From 9e8e764e8aa193779e79c73d32ccd06f79ce60d0 Mon Sep 17 00:00:00 2001 From: Varnika Budati Date: Tue, 30 Apr 2024 15:32:13 -0700 Subject: [PATCH] SNOW-1320543 Merge modin docs into snowpark-python repo (#1461) Please answer these questions before submitting your pull requests. Thanks! 1. What GitHub issue is this PR addressing? Make sure that there is an accompanying issue to your PR. Fixes SNOW-1320543 2. Fill out the following pre-review checklist: - [ ] I am adding a new automated test(s) to verify correctness of my new code - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency 3. Please describe how your code solves the related issue. Adding the modin/Snowpark pandas docs under `docs/source/modin`. The autogenerated files should reside under `docs/source/modin/pandas-api`. Added a section for Snowpark pandas in `docs/source/index.rst`. --------- Co-authored-by: Naren Krishna --- .github/CODEOWNERS | 3 +- .gitignore | 1 + docs/Makefile | 5 + docs/README.md | 3 + docs/source/conf.py | 7 +- docs/source/doc_gen.py | 39 +- docs/source/index.rst | 1 + docs/source/modin/dataframe.rst | 209 ++++++++ docs/source/modin/general_functions.rst | 46 ++ docs/source/modin/groupby.rst | 78 +++ docs/source/modin/index.rst | 21 + docs/source/modin/io.rst | 29 ++ docs/source/modin/numpy.rst | 66 +++ docs/source/modin/resampling.rst | 62 +++ docs/source/modin/series.rst | 223 +++++++++ docs/source/modin/session.rst | 95 ++++ .../modin/supported/dataframe_supported.rst | 472 ++++++++++++++++++ .../modin/supported/general_supported.rst | 209 ++++++++ .../modin/supported/groupby_supported.rst | 184 +++++++ docs/source/modin/supported/index.rst | 21 + .../modin/supported/resampling_supported.rst | 101 ++++ .../modin/supported/series_dt_supported.rst | 124 +++++ .../modin/supported/series_str_supported.rst | 139 ++++++ .../modin/supported/series_supported.rst | 460 +++++++++++++++++ .../modin/supported/window_supported.rst | 146 ++++++ docs/source/modin/window.rst | 29 ++ 26 files changed, 2751 insertions(+), 22 deletions(-) create mode 100644 docs/source/modin/dataframe.rst create mode 100644 docs/source/modin/general_functions.rst create mode 100644 docs/source/modin/groupby.rst create mode 100644 docs/source/modin/index.rst create mode 100644 docs/source/modin/io.rst create mode 100644 docs/source/modin/numpy.rst create mode 100644 docs/source/modin/resampling.rst create mode 100644 docs/source/modin/series.rst create mode 100644 docs/source/modin/session.rst create mode 100644 docs/source/modin/supported/dataframe_supported.rst create mode 100644 docs/source/modin/supported/general_supported.rst create mode 100644 docs/source/modin/supported/groupby_supported.rst create mode 100644 docs/source/modin/supported/index.rst create mode 100644 docs/source/modin/supported/resampling_supported.rst create mode 100644 docs/source/modin/supported/series_dt_supported.rst create mode 100644 docs/source/modin/supported/series_str_supported.rst create mode 100644 docs/source/modin/supported/series_supported.rst create mode 100644 docs/source/modin/supported/window_supported.rst create mode 100644 docs/source/modin/window.rst diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5d0abdd4a71..d135c6ea497 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,8 +2,9 @@ /src/snowflake/snowpark/modin/ @snowflakedb/snowpandas /tests/integ/modin/ @snowflakedb/snowpandas /tests/unit/modin/ @snowflakedb/snowpandas +/docs/modin_api_coverage/ @snowflakedb/snowpandas +/docs/source/modin/ @snowflakedb/snowpandas /.github/ @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers -/docs/ @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers /scripts/ @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers setup.py @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers tox.ini @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers diff --git a/.gitignore b/.gitignore index a903587ece7..1315f6b09a8 100644 --- a/.gitignore +++ b/.gitignore @@ -136,6 +136,7 @@ whitesource/ docs/_build/ # Ignore generated autosummary files created by Sphinx docs when you run make html in the docs directory. docs/source/snowpark/api/ +docs/source/modin/pandas_api/ # Editor specific .idea/ diff --git a/docs/Makefile b/docs/Makefile index d0c3cbf1020..f82e631a109 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -14,6 +14,11 @@ help: .PHONY: help Makefile +view: + @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + open build/html/index.html + # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile diff --git a/docs/README.md b/docs/README.md index d8738812839..b33f1948ff6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -23,6 +23,9 @@ python -m pip install sphinx Open the documentation: `open -a "Google Chrome" build/html/index.html` +As a convenience, you can also use `make view` after activating your virtual environment, which runs `make clean`, `make html`, and opens the documentation with +either your default browser, or the application you set as default for opening HTML files. + Important files and directories: `docs/source/index.rst`: Specify which rst to include in the `index.html` landing page. diff --git a/docs/source/conf.py b/docs/source/conf.py index e184657ea14..04425224ca6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -40,7 +40,7 @@ "sphinx.ext.autosummary", "sphinx.ext.napoleon", "sphinx.ext.coverage", - "sphinx.ext.linkcode" + "sphinx.ext.linkcode", ] # -- Options for autodoc -------------------------------------------------- @@ -95,6 +95,7 @@ def linkcode_resolve(domain, info): import warnings, inspect, pkg_resources import snowflake.snowpark + if domain != "py": return None @@ -126,10 +127,8 @@ def linkcode_resolve(domain, info): source, lineno = inspect.getsourcelines(obj) linespec = f"#L{lineno}-L{lineno + len(source) - 1}" except TypeError: - linespec = "" + linespec = "" return ( f"https://github.com/snowflakedb/snowpark-python/blob/" f"v{release}/{os.path.relpath(fn, start=os.pardir)}{linespec}" ) - - diff --git a/docs/source/doc_gen.py b/docs/source/doc_gen.py index c24d31d9efc..d64c2c0d175 100755 --- a/docs/source/doc_gen.py +++ b/docs/source/doc_gen.py @@ -12,10 +12,7 @@ import tempfile import itertools -Class = namedtuple( - "Class", - ["module", "methods", "attributes"] -) +Class = namedtuple("Class", ["module", "methods", "attributes"]) Module = namedtuple( "Module", ["name", "attributes", "functions", "classes", "exceptions"] ) @@ -33,10 +30,12 @@ TAB = " " NEWLINE_TAB = f"\n{TAB}" RUBRIC_HEADER = ".. rubric::" -AUTOSUMMARY_HEADER=".. autosummary::" +AUTOSUMMARY_HEADER = ".. autosummary::" -def autogen_and_parse_for_info(module_name: str, class_name: Optional[str] = None) -> Union[Module, Class]: +def autogen_and_parse_for_info( + module_name: str, class_name: Optional[str] = None +) -> Union[Module, Class]: if class_name: res = Class(module_name, [], []) name = f"{module_name}.{class_name}" @@ -45,7 +44,6 @@ def autogen_and_parse_for_info(module_name: str, class_name: Optional[str] = Non name = module_name with tempfile.TemporaryDirectory() as tmpdir: - rst_content = f""" .. currentmodule:: snowflake.snowpark @@ -62,12 +60,11 @@ def autogen_and_parse_for_info(module_name: str, class_name: Optional[str] = Non with open(fname, "w") as fp: fp.write(rst_content) - output_dir = os.path.join(tmpdir, 'output') + output_dir = os.path.join(tmpdir, "output") subprocess.run(["sphinx-autogen", fname, "-o", output_dir, "-t", "_templates"]) section = "" - with open(f"{output_dir}/{name}.rst") as fp: for line in fp: line = line.strip() @@ -102,11 +99,15 @@ def generate_autosummary_section(section: str, content: str) -> str: return "" -def generate_module_header(title:str, module:str) -> str: - automodule_text = "" if module=="snowflake.snowpark" else f""" +def generate_module_header(title: str, module: str) -> str: + automodule_text = ( + "" + if module == "snowflake.snowpark" + else f""" .. automodule:: {module} :noindex: """ + ) return f""" {'='*(len(title)+5)} {title} @@ -118,10 +119,12 @@ def generate_module_header(title:str, module:str) -> str: """ -def generate_classes(title:str, module:str, classes: Iterable[str]) -> str: +def generate_classes(title: str, module: str, classes: Iterable[str]) -> str: results = [autogen_and_parse_for_info(module, c) for c in classes] names = NEWLINE_TAB.join(classes) - methods = NEWLINE_TAB.join(itertools.chain.from_iterable(c.methods for c in results)) + methods = NEWLINE_TAB.join( + itertools.chain.from_iterable(c.methods for c in results) + ) attributes = NEWLINE_TAB.join( itertools.chain.from_iterable(c.attributes for c in results) ) @@ -135,7 +138,7 @@ def generate_classes(title:str, module:str, classes: Iterable[str]) -> str: """ -def generate_module(title:str, module: str) -> str: +def generate_module(title: str, module: str) -> str: mod = autogen_and_parse_for_info(module) attributes = NEWLINE_TAB.join(mod.attributes) functions = NEWLINE_TAB.join(mod.functions) @@ -161,9 +164,12 @@ def generate_module(title:str, module: str) -> str: "module", help="The module or the parent module of the classes to be documented" ) parser.add_argument("-c", "--classes", nargs="*", help="Classes to be documented") - parser.add_argument("-t", "--title", help="Title of the rst file generated", default="PLACEHOLDER") parser.add_argument( - "-f", "--filename", help="File to write the generated content to") + "-t", "--title", help="Title of the rst file generated", default="PLACEHOLDER" + ) + parser.add_argument( + "-f", "--filename", help="File to write the generated content to" + ) args = parser.parse_args() if args.classes: @@ -176,4 +182,3 @@ def generate_module(title:str, module: str) -> str: fp.write(content) else: print(content) - diff --git a/docs/source/index.rst b/docs/source/index.rst index 2190fea8f43..fe10b28a009 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,5 +11,6 @@ information, see the `Snowpark Developer Guide for Python ` + +.. rubric:: Constructor + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame + +.. rubric:: Attributes + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.index + DataFrame.columns + DataFrame.dtypes + DataFrame.info + DataFrame.select_dtypes + DataFrame.values + DataFrame.axes + DataFrame.ndim + DataFrame.size + DataFrame.shape + DataFrame.empty + +.. rubric:: Conversion + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.astype + DataFrame.convert_dtypes + DataFrame.copy + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.head + DataFrame.loc + DataFrame.iloc + DataFrame.insert + DataFrame.__iter__ + DataFrame.keys + DataFrame.iterrows + DataFrame.itertuples + DataFrame.tail + DataFrame.isin + DataFrame.where + DataFrame.mask + +.. rubric:: Binary operator functions + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.add + DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow + DataFrame.radd + DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.round + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq + +.. rubric:: Function application, GroupBy & window + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.apply + DataFrame.applymap + DataFrame.agg + DataFrame.aggregate + DataFrame.transform + DataFrame.groupby + DataFrame.rolling + +.. rubric:: Computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.abs + DataFrame.all + DataFrame.any + DataFrame.count + DataFrame.cummax + DataFrame.cummin + DataFrame.cumsum + DataFrame.describe + DataFrame.diff + DataFrame.max + DataFrame.mean + DataFrame.median + DataFrame.min + DataFrame.quantile + DataFrame.rank + DataFrame.round + DataFrame.skew + DataFrame.sum + DataFrame.std + DataFrame.var + DataFrame.nunique + DataFrame.value_counts + + +.. rubric:: Reindexing / selection / label manipulation + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.add_prefix + DataFrame.add_suffix + DataFrame.drop + DataFrame.drop_duplicates + DataFrame.duplicated + DataFrame.first + DataFrame.get + DataFrame.head + DataFrame.idxmax + DataFrame.idxmin + DataFrame.last + DataFrame.rename + DataFrame.rename_axis + DataFrame.reset_index + DataFrame.sample + DataFrame.set_axis + DataFrame.set_index + DataFrame.tail + DataFrame.take + +.. rubric:: Missing data handling + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.dropna + DataFrame.ffill + DataFrame.fillna + DataFrame.isna + DataFrame.isnull + DataFrame.notna + DataFrame.notnull + DataFrame.pad + DataFrame.replace + +.. rubric:: Reshaping, sorting, transposing + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.pivot_table + DataFrame.sort_values + DataFrame.sort_index + DataFrame.melt + DataFrame.squeeze + DataFrame.T + DataFrame.transpose + +.. rubric:: Combining / comparing / joining / merging + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.join + DataFrame.merge + DataFrame.update + +.. rubric:: Time Series-related + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.shift + DataFrame.first_valid_index + DataFrame.last_valid_index + DataFrame.resample + +.. rubric:: Serialization / IO / conversion + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.to_pandas + DataFrame.to_snowflake + DataFrame.to_snowpark diff --git a/docs/source/modin/general_functions.rst b/docs/source/modin/general_functions.rst new file mode 100644 index 00000000000..737033e2190 --- /dev/null +++ b/docs/source/modin/general_functions.rst @@ -0,0 +1,46 @@ +============================= +General functions +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.general +.. rubric:: :doc:`All supported general functions ` + +.. rubric:: Data manipulations + +.. autosummary:: + :toctree: pandas_api/ + + melt + pivot_table + cut + qcut + concat + get_dummies + merge + unique + +.. rubric:: Top-level missing data + +.. autosummary:: + :toctree: pandas_api/ + + isna + isnull + notna + notnull + + +.. rubric:: Top-level dealing with numeric data + +.. autosummary:: + :toctree: pandas_api/ + + to_numeric + +.. rubric:: Top-level dealing with datetimelike data + +.. autosummary:: + :toctree: pandas_api/ + + date_range + to_datetime diff --git a/docs/source/modin/groupby.rst b/docs/source/modin/groupby.rst new file mode 100644 index 00000000000..14b93571f2e --- /dev/null +++ b/docs/source/modin/groupby.rst @@ -0,0 +1,78 @@ +============================= +GroupBy +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.groupby +.. rubric:: :doc:`All supported groupby APIs ` + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + DataFrameGroupBy.__iter__ + SeriesGroupBy.__iter__ + DataFrameGroupBy.groups + SeriesGroupBy.groups + DataFrameGroupBy.indices + SeriesGroupBy.indices + +.. rubric:: Function application + +.. autosummary:: + :toctree: pandas_api/ + + DataFrameGroupBy.apply + SeriesGroupBy.apply + DataFrameGroupBy.agg + SeriesGroupBy.agg + DataFrameGroupBy.aggregate + SeriesGroupBy.aggregate + DataFrameGroupBy.transform + +.. rubric:: `DataFrameGroupBy` computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + DataFrameGroupBy.count + DataFrameGroupBy.cumcount + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumsum + DataFrameGroupBy.head + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.max + DataFrameGroupBy.mean + DataFrameGroupBy.median + DataFrameGroupBy.min + DataFrameGroupBy.nunique + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.shift + DataFrameGroupBy.std + DataFrameGroupBy.sum + DataFrameGroupBy.tail + DataFrameGroupBy.var + +.. rubric:: `SeriesGroupBy` computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + SeriesGroupBy.count + SeriesGroupBy.cumcount + SeriesGroupBy.cummax + SeriesGroupBy.cummin + SeriesGroupBy.cumsum + SeriesGroupBy.max + SeriesGroupBy.mean + SeriesGroupBy.median + SeriesGroupBy.min + SeriesGroupBy.quantile + SeriesGroupBy.rank + SeriesGroupBy.shift + SeriesGroupBy.std + SeriesGroupBy.sum + SeriesGroupBy.var diff --git a/docs/source/modin/index.rst b/docs/source/modin/index.rst new file mode 100644 index 00000000000..1e17d3e1ace --- /dev/null +++ b/docs/source/modin/index.rst @@ -0,0 +1,21 @@ +=================== +Snowpark pandas API +=================== + +This page gives an overview of all public Snowpark pandas objects, functions and methods. +For your convenience, here is all the :doc:`Supported APIs ` + + +.. toctree:: + :maxdepth: 2 + + session + io + general_functions + series + dataframe + window + groupby + resampling + numpy + All supported APIs \ No newline at end of file diff --git a/docs/source/modin/io.rst b/docs/source/modin/io.rst new file mode 100644 index 00000000000..5e3ae605b4b --- /dev/null +++ b/docs/source/modin/io.rst @@ -0,0 +1,29 @@ +============================= +Input/Output +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas + +.. rubric:: Flat file + +.. autosummary:: + :toctree: pandas_api/ + + read_csv + read_json + read_parquet + +.. rubric:: SQL + +.. autosummary:: + :toctree: pandas_api/ + + read_snowflake + to_snowpark + +.. rubric:: pandas + +.. autosummary:: + :toctree: pandas_api/ + + to_pandas \ No newline at end of file diff --git a/docs/source/modin/numpy.rst b/docs/source/modin/numpy.rst new file mode 100644 index 00000000000..2884b2126ed --- /dev/null +++ b/docs/source/modin/numpy.rst @@ -0,0 +1,66 @@ +NumPy Interoperability +====================== + +Snowpark pandas provides limited interoperability with NumPy functions through the NumPy +NEP18 and NEP13 specifications defined by `__array_ufunc__` and `__array_function__`. +A discrete number of NumPy APIs are translated to distributed snowpark pandas functions. + ++-----------------------------+----------------------------------------------------+ +| NumPy method | Notes for current implementation | ++-----------------------------+----------------------------------------------------+ +| ``np.where`` | Mapped to np.where(cond, x, y) to x.where(cond, y) | +| | cond, x, and y should have the same shapes or be | +| | scalars. The result is always a Snowpark pandas | +| | DataFrame. | +| | | +| | Since this function maps to df.where the | +| | column and index labels are considered, as opposed | +| | strict positional indexing in NumPy. | +| | | +| | cond, x, and y can either be all non-scalars or a | +| | mix of scalars and non-scalars, such that | +| | non-scalars have the same shape. (If cond, x, and | +| | y are all scalars, NumPy will not call the | +| | dispatcher at all, and the normal NumPy behavior | +| | will occur.) | ++-----------------------------+----------------------------------------------------+ +| ``np.add`` | Mapped to df.__add__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_and`` | Mapped to df.__and__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_or`` | Mapped to df.__or__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_xor`` | Mapped to df.__xor__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_not`` | Mapped to ~df.astype(bool) | ++-----------------------------+----------------------------------------------------+ + +NEP18 Implementation Details +---------------------------- +NumPy differs from pandas and Snowflake pandas in several key respects. It is +important to understand that the interoperability provided is to support +common pandas use-cases, rather than matrix or linear algebra operations. NumPy +functions are mapped, with some transformation, to their pandas analogues. + +Return Value +-------------------- +NEP18 does not specify the return value when implementing a function like np.where, +but they suggest that the return value should match the input types. We follow +that suggestion here and return a Snowpark pandas DataFrame. + +Broadcasting +------------ +NumPy will "broadcast" all arguments into the same array shape so operations +can be vectorized on the CPU. Snowpark pandas should not do this because all +execution runs within Snowflake. All input DataFrames or Series should be of +the same shape and will not be broadcast. Scalar values can also be used as +an input. + +Positional Operations +--------------------- +NumPy always performs positional operations on input datatypes, assuming they +are similarly shaped and meaningful arrays. Pandas can have DataFrames which +represent the same data but with different column ordering. Even when a numpy +method is called on a Snow pandas DataFrame we continue to consider the labels +while performing the operation. + diff --git a/docs/source/modin/resampling.rst b/docs/source/modin/resampling.rst new file mode 100644 index 00000000000..c3069b06656 --- /dev/null +++ b/docs/source/modin/resampling.rst @@ -0,0 +1,62 @@ +============================= +Resampling +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.resample +.. rubric:: :doc:`All supported resampling APIs ` + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.groups + Resampler.indices + Resampler.get_group + + +.. rubric:: Function application + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.apply + Resampler.aggregate + Resampler.transform + + +.. rubric:: Upsampling + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.ffill + Resampler.bfill + Resampler.nearest + Resampler.fillna + Resampler.asfreq + +.. rubric:: Computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.count + Resampler.nunique + Resampler.first + Resampler.last + Resampler.max + Resampler.mean + Resampler.median + Resampler.min + Resampler.interpolate + Resampler.ohlc + Resampler.pad + Resampler.pipe + Resampler.prod + Resampler.quantile + Resampler.sem + Resampler.size + Resampler.std + Resampler.sum + Resampler.var diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst new file mode 100644 index 00000000000..c2ef907e829 --- /dev/null +++ b/docs/source/modin/series.rst @@ -0,0 +1,223 @@ +============================= +Series +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas +.. rubric:: :doc:`All supported Series APIs ` + +.. rubric:: Constructor + +.. autosummary:: + :toctree: pandas_api/ + + Series + +.. rubric:: Attributes + +.. autosummary:: + :toctree: pandas_api/ + + Series.index + Series.axes + Series.array + Series.dtype + Series.dtypes + Series.duplicated + Series.empty + Series.hasnans + Series.name + Series.ndim + Series.shape + Series.size + Series.T + Series.values + + + +.. rubric:: Conversion + +.. autosummary:: + :toctree: pandas_api/ + + Series.astype + Series.convert_dtypes + Series.copy + Series.to_dict + Series.to_list + Series.to_numpy + Series.to_pandas + Series.to_snowflake + Series.to_snowpark + Series.__array__ + + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + Series.iloc + Series.loc + Series.__iter__ + Series.keys + + + +.. rubric:: Binary operator functions + +.. autosummary:: + :toctree: pandas_api/ + + Series.add + Series.sub + Series.mul + Series.div + Series.truediv + Series.floordiv + Series.mod + Series.pow + Series.radd + Series.rsub + Series.rmul + Series.rdiv + Series.rtruediv + Series.rfloordiv + Series.rmod + Series.rpow + Series.round + Series.lt + Series.gt + Series.le + Series.ge + Series.ne + Series.eq + +.. rubric:: Function application, GroupBy & window + +.. autosummary:: + :toctree: pandas_api/ + + Series.apply + Series.agg + Series.aggregate + Series.transform + Series.map + Series.groupby + Series.rolling + + + + + +.. rubric:: Computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + Series.abs + Series.all + Series.any + Series.count + Series.cummax + Series.cummin + Series.cumsum + Series.describe + Series.diff + Series.is_unique + Series.max + Series.mean + Series.median + Series.min + Series.quantile + Series.rank + Series.skew + Series.std + Series.sum + Series.var + Series.kurtosis + Series.unique + Series.nunique + Series.is_unique + Series.value_counts + + +.. rubric:: Reindexing / selection / label manipulation + +.. autosummary:: + :toctree: pandas_api/ + + Series.drop + Series.drop_duplicates + Series.duplicated + Series.get + Series.head + Series.idxmax + Series.idxmin + Series.isin + Series.last + Series.rename + Series.rename_axis + Series.reset_index + Series.sample + Series.set_axis + Series.take + Series.tail + Series.where + Series.mask + Series.add_prefix + Series.add_suffix + + + +.. rubric:: Missing data handling + +.. autosummary:: + :toctree: pandas_api/ + + Series.dropna + Series.ffill + Series.fillna + Series.isna + Series.isnull + Series.notna + Series.notnull + Series.pad + Series.replace + +.. rubric:: Reshaping, sorting + +.. autosummary:: + :toctree: pandas_api/ + + Series.sort_values + Series.sort_index + Series.squeeze + +.. rubric:: Combining / comparing / joining / merging + +.. autosummary:: + :toctree: pandas_api/ + + Series.update + +.. rubric:: Time Series-related + +.. autosummary:: + :toctree: pandas_api/ + + Series.shift + Series.first_valid_index + Series.last_valid_index + Series.resample + + +.. rubric:: Accessors + +.. autosummary:: + :toctree: pandas_api/ + + Series.str + Series.dt + +.. rubric:: :doc:`All supported Series str APIs ` +.. rubric:: :doc:`All supported Series dt APIs ` diff --git a/docs/source/modin/session.rst b/docs/source/modin/session.rst new file mode 100644 index 00000000000..414a05b8407 --- /dev/null +++ b/docs/source/modin/session.rst @@ -0,0 +1,95 @@ +=========== +Session +=========== + +``modin.pandas.session`` is the Snowpark session that new +Snowpark pandas DataFrames and Series will use to execute queries. + +* ``session`` starts as ``None``. + +* When there is no active Snowpark session and ``session`` is ``None``, accessing + ``session`` or creating a Snowpark pandas Dataframe or Series will raise an + exception. You will need to create a Snowpark session to acccess ``session`` or + create a DataFrame or Series. + +* When there a single active Snowpark session and ``session`` is ``None``, + Snowpark pandas automatically assigns that session to ``session``. + +* When there are multiple active Snowpark sessions and ``session`` is ``None``, + accessing ``session`` or creating a Snowpark pandas Dataframe or Series will + raise an exception. To make Snowpark pandas populate + ``modin.pandas.session``, you can + `close `_ + one of the sessions, or assign a particular session to ``session``. For + example, if you execute ``modin.pandas.session = session1``, + Snowpark pandas will use ``session1``. + +Examples +======== + +Creating and using the default Snowpark session +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you have set a `default Snowflake connection `_, +you can use use that connection to create a Snowpark session for Snowpark pandas: + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + from snowflake.snowpark import Session + + # Session.builder.create() will create a default Snowflake connection. + Session.builder.create() + df = pd.DataFrame([1, 2, 3]) + +Note that Snowpark pandas uses the unique active Snowpark session, even though +the code does not explicitly assign that session to Snowpark pandas. + +Assigning one of multiple sessions to Snowpark pandas +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can create multiple Snowpark sessions, then assign one of them to Snowpark +pandas. + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + from snowflake.snowpark import Session + + pandas_session = Session.builder.configs({"user": "", "password": "", "account": "").create() + other_session = Session.builder.configs({"user": "", "password": "", "account": "").create() + pd.session = pandas_session + df = pd.DataFrame([1, 2, 3]) + +Trying to use Snowpark pandas when there is no active Snowpark session +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The code below will cause a :doc:`SnowparkSessionException <../snowpark/api/snowflake.snowpark.exceptions.SnowparkSessionException>` +with a message like ``Snowpark pandas requires an active snowpark session, but there is none.`` +Once you create a session, you can use Snowpark pandas. + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + + df = pd.DataFrame([1, 2, 3]) + +Trying to use Snowpark pandas when there are multiple active Snowpark sessions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The code below will cause a :doc:`SnowparkSessionException <../snowpark/api/snowflake.snowpark.exceptions.SnowparkSessionException>` +with a message like ``There are multiple active snowpark sessions, but you need +to choose one for Snowpark pandas.`` + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + from snowflake.snowpark import Session + + pandas_session = Session.builder.configs({"user": "", "password": "", "account": ""}).create() + other_session = Session.builder.configs({"user": "", "password": "", "account": ""}).create() + df = pd.DataFrame([1, 2, 3]) diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst new file mode 100644 index 00000000000..2bec8b340e1 --- /dev/null +++ b/docs/source/modin/supported/dataframe_supported.rst @@ -0,0 +1,472 @@ +``pd.DataFrame`` supported APIs +=============================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + +Attributes + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| DataFrame attribute | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``T`` | P | ``D`` if any column name is not str or tuple of str| ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``at`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``attrs`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``axes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``columns`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dtypes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``empty`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``flags`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iat`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iloc`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``index`` | D | This operation is not recommended since it pulls | +| | | the index into local memory. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``loc`` | P | ``N`` for set with MultiIndex | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ndim`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``shape`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``style`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``values`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + + +Methods + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| DataFrame method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``abs`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_prefix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_suffix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``agg`` | P | ``margins``, ``observed``, | if ``axis == 0``: ``Y`` when function is one of | +| | | ``sort`` | `count`, `mean`, `min`, `max`, `sum`, `median`; | +| | | | `std` and `var` supported with `ddof=0` or | +| | | | `ddof=1`; `quantile` is supported when `q` is | +| | | | the default value or a scalar. | +| | | | if ``axis == 1``: ``Y`` when function is `count`, | +| | | | `min`, `max`, or `sum` and the index is not a | +| | | | MultiIndex. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``aggregate`` | P | ``margins``, ``observed``, | See ``agg`` | +| | | ``sort`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``align`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``all`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``any`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``apply`` | P | | ``D`` if ``axis == 0`` or ``func`` is not callable | +| | | | or ``result_type`` is given or ``args`` and | +| | | | ``kwargs`` contain DataFrame or Series | +| | | | ``N`` if ``func`` maps to different column labels. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``applymap`` | P | | ``D`` if ``na_action == "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asfreq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asof`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``assign`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``astype`` | P | | ``D``: from string to datetime or ``errors == | +| | | | "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``at_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``backfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``between_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bool`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``boxplot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``clip`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine_first`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``compare`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``convert_dtypes`` | N | | Not supported since Snowpark pandas is already | +| | | | using nullable datatypes internally. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``copy`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``corr`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``corrwith`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``count`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cov`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummax`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumprod`` | D | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumsum`` | P | | ``Y`` if values are numeric | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``describe`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``diff`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``div`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``divide`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop_duplicates`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop_level`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dropna`` | P | | ``D`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``duplicated`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``eq`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``equals`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``eval`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ewm`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``expanding`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``explode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ffill`` | P | | ``D`` if param ``limit`` is set | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``fillna`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``filter`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``floordiv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``from_dict`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``from_records`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ge`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``get`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``groupby`` | P | ``observed`` is ignored since | ``Y``, support ``axis == 0`` and ``by`` is column | +| | | Categoricals are not implemented | label or Series from the current DataFrame; | +| | | yet | otherwise ``D``; | +| | | | Note that supported functions are agg, count, | +| | | | cumcount, cummax, cummin, cumsum, max, mean, | +| | | | median, min, quantile, shift, std, sum, and var. | +| | | | Otherwise ``N`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``gt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``head`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``hist`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmax`` | P | | ``N`` for MultiIndex dataframes | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmin`` | P | | ``N`` for MultiIndex dataframes | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``infer_objects`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``info`` | P | | Index is different, zero bytes reported for memory | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``insert`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``interpolate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isetitem`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``items`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``iterrows`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``itertuples`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``join`` | P | | ``D`` if given the ``validate`` param. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``keys`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurtosis`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``le`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``lt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``map`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mask`` | P | | ``D`` if given ``axis`` when ``other`` is a | +| | | | ``DataFrame`` or ``level`` parameters; | +| | | | ``N`` if ``cond`` or ``other`` is Callable | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``max`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mean`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``median`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``melt`` | P | ``col_level`` | ``N`` when columns are MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``memory_usage`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge`` | P | | ``D`` if param ``validate`` is given | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``min`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``multiply`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ne`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nlargest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nsmallest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nunique`` | P | | ``D`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pad`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pct_change`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot_table`` | P | ``observed``, ``margins``, | ``N`` if ``index``, ``columns``, or ``values`` is | +| | | ``sort`` | not str; or MultiIndex; or any ``argfunc`` is not | +| | | | "count", "mean", "min", "max", or "sum" | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pop`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``prod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``product`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``quantile`` | P | | ``Y`` if ``axis == 0``, and ``interpolation`` is | +| | | | ``"linear"`` or ``"nearest"``, and ``method`` is | +| | | | ``"single"``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``query`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``radd`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rank`` | P | | ``N`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rdiv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex_like`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename`` | P | | ``D`` if ``mapper`` is callable or the series has | +| | | | multiindex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename_axis`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reorder_levels`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``copy`` is ignored, ``method``, | | +| | | ``limit`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``resample`` | P | | Only DatetimeIndex is supported and its ``freq`` | +| | | | will be lost. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reset_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rfloordiv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rolling`` | P | | Supports integer ``window``, ``min_periods >= 1``, | +| | | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``round`` | P | | ``N`` if ``decimals`` is Series | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rpow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rsub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rtruediv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sample`` | P | | ``N`` if ``weights``, ``random_state``, or | +| | | | ``replace = True`` is specified when ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``select_dtypes`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sem`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_axis`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_flags`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``shift`` | P | ``freq`` | No support for ``freq != None``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``skew`` | P | | ``N`` if ``axis == 1`` or ``skipna == False`` | +| | | | or ``numeric_only=False`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_index`` | P | | ``D`` if given the ``key`` param. ``N`` if | +| | | | ``axis == 1``, ``inplace == True``, or MultiIndex. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_values`` | P | | ``D`` if given the ``key`` param or ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``squeeze`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``stack`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``std`` | P | | ``D`` if ``ddof`` is not 0 or 1 | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``subtract`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sum`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swapaxes`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swaplevel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tail`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``take`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_clipboard`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_csv`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_dict`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_excel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_feather`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_gbq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_hdf`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_html`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_json`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_latex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_markdown`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_numpy`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_orc`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_parquet`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_period`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_pickle`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_records`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_sql`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_stata`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_string`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_timestamp`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_xarray`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_xml`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transform`` | P | | Only callable and string parameters are supported.| +| | | | list and dict parameters are not supported. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transpose`` | P | | See ``T`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truediv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truncate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_convert`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_localize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unstack`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``update`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``value_counts`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``var`` | P | | See ``std`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``where`` | P | | See ``mask`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``xs`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/general_supported.rst b/docs/source/modin/supported/general_supported.rst new file mode 100644 index 00000000000..d689a376872 --- /dev/null +++ b/docs/source/modin/supported/general_supported.rst @@ -0,0 +1,209 @@ +General utilities supported APIs +======================================= + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + +Data manipulations + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``concat`` | P | ``levels`` is not supported, | | +| | | ``copy`` is ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``crosstab`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cut`` | P | ``retbins``, ``labels`` | ``N`` if ``retbins=True``or ``labels!=False`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``factorize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``from_dummies`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``get_dummies`` | P | ``sparse`` is ignored | ``Y`` if params ``dummy_na``, ``drop_first`` | +| | | | and ``dtype`` are default, otherwise ``N`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``lreshape`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``melt`` | P | ``col_level``, ``ignore_index`` | ``N`` if df.columns is a MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge`` | P | ``validate`` | ``D`` if param ``validate`` is given | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge_asof`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge_ordered`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot_table`` | P | ``observed``, ``margins``, | ``N`` if ``index``, ``columns``, or ``values`` is | +| | | ``sort`` | not str; or MultiIndex; or any ``argfunc`` is not | +| | | | "count", "mean", "min", "max", or "sum" | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``qcut`` | P | | ``N`` if ``labels`` is an array or | +| | | | ``retbins=True`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_csv`` | P | ``engine``, ``converters``, | Reads both local and staged file(s) into a Snowpark| +| | | ``true_values``, | pandas DataFrame. Note, the order of rows in the | +| | | ``false_values``, ``skipfooter``,| may differ from the order of rows in the original | +| | | ``skipinitialspace``, | file(s). | +| | | ``nrows``, ``keep_default_na``, | | +| | | ``na_filter``, ``date_parser``, | Supported parameters are ``sep``, ``header``, | +| | | ``parse_dates``, ``verbose``, | ``names``, ``index_col``, ``usecols``, ``dtype``, | +| | | ``infer_datetime_format``, | ``compression``, ``na_values``, ``escapechar``, | +| | | ``date_format``, | ``skiprows``, ``encoding``, ``quotechar``, and | +| | | ``keep_date_col``, ``dayfirst``, | ``skip_blanks_lines``. | +| | | ``cache_dates``, ``iterator``, | | +| | | ``chunksize``, ``thousands``, | | +| | | ``decimal``, ``lineterminator``, | | +| | | ``quoting``, ``doublequote``, | | +| | | ``comment``, ``encoding_errors``,| | +| | | ``dialect``, ``on_bad_lines``, | | +| | | ``low_memory``, | | +| | | ``delim_whitespace``, | | +| | | ``memory_map``, | | +| | | ``float_precision``, | | +| | | ``storage_options``, | | +| | | ``dtype_backend`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_json`` | P | ``orient``, ``typ``, ``dtype``, | ``P``: | +| | | ``convert_axes``, ``lines``, | - if ndjson files are passed | +| | | ``convert_dates``, ``date_unit``,| - Supported parameters are ``compression`` and | +| | | ``keep_default_dates``, | ``encoding`` | +| | | ``encoding_errors``, ``nrows``, | | +| | | and ``chunksize`` will raise | | +| | | an error. | | +| | | ``precise_float``, ``engine``, | | +| | | ``dtype_backend``, and | | +| | | ``storage_options`` are ignored. | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_parquet`` | P | ``use_nullable_dtypes``, | Supported parameter(s) are: ``columns`` | +| | | ``filesystem``, and ``filters`` | | +| | | will raise an error if used. | | +| | | ``engine``, ``storage_options``, | | +| | | ``dtype_backend``, and | | +| | | ``**kwargs`` are ignored. | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_snowflake`` | Y | | Reading from tables as well as SELECT SQL Queries | +| | | | supported, but ordering is not guaranteed for | +| | | | SQL Queries that contain ORDER BY clauses. More | +| | | | complex queries, including CTEs and CTEs with | +| | | | anonymous stored procedures are also supported. | +| | | | Obtaining results from stored procedures is also | +| | | | supported via CALL queries. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_table`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_pandas`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_snowflake`` | Y | ``**kwargs`` are currently | | +| | | ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_snowpark`` | Y | | Convert the Snowpark pandas DataFrame or Series to | +| | | | a Snowpark DataFrame. Once converted to a Snowpark | +| | | | DataFrame, no ordering information will be | +| | | | preserved. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unique`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``wide_to_long`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Top-level dealing with missing data + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isna`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isnull`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``notna`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``notnull`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Top-level dealing with numeric data + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_numeric`` | P | ``downcast`` is ignored | ``D`` if ``error == "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Top-level dealing with datetime-like data + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bdate_range`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``date_range`` | P | ``tz`` | ``N`` for business or custom frequencies | +| | | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``infer_freq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``period_range`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``timedelta_range`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_datetime`` | P | ``cache`` is ignored | ``D``: | +| | | | - if ``format`` is None or not supported in | +| | | | Snowflake | +| | | | - or if params ``exact``, ``infer_datetime_format``| +| | | | is given | +| | | | - or ``origin == "julian"`` | +| | | | - or ``error == "ignore"`` | +| | | | - or ``arg`` is DataFrame and data type is not int | +| | | | - or ``arg`` is Series and data type is string | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_timedelta`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Top-level dealing with Interval data + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``interval_range`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + +Top-level evaluation + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``eval`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + +Datetime formats + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``tseries.api.guess_datetime_format`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + + +Hashing + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``util.hash_array`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``util.hash_pandas_object`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + +Importing from other DataFrame libraries + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.interchange.from_dataframe`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ \ No newline at end of file diff --git a/docs/source/modin/supported/groupby_supported.rst b/docs/source/modin/supported/groupby_supported.rst new file mode 100644 index 00000000000..02b43df341f --- /dev/null +++ b/docs/source/modin/supported/groupby_supported.rst @@ -0,0 +1,184 @@ +GroupBy supported APIs +====================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ``engine`` and ``engine_kwargs`` are always ignored in Snowpark pandas. The execution engine will always be Snowflake. + +Indexing, iteration + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get_group`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``groups`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``indices`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``__iter__`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Function application + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``agg`` | P | ``axis`` other than 0 is not | ``Y``, support functions are count, mean, min, max,| +| | | implemented. | sum, median, std, and var | +| | | | (including both Python and NumPy functions) | +| | | | otherwise ``D``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``aggregate`` | P | ``axis`` other than 0 is not | See ``agg`` | +| | | implemented. | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``apply`` | P | ``axis`` other than 0 is not | ``Y`` if the following are true, otherwise ``N``: | +| | | implemented. | - ``func`` is a callable that always returns | +| | | | either a pandas DataFrame, a pandas Series, or | +| | | ``SeriesGroupBy.apply`` is not | objects that are neither DataFrame nor Series. | +| | | implemented. | - ``apply`` called on DataFrameGroupBy, not | +| | | | SeriesGroupBy | +| | | | - grouping on axis=0 | +| | | | - Not applying transform to a dataframe with a | +| | | | non-unique index | +| | | | - Not applying ``func`` that returns two | +| | | | dataframes that have different labels for the | +| | | | column at a given position | +| | | | - Not applying ``func`` that returns two | +| | | | dataframes that have different names for a | +| | | | given index label | +| | | | - Not applying ``func`` that returns two | +| | | | Series that have different labels for the | +| | | | row at a given position | +| | | | - Not applying ``func`` that returns two | +| | | | Series that have different names | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``filter`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transform`` | P | ``SeriesGroupBy.transform`` is | ``Y`` when ``func`` is a string or callable. | +| | | not implemented. | A UDTF is created to run ``transform`` on every | +| | | | group via ``apply``. ``transform`` has the same | +| | | | limitations as ``apply`` except for string ``func``| +| | | | also being valid for ``transform``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Computations/descriptive stats + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``all`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``any`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corrwith`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | Y | SeriesGroupBy does not implement ``numeric_only`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cumcount`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cummax`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cummin`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cumprod`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cumsum`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``describe`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``diff`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ffill`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``fillna`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``first`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``head`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``idxmax`` | P | When GroupBy axis is 1: | +| | | - if idxmax axis is 1, then ``N``; | +| | | - if idxmax axis is 0, then ``D``. | +| | | GroupBy axis = 0 is fully supported. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``idxmin`` | P | See ``idxmax`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``last`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ngroup`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nth`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nunique`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ohlc`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pct_change`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``prod`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rank`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``resample`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rolling`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sample`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``shift`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``skew`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | P | ``D`` if ``ddof`` is not 0 or 1 | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tail`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``take`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``value_counts`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | P | See ``std`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Plotting and visualization + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``boxplot`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``hist`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``plot`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ \ No newline at end of file diff --git a/docs/source/modin/supported/index.rst b/docs/source/modin/supported/index.rst new file mode 100644 index 00000000000..2b1c6c0b06e --- /dev/null +++ b/docs/source/modin/supported/index.rst @@ -0,0 +1,21 @@ +====================================== +Snowpark pandas Supported API +====================================== + +For your convenience, we have compiled a list of currently implemented APIs and methods available in Snowpark pandas. +This documentation is updated as new methods and APIs are merged into the release branch, and not necessarily correct as +of the most recent release. + +To view the docs for the most recent release, check that you’re viewing the stable version of the docs. + +.. toctree:: + :maxdepth: 2 + + general_supported + series_supported + dataframe_supported + window_supported + groupby_supported + resampling_supported + series_dt_supported + series_str_supported \ No newline at end of file diff --git a/docs/source/modin/supported/resampling_supported.rst b/docs/source/modin/supported/resampling_supported.rst new file mode 100644 index 00000000000..05bedd52257 --- /dev/null +++ b/docs/source/modin/supported/resampling_supported.rst @@ -0,0 +1,101 @@ +Resampler supported APIs +======================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ``engine`` and ``engine_kwargs`` are always ignored in Snowpark pandas. The execution engine will always be Snowflake. + +Indexing, iteration + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get_group`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``groups`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``indices`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``__iter__`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Function application + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``aggregate`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``apply`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``transform`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Upsampling + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asfreq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``backfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ffill`` | P | ``limit`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``fillna`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``interpolate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nearest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Computations / descriptive stats + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``first`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``last`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nunique`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ohlc`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pad`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``prod`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ + diff --git a/docs/source/modin/supported/series_dt_supported.rst b/docs/source/modin/supported/series_dt_supported.rst new file mode 100644 index 00000000000..e9316bc93b9 --- /dev/null +++ b/docs/source/modin/supported/series_dt_supported.rst @@ -0,0 +1,124 @@ +``DatetimeProperties`` supported APIs +====================================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| DatetimeProperties | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | +| (Series.dt) | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``date`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``timetz`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``year`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``month`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``hour`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``minute`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``second`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``microsecond`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nanosecond`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day_of_week`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``week`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``weekofyear`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dayofweek`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``weekday`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dayofyear`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day_of_year`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quarter`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_month_start`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_month_end`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_quarter_start`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_quarter_end`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_year_start`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_year_end`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_leap_year`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``daysinmonth`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``days_in_month`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tz`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``freq`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_period`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_pydatetime`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tz_localize`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tz_convert`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``normalize`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``strftime`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``round`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``floor`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ceil`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``month_name`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day_name`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``total_seconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_pytimedelta`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``seconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``days`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``microseconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nanoseconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``components`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``qyear`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``start_time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``end_time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``end_time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_timestamp`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``unit`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_str_supported.rst b/docs/source/modin/supported/series_str_supported.rst new file mode 100644 index 00000000000..4359151c74f --- /dev/null +++ b/docs/source/modin/supported/series_str_supported.rst @@ -0,0 +1,139 @@ +``StringMethods`` supported APIs +================================= + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| StringMethods | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | +| (Series.str) | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``capitalize`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``casefold`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cat`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``center`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``contains`` | P | ``N`` if the `na` parameter is set to a non-bool | +| | | value. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``decode`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``encode`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``endswith`` | P | ``N`` if the `na` parameter is set to a non-bool | +| | | value. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``extract`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``extractall`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``find`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``findall`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``fullmatch`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get_dummies`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``index`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isalpha`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isalnum`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isdecimal`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isdigit`` | P | Does not check for special digits, like | +| | | superscripted and subscripted digits in unicode | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``islower`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isnumeric`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isspace`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``istitle`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isupper`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``join`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``len`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ljust`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``lower`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``lstrip`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``match`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``normalize`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pad`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``partition`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``removeprefix`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``removesuffix`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``repeat`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``N`` if `pat` is non-string, `repl` is a | +| | | non-string, or `n` is non-numeric or zero. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rfind`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rindex`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rjust`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rpartition`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rsplit`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rstrip`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``slice`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``slice_replace`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``split`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``N`` if `pat` is non-string, `repl` is | +| | | non-string, or `n` is non-numeric or zero. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``split`` | P | ``N`` if `pat` is non-string, `n` is non-numeric, | +| | | `expand` is set, or `regex` is set. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``startswith`` | P | ``N`` if the `na` parameter is set to a non-bool | +| | | value. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``strip`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``swapcase`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``title`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``translate`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``upper`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``wrap`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``zfill`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ \ No newline at end of file diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst new file mode 100644 index 00000000000..a00880094ef --- /dev/null +++ b/docs/source/modin/supported/series_supported.rst @@ -0,0 +1,460 @@ +``pd.Series`` supported APIs +============================ + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + +Attributes + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Series attribute | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``T`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``array`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``at`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``attrs`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``axes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dtype`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dtypes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``empty`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``flags`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``hasnans`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iat`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iloc`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``index`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_monotonic_decreasing`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_monotonic_increasing`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_unique`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``loc`` | P | ``N`` for set with MultiIndex | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``name`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nbytes`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ndim`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``shape`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``values`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + + +Methods + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Series method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``abs`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_prefix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_suffix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``agg`` | P | | ``Y`` when function is one of `count`, `mean`, | +| | | | `min`, `max`, `sum`, `median`; `std` and `var` | +| | | | supported with `ddof=0` or `ddof=1`; `quantile` is | +| | | | supported when `q` is the default value or a | +| | | | scalar | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``aggregate`` | P | | See ``agg`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``align`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``all`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``any`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``apply`` | P | ``convert_dtype`` is ignored | ``D`` if ``func`` is not callable. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``argmax`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``argmin`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``argsort`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asfreq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asof`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``astype`` | P | | ``D``: from string to datetime or ``errors == | +| | | | "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``at_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``autocorr`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``axes`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``backfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``between`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``between_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bool`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``case_when`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``clip`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine_first`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``compare`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``convert_dtypes`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``copy`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``corr`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``count`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cov`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummax`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumprod`` | D | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumsum`` | P | | ``Y`` if values are numeric, otherwise fails. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``describe`` | Y | | | +| | | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``diff`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``div`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``divide`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``divmod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop_duplicates`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``droplevel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dropna`` | P | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``duplicated`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``eq`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``equals`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ewm`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``expanding`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``explode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``factorize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ffill`` | P | | ``D`` if parameter ``limit`` is set | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``fillna`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``filter`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``floordiv`` | P | ``level`` | Raises division by zero exception when the right | +| | | | hand side contains at least one zero. pandas allows| +| | | | division by zero for non-object type Series and | +| | | | returns +/-inf. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ge`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``get`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``groupby`` | P | ``observed`` is ignored since | ``Y`` when ``axis == 0`` and ``by`` is column | +| | | Categoricals are not implemented | label or Series from the current DataFrame; | +| | | yet | otherwise ``D``; | +| | | | Note that supported functions are agg, count, | +| | | | cumcount, cummax, cummin, cumsum, max, mean, | +| | | | median, min, quantile, shift, std, sum, and var. | +| | | | Otherwise ``N`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``gt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``head`` | Y | | | ++-----------------------------+---------------------------------+---------------------------------------------------------------------------------------+ +| ``idxmax`` | Y | | | ++-----------------------------+---------------------------------+---------------------------------------------------------------------------------------+ +| ``idxmin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``hist`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmax`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmin`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``infer_objects`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``info`` | D | | Different Index types are used in pandas but not | +| | | | in Snowpark pandas | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``interpolate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isin`` | Y | | Snowpark pandas deviates with respect to handling | +| | | | NA values | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``item`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``items`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``keys`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurtosis`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``le`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``lt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``map`` | P | | See ``apply`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mask`` | P | | ``D`` if given ``axis`` or ``level`` parameters; | +| | | | ``N`` if ``cond`` or ``other`` is Callable | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``max`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mean`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``median`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``memory_usage`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``min`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``multiply`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ne`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nlargest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nsmallest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nunique`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pad`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pct_change`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pop`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``prod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``product`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``quantile`` | P | | ``Y`` if values are numeric, and ``interpolation`` | +| | | | is ``"linear"`` or ``"nearest"``; | +| | | | ``N`` if ``q`` is a DataFrame or Series | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``radd`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rank`` | P | | ``N`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ravel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rdiv`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rdivmod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex_like`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename`` | P | ``copy`` is ignored | ``D`` if ``mapper`` is callable or the series has | +| | | | MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename_axis`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reorder_levels`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``repeat`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``method``, ``limit`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``resample`` | P | | Only DatetimeIndex is supported and its ``freq`` | +| | | | will be lost | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reset_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rfloordiv`` | P | ``level`` | See ``floordiv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rolling`` | P | | Supports integer ``window``, ``min_periods >= 1``, | +| | | | and ``center`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``round`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rpow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rsub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rtruediv`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sample`` | P | ``weights``, ``random_state`` | ``N`` if ``replace = True`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``searchsorted`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sem`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_axis`` | Y | ``copy`` is ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``shift`` | P | ``freq`` | No support for ``freq != None`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``skew`` | P | | ``N`` if ``axis == 1`` or ``skipna == False`` | +| | | | or ``numeric_only=False`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_index`` | P | | ``D`` if given the ``key`` param | +| | | | ``N`` if ``inplace == True`` or MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_values`` | P | | ``D`` if given the ``key`` parameter | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``squeeze`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``std`` | P | | ``D`` if ``ddof`` is not 0 or 1 | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``subtract`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sum`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swapaxes`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swaplevel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tail`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``take`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_clipboard`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_csv`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_dict`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_frame`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_hdf`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_json`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_latex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_list`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_markdown`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_numpy`` | Y | ``copy`` is ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_period`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_pickle`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_sql`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_string`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_timestamp`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_xarray`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tolist`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transform`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transpose`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truediv`` | P | ``level`` | Raises division by zero exception when right hand | +| | | | hand side contains at least one zero. pandas allows| +| | | | division by zero for non-object type Series and | +| | | | returns +/-inf. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truncate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_convert`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_localize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unique`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unstack`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``update`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``value_counts`` | P | ``bins`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``var`` | P | | See ``std`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``view`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``where`` | P | | See ``mask`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``xs`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/window_supported.rst b/docs/source/modin/supported/window_supported.rst new file mode 100644 index 00000000000..f86e9d242a2 --- /dev/null +++ b/docs/source/modin/supported/window_supported.rst @@ -0,0 +1,146 @@ +Window supported APIs +======================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ``engine`` and ``engine_kwargs`` are always ignored in Snowpark pandas. The execution engine will always be Snowflake. + +Rolling window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Rolling window functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``aggregate`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``apply`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rank`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``skew`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Weighted window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Weighted window functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Expanding window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Expanding window functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``aggregate`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``apply`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | N | | +| | | | +| | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rank`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``skew`` | N | | +| | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Exponentially-weighted window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Exponential moving window | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | +| functions | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Window indexer + ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| Window Functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.indexers.BaseIndexer`` | N | | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.indexers.FixedForwardWindowIndexer`` | N | | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.indexers.VariableOffsetWindowIndexer`` | N | | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/window.rst b/docs/source/modin/window.rst new file mode 100644 index 00000000000..d173cf292f6 --- /dev/null +++ b/docs/source/modin/window.rst @@ -0,0 +1,29 @@ +============================= +Window +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.window +.. rubric:: :doc:`All supported window APIs ` + +.. rubric:: Rolling window functions + +.. autosummary:: + :toctree: pandas_api/ + + Rolling.aggregate + Rolling.apply + Rolling.corr + Rolling.count + Rolling.cov + Rolling.kurt + Rolling.max + Rolling.mean + Rolling.median + Rolling.min + Rolling.quantile + Rolling.rank + Rolling.sem + Rolling.skew + Rolling.std + Rolling.sum + Rolling.var