diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5d0abdd4a71..d135c6ea497 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,8 +2,9 @@ /src/snowflake/snowpark/modin/ @snowflakedb/snowpandas /tests/integ/modin/ @snowflakedb/snowpandas /tests/unit/modin/ @snowflakedb/snowpandas +/docs/modin_api_coverage/ @snowflakedb/snowpandas +/docs/source/modin/ @snowflakedb/snowpandas /.github/ @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers -/docs/ @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers /scripts/ @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers setup.py @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers tox.ini @snowflakedb/snowpandas @snowflakedb/snowpark-python-api-reviewers diff --git a/.gitignore b/.gitignore index a903587ece7..1315f6b09a8 100644 --- a/.gitignore +++ b/.gitignore @@ -136,6 +136,7 @@ whitesource/ docs/_build/ # Ignore generated autosummary files created by Sphinx docs when you run make html in the docs directory. docs/source/snowpark/api/ +docs/source/modin/pandas_api/ # Editor specific .idea/ diff --git a/docs/Makefile b/docs/Makefile index d0c3cbf1020..f82e631a109 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -14,6 +14,11 @@ help: .PHONY: help Makefile +view: + @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + open build/html/index.html + # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile diff --git a/docs/README.md b/docs/README.md index d8738812839..b33f1948ff6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -23,6 +23,9 @@ python -m pip install sphinx Open the documentation: `open -a "Google Chrome" build/html/index.html` +As a convenience, you can also use `make view` after activating your virtual environment, which runs `make clean`, `make html`, and opens the documentation with +either your default browser, or the application you set as default for opening HTML files. + Important files and directories: `docs/source/index.rst`: Specify which rst to include in the `index.html` landing page. diff --git a/docs/source/conf.py b/docs/source/conf.py index e184657ea14..04425224ca6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -40,7 +40,7 @@ "sphinx.ext.autosummary", "sphinx.ext.napoleon", "sphinx.ext.coverage", - "sphinx.ext.linkcode" + "sphinx.ext.linkcode", ] # -- Options for autodoc -------------------------------------------------- @@ -95,6 +95,7 @@ def linkcode_resolve(domain, info): import warnings, inspect, pkg_resources import snowflake.snowpark + if domain != "py": return None @@ -126,10 +127,8 @@ def linkcode_resolve(domain, info): source, lineno = inspect.getsourcelines(obj) linespec = f"#L{lineno}-L{lineno + len(source) - 1}" except TypeError: - linespec = "" + linespec = "" return ( f"https://github.com/snowflakedb/snowpark-python/blob/" f"v{release}/{os.path.relpath(fn, start=os.pardir)}{linespec}" ) - - diff --git a/docs/source/doc_gen.py b/docs/source/doc_gen.py index c24d31d9efc..d64c2c0d175 100755 --- a/docs/source/doc_gen.py +++ b/docs/source/doc_gen.py @@ -12,10 +12,7 @@ import tempfile import itertools -Class = namedtuple( - "Class", - ["module", "methods", "attributes"] -) +Class = namedtuple("Class", ["module", "methods", "attributes"]) Module = namedtuple( "Module", ["name", "attributes", "functions", "classes", "exceptions"] ) @@ -33,10 +30,12 @@ TAB = " " NEWLINE_TAB = f"\n{TAB}" RUBRIC_HEADER = ".. rubric::" -AUTOSUMMARY_HEADER=".. autosummary::" +AUTOSUMMARY_HEADER = ".. autosummary::" -def autogen_and_parse_for_info(module_name: str, class_name: Optional[str] = None) -> Union[Module, Class]: +def autogen_and_parse_for_info( + module_name: str, class_name: Optional[str] = None +) -> Union[Module, Class]: if class_name: res = Class(module_name, [], []) name = f"{module_name}.{class_name}" @@ -45,7 +44,6 @@ def autogen_and_parse_for_info(module_name: str, class_name: Optional[str] = Non name = module_name with tempfile.TemporaryDirectory() as tmpdir: - rst_content = f""" .. currentmodule:: snowflake.snowpark @@ -62,12 +60,11 @@ def autogen_and_parse_for_info(module_name: str, class_name: Optional[str] = Non with open(fname, "w") as fp: fp.write(rst_content) - output_dir = os.path.join(tmpdir, 'output') + output_dir = os.path.join(tmpdir, "output") subprocess.run(["sphinx-autogen", fname, "-o", output_dir, "-t", "_templates"]) section = "" - with open(f"{output_dir}/{name}.rst") as fp: for line in fp: line = line.strip() @@ -102,11 +99,15 @@ def generate_autosummary_section(section: str, content: str) -> str: return "" -def generate_module_header(title:str, module:str) -> str: - automodule_text = "" if module=="snowflake.snowpark" else f""" +def generate_module_header(title: str, module: str) -> str: + automodule_text = ( + "" + if module == "snowflake.snowpark" + else f""" .. automodule:: {module} :noindex: """ + ) return f""" {'='*(len(title)+5)} {title} @@ -118,10 +119,12 @@ def generate_module_header(title:str, module:str) -> str: """ -def generate_classes(title:str, module:str, classes: Iterable[str]) -> str: +def generate_classes(title: str, module: str, classes: Iterable[str]) -> str: results = [autogen_and_parse_for_info(module, c) for c in classes] names = NEWLINE_TAB.join(classes) - methods = NEWLINE_TAB.join(itertools.chain.from_iterable(c.methods for c in results)) + methods = NEWLINE_TAB.join( + itertools.chain.from_iterable(c.methods for c in results) + ) attributes = NEWLINE_TAB.join( itertools.chain.from_iterable(c.attributes for c in results) ) @@ -135,7 +138,7 @@ def generate_classes(title:str, module:str, classes: Iterable[str]) -> str: """ -def generate_module(title:str, module: str) -> str: +def generate_module(title: str, module: str) -> str: mod = autogen_and_parse_for_info(module) attributes = NEWLINE_TAB.join(mod.attributes) functions = NEWLINE_TAB.join(mod.functions) @@ -161,9 +164,12 @@ def generate_module(title:str, module: str) -> str: "module", help="The module or the parent module of the classes to be documented" ) parser.add_argument("-c", "--classes", nargs="*", help="Classes to be documented") - parser.add_argument("-t", "--title", help="Title of the rst file generated", default="PLACEHOLDER") parser.add_argument( - "-f", "--filename", help="File to write the generated content to") + "-t", "--title", help="Title of the rst file generated", default="PLACEHOLDER" + ) + parser.add_argument( + "-f", "--filename", help="File to write the generated content to" + ) args = parser.parse_args() if args.classes: @@ -176,4 +182,3 @@ def generate_module(title:str, module: str) -> str: fp.write(content) else: print(content) - diff --git a/docs/source/index.rst b/docs/source/index.rst index 2190fea8f43..fe10b28a009 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,5 +11,6 @@ information, see the `Snowpark Developer Guide for Python ` + +.. rubric:: Constructor + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame + +.. rubric:: Attributes + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.index + DataFrame.columns + DataFrame.dtypes + DataFrame.info + DataFrame.select_dtypes + DataFrame.values + DataFrame.axes + DataFrame.ndim + DataFrame.size + DataFrame.shape + DataFrame.empty + +.. rubric:: Conversion + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.astype + DataFrame.convert_dtypes + DataFrame.copy + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.head + DataFrame.loc + DataFrame.iloc + DataFrame.insert + DataFrame.__iter__ + DataFrame.keys + DataFrame.iterrows + DataFrame.itertuples + DataFrame.tail + DataFrame.isin + DataFrame.where + DataFrame.mask + +.. rubric:: Binary operator functions + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.add + DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow + DataFrame.radd + DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.round + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq + +.. rubric:: Function application, GroupBy & window + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.apply + DataFrame.applymap + DataFrame.agg + DataFrame.aggregate + DataFrame.transform + DataFrame.groupby + DataFrame.rolling + +.. rubric:: Computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.abs + DataFrame.all + DataFrame.any + DataFrame.count + DataFrame.cummax + DataFrame.cummin + DataFrame.cumsum + DataFrame.describe + DataFrame.diff + DataFrame.max + DataFrame.mean + DataFrame.median + DataFrame.min + DataFrame.quantile + DataFrame.rank + DataFrame.round + DataFrame.skew + DataFrame.sum + DataFrame.std + DataFrame.var + DataFrame.nunique + DataFrame.value_counts + + +.. rubric:: Reindexing / selection / label manipulation + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.add_prefix + DataFrame.add_suffix + DataFrame.drop + DataFrame.drop_duplicates + DataFrame.duplicated + DataFrame.first + DataFrame.get + DataFrame.head + DataFrame.idxmax + DataFrame.idxmin + DataFrame.last + DataFrame.rename + DataFrame.rename_axis + DataFrame.reset_index + DataFrame.sample + DataFrame.set_axis + DataFrame.set_index + DataFrame.tail + DataFrame.take + +.. rubric:: Missing data handling + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.dropna + DataFrame.ffill + DataFrame.fillna + DataFrame.isna + DataFrame.isnull + DataFrame.notna + DataFrame.notnull + DataFrame.pad + DataFrame.replace + +.. rubric:: Reshaping, sorting, transposing + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.pivot_table + DataFrame.sort_values + DataFrame.sort_index + DataFrame.melt + DataFrame.squeeze + DataFrame.T + DataFrame.transpose + +.. rubric:: Combining / comparing / joining / merging + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.join + DataFrame.merge + DataFrame.update + +.. rubric:: Time Series-related + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.shift + DataFrame.first_valid_index + DataFrame.last_valid_index + DataFrame.resample + +.. rubric:: Serialization / IO / conversion + +.. autosummary:: + :toctree: pandas_api/ + + DataFrame.to_pandas + DataFrame.to_snowflake + DataFrame.to_snowpark diff --git a/docs/source/modin/general_functions.rst b/docs/source/modin/general_functions.rst new file mode 100644 index 00000000000..737033e2190 --- /dev/null +++ b/docs/source/modin/general_functions.rst @@ -0,0 +1,46 @@ +============================= +General functions +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.general +.. rubric:: :doc:`All supported general functions ` + +.. rubric:: Data manipulations + +.. autosummary:: + :toctree: pandas_api/ + + melt + pivot_table + cut + qcut + concat + get_dummies + merge + unique + +.. rubric:: Top-level missing data + +.. autosummary:: + :toctree: pandas_api/ + + isna + isnull + notna + notnull + + +.. rubric:: Top-level dealing with numeric data + +.. autosummary:: + :toctree: pandas_api/ + + to_numeric + +.. rubric:: Top-level dealing with datetimelike data + +.. autosummary:: + :toctree: pandas_api/ + + date_range + to_datetime diff --git a/docs/source/modin/groupby.rst b/docs/source/modin/groupby.rst new file mode 100644 index 00000000000..14b93571f2e --- /dev/null +++ b/docs/source/modin/groupby.rst @@ -0,0 +1,78 @@ +============================= +GroupBy +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.groupby +.. rubric:: :doc:`All supported groupby APIs ` + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + DataFrameGroupBy.__iter__ + SeriesGroupBy.__iter__ + DataFrameGroupBy.groups + SeriesGroupBy.groups + DataFrameGroupBy.indices + SeriesGroupBy.indices + +.. rubric:: Function application + +.. autosummary:: + :toctree: pandas_api/ + + DataFrameGroupBy.apply + SeriesGroupBy.apply + DataFrameGroupBy.agg + SeriesGroupBy.agg + DataFrameGroupBy.aggregate + SeriesGroupBy.aggregate + DataFrameGroupBy.transform + +.. rubric:: `DataFrameGroupBy` computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + DataFrameGroupBy.count + DataFrameGroupBy.cumcount + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumsum + DataFrameGroupBy.head + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.max + DataFrameGroupBy.mean + DataFrameGroupBy.median + DataFrameGroupBy.min + DataFrameGroupBy.nunique + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.shift + DataFrameGroupBy.std + DataFrameGroupBy.sum + DataFrameGroupBy.tail + DataFrameGroupBy.var + +.. rubric:: `SeriesGroupBy` computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + SeriesGroupBy.count + SeriesGroupBy.cumcount + SeriesGroupBy.cummax + SeriesGroupBy.cummin + SeriesGroupBy.cumsum + SeriesGroupBy.max + SeriesGroupBy.mean + SeriesGroupBy.median + SeriesGroupBy.min + SeriesGroupBy.quantile + SeriesGroupBy.rank + SeriesGroupBy.shift + SeriesGroupBy.std + SeriesGroupBy.sum + SeriesGroupBy.var diff --git a/docs/source/modin/index.rst b/docs/source/modin/index.rst new file mode 100644 index 00000000000..1e17d3e1ace --- /dev/null +++ b/docs/source/modin/index.rst @@ -0,0 +1,21 @@ +=================== +Snowpark pandas API +=================== + +This page gives an overview of all public Snowpark pandas objects, functions and methods. +For your convenience, here is all the :doc:`Supported APIs ` + + +.. toctree:: + :maxdepth: 2 + + session + io + general_functions + series + dataframe + window + groupby + resampling + numpy + All supported APIs \ No newline at end of file diff --git a/docs/source/modin/io.rst b/docs/source/modin/io.rst new file mode 100644 index 00000000000..5e3ae605b4b --- /dev/null +++ b/docs/source/modin/io.rst @@ -0,0 +1,29 @@ +============================= +Input/Output +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas + +.. rubric:: Flat file + +.. autosummary:: + :toctree: pandas_api/ + + read_csv + read_json + read_parquet + +.. rubric:: SQL + +.. autosummary:: + :toctree: pandas_api/ + + read_snowflake + to_snowpark + +.. rubric:: pandas + +.. autosummary:: + :toctree: pandas_api/ + + to_pandas \ No newline at end of file diff --git a/docs/source/modin/numpy.rst b/docs/source/modin/numpy.rst new file mode 100644 index 00000000000..2884b2126ed --- /dev/null +++ b/docs/source/modin/numpy.rst @@ -0,0 +1,66 @@ +NumPy Interoperability +====================== + +Snowpark pandas provides limited interoperability with NumPy functions through the NumPy +NEP18 and NEP13 specifications defined by `__array_ufunc__` and `__array_function__`. +A discrete number of NumPy APIs are translated to distributed snowpark pandas functions. + ++-----------------------------+----------------------------------------------------+ +| NumPy method | Notes for current implementation | ++-----------------------------+----------------------------------------------------+ +| ``np.where`` | Mapped to np.where(cond, x, y) to x.where(cond, y) | +| | cond, x, and y should have the same shapes or be | +| | scalars. The result is always a Snowpark pandas | +| | DataFrame. | +| | | +| | Since this function maps to df.where the | +| | column and index labels are considered, as opposed | +| | strict positional indexing in NumPy. | +| | | +| | cond, x, and y can either be all non-scalars or a | +| | mix of scalars and non-scalars, such that | +| | non-scalars have the same shape. (If cond, x, and | +| | y are all scalars, NumPy will not call the | +| | dispatcher at all, and the normal NumPy behavior | +| | will occur.) | ++-----------------------------+----------------------------------------------------+ +| ``np.add`` | Mapped to df.__add__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_and`` | Mapped to df.__and__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_or`` | Mapped to df.__or__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_xor`` | Mapped to df.__xor__(df2) | ++-----------------------------+----------------------------------------------------+ +| ``np.logical_not`` | Mapped to ~df.astype(bool) | ++-----------------------------+----------------------------------------------------+ + +NEP18 Implementation Details +---------------------------- +NumPy differs from pandas and Snowflake pandas in several key respects. It is +important to understand that the interoperability provided is to support +common pandas use-cases, rather than matrix or linear algebra operations. NumPy +functions are mapped, with some transformation, to their pandas analogues. + +Return Value +-------------------- +NEP18 does not specify the return value when implementing a function like np.where, +but they suggest that the return value should match the input types. We follow +that suggestion here and return a Snowpark pandas DataFrame. + +Broadcasting +------------ +NumPy will "broadcast" all arguments into the same array shape so operations +can be vectorized on the CPU. Snowpark pandas should not do this because all +execution runs within Snowflake. All input DataFrames or Series should be of +the same shape and will not be broadcast. Scalar values can also be used as +an input. + +Positional Operations +--------------------- +NumPy always performs positional operations on input datatypes, assuming they +are similarly shaped and meaningful arrays. Pandas can have DataFrames which +represent the same data but with different column ordering. Even when a numpy +method is called on a Snow pandas DataFrame we continue to consider the labels +while performing the operation. + diff --git a/docs/source/modin/resampling.rst b/docs/source/modin/resampling.rst new file mode 100644 index 00000000000..c3069b06656 --- /dev/null +++ b/docs/source/modin/resampling.rst @@ -0,0 +1,62 @@ +============================= +Resampling +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.resample +.. rubric:: :doc:`All supported resampling APIs ` + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.groups + Resampler.indices + Resampler.get_group + + +.. rubric:: Function application + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.apply + Resampler.aggregate + Resampler.transform + + +.. rubric:: Upsampling + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.ffill + Resampler.bfill + Resampler.nearest + Resampler.fillna + Resampler.asfreq + +.. rubric:: Computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + Resampler.count + Resampler.nunique + Resampler.first + Resampler.last + Resampler.max + Resampler.mean + Resampler.median + Resampler.min + Resampler.interpolate + Resampler.ohlc + Resampler.pad + Resampler.pipe + Resampler.prod + Resampler.quantile + Resampler.sem + Resampler.size + Resampler.std + Resampler.sum + Resampler.var diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst new file mode 100644 index 00000000000..c2ef907e829 --- /dev/null +++ b/docs/source/modin/series.rst @@ -0,0 +1,223 @@ +============================= +Series +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas +.. rubric:: :doc:`All supported Series APIs ` + +.. rubric:: Constructor + +.. autosummary:: + :toctree: pandas_api/ + + Series + +.. rubric:: Attributes + +.. autosummary:: + :toctree: pandas_api/ + + Series.index + Series.axes + Series.array + Series.dtype + Series.dtypes + Series.duplicated + Series.empty + Series.hasnans + Series.name + Series.ndim + Series.shape + Series.size + Series.T + Series.values + + + +.. rubric:: Conversion + +.. autosummary:: + :toctree: pandas_api/ + + Series.astype + Series.convert_dtypes + Series.copy + Series.to_dict + Series.to_list + Series.to_numpy + Series.to_pandas + Series.to_snowflake + Series.to_snowpark + Series.__array__ + + +.. rubric:: Indexing, iteration + +.. autosummary:: + :toctree: pandas_api/ + + Series.iloc + Series.loc + Series.__iter__ + Series.keys + + + +.. rubric:: Binary operator functions + +.. autosummary:: + :toctree: pandas_api/ + + Series.add + Series.sub + Series.mul + Series.div + Series.truediv + Series.floordiv + Series.mod + Series.pow + Series.radd + Series.rsub + Series.rmul + Series.rdiv + Series.rtruediv + Series.rfloordiv + Series.rmod + Series.rpow + Series.round + Series.lt + Series.gt + Series.le + Series.ge + Series.ne + Series.eq + +.. rubric:: Function application, GroupBy & window + +.. autosummary:: + :toctree: pandas_api/ + + Series.apply + Series.agg + Series.aggregate + Series.transform + Series.map + Series.groupby + Series.rolling + + + + + +.. rubric:: Computations / descriptive stats + +.. autosummary:: + :toctree: pandas_api/ + + Series.abs + Series.all + Series.any + Series.count + Series.cummax + Series.cummin + Series.cumsum + Series.describe + Series.diff + Series.is_unique + Series.max + Series.mean + Series.median + Series.min + Series.quantile + Series.rank + Series.skew + Series.std + Series.sum + Series.var + Series.kurtosis + Series.unique + Series.nunique + Series.is_unique + Series.value_counts + + +.. rubric:: Reindexing / selection / label manipulation + +.. autosummary:: + :toctree: pandas_api/ + + Series.drop + Series.drop_duplicates + Series.duplicated + Series.get + Series.head + Series.idxmax + Series.idxmin + Series.isin + Series.last + Series.rename + Series.rename_axis + Series.reset_index + Series.sample + Series.set_axis + Series.take + Series.tail + Series.where + Series.mask + Series.add_prefix + Series.add_suffix + + + +.. rubric:: Missing data handling + +.. autosummary:: + :toctree: pandas_api/ + + Series.dropna + Series.ffill + Series.fillna + Series.isna + Series.isnull + Series.notna + Series.notnull + Series.pad + Series.replace + +.. rubric:: Reshaping, sorting + +.. autosummary:: + :toctree: pandas_api/ + + Series.sort_values + Series.sort_index + Series.squeeze + +.. rubric:: Combining / comparing / joining / merging + +.. autosummary:: + :toctree: pandas_api/ + + Series.update + +.. rubric:: Time Series-related + +.. autosummary:: + :toctree: pandas_api/ + + Series.shift + Series.first_valid_index + Series.last_valid_index + Series.resample + + +.. rubric:: Accessors + +.. autosummary:: + :toctree: pandas_api/ + + Series.str + Series.dt + +.. rubric:: :doc:`All supported Series str APIs ` +.. rubric:: :doc:`All supported Series dt APIs ` diff --git a/docs/source/modin/session.rst b/docs/source/modin/session.rst new file mode 100644 index 00000000000..414a05b8407 --- /dev/null +++ b/docs/source/modin/session.rst @@ -0,0 +1,95 @@ +=========== +Session +=========== + +``modin.pandas.session`` is the Snowpark session that new +Snowpark pandas DataFrames and Series will use to execute queries. + +* ``session`` starts as ``None``. + +* When there is no active Snowpark session and ``session`` is ``None``, accessing + ``session`` or creating a Snowpark pandas Dataframe or Series will raise an + exception. You will need to create a Snowpark session to acccess ``session`` or + create a DataFrame or Series. + +* When there a single active Snowpark session and ``session`` is ``None``, + Snowpark pandas automatically assigns that session to ``session``. + +* When there are multiple active Snowpark sessions and ``session`` is ``None``, + accessing ``session`` or creating a Snowpark pandas Dataframe or Series will + raise an exception. To make Snowpark pandas populate + ``modin.pandas.session``, you can + `close `_ + one of the sessions, or assign a particular session to ``session``. For + example, if you execute ``modin.pandas.session = session1``, + Snowpark pandas will use ``session1``. + +Examples +======== + +Creating and using the default Snowpark session +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you have set a `default Snowflake connection `_, +you can use use that connection to create a Snowpark session for Snowpark pandas: + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + from snowflake.snowpark import Session + + # Session.builder.create() will create a default Snowflake connection. + Session.builder.create() + df = pd.DataFrame([1, 2, 3]) + +Note that Snowpark pandas uses the unique active Snowpark session, even though +the code does not explicitly assign that session to Snowpark pandas. + +Assigning one of multiple sessions to Snowpark pandas +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can create multiple Snowpark sessions, then assign one of them to Snowpark +pandas. + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + from snowflake.snowpark import Session + + pandas_session = Session.builder.configs({"user": "", "password": "", "account": "").create() + other_session = Session.builder.configs({"user": "", "password": "", "account": "").create() + pd.session = pandas_session + df = pd.DataFrame([1, 2, 3]) + +Trying to use Snowpark pandas when there is no active Snowpark session +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The code below will cause a :doc:`SnowparkSessionException <../snowpark/api/snowflake.snowpark.exceptions.SnowparkSessionException>` +with a message like ``Snowpark pandas requires an active snowpark session, but there is none.`` +Once you create a session, you can use Snowpark pandas. + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + + df = pd.DataFrame([1, 2, 3]) + +Trying to use Snowpark pandas when there are multiple active Snowpark sessions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The code below will cause a :doc:`SnowparkSessionException <../snowpark/api/snowflake.snowpark.exceptions.SnowparkSessionException>` +with a message like ``There are multiple active snowpark sessions, but you need +to choose one for Snowpark pandas.`` + +.. code-block:: python + + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + from snowflake.snowpark import Session + + pandas_session = Session.builder.configs({"user": "", "password": "", "account": ""}).create() + other_session = Session.builder.configs({"user": "", "password": "", "account": ""}).create() + df = pd.DataFrame([1, 2, 3]) diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst new file mode 100644 index 00000000000..2bec8b340e1 --- /dev/null +++ b/docs/source/modin/supported/dataframe_supported.rst @@ -0,0 +1,472 @@ +``pd.DataFrame`` supported APIs +=============================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + +Attributes + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| DataFrame attribute | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``T`` | P | ``D`` if any column name is not str or tuple of str| ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``at`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``attrs`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``axes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``columns`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dtypes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``empty`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``flags`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iat`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iloc`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``index`` | D | This operation is not recommended since it pulls | +| | | the index into local memory. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``loc`` | P | ``N`` for set with MultiIndex | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ndim`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``shape`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``style`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``values`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + + +Methods + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| DataFrame method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``abs`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_prefix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_suffix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``agg`` | P | ``margins``, ``observed``, | if ``axis == 0``: ``Y`` when function is one of | +| | | ``sort`` | `count`, `mean`, `min`, `max`, `sum`, `median`; | +| | | | `std` and `var` supported with `ddof=0` or | +| | | | `ddof=1`; `quantile` is supported when `q` is | +| | | | the default value or a scalar. | +| | | | if ``axis == 1``: ``Y`` when function is `count`, | +| | | | `min`, `max`, or `sum` and the index is not a | +| | | | MultiIndex. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``aggregate`` | P | ``margins``, ``observed``, | See ``agg`` | +| | | ``sort`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``align`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``all`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``any`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``apply`` | P | | ``D`` if ``axis == 0`` or ``func`` is not callable | +| | | | or ``result_type`` is given or ``args`` and | +| | | | ``kwargs`` contain DataFrame or Series | +| | | | ``N`` if ``func`` maps to different column labels. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``applymap`` | P | | ``D`` if ``na_action == "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asfreq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asof`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``assign`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``astype`` | P | | ``D``: from string to datetime or ``errors == | +| | | | "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``at_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``backfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``between_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bool`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``boxplot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``clip`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine_first`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``compare`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``convert_dtypes`` | N | | Not supported since Snowpark pandas is already | +| | | | using nullable datatypes internally. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``copy`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``corr`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``corrwith`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``count`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cov`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummax`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumprod`` | D | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumsum`` | P | | ``Y`` if values are numeric | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``describe`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``diff`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``div`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``divide`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop_duplicates`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop_level`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dropna`` | P | | ``D`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``duplicated`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``eq`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``equals`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``eval`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ewm`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``expanding`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``explode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ffill`` | P | | ``D`` if param ``limit`` is set | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``fillna`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``filter`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``floordiv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``from_dict`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``from_records`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ge`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``get`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``groupby`` | P | ``observed`` is ignored since | ``Y``, support ``axis == 0`` and ``by`` is column | +| | | Categoricals are not implemented | label or Series from the current DataFrame; | +| | | yet | otherwise ``D``; | +| | | | Note that supported functions are agg, count, | +| | | | cumcount, cummax, cummin, cumsum, max, mean, | +| | | | median, min, quantile, shift, std, sum, and var. | +| | | | Otherwise ``N`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``gt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``head`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``hist`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmax`` | P | | ``N`` for MultiIndex dataframes | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmin`` | P | | ``N`` for MultiIndex dataframes | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``infer_objects`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``info`` | P | | Index is different, zero bytes reported for memory | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``insert`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``interpolate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isetitem`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``items`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``iterrows`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``itertuples`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``join`` | P | | ``D`` if given the ``validate`` param. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``keys`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurtosis`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``le`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``lt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``map`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mask`` | P | | ``D`` if given ``axis`` when ``other`` is a | +| | | | ``DataFrame`` or ``level`` parameters; | +| | | | ``N`` if ``cond`` or ``other`` is Callable | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``max`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mean`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``median`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``melt`` | P | ``col_level`` | ``N`` when columns are MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``memory_usage`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge`` | P | | ``D`` if param ``validate`` is given | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``min`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``multiply`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ne`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nlargest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nsmallest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nunique`` | P | | ``D`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pad`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pct_change`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot_table`` | P | ``observed``, ``margins``, | ``N`` if ``index``, ``columns``, or ``values`` is | +| | | ``sort`` | not str; or MultiIndex; or any ``argfunc`` is not | +| | | | "count", "mean", "min", "max", or "sum" | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pop`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``prod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``product`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``quantile`` | P | | ``Y`` if ``axis == 0``, and ``interpolation`` is | +| | | | ``"linear"`` or ``"nearest"``, and ``method`` is | +| | | | ``"single"``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``query`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``radd`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rank`` | P | | ``N`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rdiv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex_like`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename`` | P | | ``D`` if ``mapper`` is callable or the series has | +| | | | multiindex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename_axis`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reorder_levels`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``copy`` is ignored, ``method``, | | +| | | ``limit`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``resample`` | P | | Only DatetimeIndex is supported and its ``freq`` | +| | | | will be lost. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reset_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rfloordiv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rolling`` | P | | Supports integer ``window``, ``min_periods >= 1``, | +| | | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``round`` | P | | ``N`` if ``decimals`` is Series | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rpow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rsub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rtruediv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sample`` | P | | ``N`` if ``weights``, ``random_state``, or | +| | | | ``replace = True`` is specified when ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``select_dtypes`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sem`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_axis`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_flags`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``shift`` | P | ``freq`` | No support for ``freq != None``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``skew`` | P | | ``N`` if ``axis == 1`` or ``skipna == False`` | +| | | | or ``numeric_only=False`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_index`` | P | | ``D`` if given the ``key`` param. ``N`` if | +| | | | ``axis == 1``, ``inplace == True``, or MultiIndex. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_values`` | P | | ``D`` if given the ``key`` param or ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``squeeze`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``stack`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``std`` | P | | ``D`` if ``ddof`` is not 0 or 1 | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``subtract`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sum`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swapaxes`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swaplevel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tail`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``take`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_clipboard`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_csv`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_dict`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_excel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_feather`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_gbq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_hdf`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_html`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_json`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_latex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_markdown`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_numpy`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_orc`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_parquet`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_period`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_pickle`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_records`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_sql`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_stata`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_string`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_timestamp`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_xarray`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_xml`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transform`` | P | | Only callable and string parameters are supported.| +| | | | list and dict parameters are not supported. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transpose`` | P | | See ``T`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truediv`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truncate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_convert`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_localize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unstack`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``update`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``value_counts`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``var`` | P | | See ``std`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``where`` | P | | See ``mask`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``xs`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/general_supported.rst b/docs/source/modin/supported/general_supported.rst new file mode 100644 index 00000000000..d689a376872 --- /dev/null +++ b/docs/source/modin/supported/general_supported.rst @@ -0,0 +1,209 @@ +General utilities supported APIs +======================================= + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + +Data manipulations + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``concat`` | P | ``levels`` is not supported, | | +| | | ``copy`` is ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``crosstab`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cut`` | P | ``retbins``, ``labels`` | ``N`` if ``retbins=True``or ``labels!=False`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``factorize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``from_dummies`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``get_dummies`` | P | ``sparse`` is ignored | ``Y`` if params ``dummy_na``, ``drop_first`` | +| | | | and ``dtype`` are default, otherwise ``N`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``lreshape`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``melt`` | P | ``col_level``, ``ignore_index`` | ``N`` if df.columns is a MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge`` | P | ``validate`` | ``D`` if param ``validate`` is given | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge_asof`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``merge_ordered`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pivot_table`` | P | ``observed``, ``margins``, | ``N`` if ``index``, ``columns``, or ``values`` is | +| | | ``sort`` | not str; or MultiIndex; or any ``argfunc`` is not | +| | | | "count", "mean", "min", "max", or "sum" | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``qcut`` | P | | ``N`` if ``labels`` is an array or | +| | | | ``retbins=True`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_csv`` | P | ``engine``, ``converters``, | Reads both local and staged file(s) into a Snowpark| +| | | ``true_values``, | pandas DataFrame. Note, the order of rows in the | +| | | ``false_values``, ``skipfooter``,| may differ from the order of rows in the original | +| | | ``skipinitialspace``, | file(s). | +| | | ``nrows``, ``keep_default_na``, | | +| | | ``na_filter``, ``date_parser``, | Supported parameters are ``sep``, ``header``, | +| | | ``parse_dates``, ``verbose``, | ``names``, ``index_col``, ``usecols``, ``dtype``, | +| | | ``infer_datetime_format``, | ``compression``, ``na_values``, ``escapechar``, | +| | | ``date_format``, | ``skiprows``, ``encoding``, ``quotechar``, and | +| | | ``keep_date_col``, ``dayfirst``, | ``skip_blanks_lines``. | +| | | ``cache_dates``, ``iterator``, | | +| | | ``chunksize``, ``thousands``, | | +| | | ``decimal``, ``lineterminator``, | | +| | | ``quoting``, ``doublequote``, | | +| | | ``comment``, ``encoding_errors``,| | +| | | ``dialect``, ``on_bad_lines``, | | +| | | ``low_memory``, | | +| | | ``delim_whitespace``, | | +| | | ``memory_map``, | | +| | | ``float_precision``, | | +| | | ``storage_options``, | | +| | | ``dtype_backend`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_json`` | P | ``orient``, ``typ``, ``dtype``, | ``P``: | +| | | ``convert_axes``, ``lines``, | - if ndjson files are passed | +| | | ``convert_dates``, ``date_unit``,| - Supported parameters are ``compression`` and | +| | | ``keep_default_dates``, | ``encoding`` | +| | | ``encoding_errors``, ``nrows``, | | +| | | and ``chunksize`` will raise | | +| | | an error. | | +| | | ``precise_float``, ``engine``, | | +| | | ``dtype_backend``, and | | +| | | ``storage_options`` are ignored. | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_parquet`` | P | ``use_nullable_dtypes``, | Supported parameter(s) are: ``columns`` | +| | | ``filesystem``, and ``filters`` | | +| | | will raise an error if used. | | +| | | ``engine``, ``storage_options``, | | +| | | ``dtype_backend``, and | | +| | | ``**kwargs`` are ignored. | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_snowflake`` | Y | | Reading from tables as well as SELECT SQL Queries | +| | | | supported, but ordering is not guaranteed for | +| | | | SQL Queries that contain ORDER BY clauses. More | +| | | | complex queries, including CTEs and CTEs with | +| | | | anonymous stored procedures are also supported. | +| | | | Obtaining results from stored procedures is also | +| | | | supported via CALL queries. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``read_table`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_pandas`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_snowflake`` | Y | ``**kwargs`` are currently | | +| | | ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_snowpark`` | Y | | Convert the Snowpark pandas DataFrame or Series to | +| | | | a Snowpark DataFrame. Once converted to a Snowpark | +| | | | DataFrame, no ordering information will be | +| | | | preserved. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unique`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``wide_to_long`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Top-level dealing with missing data + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isna`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isnull`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``notna`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``notnull`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Top-level dealing with numeric data + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_numeric`` | P | ``downcast`` is ignored | ``D`` if ``error == "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Top-level dealing with datetime-like data + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bdate_range`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``date_range`` | P | ``tz`` | ``N`` for business or custom frequencies | +| | | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``infer_freq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``period_range`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``timedelta_range`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_datetime`` | P | ``cache`` is ignored | ``D``: | +| | | | - if ``format`` is None or not supported in | +| | | | Snowflake | +| | | | - or if params ``exact``, ``infer_datetime_format``| +| | | | is given | +| | | | - or ``origin == "julian"`` | +| | | | - or ``error == "ignore"`` | +| | | | - or ``arg`` is DataFrame and data type is not int | +| | | | - or ``arg`` is Series and data type is string | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_timedelta`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Top-level dealing with Interval data + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``interval_range`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + +Top-level evaluation + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``eval`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + +Datetime formats + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``tseries.api.guess_datetime_format`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + + +Hashing + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``util.hash_array`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``util.hash_pandas_object`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ + +Importing from other DataFrame libraries + ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| Method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++---------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.interchange.from_dataframe`` | N | | ++---------------------------------------+---------------------------------+----------------------------------------------------+ \ No newline at end of file diff --git a/docs/source/modin/supported/groupby_supported.rst b/docs/source/modin/supported/groupby_supported.rst new file mode 100644 index 00000000000..02b43df341f --- /dev/null +++ b/docs/source/modin/supported/groupby_supported.rst @@ -0,0 +1,184 @@ +GroupBy supported APIs +====================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ``engine`` and ``engine_kwargs`` are always ignored in Snowpark pandas. The execution engine will always be Snowflake. + +Indexing, iteration + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get_group`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``groups`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``indices`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``__iter__`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Function application + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``agg`` | P | ``axis`` other than 0 is not | ``Y``, support functions are count, mean, min, max,| +| | | implemented. | sum, median, std, and var | +| | | | (including both Python and NumPy functions) | +| | | | otherwise ``D``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``aggregate`` | P | ``axis`` other than 0 is not | See ``agg`` | +| | | implemented. | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``apply`` | P | ``axis`` other than 0 is not | ``Y`` if the following are true, otherwise ``N``: | +| | | implemented. | - ``func`` is a callable that always returns | +| | | | either a pandas DataFrame, a pandas Series, or | +| | | ``SeriesGroupBy.apply`` is not | objects that are neither DataFrame nor Series. | +| | | implemented. | - ``apply`` called on DataFrameGroupBy, not | +| | | | SeriesGroupBy | +| | | | - grouping on axis=0 | +| | | | - Not applying transform to a dataframe with a | +| | | | non-unique index | +| | | | - Not applying ``func`` that returns two | +| | | | dataframes that have different labels for the | +| | | | column at a given position | +| | | | - Not applying ``func`` that returns two | +| | | | dataframes that have different names for a | +| | | | given index label | +| | | | - Not applying ``func`` that returns two | +| | | | Series that have different labels for the | +| | | | row at a given position | +| | | | - Not applying ``func`` that returns two | +| | | | Series that have different names | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``filter`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transform`` | P | ``SeriesGroupBy.transform`` is | ``Y`` when ``func`` is a string or callable. | +| | | not implemented. | A UDTF is created to run ``transform`` on every | +| | | | group via ``apply``. ``transform`` has the same | +| | | | limitations as ``apply`` except for string ``func``| +| | | | also being valid for ``transform``. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Computations/descriptive stats + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``all`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``any`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corrwith`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | Y | SeriesGroupBy does not implement ``numeric_only`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cumcount`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cummax`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cummin`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cumprod`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cumsum`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``describe`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``diff`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ffill`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``fillna`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``first`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``head`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``idxmax`` | P | When GroupBy axis is 1: | +| | | - if idxmax axis is 1, then ``N``; | +| | | - if idxmax axis is 0, then ``D``. | +| | | GroupBy axis = 0 is fully supported. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``idxmin`` | P | See ``idxmax`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``last`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ngroup`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nth`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nunique`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ohlc`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pct_change`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``prod`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rank`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``resample`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rolling`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sample`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``shift`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``skew`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | P | ``D`` if ``ddof`` is not 0 or 1 | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | Y | See ``count`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tail`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``take`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``value_counts`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | P | See ``std`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Plotting and visualization + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| GroupBy method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``boxplot`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``hist`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``plot`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ \ No newline at end of file diff --git a/docs/source/modin/supported/index.rst b/docs/source/modin/supported/index.rst new file mode 100644 index 00000000000..2b1c6c0b06e --- /dev/null +++ b/docs/source/modin/supported/index.rst @@ -0,0 +1,21 @@ +====================================== +Snowpark pandas Supported API +====================================== + +For your convenience, we have compiled a list of currently implemented APIs and methods available in Snowpark pandas. +This documentation is updated as new methods and APIs are merged into the release branch, and not necessarily correct as +of the most recent release. + +To view the docs for the most recent release, check that you’re viewing the stable version of the docs. + +.. toctree:: + :maxdepth: 2 + + general_supported + series_supported + dataframe_supported + window_supported + groupby_supported + resampling_supported + series_dt_supported + series_str_supported \ No newline at end of file diff --git a/docs/source/modin/supported/resampling_supported.rst b/docs/source/modin/supported/resampling_supported.rst new file mode 100644 index 00000000000..05bedd52257 --- /dev/null +++ b/docs/source/modin/supported/resampling_supported.rst @@ -0,0 +1,101 @@ +Resampler supported APIs +======================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ``engine`` and ``engine_kwargs`` are always ignored in Snowpark pandas. The execution engine will always be Snowflake. + +Indexing, iteration + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get_group`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``groups`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``indices`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``__iter__`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Function application + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``aggregate`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``apply`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``transform`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Upsampling + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asfreq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``backfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ffill`` | P | ``limit`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``fillna`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``interpolate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nearest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ + +Computations / descriptive stats + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Resampler method | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``first`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``last`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nunique`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ohlc`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pad`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``prod`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | P | Supports frequencies 's', 'min', 'h', 'D'. | ++-----------------------------+---------------------------------+----------------------------------------------------+ + diff --git a/docs/source/modin/supported/series_dt_supported.rst b/docs/source/modin/supported/series_dt_supported.rst new file mode 100644 index 00000000000..e9316bc93b9 --- /dev/null +++ b/docs/source/modin/supported/series_dt_supported.rst @@ -0,0 +1,124 @@ +``DatetimeProperties`` supported APIs +====================================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| DatetimeProperties | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | +| (Series.dt) | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``date`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``timetz`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``year`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``month`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``hour`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``minute`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``second`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``microsecond`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nanosecond`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day_of_week`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``week`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``weekofyear`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dayofweek`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``weekday`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dayofyear`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day_of_year`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quarter`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_month_start`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_month_end`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_quarter_start`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_quarter_end`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_year_start`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_year_end`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_leap_year`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``daysinmonth`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``days_in_month`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tz`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``freq`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_period`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_pydatetime`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tz_localize`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``tz_convert`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``normalize`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``strftime`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``round`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``floor`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ceil`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``month_name`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``day_name`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``total_seconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_pytimedelta`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``seconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``days`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``microseconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nanoseconds`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``components`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``qyear`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``start_time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``end_time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``end_time`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``to_timestamp`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``unit`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_str_supported.rst b/docs/source/modin/supported/series_str_supported.rst new file mode 100644 index 00000000000..4359151c74f --- /dev/null +++ b/docs/source/modin/supported/series_str_supported.rst @@ -0,0 +1,139 @@ +``StringMethods`` supported APIs +================================= + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| StringMethods | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | +| (Series.str) | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``capitalize`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``casefold`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cat`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``center`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``contains`` | P | ``N`` if the `na` parameter is set to a non-bool | +| | | value. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``decode`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``encode`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``endswith`` | P | ``N`` if the `na` parameter is set to a non-bool | +| | | value. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``extract`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``extractall`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``find`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``findall`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``fullmatch`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``get_dummies`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``index`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isalpha`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isalnum`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isdecimal`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isdigit`` | P | Does not check for special digits, like | +| | | superscripted and subscripted digits in unicode | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``islower`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isnumeric`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isspace`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``istitle`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``isupper`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``join`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``len`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ljust`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``lower`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``lstrip`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``match`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``normalize`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``pad`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``partition`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``removeprefix`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``removesuffix`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``repeat`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``N`` if `pat` is non-string, `repl` is a | +| | | non-string, or `n` is non-numeric or zero. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rfind`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rindex`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rjust`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rpartition`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rsplit`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rstrip`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``slice`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``slice_replace`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``split`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``N`` if `pat` is non-string, `repl` is | +| | | non-string, or `n` is non-numeric or zero. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``split`` | P | ``N`` if `pat` is non-string, `n` is non-numeric, | +| | | `expand` is set, or `regex` is set. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``startswith`` | P | ``N`` if the `na` parameter is set to a non-bool | +| | | value. | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``strip`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``swapcase`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``title`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``translate`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``upper`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``wrap`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``zfill`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ \ No newline at end of file diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst new file mode 100644 index 00000000000..a00880094ef --- /dev/null +++ b/docs/source/modin/supported/series_supported.rst @@ -0,0 +1,460 @@ +``pd.Series`` supported APIs +============================ + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + +Attributes + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Series attribute | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``T`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``array`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``at`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``attrs`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``axes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dtype`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``dtypes`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``empty`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``flags`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``hasnans`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iat`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``iloc`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``index`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_monotonic_decreasing`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_monotonic_increasing`` | D | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``is_unique`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``loc`` | P | ``N`` for set with MultiIndex | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``name`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``nbytes`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``ndim`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``shape`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``size`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``values`` | Y | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + + +Methods + ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| Series method | Snowpark implemented? (Y/N/P/D) | Missing parameters | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``abs`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_prefix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``add_suffix`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``agg`` | P | | ``Y`` when function is one of `count`, `mean`, | +| | | | `min`, `max`, `sum`, `median`; `std` and `var` | +| | | | supported with `ddof=0` or `ddof=1`; `quantile` is | +| | | | supported when `q` is the default value or a | +| | | | scalar | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``aggregate`` | P | | See ``agg`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``align`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``all`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``any`` | P | | ``D`` for non-integer/boolean types | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``apply`` | P | ``convert_dtype`` is ignored | ``D`` if ``func`` is not callable. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``argmax`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``argmin`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``argsort`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asfreq`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``asof`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``astype`` | P | | ``D``: from string to datetime or ``errors == | +| | | | "ignore"`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``at_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``autocorr`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``axes`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``backfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``between`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``between_time`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bfill`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``bool`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``case_when`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``clip`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``combine_first`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``compare`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``convert_dtypes`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``copy`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``corr`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``count`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cov`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummax`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cummin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumprod`` | D | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``cumsum`` | P | | ``Y`` if values are numeric, otherwise fails. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``describe`` | Y | | | +| | | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``diff`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``div`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``divide`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``divmod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dot`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``drop_duplicates`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``droplevel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``dropna`` | P | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``duplicated`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``eq`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``equals`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ewm`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``expanding`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``explode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``factorize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ffill`` | P | | ``D`` if parameter ``limit`` is set | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``fillna`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``filter`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``first_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``floordiv`` | P | ``level`` | Raises division by zero exception when the right | +| | | | hand side contains at least one zero. pandas allows| +| | | | division by zero for non-object type Series and | +| | | | returns +/-inf. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ge`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``get`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``groupby`` | P | ``observed`` is ignored since | ``Y`` when ``axis == 0`` and ``by`` is column | +| | | Categoricals are not implemented | label or Series from the current DataFrame; | +| | | yet | otherwise ``D``; | +| | | | Note that supported functions are agg, count, | +| | | | cumcount, cummax, cummin, cumsum, max, mean, | +| | | | median, min, quantile, shift, std, sum, and var. | +| | | | Otherwise ``N`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``gt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``head`` | Y | | | ++-----------------------------+---------------------------------+---------------------------------------------------------------------------------------+ +| ``idxmax`` | Y | | | ++-----------------------------+---------------------------------+---------------------------------------------------------------------------------------+ +| ``idxmin`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``hist`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmax`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``idxmin`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``infer_objects`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``info`` | D | | Different Index types are used in pandas but not | +| | | | in Snowpark pandas | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``interpolate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isin`` | Y | | Snowpark pandas deviates with respect to handling | +| | | | NA values | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``isnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``item`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``items`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``keys`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``kurtosis`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``last_valid_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``le`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``lt`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``map`` | P | | See ``apply`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mask`` | P | | ``D`` if given ``axis`` or ``level`` parameters; | +| | | | ``N`` if ``cond`` or ``other`` is Callable | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``max`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mean`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``median`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``memory_usage`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``min`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mode`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``mul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``multiply`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ne`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nlargest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notna`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``notnull`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nsmallest`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``nunique`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pad`` | P | | See ``ffill`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pct_change`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pipe`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pop`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``pow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``prod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``product`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``quantile`` | P | | ``Y`` if values are numeric, and ``interpolation`` | +| | | | is ``"linear"`` or ``"nearest"``; | +| | | | ``N`` if ``q`` is a DataFrame or Series | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``radd`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rank`` | P | | ``N`` if ``axis == 1`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``ravel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rdiv`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rdivmod`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reindex_like`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename`` | P | ``copy`` is ignored | ``D`` if ``mapper`` is callable or the series has | +| | | | MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rename_axis`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reorder_levels`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``repeat`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``replace`` | P | ``method``, ``limit`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``resample`` | P | | Only DatetimeIndex is supported and its ``freq`` | +| | | | will be lost | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``reset_index`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rfloordiv`` | P | ``level`` | See ``floordiv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmod`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rmul`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rolling`` | P | | Supports integer ``window``, ``min_periods >= 1``, | +| | | | and ``center`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``round`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rpow`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rsub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``rtruediv`` | P | ``level`` | See ``truediv`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sample`` | P | ``weights``, ``random_state`` | ``N`` if ``replace = True`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``searchsorted`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sem`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``set_axis`` | Y | ``copy`` is ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``shift`` | P | ``freq`` | No support for ``freq != None`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``skew`` | P | | ``N`` if ``axis == 1`` or ``skipna == False`` | +| | | | or ``numeric_only=False`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_index`` | P | | ``D`` if given the ``key`` param | +| | | | ``N`` if ``inplace == True`` or MultiIndex | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sort_values`` | P | | ``D`` if given the ``key`` parameter | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``squeeze`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``std`` | P | | ``D`` if ``ddof`` is not 0 or 1 | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sub`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``subtract`` | P | ``level`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``sum`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swapaxes`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``swaplevel`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tail`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``take`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_clipboard`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_csv`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_dict`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_frame`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_hdf`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_json`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_latex`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_list`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_markdown`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_numpy`` | Y | ``copy`` is ignored | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_period`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_pickle`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_sql`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_string`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_timestamp`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``to_xarray`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tolist`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transform`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``transpose`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truediv`` | P | ``level`` | Raises division by zero exception when right hand | +| | | | hand side contains at least one zero. pandas allows| +| | | | division by zero for non-object type Series and | +| | | | returns +/-inf. | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``truncate`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_convert`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``tz_localize`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unique`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``unstack`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``update`` | Y | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``value_counts`` | P | ``bins`` | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``var`` | P | | See ``std`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``view`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``where`` | P | | See ``mask`` | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ +| ``xs`` | N | | | ++-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/window_supported.rst b/docs/source/modin/supported/window_supported.rst new file mode 100644 index 00000000000..f86e9d242a2 --- /dev/null +++ b/docs/source/modin/supported/window_supported.rst @@ -0,0 +1,146 @@ +Window supported APIs +======================== + +The following table is structured as follows: The first column contains the method name. +The second column is a flag for whether or not there is an implementation in Snowpark for +the method in the left column. + +.. note:: + ``Y`` stands for yes, i.e., supports distributed implementation, ``N`` stands for no and API simply errors out, + ``P`` stands for partial (meaning some parameters may not be supported yet), and ``D`` stands for defaults to single + node pandas execution via UDF/Sproc. + ``engine`` and ``engine_kwargs`` are always ignored in Snowpark pandas. The execution engine will always be Snowflake. + +Rolling window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Rolling window functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``aggregate`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``apply`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rank`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``skew`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | P | Supports integer ``window``, ``min_periods >= 1``, | +| | | and ``center`` for ``axis = 0`` | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Weighted window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Weighted window functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Expanding window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Expanding window functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``aggregate`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``apply`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``count`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``kurt`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``max`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``median`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``min`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``quantile`` | N | | +| | | | +| | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``rank`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sem`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``skew`` | N | | +| | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Exponentially-weighted window functions + ++-----------------------------+---------------------------------+----------------------------------------------------+ +| Exponential moving window | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | +| functions | | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``corr`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``cov`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``mean`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``std`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``sum`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ +| ``var`` | N | | ++-----------------------------+---------------------------------+----------------------------------------------------+ + +Window indexer + ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| Window Functions | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.indexers.BaseIndexer`` | N | | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.indexers.FixedForwardWindowIndexer`` | N | | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ +| ``api.indexers.VariableOffsetWindowIndexer`` | N | | ++----------------------------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/window.rst b/docs/source/modin/window.rst new file mode 100644 index 00000000000..d173cf292f6 --- /dev/null +++ b/docs/source/modin/window.rst @@ -0,0 +1,29 @@ +============================= +Window +============================= + +.. currentmodule:: snowflake.snowpark.modin.pandas.window +.. rubric:: :doc:`All supported window APIs ` + +.. rubric:: Rolling window functions + +.. autosummary:: + :toctree: pandas_api/ + + Rolling.aggregate + Rolling.apply + Rolling.corr + Rolling.count + Rolling.cov + Rolling.kurt + Rolling.max + Rolling.mean + Rolling.median + Rolling.min + Rolling.quantile + Rolling.rank + Rolling.sem + Rolling.skew + Rolling.std + Rolling.sum + Rolling.var