Skip to content

Commit

Permalink
SNOW-1545615: Change snowflake.snowpark.modin.pandas to modin.pandas …
Browse files Browse the repository at this point in the history
…in DF/Series doc generation (#1940)
  • Loading branch information
sfc-gh-joshi authored Jul 23, 2024
1 parent 66cbf5c commit b2cd19c
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 10 deletions.
6 changes: 6 additions & 0 deletions docs/source/_templates/autosummary/modin_accessor.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{{ fullname }}
{{ underline }}

.. currentmodule:: {{ module }}

.. automodinaccessor:: {{ objname }}
75 changes: 74 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,59 @@
AttributeDocumenter,
Documenter,
MethodDocumenter,
PropertyDocumenter,
)
from sphinx.ext.autosummary import Autosummary # isort:skip


class ModinAccessorDocumenter(PropertyDocumenter):
"""
Generates documentation for properties of Modin objects like Series.str and Series.dt that
are themselves accessor classes.
This class is necessary because we need to monkeypatch the Series.str/dt property objects
with the actual classes (StringMethods/DatetimeProperties) in order for autosummary-generate
to produce stubs for them. We override sphinx's `import_object` hook here to ensure it can
resolve these classes correctly.
TODO SNOW-1063347: check whether this is still needed after removing series.py since upstream
modin uses CachedAccessor wrapper for str/dt
This class is not responsible for properties of those accessors like Series.str.capitalize.
See sphinx source for PropertyDocumenter:
https://github.com/sphinx-doc/sphinx/blob/907d27dc6506c542c11a7dd16b560eb4be7da5fc/sphinx/ext/autodoc/__init__.py#L2691
"""

objtype = "modinaccessor"
directivetype = "attribute"

# lower priority than the default PropertyDocumenter so it is not chosen for normal properties
priority = 0.6

def import_object(self, raiseerror=False):
# Set `self.object` and related fields after importing the object, since sphinx has difficulty
# trying to import the top-level Series.str and Series.dt objects.
# Returns True if the object was successfully imported.
# See definition on parent classes:
# https://github.com/sphinx-doc/sphinx/blob/907d27dc6506c542c11a7dd16b560eb4be7da5fc/sphinx/ext/autodoc/__init__.py#L2714
# https://github.com/sphinx-doc/sphinx/blob/907d27dc6506c542c11a7dd16b560eb4be7da5fc/sphinx/ext/autodoc/__init__.py#L400
import modin.pandas as pd
self.module = pd
self.parent = pd.Series
# objpath is an array like ["Series", "str"]
# object_name should be the name of the property (in this case "str")
self.object_name = self.objpath[-1]
self.object = getattr(pd.Series, self.object_name)
self.isclassmethod = False
return True


class ModinAccessorLevelDocumenter(Documenter):
"""
Performs name resolution and formatting for modin Accessor classes like Series.str and Series.dt.
Performs name resolution and formatting for properties of Modin Accessor classes like
Series.str.capitalize and Series.dt.date.
This class is not responsible for the top-level object like Series.str or Series.dt.
"""

def format_name(self):
Expand Down Expand Up @@ -243,9 +289,36 @@ def process_modin_accessors(args):


def setup(app):
# Make sure modin.pandas namespace is properly set up
import modin.pandas as pd
import snowflake.snowpark.modin.plugin
# Monkeypatch dt/str to make sure their children are resolvable by autosummary-generate.
# Without this monkeypatch, the autosummary-generate (which runs before any custom documenter
# classes can take effect) will report an error like the following for every child of Series.str
# and Series.dt:
#
# WARNING: [autosummary] failed to import modin.pandas.Series.str.slice.
# Possible hints:
# * AttributeError: 'property' object has no attribute 'slice'
# * ImportError:
# * ModuleNotFoundError: No module named 'modin.pandas.Series'
#
# Because we're replacing the `property` object, we also need to set the __doc__ of the new
# values of Series.str/dt to make sure autodoc can pick them up. The custom ModinAttributeDocumenter
# class allows the top-level Series.str/dt objects to be properly documented.
#
# TODO SNOW-1063347: check whether this is still needed after removing series.py since upstream
# modin uses CachedAccessor wrapper for str/dt rather than a property
old_series_dt = pd.Series.dt
old_series_str = pd.Series.str
pd.Series.dt = pd.series_utils.DatetimeProperties
pd.Series.str = pd.series_utils.StringMethods
pd.Series.dt.__doc__ = old_series_dt.__doc__
pd.Series.str.__doc__ = old_series_str.__doc__
# Like pandas, we need to do some pre-processing for accessor methods/properties like
# pd.Series.str.replace and pd.Series.dt.date in order to resolve the parent class correctly.
# https://github.com/pandas-dev/pandas/blob/bbe0e531383358b44e94131482e122bda43b33d7/doc/source/conf.py#L792
app.add_autodocumenter(ModinAccessorDocumenter)
app.add_autodocumenter(ModinAccessorMethodDocumenter)
app.add_autodocumenter(ModinAccessorAttributeDocumenter)
app.add_directive("autosummary", ModinAutosummary)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
DataFrame
=============================

.. currentmodule:: snowflake.snowpark.modin.pandas
.. currentmodule:: modin.pandas
.. rubric:: :doc:`All supported DataFrame APIs <supported/dataframe_supported>`

.. rubric:: Constructor
Expand Down
9 changes: 2 additions & 7 deletions docs/source/modin/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Series
=============================

.. currentmodule:: snowflake.snowpark.modin.pandas
.. currentmodule:: modin.pandas
.. rubric:: :doc:`All supported Series APIs <supported/series_supported>`

.. rubric:: Constructor
Expand Down Expand Up @@ -226,17 +226,12 @@ Series

.. autosummary::
:toctree: pandas_api/
:template: autosummary/modin_accessor.rst

Series.str
Series.dt


.. Series.str and Series.dt are imported from upstream modin.pandas, so we need to swap
.. the current module here.
.. currentmodule:: modin.pandas


.. rubric:: Datetime accessor properties

:doc:`All supported Series dt APIs <supported/series_dt_supported>`
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ def read_excel(
storage_options: StorageOptions = None,
dtype_backend: DtypeBackend | NoDefault = no_default,
engine_kwargs: dict | None = None,
) -> DataFrame | dict[IntStrT, DataFrame]: # pragma: no cover
) -> pd.DataFrame | dict[IntStrT, pd.DataFrame]: # pragma: no cover
_, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
FactoryDispatcher,
Expand Down

0 comments on commit b2cd19c

Please sign in to comment.