diff --git a/docs/source/_templates/autosummary/modin_accessor.rst b/docs/source/_templates/autosummary/modin_accessor.rst new file mode 100644 index 00000000000..5016886946a --- /dev/null +++ b/docs/source/_templates/autosummary/modin_accessor.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. automodinaccessor:: {{ objname }} \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index c9ff1656f7b..1a8e059d7a3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -123,13 +123,59 @@ AttributeDocumenter, Documenter, MethodDocumenter, + PropertyDocumenter, ) from sphinx.ext.autosummary import Autosummary # isort:skip +class ModinAccessorDocumenter(PropertyDocumenter): + """ + Generates documentation for properties of Modin objects like Series.str and Series.dt that + are themselves accessor classes. + This class is necessary because we need to monkeypatch the Series.str/dt property objects + with the actual classes (StringMethods/DatetimeProperties) in order for autosummary-generate + to produce stubs for them. We override sphinx's `import_object` hook here to ensure it can + resolve these classes correctly. + + TODO SNOW-1063347: check whether this is still needed after removing series.py since upstream + modin uses CachedAccessor wrapper for str/dt + + This class is not responsible for properties of those accessors like Series.str.capitalize. + + See sphinx source for PropertyDocumenter: + https://github.com/sphinx-doc/sphinx/blob/907d27dc6506c542c11a7dd16b560eb4be7da5fc/sphinx/ext/autodoc/__init__.py#L2691 + """ + + objtype = "modinaccessor" + directivetype = "attribute" + + # lower priority than the default PropertyDocumenter so it is not chosen for normal properties + priority = 0.6 + + def import_object(self, raiseerror=False): + # Set `self.object` and related fields after importing the object, since sphinx has difficulty + # trying to import the top-level Series.str and Series.dt objects. + # Returns True if the object was successfully imported. + # See definition on parent classes: + # https://github.com/sphinx-doc/sphinx/blob/907d27dc6506c542c11a7dd16b560eb4be7da5fc/sphinx/ext/autodoc/__init__.py#L2714 + # https://github.com/sphinx-doc/sphinx/blob/907d27dc6506c542c11a7dd16b560eb4be7da5fc/sphinx/ext/autodoc/__init__.py#L400 + import modin.pandas as pd + self.module = pd + self.parent = pd.Series + # objpath is an array like ["Series", "str"] + # object_name should be the name of the property (in this case "str") + self.object_name = self.objpath[-1] + self.object = getattr(pd.Series, self.object_name) + self.isclassmethod = False + return True + + class ModinAccessorLevelDocumenter(Documenter): """ - Performs name resolution and formatting for modin Accessor classes like Series.str and Series.dt. + Performs name resolution and formatting for properties of Modin Accessor classes like + Series.str.capitalize and Series.dt.date. + + This class is not responsible for the top-level object like Series.str or Series.dt. """ def format_name(self): @@ -243,9 +289,36 @@ def process_modin_accessors(args): def setup(app): + # Make sure modin.pandas namespace is properly set up + import modin.pandas as pd + import snowflake.snowpark.modin.plugin + # Monkeypatch dt/str to make sure their children are resolvable by autosummary-generate. + # Without this monkeypatch, the autosummary-generate (which runs before any custom documenter + # classes can take effect) will report an error like the following for every child of Series.str + # and Series.dt: + # + # WARNING: [autosummary] failed to import modin.pandas.Series.str.slice. + # Possible hints: + # * AttributeError: 'property' object has no attribute 'slice' + # * ImportError: + # * ModuleNotFoundError: No module named 'modin.pandas.Series' + # + # Because we're replacing the `property` object, we also need to set the __doc__ of the new + # values of Series.str/dt to make sure autodoc can pick them up. The custom ModinAttributeDocumenter + # class allows the top-level Series.str/dt objects to be properly documented. + # + # TODO SNOW-1063347: check whether this is still needed after removing series.py since upstream + # modin uses CachedAccessor wrapper for str/dt rather than a property + old_series_dt = pd.Series.dt + old_series_str = pd.Series.str + pd.Series.dt = pd.series_utils.DatetimeProperties + pd.Series.str = pd.series_utils.StringMethods + pd.Series.dt.__doc__ = old_series_dt.__doc__ + pd.Series.str.__doc__ = old_series_str.__doc__ # Like pandas, we need to do some pre-processing for accessor methods/properties like # pd.Series.str.replace and pd.Series.dt.date in order to resolve the parent class correctly. # https://github.com/pandas-dev/pandas/blob/bbe0e531383358b44e94131482e122bda43b33d7/doc/source/conf.py#L792 + app.add_autodocumenter(ModinAccessorDocumenter) app.add_autodocumenter(ModinAccessorMethodDocumenter) app.add_autodocumenter(ModinAccessorAttributeDocumenter) app.add_directive("autosummary", ModinAutosummary) diff --git a/docs/source/modin/dataframe.rst b/docs/source/modin/dataframe.rst index c993e0bec10..ef55db3145c 100644 --- a/docs/source/modin/dataframe.rst +++ b/docs/source/modin/dataframe.rst @@ -2,7 +2,7 @@ DataFrame ============================= -.. currentmodule:: snowflake.snowpark.modin.pandas +.. currentmodule:: modin.pandas .. rubric:: :doc:`All supported DataFrame APIs ` .. rubric:: Constructor diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst index 21c68ca74ca..4928162ff3b 100644 --- a/docs/source/modin/series.rst +++ b/docs/source/modin/series.rst @@ -2,7 +2,7 @@ Series ============================= -.. currentmodule:: snowflake.snowpark.modin.pandas +.. currentmodule:: modin.pandas .. rubric:: :doc:`All supported Series APIs ` .. rubric:: Constructor @@ -226,17 +226,12 @@ Series .. autosummary:: :toctree: pandas_api/ + :template: autosummary/modin_accessor.rst Series.str Series.dt -.. Series.str and Series.dt are imported from upstream modin.pandas, so we need to swap -.. the current module here. - -.. currentmodule:: modin.pandas - - .. rubric:: Datetime accessor properties :doc:`All supported Series dt APIs ` diff --git a/src/snowflake/snowpark/modin/pandas/io.py b/src/snowflake/snowpark/modin/pandas/io.py index fe68d55346a..366dff06a31 100644 --- a/src/snowflake/snowpark/modin/pandas/io.py +++ b/src/snowflake/snowpark/modin/pandas/io.py @@ -514,7 +514,7 @@ def read_excel( storage_options: StorageOptions = None, dtype_backend: DtypeBackend | NoDefault = no_default, engine_kwargs: dict | None = None, -) -> DataFrame | dict[IntStrT, DataFrame]: # pragma: no cover +) -> pd.DataFrame | dict[IntStrT, pd.DataFrame]: # pragma: no cover _, _, _, kwargs = inspect.getargvalues(inspect.currentframe()) from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import ( FactoryDispatcher,