Skip to content

Commit

Permalink
SNOW-1462359/SNOW-1462481: Add Series.str and Series.dt to docs (#1784)
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-joshi authored Jun 18, 2024
1 parent a34cb79 commit fcf8455
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{{ fullname }}
{{ underline }}

.. currentmodule:: {{ module }}

.. automodinaccessorattribute:: {{ objname }}
6 changes: 6 additions & 0 deletions docs/source/_templates/autosummary/modin_accessor_method.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{{ fullname }}
{{ underline }}

.. currentmodule:: {{ module }}

.. automodinaccessormethod:: {{ objname }}
166 changes: 166 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,172 @@
# Disable footer message "Built with Sphinx using a theme provided by Read the Docs."
html_show_sphinx = False

# Custom Documenter and AutoSummary classes to render friendly names for accessor attributes/methods
# like pd.Series.str.replace and pd.Series.dt.date, rather than pd.series_utils.StringMethods.replace
# and pd.series_utils.DatetimeProperties.date.
# See implementation from pandas:
# https://github.com/pandas-dev/pandas/blob/bbe0e531383358b44e94131482e122bda43b33d7/doc/source/conf.py#L484-L485
# The rough control flow of autosummary/autodoc, and the manner in which our custom classes interact
# it, is as follows:
# (0.) The autosummary-generate directive creates rst files for each method detected within an "autosummary"
# block. I'm not completely sure how this resolution is performed, but it will only pick up elements
# defined in an explicit "autosummary" block, and does not find those declared within custom directives
# that inherit the Autosummary class. See comments on `ModinAutosummary` below.
# 1. `Autosummary` reads the :toctree: elements specified in modin/series.rst.
# These elements are listed by the names we want displayed ("Series.str.replace", "Series.dt.date", etc.),
# as autosummary does not provide simple mechanisms for replacing a path with a different label.
# 2. Our custom autosummary class (`ModinAutosummary`) replaces vanity names (e.g. "Series.str.replace")
# with the canonical import path ("series_utils.StringMethods.replace") before passing it to the base
# `Autosummary` class, which imports the appropriate module and class to parse type signatures and
# docstrings. After the base `Autosummary` import is done, `ModinAutosummary` restores the vanity names
# so they are properly rendered within the output of Series.html.
# 3. Individual method/attribute stubs are generated from our custom files in _templates/autosummary/.
# The subclasses of `ModinAccessorLevelDocumenter` define the custom "automodinaccessormethod" and
# "automodinaccessorattribute" directives, which are necessary to perform formatting and avoid certain
# autodoc warnings. This is similar to the approach taken by native pandas. See comments on these
# classes for more details.
# (4.) Sphinx or autodoc or some other thing out of our control resolves path names and turns them into
# links. In order for these links to properly generate, the `real_name` parsed in our custom
# `ModinAutosummary` class must match the `format_name` value generated from autodoc in our
# custom `ModinAccessorLevelDocumenter` class and its children.
import sphinx # isort:skip
from sphinx.ext.autodoc import ( # isort:skip
AttributeDocumenter,
Documenter,
MethodDocumenter,
)
from sphinx.ext.autosummary import Autosummary # isort:skip


class ModinAccessorLevelDocumenter(Documenter):
"""
Performs name resolution and formatting for modin Accessor classes like Series.str and Series.dt.
"""

def format_name(self):
# format_name determines the name displayed as part of a member's signature (below the heading)
# on every individual method/attribute page.
# This also determines the name of the anchor element that links to the member's signature,
# for example
# modin/pandas_api/modin.pandas.Series.dt.date.html#modin.pandas.Series.dt.date
# Changing this line may cause the links from Series.rst to break, since autosummary
# needs the anchor name to match the name listed in the toctree in order to link properly.
# Note that if this link is generated incorrectly, `make html` may still silently pass without
# generating any warnings.
return (
".".join(self.objpath)
.replace("series_utils.DatetimeProperties.", "Series.dt.")
.replace("series_utils.StringMethods.", "Series.str.")
)

def resolve_name(self, modname, parents, path, base):
if modname is None:
if path == "Series.str.":
modname = "modin.pandas"
parents = ["series_utils", "StringMethods"]
elif path == "Series.dt.":
modname = "modin.pandas"
parents = ["series_utils", "DatetimeProperties"]
return modname, parents + [base]


class ModinAccessorMethodDocumenter(ModinAccessorLevelDocumenter, MethodDocumenter):
objtype = "modinaccessormethod"
directivetype = "method"

# lower than MethodDocumenter (1) so this is not chosen for normal methods
priority = 0.6


class ModinAccessorAttributeDocumenter(ModinAccessorLevelDocumenter, MethodDocumenter):
objtype = "modinaccessorattribute"
directivetype = "attribute"

# lower than AttributeDocumenter (1) so this is not chosen for normal attributes
priority = 0.6

def format_signature(self) -> str:
# Avoids this warning:
# WARNING: Failed to get a method signature for modin.pandas.series_utils.DatetimeProperties.date: <property object at 0x130c33c20> is not a callable object
return ""


class ModinAutosummary(Autosummary):
"""
Overrides the default autosummary directive to properly resolve and display modin accessor
methods like Series.str.replace.
"""

def get_items(self, names):
if self.env.ref_context.get("py:module") == "modin.pandas":
# Within rst files, we will specify paths as `Series.str.replace`, `Series.dt.date`, etc.
# for readability and because autosummary is very inflexible about replacing these displayed
# strings with some other label.
# Even though we want the displayed names to contain `Series.str` instead of `StringMethods`,
# we need to replace them with their canonical import paths in order for Autosummary.get_items
# to correctly import them.
names = list(
map(
lambda name:
# The trailing . is important here so we don't replace the path for the `str` property
# of the pd.Series class by mistake.
name.replace("Series.str.", "series_utils.StringMethods.").replace(
"Series.dt.", "series_utils.DatetimeProperties."
),
names,
)
)

items = Autosummary.get_items(self, names)

if self.env.ref_context.get("py:module") == "modin.pandas":

def process_modin_accessors(args):
display_name, sig, summary, real_name = args
# Before calling Autosummary.get_items, we replaced `Series.str`/`Series.dt` with
# the canonical path from which to import these methods (`modin.pandas.series_utils.StringMethods`
# and `modin.pandas.series_utils.DatetimeProperties`) in order for Autosummary to
# be able to import the correct classes. These paths are then passed here as the
# parsed `display_name` value. However, we still want to render the name as
# `Series.str` or `Series.dt`, so we re-replace those values here.
display_name = display_name.replace(
"series_utils.StringMethods.",
"Series.str.",
).replace(
"series_utils.DatetimeProperties.",
"Series.dt.",
)
# We also need to replace `real_name` to properly resolve links from Series.rst.
# If we don't do this replacement here, autosummary will raise
# WARNING: document isn't included in any toctree
# for all the generated accessor members.
real_name = real_name.replace(
"series_utils.StringMethods.",
"Series.str.",
).replace(
"series_utils.DatetimeProperties.",
"Series.dt.",
)
return display_name, sig, summary, real_name

return list(map(process_modin_accessors, items))
return items


def setup(app):
# Like pandas, we need to do some pre-processing for accessor methods/properties like
# pd.Series.str.replace and pd.Series.dt.date in order to resolve the parent class correctly.
# https://github.com/pandas-dev/pandas/blob/bbe0e531383358b44e94131482e122bda43b33d7/doc/source/conf.py#L792
app.add_autodocumenter(ModinAccessorMethodDocumenter)
app.add_autodocumenter(ModinAccessorAttributeDocumenter)
app.add_directive("autosummary", ModinAutosummary)


# We overwrite the existing "autosummary" directive in order to properly resolve names for modin
# accessor classes. We cannot simply declare an alternative directive like "automodinsummary" to use
# in rst files because their children would not be picked up by the autosummary-generate flag.
suppress_warnings = ["app.add_directive"]


# Construct URL to the corresponding section in the snowpark-python repo
def linkcode_resolve(domain, info):
Expand Down
61 changes: 59 additions & 2 deletions docs/source/modin/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -223,5 +223,62 @@ Series
Series.str
Series.dt

.. rubric:: :doc:`All supported Series str APIs <supported/series_str_supported>`
.. rubric:: :doc:`All supported Series dt APIs <supported/series_dt_supported>`

.. Series.str and Series.dt are imported from upstream modin.pandas, so we need to swap
.. the current module here.
.. currentmodule:: modin.pandas


.. rubric:: Datetime accessor properties

:doc:`All supported Series dt APIs <supported/series_dt_supported>`

.. autosummary::
:toctree: pandas_api/
:template: autosummary/modin_accessor_attribute.rst

Series.dt.date
Series.dt.year
Series.dt.month
Series.dt.day
Series.dt.hour
Series.dt.minute
Series.dt.second
Series.dt.dayofweek
Series.dt.day_of_week
Series.dt.dayofyear
Series.dt.day_of_year
Series.dt.quarter


.. rubric:: String accessor methods

:doc:`All supported Series str APIs <supported/series_str_supported>`

.. autosummary::
:toctree: pandas_api/
:template: autosummary/modin_accessor_method.rst

Series.str.capitalize
Series.str.casefold
Series.str.center
Series.str.contains
Series.str.count
Series.str.endswith
Series.str.get
Series.str.isdigit
Series.str.islower
Series.str.istitle
Series.str.isupper
Series.str.len
Series.str.lower
Series.str.lstrip
Series.str.match
Series.str.replace
Series.str.rstrip
Series.str.slice
Series.str.split
Series.str.startswith
Series.str.strip
Series.str.upper
3 changes: 0 additions & 3 deletions docs/source/modin/supported/series_str_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,6 @@ the method in the left column.
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``slice_replace`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``replace`` | P | ``N`` if `pat` is non-string, `repl` is |
| | | non-string, or `n` is non-numeric or zero. |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``split`` | P | ``N`` if `pat` is non-string, `n` is non-numeric, |
| | | `expand` is set, or `regex` is set. |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def split():
Returns
-------
Series, Index, DataFrame or MultiIndex
:class:`~snowflake.snowpark.modin.pandas.Series`, Index, :class:`~snowflake.snowpark.modin.pandas.DataFrame` or MultiIndex
Type matches caller unless expand=True (see Notes).
See also
Expand Down

0 comments on commit fcf8455

Please sign in to comment.