Skip to content

Commit

Permalink
Merge pull request #35 from reichlab/bsweger/add-readthedocs/30
Browse files Browse the repository at this point in the history
Add readthedocs and sphinx to cladetime
  • Loading branch information
bsweger authored Oct 17, 2024
2 parents 92f9e46 + 412d26e commit c3095bf
Show file tree
Hide file tree
Showing 12 changed files with 415 additions and 40 deletions.
21 changes: 21 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

version: 2

build:
os: ubuntu-22.04
tools:
python: "3.12"

# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
fail_on_warning: true

python:
install:
- method: pip
path: .
extra_requirements:
- docs
Binary file added docs/_static/cladetime_logo_dark_mode.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/cladetime_logo_light_mode.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/reichlab.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/reichlab_favicon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
137 changes: 137 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import os
import sys
from datetime import date

# Configuration file for the Sphinx documentation builder.

# -- Project information

project = "Cladetime"
project_copyright = f"{date.today().year}, Reich Lab @ The University of Massachusetts Amherst"
author = "Reich Lab"

# Add cladetime location to the path, so we can use autodoc to
# generate API documentation from docstrings.
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, root_path)

release = "0.1"
# FIXME: get the version dynamically
version = "0.1.0"

# -- General configuration

extensions = [
"myst_parser",
"sphinx.ext.autodoc",
"sphinx_copybutton",
"sphinx.ext.doctest",
"sphinx.ext.intersphinx",
"sphinx_github_style",
"sphinxext.opengraph",
"sphinx.ext.napoleon",
]

intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
"polars": ("https://docs.pola.rs/api/python/stable", None),
}
intersphinx_disabled_domains = ["std"]

# Copied these settings from the copybutton's config
# https://github.com/executablebooks/sphinx-copybutton/blob/master/docs/conf.py
copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
copybutton_prompt_is_regexp = True
copybutton_line_continuation_character = "\\"
copybutton_here_doc_delimiter = "EOT"
copybutton_selector = "div:not(.no-copybutton) > div.highlight > pre"

templates_path = ["_templates"]

# The root toctree document.
root_doc = "index"

# Test code blocks only when explicitly specified
doctest_test_doctest_blocks = ""

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_static_path = ["_static"]
html_theme = "furo"
html_favicon = "_static/reichlab_favicon.png"
html_title = "Cladetime"
html_last_updated_fmt = "%Y-%m-%d"

# Settings for the GitHub link extension
linkcode_url = "https://github.com/reichlab/cladetime"

# These folders are copied to the documentation's HTML output
html_theme_options = {
"announcement": """
<a style=\"text-decoration: none; color: white;\"
href=\"https://github.com/reichlab/cladetime/issues">
Cladetime is a work in progress. Please feel free to file issues on GitHub.
</a>
""",
"sidebar_hide_name": True,
"light_logo": "cladetime_logo_light_mode.png",
"dark_logo": "cladetime_logo_dark_mode.png",
"navigation_with_keys": True,
"source_repository": "https://github.com/reichlab/cladetime/",
# source for GitHub footer icon:
# https://pradyunsg.me/furo/customisation/footer/#using-embedded-svgs
"footer_icons": [
{
"name": "GitHub",
"url": "https://github.com/reichlab/cladetime",
"html": """
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path>
</svg>
""",
"class": "",
},
],
}

# from https://myst-parser.readthedocs.io/en/latest/syntax/optional.html
myst_enable_extensions = [
"amsmath",
"deflist",
"dollarmath",
"fieldlist",
"substitution",
"tasklist",
"colon_fence",
"attrs_inline",
]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "friendly"

# Show typehints as content of the function or method
autodoc_typehints = "description"
autodoc_member_order = "bysource"

# Open Graph metadata
ogp_site_url = "https://cladetime.readthedocs.io"
ogp_title = "cladetime documentation"
ogp_type = "website"
ogp_image = "https://cladetime.readthedocs.io/en/latest/_static/cladetime_logo_light_mode.png"
ogp_social_cards = {
"image": "https://cladetime.readthedocs.io/en/latest/_static/cladetime_logo_light_mode.png",
"line_color": "#5d9c9c",
}

# Warn about all references to unknown targets
nitpicky = True
nitpick_ignore = [
("py:class", "datetime"),
("py:class", "polars.LazyFrame"),
("py:class", "polars.lazyframe.frame.LazyFrame"),
]


# -- Options for EPUB output
epub_show_urls = "footnote"
68 changes: 68 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
:og:description: Cladetime is a Python interface for accessing SARS-CoV-2 sequence and clade data provided by Nextstrain.

===============
Cladetime
===============

Cladetime is a Python interface for accessing `Nextstrain <https://nextstrain.org/>`_ SARS-CoV-2 sequence and clade data.

.. toctree::
:titlesonly:
:hidden:

Home <self>
user-guide
reference/index

Installation
------------

Cladetime can be installed with `pip <https://pip.pypa.io/>`_:

.. code-block:: bash
$ pip install git+https://github.com/reichlab/cladetime.git
Usage
-----

The CladeTime :any:`CladeTime` class provides a lightweight wrapper around historical and current
SARS-CoV-2 GenBank sequence and sequence metadata created by `nextstrain.org's <https://nextstrain.org/>`_
daily workflow pipeline.

.. code-block:: python
>>> import polars as pl
>>> from cladetime import CladeTime
>>> ct = CladeTime()
>>> filtered_sequence_metadata = (
... ct.sequence_metadata.select(["country", "division", "date", "host", "clade_nextstrain"])
... .filter(
... pl.col("country") == "USA",
... pl.col("date").is_not_null(),
... pl.col("host") == "Homo sapiens",
... )
... .cast({"date": pl.Date}, strict=False)
... )
>>> filtered_sequence_metadata.head(5).collect()
shape: (5, 5)
┌─────────┬──────────┬────────────┬──────────────┬──────────────────┐
│ country ┆ division ┆ date ┆ host ┆ clade_nextstrain │
---------------
strstr ┆ date ┆ strstr
╞═════════╪══════════╪════════════╪══════════════╪══════════════════╡
USA ┆ Alabama ┆ 2022-07-07 ┆ Homo sapiens ┆ 22A
USA ┆ Arizona ┆ 2022-07-02 ┆ Homo sapiens ┆ 22B
USA ┆ Arizona ┆ 2022-07-19 ┆ Homo sapiens ┆ 22B
USA ┆ Arizona ┆ 2022-07-15 ┆ Homo sapiens ┆ 22B
USA ┆ Arizona ┆ 2022-07-20 ┆ Homo sapiens ┆ 22B
└─────────┴──────────┴────────────┴──────────────┴──────────────────┘
See the :doc:`user-guide` for more details about working with Cladetime.

The :doc:`reference/index` documentation provides API-level documentation.

8 changes: 8 additions & 0 deletions docs/reference/cladetime.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
:og:description: Cladetime is a Python interface for accessing Sars-Cov-2 sequence and clade data provided by Nextstrain.

==========
CladeTime
==========

.. autoclass:: cladetime.CladeTime
:members:
6 changes: 6 additions & 0 deletions docs/reference/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
API Reference
=============

.. toctree::

cladetime
94 changes: 94 additions & 0 deletions docs/user-guide.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
===============
User Guide
===============



Finding Nextstrain SARS-CoV-2 sequences and sequence metadata
--------------------------------------------------------------

Cladetime provides a CladeTime class that provides a lightweight interface to nextstrain.org files.

.. code-block:: python
>>> from cladetime import CladeTime
# Instantiating a CladeTime object with no parameters will use the
# latest available data from nextstrain.org.
>>> ct = CladeTime()
# URL to the most recent SARS-CoV-2 sequence file (.fasta)
>>> ct.url_sequence
https://nextstrain-data.s3.amazonaws.com/files/ncov/open/sequences.fasta.zst?versionId=d66Hn1T0eFMAg8osEh8Yrod.QEUBRxvu'
# URL to the metadata that describes the sequences in the above file
>>> ct.url.sequence_metadata
'https://nextstrain-data.s3.amazonaws.com/files/ncov/open/metadata.tsv.zst?versionId=JTXXFlKyyvt9AerxKMwoZflhFYQFrDek'
# Metadata about the nextstrain data pipeline that created generated the sequence file and its metadata
>>> ct.ncov_metadata
{'schema_version': 'v1',
'nextclade_version': 'nextclade 3.8.2',
'nextclade_dataset_name': 'SARS-CoV-2',
'nextclade_dataset_version': '2024-09-25--21-50-30Z',
'nextclade_tsv_sha256sum': '5b0f2b64bfe694a3c96bd5a116de8fae23b144bfd3d22da774d4bfe9a84399c3',
'metadata_tsv_sha256sum': '1dc6a4204039e5c69eed84583faf75bbec1629e531dc99aab5bd566d3fb28295'}
Working with SARS-CoV-2 sequence metadata
------------------------------------------

The CladeTime class also provides a Polars LazyFrame object that points to the Nextstrain's sequence metadata file.
This file is in .tsv format and contains information about the sequences, such as their collection date,
host, and location.

The metadata also includes a clade assignment for each sequence. Nextstrain assigns clades based on a reference tree,
and the reference tree varies over time.

.. code-block:: python
>>> import polars as pl
>>> from cladetime import CladeTime
>>> ct = CladeTime()
# ct contains a Polars LazyFrame that references the sequence metadata .tsv file on AWS S3
>>> lf = ct.sequence_metadata
>>> lf
<LazyFrame at 0x105341190>
Getting historical SARS-CoV-2 sequence metadata
------------------------------------------------

A CladeTime instance created without parameters will reference the most
recent data available from Nextstrain.

To access sequence metadata at a specific point in time, pass a date string
in the format 'YYYY-MM-DD' to the CladeTime constructor. Alternatively, you can pass
a Python datetime object. Both will be treated as UTC dates/times. If a date string
is specified, the datetime will be set to 00:00:00 hours:minutes:seconds on that
date, meaning that the CladeTime object will retrieve the sequence metadata that
was available at the start of that day.

.. code-block:: python
>>> from cladetime import CladeTime
>>> ct = CladeTime(sequence_as_of="2024-08-02")
# ct operations now reference the version of the sequence metadata
# that was available at midnight UTC on August 2, 2024.
>>> ct.sequence_metadata \
... .cast({"date": pl.Date}, strict=False) \
... .select(pl.max("date")).collect()
shape: (1, 1)
┌────────────┐
│ date │
---
│ date │
╞════════════╡
2024-07-23
└────────────┘
17 changes: 16 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ authors = [

requires-python = ">=3.11"
readme = "README.md"
license = {text = "MIT"}

classifiers = [
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: MIT License",
]

dependencies = [
"awscli>=1.32.92",
Expand Down Expand Up @@ -41,9 +45,20 @@ dev = [
"types-python-dateutil",
"types-requests",
]
docs = [
"furo",
"matplotlib",
"myst-parser",
"sphinx>=5.0,<6.0",
"sphinx-copybutton",
"sphinx-github-style",
"sphinxext-opengraph",
]

[project.urls]
Repository = "https://github.com/reichlab/cladetime.git"
Documentation = "https://cladetime.readthedocs.io/"
Issues = "https://github.com/reichlab/cladetime/issues"

[project.entry-points."console_scripts"]
assign_clades = "cladetime.assign_clades:main"
Expand Down
Loading

0 comments on commit c3095bf

Please sign in to comment.