-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #35 from reichlab/bsweger/add-readthedocs/30
Add readthedocs and sphinx to cladetime
- Loading branch information
Showing
12 changed files
with
415 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Read the Docs configuration file for Sphinx projects | ||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details | ||
|
||
version: 2 | ||
|
||
build: | ||
os: ubuntu-22.04 | ||
tools: | ||
python: "3.12" | ||
|
||
# Build documentation in the "docs/" directory with Sphinx | ||
sphinx: | ||
configuration: docs/conf.py | ||
fail_on_warning: true | ||
|
||
python: | ||
install: | ||
- method: pip | ||
path: . | ||
extra_requirements: | ||
- docs |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
import os | ||
import sys | ||
from datetime import date | ||
|
||
# Configuration file for the Sphinx documentation builder. | ||
|
||
# -- Project information | ||
|
||
project = "Cladetime" | ||
project_copyright = f"{date.today().year}, Reich Lab @ The University of Massachusetts Amherst" | ||
author = "Reich Lab" | ||
|
||
# Add cladetime location to the path, so we can use autodoc to | ||
# generate API documentation from docstrings. | ||
root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) | ||
sys.path.insert(0, root_path) | ||
|
||
release = "0.1" | ||
# FIXME: get the version dynamically | ||
version = "0.1.0" | ||
|
||
# -- General configuration | ||
|
||
extensions = [ | ||
"myst_parser", | ||
"sphinx.ext.autodoc", | ||
"sphinx_copybutton", | ||
"sphinx.ext.doctest", | ||
"sphinx.ext.intersphinx", | ||
"sphinx_github_style", | ||
"sphinxext.opengraph", | ||
"sphinx.ext.napoleon", | ||
] | ||
|
||
intersphinx_mapping = { | ||
"python": ("https://docs.python.org/3/", None), | ||
"sphinx": ("https://www.sphinx-doc.org/en/master/", None), | ||
"polars": ("https://docs.pola.rs/api/python/stable", None), | ||
} | ||
intersphinx_disabled_domains = ["std"] | ||
|
||
# Copied these settings from the copybutton's config | ||
# https://github.com/executablebooks/sphinx-copybutton/blob/master/docs/conf.py | ||
copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " | ||
copybutton_prompt_is_regexp = True | ||
copybutton_line_continuation_character = "\\" | ||
copybutton_here_doc_delimiter = "EOT" | ||
copybutton_selector = "div:not(.no-copybutton) > div.highlight > pre" | ||
|
||
templates_path = ["_templates"] | ||
|
||
# The root toctree document. | ||
root_doc = "index" | ||
|
||
# Test code blocks only when explicitly specified | ||
doctest_test_doctest_blocks = "" | ||
|
||
# The theme to use for HTML and HTML Help pages. See the documentation for | ||
# a list of builtin themes. | ||
html_static_path = ["_static"] | ||
html_theme = "furo" | ||
html_favicon = "_static/reichlab_favicon.png" | ||
html_title = "Cladetime" | ||
html_last_updated_fmt = "%Y-%m-%d" | ||
|
||
# Settings for the GitHub link extension | ||
linkcode_url = "https://github.com/reichlab/cladetime" | ||
|
||
# These folders are copied to the documentation's HTML output | ||
html_theme_options = { | ||
"announcement": """ | ||
<a style=\"text-decoration: none; color: white;\" | ||
href=\"https://github.com/reichlab/cladetime/issues"> | ||
Cladetime is a work in progress. Please feel free to file issues on GitHub. | ||
</a> | ||
""", | ||
"sidebar_hide_name": True, | ||
"light_logo": "cladetime_logo_light_mode.png", | ||
"dark_logo": "cladetime_logo_dark_mode.png", | ||
"navigation_with_keys": True, | ||
"source_repository": "https://github.com/reichlab/cladetime/", | ||
# source for GitHub footer icon: | ||
# https://pradyunsg.me/furo/customisation/footer/#using-embedded-svgs | ||
"footer_icons": [ | ||
{ | ||
"name": "GitHub", | ||
"url": "https://github.com/reichlab/cladetime", | ||
"html": """ | ||
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16"> | ||
<path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path> | ||
</svg> | ||
""", | ||
"class": "", | ||
}, | ||
], | ||
} | ||
|
||
# from https://myst-parser.readthedocs.io/en/latest/syntax/optional.html | ||
myst_enable_extensions = [ | ||
"amsmath", | ||
"deflist", | ||
"dollarmath", | ||
"fieldlist", | ||
"substitution", | ||
"tasklist", | ||
"colon_fence", | ||
"attrs_inline", | ||
] | ||
|
||
# The name of the Pygments (syntax highlighting) style to use. | ||
pygments_style = "friendly" | ||
|
||
# Show typehints as content of the function or method | ||
autodoc_typehints = "description" | ||
autodoc_member_order = "bysource" | ||
|
||
# Open Graph metadata | ||
ogp_site_url = "https://cladetime.readthedocs.io" | ||
ogp_title = "cladetime documentation" | ||
ogp_type = "website" | ||
ogp_image = "https://cladetime.readthedocs.io/en/latest/_static/cladetime_logo_light_mode.png" | ||
ogp_social_cards = { | ||
"image": "https://cladetime.readthedocs.io/en/latest/_static/cladetime_logo_light_mode.png", | ||
"line_color": "#5d9c9c", | ||
} | ||
|
||
# Warn about all references to unknown targets | ||
nitpicky = True | ||
nitpick_ignore = [ | ||
("py:class", "datetime"), | ||
("py:class", "polars.LazyFrame"), | ||
("py:class", "polars.lazyframe.frame.LazyFrame"), | ||
] | ||
|
||
|
||
# -- Options for EPUB output | ||
epub_show_urls = "footnote" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
:og:description: Cladetime is a Python interface for accessing SARS-CoV-2 sequence and clade data provided by Nextstrain. | ||
|
||
=============== | ||
Cladetime | ||
=============== | ||
|
||
Cladetime is a Python interface for accessing `Nextstrain <https://nextstrain.org/>`_ SARS-CoV-2 sequence and clade data. | ||
|
||
.. toctree:: | ||
:titlesonly: | ||
:hidden: | ||
|
||
Home <self> | ||
user-guide | ||
reference/index | ||
|
||
Installation | ||
------------ | ||
|
||
Cladetime can be installed with `pip <https://pip.pypa.io/>`_: | ||
|
||
.. code-block:: bash | ||
$ pip install git+https://github.com/reichlab/cladetime.git | ||
Usage | ||
----- | ||
|
||
The CladeTime :any:`CladeTime` class provides a lightweight wrapper around historical and current | ||
SARS-CoV-2 GenBank sequence and sequence metadata created by `nextstrain.org's <https://nextstrain.org/>`_ | ||
daily workflow pipeline. | ||
|
||
.. code-block:: python | ||
>>> import polars as pl | ||
>>> from cladetime import CladeTime | ||
>>> ct = CladeTime() | ||
>>> filtered_sequence_metadata = ( | ||
... ct.sequence_metadata.select(["country", "division", "date", "host", "clade_nextstrain"]) | ||
... .filter( | ||
... pl.col("country") == "USA", | ||
... pl.col("date").is_not_null(), | ||
... pl.col("host") == "Homo sapiens", | ||
... ) | ||
... .cast({"date": pl.Date}, strict=False) | ||
... ) | ||
>>> filtered_sequence_metadata.head(5).collect() | ||
shape: (5, 5) | ||
┌─────────┬──────────┬────────────┬──────────────┬──────────────────┐ | ||
│ country ┆ division ┆ date ┆ host ┆ clade_nextstrain │ | ||
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ | ||
│ str ┆ str ┆ date ┆ str ┆ str │ | ||
╞═════════╪══════════╪════════════╪══════════════╪══════════════════╡ | ||
│ USA ┆ Alabama ┆ 2022-07-07 ┆ Homo sapiens ┆ 22A │ | ||
│ USA ┆ Arizona ┆ 2022-07-02 ┆ Homo sapiens ┆ 22B │ | ||
│ USA ┆ Arizona ┆ 2022-07-19 ┆ Homo sapiens ┆ 22B │ | ||
│ USA ┆ Arizona ┆ 2022-07-15 ┆ Homo sapiens ┆ 22B │ | ||
│ USA ┆ Arizona ┆ 2022-07-20 ┆ Homo sapiens ┆ 22B │ | ||
└─────────┴──────────┴────────────┴──────────────┴──────────────────┘ | ||
See the :doc:`user-guide` for more details about working with Cladetime. | ||
|
||
The :doc:`reference/index` documentation provides API-level documentation. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
:og:description: Cladetime is a Python interface for accessing Sars-Cov-2 sequence and clade data provided by Nextstrain. | ||
|
||
========== | ||
CladeTime | ||
========== | ||
|
||
.. autoclass:: cladetime.CladeTime | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
API Reference | ||
============= | ||
|
||
.. toctree:: | ||
|
||
cladetime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
=============== | ||
User Guide | ||
=============== | ||
|
||
|
||
|
||
Finding Nextstrain SARS-CoV-2 sequences and sequence metadata | ||
-------------------------------------------------------------- | ||
|
||
Cladetime provides a CladeTime class that provides a lightweight interface to nextstrain.org files. | ||
|
||
.. code-block:: python | ||
>>> from cladetime import CladeTime | ||
# Instantiating a CladeTime object with no parameters will use the | ||
# latest available data from nextstrain.org. | ||
>>> ct = CladeTime() | ||
# URL to the most recent SARS-CoV-2 sequence file (.fasta) | ||
>>> ct.url_sequence | ||
https://nextstrain-data.s3.amazonaws.com/files/ncov/open/sequences.fasta.zst?versionId=d66Hn1T0eFMAg8osEh8Yrod.QEUBRxvu' | ||
# URL to the metadata that describes the sequences in the above file | ||
>>> ct.url.sequence_metadata | ||
'https://nextstrain-data.s3.amazonaws.com/files/ncov/open/metadata.tsv.zst?versionId=JTXXFlKyyvt9AerxKMwoZflhFYQFrDek' | ||
# Metadata about the nextstrain data pipeline that created generated the sequence file and its metadata | ||
>>> ct.ncov_metadata | ||
{'schema_version': 'v1', | ||
'nextclade_version': 'nextclade 3.8.2', | ||
'nextclade_dataset_name': 'SARS-CoV-2', | ||
'nextclade_dataset_version': '2024-09-25--21-50-30Z', | ||
'nextclade_tsv_sha256sum': '5b0f2b64bfe694a3c96bd5a116de8fae23b144bfd3d22da774d4bfe9a84399c3', | ||
'metadata_tsv_sha256sum': '1dc6a4204039e5c69eed84583faf75bbec1629e531dc99aab5bd566d3fb28295'} | ||
Working with SARS-CoV-2 sequence metadata | ||
------------------------------------------ | ||
|
||
The CladeTime class also provides a Polars LazyFrame object that points to the Nextstrain's sequence metadata file. | ||
This file is in .tsv format and contains information about the sequences, such as their collection date, | ||
host, and location. | ||
|
||
The metadata also includes a clade assignment for each sequence. Nextstrain assigns clades based on a reference tree, | ||
and the reference tree varies over time. | ||
|
||
.. code-block:: python | ||
>>> import polars as pl | ||
>>> from cladetime import CladeTime | ||
>>> ct = CladeTime() | ||
# ct contains a Polars LazyFrame that references the sequence metadata .tsv file on AWS S3 | ||
>>> lf = ct.sequence_metadata | ||
>>> lf | ||
<LazyFrame at 0x105341190> | ||
Getting historical SARS-CoV-2 sequence metadata | ||
------------------------------------------------ | ||
|
||
A CladeTime instance created without parameters will reference the most | ||
recent data available from Nextstrain. | ||
|
||
To access sequence metadata at a specific point in time, pass a date string | ||
in the format 'YYYY-MM-DD' to the CladeTime constructor. Alternatively, you can pass | ||
a Python datetime object. Both will be treated as UTC dates/times. If a date string | ||
is specified, the datetime will be set to 00:00:00 hours:minutes:seconds on that | ||
date, meaning that the CladeTime object will retrieve the sequence metadata that | ||
was available at the start of that day. | ||
|
||
.. code-block:: python | ||
>>> from cladetime import CladeTime | ||
>>> ct = CladeTime(sequence_as_of="2024-08-02") | ||
# ct operations now reference the version of the sequence metadata | ||
# that was available at midnight UTC on August 2, 2024. | ||
>>> ct.sequence_metadata \ | ||
... .cast({"date": pl.Date}, strict=False) \ | ||
... .select(pl.max("date")).collect() | ||
shape: (1, 1) | ||
┌────────────┐ | ||
│ date │ | ||
│ --- │ | ||
│ date │ | ||
╞════════════╡ | ||
│ 2024-07-23 │ | ||
└────────────┘ | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.