Merge pull request #12 from sanjaymjoshi/quantile

Documentation using sphinx
sanjaymjoshi · Mar 3, 2024 · 472e3b3 · 472e3b3
2 parents f709291 + 42de96a
commit 472e3b3
Show file tree

Hide file tree

Showing 9 changed files with 142 additions and 46 deletions.
diff --git a/Pipfile b/Pipfile
@@ -22,3 +22,4 @@ pytest-cov = "*"
 build = "*"
 twine = "*"
 flake8-pyproject = "*"
+sphinx = "*"
diff --git a/cspell.json b/cspell.json
@@ -5,8 +5,12 @@
     "language": "en",
     // words - list of words to be always considered correct
     "words": [
+        "automodule",
+        "autosummary",
         "barh",
         "expon",
+        "genindex",
+        "intersphinx",
         "isort",
         "kdeplot",
         "labelbottom",
@@ -15,15 +19,21 @@
         "lineplot",
         "linewidth",
         "Loosers",
+        "maxdepth",
+        "modindex",
         "mypy",
         "pipenv",
         "pyproject",
         "pytest",
         "quantile",
+        "quickstart",
+        "Sanjay",
         "Tighters",
         "tmean",
+        "toctree",
         "triang",
         "tstd",
+        "undoc",
         "xaxis",
         "xlabel",
         "xlim",

diff --git a/docs/README.md b/docs/README.md
@@ -0,0 +1,7 @@
+# Documentation
+
+This folder contains infrastructure to build documentation using `sphinx`. To build
+using this infrastructure, install the dependencies using `pipenv`.
+
+- On Windows, execute `main.bat` from this folder.
+- On Linux/Mac, execute `sphinx-build source build`
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -13,7 +13,7 @@
 sys.path.insert(0, os.path.abspath(".."))
 
 
-project = "Reliability Statistics"
+project = "Reliable Statistics"
 copyright = "2023, Sanjay M Joshi"
 author = "Sanjay M Joshi"
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -1,12 +1,13 @@
-.. Reliability Statistics documentation master file, created by
+.. ReliableStatistics documentation master file, created by
    sphinx-quickstart on Sun Feb 26 11:41:33 2023.
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-Welcome to Reliability Statistics's documentation!
+Welcome to Reliable Statistics's documentation!
 ==================================================
-**relistats** is a Python library for computing reliability engineering
-statistics, such as reliability, confidence, and assurance. 
+**relistats** is a Python library for computing reliable
+statistics, such as reliability, confidence, and assurance as well as 
+tolerance and assurance intervals. 
 
 .. toctree::
    :maxdepth: 2

diff --git a/docs/source/relistats.rst b/docs/source/relistats.rst
@@ -22,6 +22,22 @@ relistats.binom_fin module
    :undoc-members:
    :show-inheritance:
 
+relistats.intervals module
+--------------------------
+
+.. automodule:: relistats.intervals
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+relistats.percentile module
+---------------------------
+
+.. automodule:: relistats.percentile
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Module contents
 ---------------
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,7 +32,7 @@ Repository = "https://github.com/sanjaymjoshi/relistats"
 Changelog = "https://github.com/sanjaymjoshi/relistats/blob/main/CHANGELOG.md"
 
 [tool.flake8]
-ignore = ["E203", "E266", "E501", "W503"]
+ignore = ["E203", "E266", "E501", "W503", "W605"]
 max-line-length = 88
 max-complexity = 18
 select = ["B", "C", "E", "F", "W", "T4"]

diff --git a/relistats/intervals.py b/relistats/intervals.py
@@ -15,26 +15,48 @@
 
 
 def confidence_interval_of_mean(c: float, *args) -> tuple[Any, Any]:
-    """Returns confidence interval of mean from args at confidence of c, 0 < c < 1"""
+    """Confidence interval of mean of `args` at confidence level `c`.
+
+    :param c: confidence level
+    :type c: float, `0 < c < 1`
+    :param args: array of values
+    :type args: array_like of type that supports computation of mean
+    :return: confidence interval
+    :rtype: tuple of same type as `args`
+    """
     mean, sem = stats.tmean(*args), stats.sem(*args)
     return stats.norm.interval(c, loc=mean, scale=sem)
 
 
 def confidence_interval_of_median(c: float, *args) -> Optional[tuple[Any, Any]]:
-    """Returns median interval from args at confidence of at least c, if possible.
-    Returns None if not possible.
-    args is any iterable (list, tuple, set)
+    """Confidence interval of median of `args` at confidence level `c`.
+
+    :param c: confidence level
+    :type c: float, `0 < c < 1`
+    :param args: array of values
+    :type args: array_like of type that supports computation of mean
+    :return: confidence interval or None
+    :rtype: tuple of same type as `args` or None
     """
     return confidence_interval_of_percentile(0.5, c, *args)
 
 
 def confidence_interval_of_percentile(
     p: float, c: float, *args
 ) -> Optional[tuple[Any, Any]]:
-    """Returns p'th percentile/quantile interval from args at confidence of at least c, if possible.
-    Use this method if you data is not sorted already, else you can use quantile_interval_places.
-    Returns None if not possible.
-    args is any iterable (list, tuple, set)
+    """`p`'th percentile/quantile interval of `args` at confidence level `c`.
+
+    Use this method if you data is not sorted already, else you can use
+    :meth:`relistats.intervals.percentile_interval_locs`.
+
+    :param p: percentile/quantile level
+    :type p: float, `0 < p < 1`
+    :param c: confidence level
+    :type c: float, `0 < c < 1`
+    :param args: array of values
+    :type args: array_like of type that supports computation of mean
+    :return: confidence interval
+    :rtype: tuple of same type as `args`
     """
     n = len(*args)
     ii = percentile_interval_locs(n, p, c)
@@ -45,17 +67,25 @@ def confidence_interval_of_percentile(
 
 
 def percentile_interval_locs(n: int, p: float, c: float) -> Optional[tuple[int, int]]:
-    """Returns tuple of two locations (1..n) such that percentile/quantile p
-    (0 < p < 1) lies within these two locations of n sorted samples with confidence
-    of at least c (0 < c < 1).
+    """Tuple of two locations `(1...n)` such that percentile/quantile `p`
+    lies within these two locations of `n` sorted samples with confidence
+    of at least `c`. Return `None` if such a tuple cannot be computed. If that happens,
+    try to increase `n`, reduce `p`, or reduce `c`.
 
     Note that the locations are indexed at 1 and not zero!
 
-    Use this method if you plan to sort samples yourself, else you can use
-    confidence_interval_of_quantile method.
+    Use this method if you plan to sort samples yourself or you need only the locations.
+    If your array is sorted already, you can use
+    :meth:`relistats.intervals.confidence_interval_of_percentile`.
 
-    Return None if such a tuple cannot be computed. If that happens, try to increase n,
-    reduce p, or reduce c.
+    :param n: number of samples
+    :type n: int
+    :param p: percentile/quantile level
+    :type p: float, `0 < p < 1`
+    :param c: confidence level
+    :type c: float, `0 < c < 1`
+    :return: percentile interval locations (1-based)
+    :rtype: tuple of int of None
     """
     if _percentile_invalid(p) or _confidence_invalid(c) or _num_samples_invalid(n):
         return None
@@ -129,11 +159,19 @@ def _percentile_interval_locs_candidates(
 
 
 def tolerance_interval(t: float, c: float, *args) -> Optional[tuple[Any, Any]]:
-    """Returns tolerance interval for middle t (0<t<1) fraction of samples,
-    with confidence c (0<c<1), if possible.
-    Use this method if you data is not sorted already, else you can use tolerance_interval_places.
-    Returns None if not possible.
-    args is any iterable (list, tuple, set)
+    """Tolerance interval for middle `t` fraction of samples, with confidence c.
+
+    Use this method if you data is not sorted already, else you can use
+    :meth:`relistats.intervals.tolerance_interval_locs`.
+
+    :param t: tolerance level
+    :type t: float, `0 < t < 1`
+    :param c: confidence level
+    :type c: float, `0 < c < 1`
+    :param args: array of values
+    :type args: array_like of type that supports computation of mean
+    :return: confidence interval
+    :rtype: tuple of same type as `args`
     """
     n = len(*args)
     ii = tolerance_interval_locs(n, t, c)
@@ -144,12 +182,20 @@ def tolerance_interval(t: float, c: float, *args) -> Optional[tuple[Any, Any]]:
 
 
 def tolerance_interval_locs(n: int, t: float, c: float) -> Optional[tuple[int, int]]:
-    """Returns tolerance interval locations. Out of n sorted samples, a fraction of t samples
-    (0 < t < 1) are expected to be within these two places, with a probability of at least c,
-    0 < c < 1.
+    """Tolerance interval locations. Out of `n` sorted samples, a fraction of `t` samples
+    are expected to be within these two places, with a probability of at least `c`.
 
-    Returns None if such tuple cannot be calculated. If that happens, try to increase n,
-    reduce t, or reduce c.
+    Returns `None` if such tuple cannot be calculated. If that happens, try to increase `n`,
+    reduce `t`, or reduce `c`.
+
+    :param n: number of samples
+    :type n: int
+    :param t: tolerance interval level
+    :type t: float, `0 < t < 1`
+    :param c: confidence level
+    :type c: float, `0 < c < 1`
+    :return: tolerance interval locations (1-based)
+    :rtype: tuple of int of None
     """
     if _percentile_invalid(t) or _confidence_invalid(c) or _num_samples_invalid(n):
         return None
@@ -198,11 +244,18 @@ def tolerance_interval_locs(n: int, t: float, c: float) -> Optional[tuple[int, i
 
 
 def assurance_interval(a: float, *args) -> Optional[tuple[Any, Any]]:
-    """Returns assurance interval for middle a (0<a<1) fraction of samples, if possible.
-    Same as tolerance interval for fraction a with confidence a.
-    Use this method if you data is not sorted already, else you can use assurance_interval_places.
-    Returns None if not possible.
-    args is any iterable (list, tuple, set)
+    """Assurance interval for middle `a` fraction of samples, if possible.
+    Same as tolerance interval for fraction `a` with confidence `a`.
+
+    Use this method if you data is not sorted already, else you can use
+    :meth:`relistats.intervals.assurance_interval_locs`.
+
+    :param a: assurance level
+    :type a: float, `0 < a < 1`
+    :param args: array of values
+    :type args: array_like of type that supports computation of mean
+    :return: assurance interval
+    :rtype: tuple of same type as `args`
     """
     n = len(*args)
     ii = assurance_interval_locs(n, a)
@@ -213,30 +266,36 @@ def assurance_interval(a: float, *args) -> Optional[tuple[Any, Any]]:
 
 
 def assurance_interval_locs(n: int, a: float) -> Optional[tuple[int, int]]:
-    """Returns assurance interval locations. Out of n sorted samples, a fraction of a samples
-    are expected to be within these two locations, with a probability of at least a.
+    """Assurance interval locations. Out of `n` sorted samples a fraction of `a` samples
+    are expected to be within these two locations, with a probability of at least `a`.
 
-    Returns None if such tuple cannot be calculated. If that happens, try to increase n
-    or reduce a.
+    Returns `None` if such tuple cannot be calculated. If that happens, try to increase `n`,
+    or reduce `a`.
+
+    :param n: number of samples
+    :type n: int
+    :param a: assurance level
+    :type a: float, `0 < a < 1`
+    :return: assurance interval locations (1-based)
+    :rtype: tuple of int of None
     """
     return tolerance_interval_locs(n, a, a)
 
 
 def assurance_in_interval(j_lo: int, j_hi: int, n: int, tol=0.001) -> Optional[float]:
-    """Assurance level for interval [j_lo, j_hi] out of n sorted samples. Assurance
-    level of a means a% of samples will be within this interval with a% confidence.
+    """Assurance level for interval [`j_lo`, `j_hi`] out of `n` sorted samples. Assurance
+    level of `a` means `a%` of samples will be within this interval with `a%` confidence.
     Example: Out of 16 ordered samples, we can be 80% confident that 80% samples will
     be between 1st and 15th place.
 
     :param j_lo: sample place at lower end
     :type j_lo: int, >0
     :param j_hi: sample place at upper end
-    :type j_hi: int, n > j_hi > j_lo
+    :type j_hi: int, `n > j_hi > j_lo`
     :param n: number of samples
     :type n: int, >=0
     :param tol: accuracy tolerance
     :type tol: float, optional
-
     :return: Assurance or None if it could not be computed
     :rtype: float, optional
     """

diff --git a/relistats/percentile.py b/relistats/percentile.py
@@ -18,12 +18,14 @@ def confidence_in_percentile(j: int, n: int, p: float) -> float:
     pp^th percentile/quantile (0 < p < 1) is greater than j samples, 1 <= j <= n.
 
     From https://online.stat.psu.edu/stat415/lesson/19/19.2
-    c = sum_{k=0}^{j-1} nCk * p^k * (1-p)^(n-k)
+
+    .. math::
+        c = \sum_{k=0}^{j-1} {n\choose k}  p^k  (1-p)^{n-k}
 
     This is same as cumulative density function for a binomial
     distribution, evaluated at j-1 out of n samples.
 
-    Note that j=n+1 will return 1.
+    Note that :math:`j=n+1` will return 1.
     """
     return stats.binom.cdf(j - 1, n, p)