diff --git a/Pipfile b/Pipfile index b3ac02e..10f1aed 100644 --- a/Pipfile +++ b/Pipfile @@ -22,3 +22,4 @@ pytest-cov = "*" build = "*" twine = "*" flake8-pyproject = "*" +sphinx = "*" diff --git a/cspell.json b/cspell.json index 7bea27c..5a6d731 100644 --- a/cspell.json +++ b/cspell.json @@ -5,8 +5,12 @@ "language": "en", // words - list of words to be always considered correct "words": [ + "automodule", + "autosummary", "barh", "expon", + "genindex", + "intersphinx", "isort", "kdeplot", "labelbottom", @@ -15,15 +19,21 @@ "lineplot", "linewidth", "Loosers", + "maxdepth", + "modindex", "mypy", "pipenv", "pyproject", "pytest", "quantile", + "quickstart", + "Sanjay", "Tighters", "tmean", + "toctree", "triang", "tstd", + "undoc", "xaxis", "xlabel", "xlim", diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..d83caa4 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,7 @@ +# Documentation + +This folder contains infrastructure to build documentation using `sphinx`. To build +using this infrastructure, install the dependencies using `pipenv`. + +- On Windows, execute `main.bat` from this folder. +- On Linux/Mac, execute `sphinx-build source build` diff --git a/docs/source/conf.py b/docs/source/conf.py index da4b805..7626a36 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ sys.path.insert(0, os.path.abspath("..")) -project = "Reliability Statistics" +project = "Reliable Statistics" copyright = "2023, Sanjay M Joshi" author = "Sanjay M Joshi" diff --git a/docs/source/index.rst b/docs/source/index.rst index 1d58786..a0665bf 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,12 +1,13 @@ -.. Reliability Statistics documentation master file, created by +.. ReliableStatistics documentation master file, created by sphinx-quickstart on Sun Feb 26 11:41:33 2023. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to Reliability Statistics's documentation! +Welcome to Reliable Statistics's documentation! ================================================== -**relistats** is a Python library for computing reliability engineering -statistics, such as reliability, confidence, and assurance. +**relistats** is a Python library for computing reliable +statistics, such as reliability, confidence, and assurance as well as +tolerance and assurance intervals. .. toctree:: :maxdepth: 2 diff --git a/docs/source/relistats.rst b/docs/source/relistats.rst index 885f049..378a50e 100644 --- a/docs/source/relistats.rst +++ b/docs/source/relistats.rst @@ -22,6 +22,22 @@ relistats.binom_fin module :undoc-members: :show-inheritance: +relistats.intervals module +-------------------------- + +.. automodule:: relistats.intervals + :members: + :undoc-members: + :show-inheritance: + +relistats.percentile module +--------------------------- + +.. automodule:: relistats.percentile + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/pyproject.toml b/pyproject.toml index 14dc6e3..546b0df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ Repository = "https://github.com/sanjaymjoshi/relistats" Changelog = "https://github.com/sanjaymjoshi/relistats/blob/main/CHANGELOG.md" [tool.flake8] -ignore = ["E203", "E266", "E501", "W503"] +ignore = ["E203", "E266", "E501", "W503", "W605"] max-line-length = 88 max-complexity = 18 select = ["B", "C", "E", "F", "W", "T4"] diff --git a/relistats/intervals.py b/relistats/intervals.py index bab6395..63b8eb6 100644 --- a/relistats/intervals.py +++ b/relistats/intervals.py @@ -15,15 +15,28 @@ def confidence_interval_of_mean(c: float, *args) -> tuple[Any, Any]: - """Returns confidence interval of mean from args at confidence of c, 0 < c < 1""" + """Confidence interval of mean of `args` at confidence level `c`. + + :param c: confidence level + :type c: float, `0 < c < 1` + :param args: array of values + :type args: array_like of type that supports computation of mean + :return: confidence interval + :rtype: tuple of same type as `args` + """ mean, sem = stats.tmean(*args), stats.sem(*args) return stats.norm.interval(c, loc=mean, scale=sem) def confidence_interval_of_median(c: float, *args) -> Optional[tuple[Any, Any]]: - """Returns median interval from args at confidence of at least c, if possible. - Returns None if not possible. - args is any iterable (list, tuple, set) + """Confidence interval of median of `args` at confidence level `c`. + + :param c: confidence level + :type c: float, `0 < c < 1` + :param args: array of values + :type args: array_like of type that supports computation of mean + :return: confidence interval or None + :rtype: tuple of same type as `args` or None """ return confidence_interval_of_percentile(0.5, c, *args) @@ -31,10 +44,19 @@ def confidence_interval_of_median(c: float, *args) -> Optional[tuple[Any, Any]]: def confidence_interval_of_percentile( p: float, c: float, *args ) -> Optional[tuple[Any, Any]]: - """Returns p'th percentile/quantile interval from args at confidence of at least c, if possible. - Use this method if you data is not sorted already, else you can use quantile_interval_places. - Returns None if not possible. - args is any iterable (list, tuple, set) + """`p`'th percentile/quantile interval of `args` at confidence level `c`. + + Use this method if you data is not sorted already, else you can use + :meth:`relistats.intervals.percentile_interval_locs`. + + :param p: percentile/quantile level + :type p: float, `0 < p < 1` + :param c: confidence level + :type c: float, `0 < c < 1` + :param args: array of values + :type args: array_like of type that supports computation of mean + :return: confidence interval + :rtype: tuple of same type as `args` """ n = len(*args) ii = percentile_interval_locs(n, p, c) @@ -45,17 +67,25 @@ def confidence_interval_of_percentile( def percentile_interval_locs(n: int, p: float, c: float) -> Optional[tuple[int, int]]: - """Returns tuple of two locations (1..n) such that percentile/quantile p - (0 < p < 1) lies within these two locations of n sorted samples with confidence - of at least c (0 < c < 1). + """Tuple of two locations `(1...n)` such that percentile/quantile `p` + lies within these two locations of `n` sorted samples with confidence + of at least `c`. Return `None` if such a tuple cannot be computed. If that happens, + try to increase `n`, reduce `p`, or reduce `c`. Note that the locations are indexed at 1 and not zero! - Use this method if you plan to sort samples yourself, else you can use - confidence_interval_of_quantile method. + Use this method if you plan to sort samples yourself or you need only the locations. + If your array is sorted already, you can use + :meth:`relistats.intervals.confidence_interval_of_percentile`. - Return None if such a tuple cannot be computed. If that happens, try to increase n, - reduce p, or reduce c. + :param n: number of samples + :type n: int + :param p: percentile/quantile level + :type p: float, `0 < p < 1` + :param c: confidence level + :type c: float, `0 < c < 1` + :return: percentile interval locations (1-based) + :rtype: tuple of int of None """ if _percentile_invalid(p) or _confidence_invalid(c) or _num_samples_invalid(n): return None @@ -129,11 +159,19 @@ def _percentile_interval_locs_candidates( def tolerance_interval(t: float, c: float, *args) -> Optional[tuple[Any, Any]]: - """Returns tolerance interval for middle t (0 Optional[tuple[Any, Any]]: def tolerance_interval_locs(n: int, t: float, c: float) -> Optional[tuple[int, int]]: - """Returns tolerance interval locations. Out of n sorted samples, a fraction of t samples - (0 < t < 1) are expected to be within these two places, with a probability of at least c, - 0 < c < 1. + """Tolerance interval locations. Out of `n` sorted samples, a fraction of `t` samples + are expected to be within these two places, with a probability of at least `c`. - Returns None if such tuple cannot be calculated. If that happens, try to increase n, - reduce t, or reduce c. + Returns `None` if such tuple cannot be calculated. If that happens, try to increase `n`, + reduce `t`, or reduce `c`. + + :param n: number of samples + :type n: int + :param t: tolerance interval level + :type t: float, `0 < t < 1` + :param c: confidence level + :type c: float, `0 < c < 1` + :return: tolerance interval locations (1-based) + :rtype: tuple of int of None """ if _percentile_invalid(t) or _confidence_invalid(c) or _num_samples_invalid(n): return None @@ -198,11 +244,18 @@ def tolerance_interval_locs(n: int, t: float, c: float) -> Optional[tuple[int, i def assurance_interval(a: float, *args) -> Optional[tuple[Any, Any]]: - """Returns assurance interval for middle a (0 Optional[tuple[Any, Any]]: def assurance_interval_locs(n: int, a: float) -> Optional[tuple[int, int]]: - """Returns assurance interval locations. Out of n sorted samples, a fraction of a samples - are expected to be within these two locations, with a probability of at least a. + """Assurance interval locations. Out of `n` sorted samples a fraction of `a` samples + are expected to be within these two locations, with a probability of at least `a`. - Returns None if such tuple cannot be calculated. If that happens, try to increase n - or reduce a. + Returns `None` if such tuple cannot be calculated. If that happens, try to increase `n`, + or reduce `a`. + + :param n: number of samples + :type n: int + :param a: assurance level + :type a: float, `0 < a < 1` + :return: assurance interval locations (1-based) + :rtype: tuple of int of None """ return tolerance_interval_locs(n, a, a) def assurance_in_interval(j_lo: int, j_hi: int, n: int, tol=0.001) -> Optional[float]: - """Assurance level for interval [j_lo, j_hi] out of n sorted samples. Assurance - level of a means a% of samples will be within this interval with a% confidence. + """Assurance level for interval [`j_lo`, `j_hi`] out of `n` sorted samples. Assurance + level of `a` means `a%` of samples will be within this interval with `a%` confidence. Example: Out of 16 ordered samples, we can be 80% confident that 80% samples will be between 1st and 15th place. :param j_lo: sample place at lower end :type j_lo: int, >0 :param j_hi: sample place at upper end - :type j_hi: int, n > j_hi > j_lo + :type j_hi: int, `n > j_hi > j_lo` :param n: number of samples :type n: int, >=0 :param tol: accuracy tolerance :type tol: float, optional - :return: Assurance or None if it could not be computed :rtype: float, optional """ diff --git a/relistats/percentile.py b/relistats/percentile.py index 7faa5a9..a87f96b 100644 --- a/relistats/percentile.py +++ b/relistats/percentile.py @@ -18,12 +18,14 @@ def confidence_in_percentile(j: int, n: int, p: float) -> float: pp^th percentile/quantile (0 < p < 1) is greater than j samples, 1 <= j <= n. From https://online.stat.psu.edu/stat415/lesson/19/19.2 - c = sum_{k=0}^{j-1} nCk * p^k * (1-p)^(n-k) + + .. math:: + c = \sum_{k=0}^{j-1} {n\choose k} p^k (1-p)^{n-k} This is same as cumulative density function for a binomial distribution, evaluated at j-1 out of n samples. - Note that j=n+1 will return 1. + Note that :math:`j=n+1` will return 1. """ return stats.binom.cdf(j - 1, n, p)