diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 419d1a7..3e81f23 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -45,13 +45,13 @@ jobs: - name: Build documentation run: | - python -m sphinx docs/ docs/_build/ -b html + python -m sphinx docs/ build/html -b html - name: Deploy documentation to Github pages uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./docs/_build + publish_dir: ./build/html # Github release - name: Read CHANGELOG diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3c08a24..3e3b888 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,9 +59,9 @@ jobs: if: matrix.tasks == 'docs' - name: Test building documentation - run: python -m sphinx docs/ docs/_build/ -b html -W + run: python -m sphinx docs/ build/html -b html -W if: matrix.tasks == 'docs' - name: Check links in documentation - run: python -m sphinx docs/ docs/_build/ -b linkcheck -W + run: python -m sphinx docs/ build/html -b linkcheck -W if: matrix.tasks == 'docs' diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 54bd517..f059a3f 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -88,14 +88,14 @@ You can install it and a few other necessary packages with:: To create the HTML pages, use:: - python -m sphinx docs/ build/sphinx/html -b html + python -m sphinx docs/ build/html -b html The generated files will be available in the directory :file:`build/sphinx/html/`. It is also possible to automatically check if all links are still valid:: - python -m sphinx docs/ build/sphinx/html -b linkcheck + python -m sphinx docs/ build/html -b linkcheck .. _Sphinx: http://sphinx-doc.org diff --git a/audpsychometric/__init__.py b/audpsychometric/__init__.py index 22e55ae..40cd060 100644 --- a/audpsychometric/__init__.py +++ b/audpsychometric/__init__.py @@ -1,5 +1,3 @@ -import audpsychometric.core -from audpsychometric.core import datasets from audpsychometric.core.datasets import list_datasets from audpsychometric.core.datasets import read_dataset from audpsychometric.core.gold_standard import agreement_categorical @@ -7,7 +5,6 @@ from audpsychometric.core.gold_standard import evaluator_weighted_estimator from audpsychometric.core.gold_standard import mode from audpsychometric.core.gold_standard import rater_agreement_pearson -import audpsychometric.core.reliability from audpsychometric.core.reliability import congeneric_reliability from audpsychometric.core.reliability import cronbachs_alpha from audpsychometric.core.reliability import intra_class_correlation diff --git a/audpsychometric/core/datasets/__init__.py b/audpsychometric/core/datasets/__init__.py index faa1be1..3581490 100644 --- a/audpsychometric/core/datasets/__init__.py +++ b/audpsychometric/core/datasets/__init__.py @@ -1,6 +1,3 @@ -"""Provide example datasets for package.""" - - __all__ = ["read_dataset", "list_dataset"] import os @@ -19,12 +16,20 @@ def read_dataset(data_set_name: str) -> pd.DataFrame: retrieves a test dataset from within the package. Args: - data_set_name(str): string identifier of the dataset. - This does not need not be identical with the filename + data_set_name(str): dataset name Returns: - table containing dataset - + dataframe containing dataset + + Examples: + >>> df = read_dataset("wine") + >>> df.head() + Wine Judge Scores + 0 1 A 1 + 1 2 A 1 + 2 3 A 3 + 3 4 A 6 + 4 5 A 6 """ ds = data_sets.loc[data_sets["dataset"] == data_set_name] @@ -38,11 +43,22 @@ def read_dataset(data_set_name: str) -> pd.DataFrame: def list_datasets(): r"""List tests datasets available in package. - Args: - None Returns: - table listing available datasets - - """ + dataframe listing available datasets + + Examples: + >>> list_datasets() + fname ... description + dataset ... + statology statology.csv ... icc sample from web page + hallgren-table5 Hallgren-Table-05.csv ... icc table from publication + hallgren-table3 Hallgren-Table-03.csv ... kappa table from publication + HolzingerSwineford1939 HolzingerSwineford1939.csv ... lavaan + Shrout_Fleiss Shrout_Fleiss_1979.csv ... Dataset from paper + wine wine.csv ... online source + + [6 rows x 4 columns] + + """ # noqa: E501 df_data_sets = data_sets.set_index("dataset") return df_data_sets diff --git a/audpsychometric/core/reliability.py b/audpsychometric/core/reliability.py index d6ab942..099b5b8 100644 --- a/audpsychometric/core/reliability.py +++ b/audpsychometric/core/reliability.py @@ -126,6 +126,9 @@ def intra_class_correlation( The model is based on analysis of variance, and ratings must at least be ordinally scaled. + CCC_ is conceptually and numerically related to the ICC. + For an implementation see :func:`audmetric.concordance_cc`. + Args: ratings: ratings. When given as a 1-dimensional array, @@ -137,6 +140,8 @@ def intra_class_correlation( anova_method: method for ANOVA calculation, can be ``"pingouin"`` or ``"statsmodels"`` + .. _CCC: https://en.wikipedia.org/wiki/Concordance_correlation_coefficient + Returns: icc and additional results lumped into dict @@ -288,20 +293,6 @@ def intra_class_correlation( - :math:`k` is the number of raters - :math:`n` is the number of items - **Implementation Details** - - For doing the analysis, - the :class:`pd.DataFrame` is preprocessed: - The first step will melt - and the data into a long format - for checking incomplete cases. - In this process, - the index will be renamed to a column item - to mimic classical test theory conventions. - The raters will end up in a separate column - containing the ratings. - Ratings will be available under the column rating. - """ # noqa: E501 if not isinstance(ratings, pd.DataFrame): df = pd.DataFrame(np.atleast_2d(np.array(ratings))) diff --git a/docs/api-src/audpsychometric.rst b/docs/api-src/audpsychometric.rst index f58da5d..5d49bf9 100644 --- a/docs/api-src/audpsychometric.rst +++ b/docs/api-src/audpsychometric.rst @@ -3,84 +3,17 @@ audpsychometric .. automodule:: audpsychometric -Library to facilitate evaluation and processing of annotated speech. - -Pychometric Analysis --------------------- - -.. autosummary:: - :toctree: - :nosignatures: - - cronbachs_alpha - congeneric_reliability - intra_class_correlation - -The module currently contains two reliability coefficients -from the family of structural equation model (SEM)-based -reliability coefficients. -One of them is Cronbach's alphas -in the function :func:`audpsychometric.cronbachs_alpha`. -This classical coefficient assumes *tau equivalence* -which requires factor loadings to be homogeneous. -The second coefficient -in the function :func:`audpsychometric.congeneric_reliability` -relaxes this assumption -and only assumes a `one-dimensional congeneric reliability`_ model: -congeneric measurement models are characterized by the fact -that the factor loadings of the indicators -do not have to be homogeneous, -i.e. they can differ. - -In addition, -the module implements *Intraclass Correlation (ICC)* analysis. -ICC is based on the analysis of variance of a class of coefficients -that are based on ANOVA -with ratings as the dependent variable, -and terms for targets -(like e.g rated audio chunks), -raters and their interaction are estimated. -Different flavors of ICC are then computed -based on these sum of squares terms. - -Note that the CCC_ is conceptually and numerically related to the ICC. -We do not implement it here, -as there are other implementations available, -e.g. :func:`audmetric.concordance_cc`. - - -Gold Standard Calculation -------------------------- - .. autosummary:: :toctree: :nosignatures: agreement_categorical agreement_numerical + cronbachs_alpha + congeneric_reliability evaluator_weighted_estimator + intra_class_correlation + list_datasets mode rater_agreement_pearson - - -Demo Datasets -------------- - -.. autosummary:: - :toctree: - :nosignatures: - - list_datasets read_dataset - -Currently these datasets are defined: - -.. jupyter-execute:: - - from audpsychometric import datasets - df_datasets = datasets.list_datasets() - print(df_datasets) - - -.. _one-dimensional congeneric reliability: https://en.wikipedia.org/wiki/Congeneric_reliability -.. _CCC: https://en.wikipedia.org/wiki/Concordance_correlation_coefficient diff --git a/docs/conf.py b/docs/conf.py index b2f86a3..f09f506 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,7 +26,6 @@ ] pygments_style = None extensions = [ - "jupyter_sphinx", # executing code blocks "sphinx.ext.autodoc", "sphinx.ext.napoleon", # support for Google-style docstrings "sphinx.ext.viewcode", diff --git a/docs/requirements.txt b/docs/requirements.txt index f1e898d..bb43359 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,11 +1,8 @@ audeer -ipykernel -jupyter-sphinx sphinx sphinx-apipages >=0.1.2 sphinx-audeering-theme >=1.2.1 sphinx-autodoc-typehints sphinx-copybutton -sphinxcontrib-programoutput sphinxcontrib-bibtex toml diff --git a/tests/conftest.py b/tests/conftest.py index b83ce14..09729a1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,12 @@ import numpy as np import pytest -from audpsychometric import datasets +import audpsychometric @pytest.fixture(scope="function") def df_holzinger_swineford(): - df_dataset = datasets.read_dataset("HolzingerSwineford1939") + df_dataset = audpsychometric.read_dataset("HolzingerSwineford1939") cols_use = [col for col in df_dataset.columns if col.startswith("x")] df = df_dataset[cols_use].astype(np.float32) return df diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 744a51a..a295271 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -7,7 +7,7 @@ def test_list_datasets(): """First basic dataset is available in dataset list.""" - df_datasets = audpsychometric.datasets.list_datasets() + df_datasets = audpsychometric.list_datasets() assert "statology" in df_datasets.index @@ -24,5 +24,5 @@ def test_list_datasets(): ) def test_read_dataset(dataset): """Test functional requirement that a dataset can be read into dataframe.""" - df_dataset = audpsychometric.datasets.read_dataset(dataset) + df_dataset = audpsychometric.read_dataset(dataset) assert isinstance(df_dataset, pd.DataFrame) diff --git a/tests/test_reliability.py b/tests/test_reliability.py index 65eab23..d9f7c30 100644 --- a/tests/test_reliability.py +++ b/tests/test_reliability.py @@ -7,7 +7,7 @@ def test_icc(): """Test icc basic result validity.""" - df_dataset = audpsychometric.datasets.read_dataset("wine") + df_dataset = audpsychometric.read_dataset("wine") data_wide = df_dataset.pivot_table(index="Wine", columns="Judge", values="Scores") @@ -24,7 +24,7 @@ def test_icc(): def test_cronbachs_alpha(): """Test cronbach's alpha return values for three raters.""" - df_dataset = audpsychometric.datasets.read_dataset("hallgren-table3") + df_dataset = audpsychometric.read_dataset("hallgren-table3") df = df_dataset[["Dep_Rater1", "Dep_Rater2", "Dep_Rater3"]] for ratings in [df, df.values]: alpha, result = audpsychometric.cronbachs_alpha(ratings) @@ -56,7 +56,7 @@ def test_anova_helper(): def test_icc_nanremoval(): """Cover nan removal if statement.""" - df_dataset = audpsychometric.datasets.read_dataset("HolzingerSwineford1939") + df_dataset = audpsychometric.read_dataset("HolzingerSwineford1939") df_dataset = df_dataset[[x for x in df_dataset.columns if x.startswith("x")]] nan_mat = np.random.random(df_dataset.shape) < 0.1 audpsychometric.intra_class_correlation(df_dataset.mask(nan_mat))