From dac8fc7aa8a0ee4a0cc313ab117291ada20fab16 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Tue, 19 Mar 2024 09:25:08 +0100 Subject: [PATCH 1/4] updated docs based on review --- docs/conf.py | 1 - paper/paper.md | 1 - readme.md | 2 +- tests/test_all_augmenters.py | 4 +++- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a2ab557..1dbd4cf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -112,4 +112,3 @@ "sidebar_hide_name": True, "navigation_with_keys": True, } - diff --git a/paper/paper.md b/paper/paper.md index 668f110..c4d8f08 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -37,7 +37,6 @@ Other tools for data augmentation focus on specific downstream application such # Features & Functionality - `Augmenty` is a Python library that implements augmentations based on `spaCy`'s `Doc` object. `spaCy`'s `Doc` object is a container for a text and its annotations. This makes it easy to augment text and annotations simultaneously. The `Doc` object can easily be extended to include custom augmentation not available in `spaCy` by adding custom attributes to the `Doc` object. While `Augmenty` is built to augment `Doc`s the object is easily converted into strings, lists or other formats. The annotations within a `Doc` can be provided either by human annotations or using a trained model. Augmenty implements a series of augmenters for token-, span- and sentence-level augmentation. These augmenters range from primitive augmentations such as word replacement to language specific augmenters such as keystroke error augmentations based on a French keyboard layout. Augmenty also integrates with other libraries such as `NLTK` [@bird2009natural] to allow for augmentations based on WordNet [@miller-1994-wordnet] and allows for specification of static word vectors [pennington-etal-2014-glove] to allow for augmentations based on word similarity. Lastly, `augmenty` provides a set of utility functions for repeating augmentations, combining augmenters or adjust the percentage of documents that should be augmented. This allow for the flexible construction of augmentation pipelines specific to the task at hand. diff --git a/readme.md b/readme.md index 711956e..e0fe2b2 100644 --- a/readme.md +++ b/readme.md @@ -26,7 +26,7 @@ pip install augmenty Do note that this is a minimal installation. As some augmenters requires additional packages please write the following line to install all dependencies. ``` -pip install augmenty[all] +pip install "augmenty[all]" ``` For more detailed instructions on installing augmenty, including specific language support, see the [installation instructions](https://kennethenevoldsen.github.io/augmenty/installation). diff --git a/tests/test_all_augmenters.py b/tests/test_all_augmenters.py index 0c92050..1e0b303 100644 --- a/tests/test_all_augmenters.py +++ b/tests/test_all_augmenters.py @@ -80,7 +80,9 @@ def is_pronoun(token: Token) -> bool: } -@pytest.mark.parametrize("aug,args", [(k, augmenters_args[k]) for k in augmenters_args]) # noqa +@pytest.mark.parametrize( + "aug,args", [(k, augmenters_args[k]) for k in augmenters_args] +) # noqa @pytest.mark.parametrize("level", [0.1, 0.5, 1]) @pytest.mark.timeout(100) @pytest.mark.parametrize( From 477895c48cd6a1d271994276d2cf535cdd214bd3 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Tue, 19 Mar 2024 09:55:28 +0100 Subject: [PATCH 2/4] fix: updated tests dependencies due to issues with pytest-fixtures: https://github.com/TvoroG/pytest-lazy-fixture/issues/65 --- .github/workflows/tests.yml | 1 - CONTRIBUTING.md | 2 +- makefile | 1 + pyproject.toml | 2 +- readme.md | 1 + 5 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d55d3be..3ea677a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -41,7 +41,6 @@ jobs: shell: bash run: | make install - pip install -r tests/requirements.txt - name: Run tests shell: bash diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed88d46..d1fe4c9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,7 +42,7 @@ $ make install ``` -## How to test the project +### How to test the project Run the full test suite: diff --git a/makefile b/makefile index 4ee4cfb..0259785 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,7 @@ install: @echo "--- ๐Ÿš€ Installing project ---" pip install -e ".[dev, docs, tests,tutorials,all,da]" + pip install -r tests/requirements.txt static-type-check: @echo "--- ๐Ÿ” Running static type check ---" diff --git a/pyproject.toml b/pyproject.toml index 73d3f62..1c4c38d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dev = [ "pyproject-parser[cli, readme]>=0.9.1", ] tests = [ - "pytest>=7.1.3", + "pytest>=7.1.3,<8.0.0", # due to https://github.com/TvoroG/pytest-lazy-fixture/issues/65 "pytest-cov>=3.0.0", "pytest-lazy-fixture>=0.6.3", "pytest-timeout>=2.1.0", diff --git a/readme.md b/readme.md index e0fe2b2..df2647a 100644 --- a/readme.md +++ b/readme.md @@ -39,6 +39,7 @@ import spacy import augmenty nlp = spacy.load("en_core_web_md") +# if not installed run: python -m spacy download en_core_web_md docs = nlp.pipe(["Augmenty is a great tool for text augmentation"]) From 61d5e9c3661521f983eb72e135aecf49fdddabc0 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Tue, 19 Mar 2024 09:59:12 +0100 Subject: [PATCH 3/4] fix: ran linters --- src/augmenty/character/replace.py | 1 - src/augmenty/character/swap.py | 1 - src/augmenty/keyboard.py | 1 + tests/test_all_augmenters.py | 5 +---- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/augmenty/character/replace.py b/src/augmenty/character/replace.py index c50fc72..cda2065 100644 --- a/src/augmenty/character/replace.py +++ b/src/augmenty/character/replace.py @@ -1,6 +1,5 @@ """Augmenters for randomly or semi-randomly replacing characters.""" - import random from functools import partial from typing import Callable, Iterator diff --git a/src/augmenty/character/swap.py b/src/augmenty/character/swap.py index 660d6a3..2e10d79 100644 --- a/src/augmenty/character/swap.py +++ b/src/augmenty/character/swap.py @@ -1,6 +1,5 @@ """Augmenters for swapping characters.""" - import random from functools import partial from typing import Callable, Iterator diff --git a/src/augmenty/keyboard.py b/src/augmenty/keyboard.py index c298761..d0c4e42 100644 --- a/src/augmenty/keyboard.py +++ b/src/augmenty/keyboard.py @@ -1,4 +1,5 @@ """Function for defining and handling keyboard layouts.""" + from typing import Dict, List, Tuple from pydantic import BaseModel diff --git a/tests/test_all_augmenters.py b/tests/test_all_augmenters.py index 1e0b303..4da6fed 100644 --- a/tests/test_all_augmenters.py +++ b/tests/test_all_augmenters.py @@ -1,6 +1,5 @@ """Pytest script for testing all augmenters in a variety of cases.""" - from typing import Iterable import augmenty @@ -80,9 +79,7 @@ def is_pronoun(token: Token) -> bool: } -@pytest.mark.parametrize( - "aug,args", [(k, augmenters_args[k]) for k in augmenters_args] -) # noqa +@pytest.mark.parametrize("aug,args", [(k, augmenters_args[k]) for k in augmenters_args]) # noqa @pytest.mark.parametrize("level", [0.1, 0.5, 1]) @pytest.mark.timeout(100) @pytest.mark.parametrize( From 98e6f97b70e3b55a279ea8e49129b0fa4fe852fc Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Tue, 19 Mar 2024 09:59:24 +0100 Subject: [PATCH 4/4] updated linters due to deprecation warning --- makefile | 2 +- pyproject.toml | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/makefile b/makefile index 0259785..692f117 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,7 @@ lint: @echo "--- ๐Ÿงน Running linters ---" pyproject-parser check pyproject.toml # check pyproject.toml ruff format . # running ruff formatting (.ipynb, .py) - ruff **/*.py --fix # running ruff linting (.py) + ruff check **/*.py --fix # running ruff linting (.py) test: @echo "--- ๐Ÿงช Running tests ---" diff --git a/pyproject.toml b/pyproject.toml index 1c4c38d..4f491b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,7 +92,7 @@ pythonPlatform = "Darwin" [tool.ruff] extend-include = ["*.ipynb"] # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. -select = [ +lint.select = [ "A", "ANN", "ARG", @@ -120,7 +120,7 @@ select = [ "SIM", "W", ] -ignore = [ +lint.ignore = [ "ANN101", "ANN401", "E402", @@ -130,9 +130,9 @@ ignore = [ "RET504", "COM812", ] -ignore-init-module-imports = true +lint.ignore-init-module-imports = true # Allow autofix for all enabled rules (when `--fix`) is provided. -unfixable = ["ERA"] +lint.unfixable = ["ERA"] # Exclude a variety of commonly ignored directories. exclude = [ ".bzr", @@ -158,10 +158,10 @@ exclude = [ "docs/conf.py", ] # Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" target-version = "py38" -[tool.ruff.flake8-annotations] +[tool.ruff.lint.flake8-annotations] mypy-init-return = true suppress-none-returning = true