diff --git a/.github/workflows/publish-package.yml b/.github/workflows/publish-package.yml
new file mode 100644
index 000000000..537c1ee71
--- /dev/null
+++ b/.github/workflows/publish-package.yml
@@ -0,0 +1,44 @@
+name: Publish npc_gzip package
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  
+  publish:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9"]
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry --version
+
+      - name: Run Tests
+        run: |
+          poetry install
+          poetry run pytest --junit-xml=junit/test-results-${{ matrix.python-version }}.xml
+          
+      - name: Upload pytest test results
+        if: ${{ !cancelled() }}  # Upload results even if tests fail.
+        uses: actions/upload-artifact@v3
+        with:
+          name: pytest-results-${{ matrix.python-version }}
+          path: junit/test-results-${{ matrix.python-version }}.xml
+
+      - name: Poetry Build & Publish
+        run: |
+          poetry build
+          poetry publish
\ No newline at end of file
diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml
new file mode 100644
index 000000000..a00dc356f
--- /dev/null
+++ b/.github/workflows/test-package.yml
@@ -0,0 +1,37 @@
+name: Run npc_gzip tests
+
+on: [push, pull_request]
+
+jobs:
+  
+  run-tests:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry --version
+
+      - name: Run Tests
+        run: |
+          poetry install
+          poetry run pytest --junit-xml=junit/test-results-${{ matrix.python-version }}.xml
+
+      - name: Upload pytest test results  
+        if: ${{ !cancelled() }}  # Upload results even if tests fail.
+        uses: actions/upload-artifact@v3
+        with:
+          name: pytest-results-${{ matrix.python-version }}
+          path: junit/test-results-${{ matrix.python-version }}.xml
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..213eba841
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,163 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+notebooks/
+junit/
+*.npy
\ No newline at end of file
diff --git a/README.md b/README.md
index 30a465ee6..0abea063d 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,40 @@
-### Code for Paper: “Low-Resource” Text Classification: A Parameter-Free Classification Method with Compressors
+# Code for Paper: “Low-Resource” Text Classification: A Parameter-Free Classification Method with Compressors
 
 This paper is accepted to Findings of [ACL2023](https://aclanthology.org/2023.findings-acl.426/).
 
-### Require
+## Getting Started
+
+This codebase is [available on pypi.org via](https://pypi.org/project/npc-gzip) 
+
+
+```bash
+
+pip install npc-gzip
+
+```
+
+## Usage
+
+See the [examples](./examples/imdb.py) directory for example usage.
+
+
+## Testing
+
+This package utilizes `poetry` to maintain its dependencies and `pytest` to execute tests. To get started running the tests:
+
+```bash
+
+poetry shell
+poetry install
+pytest
+
+```
+
+-------------------------
+
+### Original Codebase
+
+#### Require
 
 See `requirements.txt`.
 
@@ -13,7 +45,7 @@ conda activate npc
 pip install -r requirements.txt
 ```
 
-### Run
+#### Run
 
 ```
 python main_text.py
@@ -36,7 +68,7 @@ By default, this will only use 100 test and training samples per class as a quic
 
 ```
 
-### Calculate Accuracy (Optional)
+#### Calculate Accuracy (Optional)
 
 If we want to calculate accuracy from recorded distance file <DISTANCE DIR>, use
 
@@ -45,7 +77,7 @@ python main_text.py --record --score --distance_fn <DISTANCE DIR>
 ```
 to calculate accuracy. Otherwise, the accuracy will be calculated automatically using the command in the last section.
 
-### Use Custom Dataset
+#### Use Custom Dataset
 
 You can use your own custom dataset by passing `custom` to `--dataset`; pass the data directory that contains `train.txt` and `test.txt` to `--data_dir`; pass the class number to the `--class_num`.
 
diff --git a/examples/ag_news.py b/examples/ag_news.py
new file mode 100644
index 000000000..2f60ad997
--- /dev/null
+++ b/examples/ag_news.py
@@ -0,0 +1,112 @@
+import numpy as np
+from sklearn.metrics import classification_report
+from sklearn.utils import shuffle
+from torchtext.datasets import AG_NEWS
+
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.compressors.gzip_compressor import GZipCompressor
+from npc_gzip.knn_classifier import KnnClassifier
+
+
+def get_data() -> tuple:
+    """
+    Pulls the AG_NEWS dataset
+    and returns two tuples the first being the
+    training data and the second being the test
+    data. Each tuple contains the text and label
+    respectively as numpy arrays.
+
+    """
+
+    train_iter, test_iter = AG_NEWS(split=("train", "test"))
+
+    train_text = []
+    train_labels = []
+    for label, text in train_iter:
+        train_labels.append(label)
+        train_text.append(text)
+
+    test_text = []
+    test_labels = []
+    for label, text in test_iter:
+        test_labels.append(label)
+        test_text.append(text)
+
+    train_text = np.array(train_text)
+    train_labels = np.array(train_labels)
+
+    test_text = np.array(test_text)
+    test_labels = np.array(test_labels)
+
+    train = (train_text, train_labels)
+    test = (test_text, test_labels)
+
+    return (train, test)
+
+
+def fit_model(
+    train_text: np.ndarray, train_labels: np.ndarray, distance_metric: str = "ncd"
+) -> KnnClassifier:
+    """
+    Fits a Knn-GZip compressor on the train
+    data and returns it.
+
+    Arguments:
+        train_text (np.ndarray): Training dataset as a numpy array.
+        train_labels (np.ndarray): Training labels as a numpy array.
+
+    Returns:
+        KnnClassifier: Trained Knn-Compressor model ready to make predictions.
+    """
+
+    compressor: BaseCompressor = GZipCompressor()
+    model: KnnClassifier = KnnClassifier(
+        compressor=compressor,
+        training_inputs=train_text,
+        training_labels=train_labels,
+        distance_metric=distance_metric,
+    )
+
+    return model
+
+
+def main() -> None:
+    print(f"Fetching data...")
+    ((train_text, train_labels), (test_text, test_labels)) = get_data()
+
+    print(f"Fitting model...")
+    model = fit_model(train_text, train_labels)
+    random_indicies = np.random.choice(test_text.shape[0], 1000, replace=False)
+
+    sample_test_text = test_text[random_indicies]
+    sample_test_labels = test_labels[random_indicies]
+
+    print(f"Generating predictions...")
+    top_k = 1
+
+    # Here we use the `sampling_percentage` to save time
+    # at the expense of worse predictions. This
+    # `sampling_percentage` selects a random % of training
+    # data to compare `sample_test_text` against rather
+    # than comparing it against the entire training dataset.
+    (distances, labels, similar_samples) = model.predict(
+        sample_test_text, top_k, sampling_percentage=0.01
+    )
+
+    print(classification_report(sample_test_labels, labels.reshape(-1)))
+
+
+if __name__ == "__main__":
+    main()
+
+
+#               precision    recall  f1-score   support
+
+#            1       0.67      0.80      0.73       246
+#            2       0.78      0.74      0.76       246
+#            3       0.64      0.61      0.62       249
+#            4       0.65      0.59      0.62       259
+
+#     accuracy                           0.68      1000
+#    macro avg       0.68      0.68      0.68      1000
+# weighted avg       0.68      0.68      0.68      1000
diff --git a/examples/imdb.py b/examples/imdb.py
new file mode 100644
index 000000000..44be9f1b8
--- /dev/null
+++ b/examples/imdb.py
@@ -0,0 +1,117 @@
+import numpy as np
+from sklearn.metrics import classification_report
+from sklearn.utils import shuffle
+from torchtext.datasets import IMDB
+
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.compressors.gzip_compressor import GZipCompressor
+from npc_gzip.knn_classifier import KnnClassifier
+
+
+def get_data() -> tuple:
+    """
+    Pulls the IMDB sentiment analysis dataset
+    and returns two tuples the first being the
+    training data and the second being the test
+    data. Each tuple contains the text and label
+    respectively as numpy arrays.
+
+    """
+
+    train_iter, test_iter = IMDB(split=("train", "test"))
+
+    train_text = []
+    train_labels = []
+    for label, text in train_iter:
+        train_labels.append(label)
+        train_text.append(text)
+
+    test_text = []
+    test_labels = []
+    for label, text in test_iter:
+        test_labels.append(label)
+        test_text.append(text)
+
+    train_text = np.array(train_text)
+    train_labels = np.array(train_labels)
+
+    test_text = np.array(test_text)
+    test_labels = np.array(test_labels)
+
+    train = (train_text, train_labels)
+    test = (test_text, test_labels)
+
+    return (train, test)
+
+
+def fit_model(
+    train_text: np.ndarray, train_labels: np.ndarray, distance_metric: str = "ncd"
+) -> KnnClassifier:
+    """
+    Fits a Knn-GZip compressor on the train
+    data and returns it.
+
+    Arguments:
+        train_text (np.ndarray): Training dataset as a numpy array.
+        train_labels (np.ndarray): Training labels as a numpy array.
+
+    Returns:
+        KnnClassifier: Trained Knn-Compressor model ready to make predictions.
+    """
+
+    compressor: BaseCompressor = GZipCompressor()
+    model: KnnClassifier = KnnClassifier(
+        compressor=compressor,
+        training_inputs=train_text,
+        training_labels=train_labels,
+        distance_metric=distance_metric,
+    )
+
+    return model
+
+
+def main() -> None:
+    print(f"Fetching data...")
+    ((train_text, train_labels), (test_text, test_labels)) = get_data()
+
+    print(f"Fitting model...")
+    model = fit_model(train_text, train_labels)
+
+    # Randomly sampling from the test set.
+    # The IMDb test data comes in with all of the
+    # `1` labels first, then all of the `2` labels
+    # last, so we're shuffling so that our model
+    # has something to predict other than `1`.
+
+    random_indicies = np.random.choice(test_text.shape[0], 1000, replace=False)
+
+    sample_test_text = test_text[random_indicies]
+    sample_test_labels = test_labels[random_indicies]
+
+    print(f"Generating predictions...")
+    top_k = 1
+
+    # Here we use the `sampling_percentage` to save time
+    # at the expense of worse predictions. This
+    # `sampling_percentage` selects a random % of training
+    # data to compare `sample_test_text` against rather
+    # than comparing it against the entire training dataset.
+    (distances, labels, similar_samples) = model.predict(
+        sample_test_text, top_k, sampling_percentage=0.01
+    )
+
+    print(classification_report(sample_test_labels, labels.reshape(-1)))
+
+
+if __name__ == "__main__":
+    main()
+
+
+#               precision    recall  f1-score   support
+
+#            1       0.57      0.63      0.60       495
+#            2       0.59      0.53      0.56       505
+
+#     accuracy                           0.58      1000
+#    macro avg       0.58      0.58      0.58      1000
+# weighted avg       0.58      0.58      0.58      1000
diff --git a/npc_gzip/__init__.py b/npc_gzip/__init__.py
new file mode 100644
index 000000000..e29214f7f
--- /dev/null
+++ b/npc_gzip/__init__.py
@@ -0,0 +1,4 @@
+from . import compressors
+from .distance import *
+from .exceptions import *
+from .utils import *
diff --git a/npc_gzip/aggregations.py b/npc_gzip/aggregations.py
new file mode 100644
index 000000000..adedfdf56
--- /dev/null
+++ b/npc_gzip/aggregations.py
@@ -0,0 +1,43 @@
+import itertools
+
+import numpy as np
+
+
+def concatenate_with_space(stringa: str, stringb: str) -> str:
+    """
+    Combines `stringa` and `stringb` with a space.
+
+    Arguments:
+        stringa (str): First item.
+        stringb (str): Second item.
+
+    Returns:
+        str: `{stringa} {stringb}`
+    """
+    return stringa + " " + stringb
+
+
+def aggregate_strings(stringa: str, stringb: str, by_character: bool = False) -> str:
+    """
+    Aggregates strings.
+
+    (replaces agg_by_jag_char, agg_by_jag_word)
+
+    Arguments:
+        stringa (str): First item.
+        stringb (str): Second item.
+        by_character (bool): True if you want to join the combined string by character,
+                             Else combines by word
+
+    Returns:
+        str: combination of stringa and stringb
+    """
+
+    stringa_list: list = stringa.split()
+    stringb_list: list = stringb.split()
+
+    zipped_lists: list = list(zip(stringa_list, stringb_list))
+    out: list = list(itertools.chain(*zipped_lists))
+
+    aggregated: str = ("" if by_character else " ").join(out)
+    return aggregated
diff --git a/npc_gzip/compressors/__init__.py b/npc_gzip/compressors/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/npc_gzip/compressors/base.py b/npc_gzip/compressors/base.py
new file mode 100644
index 000000000..9b52ffe71
--- /dev/null
+++ b/npc_gzip/compressors/base.py
@@ -0,0 +1,112 @@
+import os
+from types import ModuleType
+
+from npc_gzip.exceptions import InvalidCompressorException
+
+
+class BaseCompressor:
+    """
+    Default compressor class that other compressors inherit
+    from.
+
+    >>> import gzip
+
+    >>> compressor = BaseCompressor(compressor=gzip)
+
+    >>> example = "Hello there!"
+    >>> compressed_length: int = compressor.get_compressed_length(example)
+    >>> bits_per_character: float = compressor.get_bits_per_character(example)
+    >>> assert isinstance(compressed_length, int)
+    >>> assert isinstance(bits_per_character, float)
+    """
+
+    def __init__(self, compressor: ModuleType) -> None:
+        if not isinstance(compressor, (ModuleType, BaseCompressor)):
+            raise InvalidCompressorException(
+                f"Not a function passed: {type(compressor)}"
+            )
+        self.compressor = compressor
+
+    def _open_file(self, filepath: str, as_bytes: bool = False) -> str:
+        """
+        Helper function that loads and returns the contents
+        of a file at `filepath`. Optional `as_bytes` parameter
+        will load the file contents with `open(filepath, 'rb')`
+        if True.
+
+        Arguments:
+            filepath (str): Path to the file to read contents from.
+            as_bytes (bool): [Optional] If true, opens the file as bytes.
+
+        Returns:
+            str: File contents as a string.
+        """
+
+        assert os.path.isfile(filepath), f"Filepath ({filepath}) does not exist."
+        file_contents = None
+        open_mode = "r"
+        if as_bytes:
+            open_mode = "rb"
+
+        with open(filepath, open_mode) as f:
+            file_contents = f.read()
+
+        if as_bytes:
+            file_contents = file_contents.decode("utf-8")
+
+        return file_contents
+
+    def _compress(self, x: str) -> bytes:
+        """
+        Applies the compression algorithm to `x` and
+        returns the results as bytes.
+
+        Arguments:
+            x (str): The string you want to compress.
+
+        Returns:
+            bytes: The compressed bytes representation of `x`.
+        """
+
+        x: bytes = x.encode("utf-8")
+        compressed: bytes = self.compressor.compress(x)
+        return compressed
+
+    def get_compressed_length(self, x: str) -> int:
+        """
+        Calculates the size of `x` once compressed.
+
+        Arguments:
+            x (str): String you want to compute the compressed length of.
+
+        Returns:
+            int: Length of `x` once compressed.
+        """
+
+        compressed_value: bytes = self._compress(x)
+        compressed_length: int = len(compressed_value)
+        return compressed_length
+
+    def get_bits_per_character(self, x: str) -> float:
+        """
+        Returns the compressed size of `x` relative to
+        the number of characters in string `x`.
+
+        Arguments:
+            x (str): String you want to compute the compressed length per
+                     character in.
+
+        Returns:
+            float: Length of `x` once compressed divided by the number of
+                 characters in `x`.
+        """
+
+        compressed_value: bytes = self._compress(x)
+        compressed_length: int = len(compressed_value)
+        compressed_length_in_bits: int = compressed_length * 8
+        number_of_characters: int = len(x)
+
+        compressed_length_per_number_of_characters: float = (
+            compressed_length_in_bits / number_of_characters
+        )
+        return compressed_length_per_number_of_characters
diff --git a/npc_gzip/compressors/bz2_compressor.py b/npc_gzip/compressors/bz2_compressor.py
new file mode 100644
index 000000000..3ec1a994c
--- /dev/null
+++ b/npc_gzip/compressors/bz2_compressor.py
@@ -0,0 +1,26 @@
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.exceptions import MissingDependencyException
+
+
+class Bz2Compressor(BaseCompressor):
+    """
+    bz2 compressor that inherits from
+    `npc_gzip.compressors.base.BaseCompressor`
+
+    >>> compressor: BaseCompressor = Bz2Compressor()
+    >>> example: str = "Hello there!"
+    >>> compressed_length: int = compressor.get_compressed_length(example)
+    >>> bits_per_character: float = compressor.get_bits_per_character(example)
+    >>> assert isinstance(compressed_length, int)
+    >>> assert isinstance(bits_per_character, float)
+    """
+
+    def __init__(self) -> None:
+        super().__init__(self)
+
+        try:
+            import bz2
+        except ModuleNotFoundError as e:
+            raise MissingDependencyException("bz2") from e
+
+        self.compressor = bz2
diff --git a/npc_gzip/compressors/gzip_compressor.py b/npc_gzip/compressors/gzip_compressor.py
new file mode 100644
index 000000000..0ab009e61
--- /dev/null
+++ b/npc_gzip/compressors/gzip_compressor.py
@@ -0,0 +1,26 @@
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.exceptions import MissingDependencyException
+
+
+class GZipCompressor(BaseCompressor):
+    """
+    gzip compressor that inherits from
+    `npc_gzip.compressors.base.BaseCompressor`
+
+    >>> compressor: BaseCompressor = GZipCompressor()
+    >>> example: str = "Hello there!"
+    >>> compressed_length: int = compressor.get_compressed_length(example)
+    >>> bits_per_character: float = compressor.get_bits_per_character(example)
+    >>> assert isinstance(compressed_length, int)
+    >>> assert isinstance(bits_per_character, float)
+    """
+
+    def __init__(self) -> None:
+        super().__init__(self)
+
+        try:
+            import gzip
+        except ModuleNotFoundError as e:
+            raise MissingDependencyException("gzip") from e
+
+        self.compressor = gzip
diff --git a/npc_gzip/compressors/lzma_compressor.py b/npc_gzip/compressors/lzma_compressor.py
new file mode 100644
index 000000000..ea84bd2e4
--- /dev/null
+++ b/npc_gzip/compressors/lzma_compressor.py
@@ -0,0 +1,26 @@
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.exceptions import MissingDependencyException
+
+
+class LzmaCompressor(BaseCompressor):
+    """
+    lzma compressor that inherits from
+    `npc_gzip.compressors.base.BaseCompressor`
+
+    >>> compressor: BaseCompressor = LzmaCompressor()
+    >>> example: str = "Hello there!"
+    >>> compressed_length: int = compressor.get_compressed_length(example)
+    >>> bits_per_character: float = compressor.get_bits_per_character(example)
+    >>> assert isinstance(compressed_length, int)
+    >>> assert isinstance(bits_per_character, float)
+    """
+
+    def __init__(self) -> None:
+        super().__init__(self)
+
+        try:
+            import lzma
+        except ModuleNotFoundError as e:
+            raise MissingDependencyException("lzma") from e
+
+        self.compressor = lzma
diff --git a/npc_gzip/distance.py b/npc_gzip/distance.py
new file mode 100644
index 000000000..f999b1d68
--- /dev/null
+++ b/npc_gzip/distance.py
@@ -0,0 +1,223 @@
+from typing import Sequence, Union
+
+import numpy as np
+
+from npc_gzip.exceptions import CompressedValuesEqualZero, InvalidShapeException
+
+
+class Distance:
+    """
+    Used to calculate the distance between compressed
+    objects. Typical usage is your training data as
+    `compressed_values_a`, the data you want to predict
+    on as `compressed_values_b` and the two values
+    concatenated then compressed as `compressed_values_ab`.
+
+    >>> import random
+
+    >>> a = random.random()
+    >>> b = random.random()
+    >>> ab = random.random()
+
+    >>> distance = Distance(a, b, ab)
+    >>> ncd: float = distance._ncd(a, b, ab)
+    >>> cdm: float = distance._cdm(a, b, ab)
+    >>> clm: float = distance._clm(a, b, ab)
+    >>> mse: float = distance._mse(a, b)
+
+    >>> assert isinstance(ncd, float)
+    >>> assert isinstance(cdm, float)
+    >>> assert isinstance(clm, float)
+    >>> assert isinstance(mse, float)
+
+    >>> a = np.random.rand(3, 10)
+    >>> b = np.random.rand(3, 10)
+    >>> ab = np.random.rand(3, 10)
+
+    >>> distance = Distance(a, b, ab)
+
+    >>> ncd: np.ndarray = distance.ncd
+    >>> cdm: np.ndarray = distance.cdm
+    >>> clm: np.ndarray = distance.clm
+    >>> mse: np.ndarray = distance.mse
+
+    >>> assert isinstance(ncd, np.ndarray)
+    >>> assert isinstance(cdm, np.ndarray)
+    >>> assert isinstance(clm, np.ndarray)
+    >>> assert isinstance(mse, np.ndarray)
+    """
+
+    def __init__(
+        self,
+        compressed_values_a: Sequence,
+        compressed_values_b: Sequence,
+        compressed_values_ab: Sequence,
+    ) -> None:
+        if not isinstance(compressed_values_a, np.ndarray):
+            compressed_values_a = np.array(compressed_values_a)
+
+        if not isinstance(compressed_values_b, np.ndarray):
+            compressed_values_b = np.array(compressed_values_b)
+
+        if not isinstance(compressed_values_ab, np.ndarray):
+            compressed_values_ab = np.array(compressed_values_ab)
+
+        if (
+            compressed_values_a.shape
+            == compressed_values_b.shape
+            == compressed_values_ab.shape
+        ):
+            self.compressed_values_a = compressed_values_a
+            self.compressed_values_b = compressed_values_b
+            self.compressed_values_ab = compressed_values_ab
+        else:
+            raise InvalidShapeException(
+                compressed_values_a,
+                compressed_values_b,
+                compressed_values_ab,
+                function_name="Distance.__init__",
+            )
+
+    def _ncd(
+        self,
+        compressed_value_a: float,
+        compressed_value_b: float,
+        compressed_value_ab: float,
+    ) -> float:
+        denominator = max(compressed_value_a, compressed_value_b)
+        if denominator == 0:
+            raise CompressedValuesEqualZero(
+                compressed_value_a, compressed_value_b, function_name="Distance._ncd"
+            )
+
+        numerator = compressed_value_ab - min(compressed_value_a, compressed_value_b)
+        distance = numerator / denominator
+        return distance
+
+    def _cdm(
+        self,
+        compressed_value_a: float,
+        compressed_value_b: float,
+        compressed_value_ab: float,
+    ) -> float:
+        denominator = compressed_value_a + compressed_value_b
+        if denominator == 0:
+            raise CompressedValuesEqualZero(
+                compressed_value_a, compressed_value_b, function_name="Distance._cdm"
+            )
+
+        numerator = compressed_value_ab
+
+        distance = numerator / denominator
+        return distance
+
+    def _clm(
+        self,
+        compressed_value_a: float,
+        compressed_value_b: float,
+        compressed_value_ab: float,
+    ) -> float:
+        denominator = compressed_value_ab
+        if denominator == 0:
+            raise CompressedValuesEqualZero(
+                compressed_value_ab, function_name="Distance._clm"
+            )
+
+        numerator = 1 - (compressed_value_a + compressed_value_b - compressed_value_ab)
+
+        distance = numerator / denominator
+        return distance
+
+    def _mse(
+        self,
+        compressed_value_a: Sequence,
+        compressed_value_b: Sequence,
+    ) -> np.ndarray:
+        """
+        Computes the mean squared error between two
+        values.
+
+        """
+
+        if not isinstance(compressed_value_a, np.ndarray):
+            compressed_value_a = np.array(compressed_value_a)
+
+        if not isinstance(compressed_value_b, np.ndarray):
+            compressed_value_b = np.array(compressed_value_b)
+
+        compressed_value_a = compressed_value_a.reshape(-1)
+        compressed_value_b = compressed_value_b.reshape(-1)
+
+        numerator = np.sum((compressed_value_a - compressed_value_b) ** 2)
+        denominator = compressed_value_a.shape[0]
+
+        if denominator == 0:
+            raise CompressedValuesEqualZero(
+                compressed_value_a, compressed_value_b, function_name="Distance._mse"
+            )
+
+        mse = numerator / denominator
+        return mse
+
+    @property
+    def ncd(self) -> np.ndarray:
+        """
+        A numpy vectorized form of self._ncd.
+
+        """
+
+        out = np.vectorize(self._ncd)(
+            self.compressed_values_a,
+            self.compressed_values_b,
+            self.compressed_values_ab,
+        )
+        out = out.reshape(self.compressed_values_a.shape)
+
+        return out
+
+    @property
+    def cdm(self) -> np.ndarray:
+        """
+        A numpy vectorized form of self._cdm.
+
+        """
+
+        out = np.vectorize(self._cdm)(
+            self.compressed_values_a,
+            self.compressed_values_b,
+            self.compressed_values_ab,
+        )
+        out = out.reshape(self.compressed_values_a.shape)
+
+        return out
+
+    @property
+    def clm(self) -> np.ndarray:
+        """
+        A numpy vectorized form of self._clm.
+
+        """
+
+        out = np.vectorize(self._clm)(
+            self.compressed_values_a,
+            self.compressed_values_b,
+            self.compressed_values_ab,
+        )
+        out = out.reshape(self.compressed_values_a.shape)
+
+        return out
+
+    @property
+    def mse(self) -> np.ndarray:
+        """
+        A numpy vectorized form of self._mse.
+
+        """
+
+        out = np.vectorize(self._mse)(
+            self.compressed_values_a,
+            self.compressed_values_b,
+        )
+        out = out.reshape(self.compressed_values_a.shape)
+
+        return out
diff --git a/npc_gzip/exceptions.py b/npc_gzip/exceptions.py
new file mode 100644
index 000000000..a59324ee6
--- /dev/null
+++ b/npc_gzip/exceptions.py
@@ -0,0 +1,188 @@
+from typing import Any, Optional
+
+import numpy as np
+
+
+class InvalidCompressorException(Exception):
+    """
+    Is raised when a user is trying to use a compression
+    library that is not supported.
+    """
+
+    def __init__(self, compression_library: str) -> None:
+        self.message = f"""
+        Compression Library ({compression_library}) 
+        is not currently supported.
+        """
+        super().__init__(self.message)
+
+
+class MissingDependencyException(Exception):
+    """
+    Is raised when an underlying dependency is not
+    found when loading a library.
+    """
+
+    def __init__(self, compression_library: str) -> None:
+        self.message = f"""
+        Compression Library ({compression_library}) 
+        is missing an underlying dependency. Try 
+        installing those missing dependencies and 
+        load this again. 
+
+        Common missing dependencies for:
+
+        * lzma:
+            * brew install xz
+            * sudo apt-get install lzma liblzma-dev libbz2-dev
+
+        * bz2:
+            * sudo apt-get install lzma liblzma-dev libbz2-dev
+
+        """
+        super().__init__(self.message)
+
+
+class StringTooShortException(Exception):
+    def __init__(
+        self, stringa: str, stringb: str, function_name: Optional[str] = None
+    ) -> None:
+        self.message = f"""
+        Unable to aggregate ({stringa}) and ({stringb}).
+        One or both of the two strings are too short to concatenate.
+        
+        """
+
+        if function_name is not None:
+            self.message += f"function_name: {function_name}"
+
+        super().__init__(self.message)
+
+
+class CompressedValuesEqualZero(Exception):
+    def __init__(
+        self,
+        compressed_value_a: float,
+        compressed_value_b: Optional[float] = None,
+        function_name: Optional[str] = None,
+    ) -> None:
+        self.message = f"""
+        The combination of compressed values passed equal zero. 
+        This will result in a divide by zero error.
+
+        
+        """
+
+        if function_name is not None:
+            self.message += f"function_name: {function_name}"
+        super().__init__(self.message)
+
+
+class AllOrNoneException(Exception):
+    def __init__(
+        self,
+        a: Any,
+        b: Any,
+        c: Any,
+        function_name: Optional[str] = None,
+    ) -> None:
+        self.message = f"""
+        The passed values must either all be None or not None.
+            arg1: {type(a)}
+            arg2: {type(b)}
+            arg3: {type(c)}
+        
+        """
+
+        if function_name is not None:
+            self.message += f"function_name: {function_name}"
+        super().__init__(self.message)
+
+
+class InvalidShapeException(Exception):
+    def __init__(
+        self,
+        array_a: np.ndarray,
+        array_b: np.ndarray,
+        array_c: np.ndarray,
+        function_name: Optional[str] = None,
+    ) -> None:
+        self.message = f"""
+        The passed values must either all of the same shape.
+            arg1: {array_a.shape}
+            arg2: {array_b.shape}
+            arg3: {array_c.shape}
+        
+        """
+
+        if function_name is not None:
+            self.message += f"function_name: {function_name}"
+        super().__init__(self.message)
+
+
+class UnsupportedDistanceMetricException(Exception):
+    def __init__(
+        self,
+        distance_metric: str,
+        supported_distance_metrics: Optional[list] = None,
+        function_name: Optional[str] = None,
+    ) -> None:
+        self.message = f"""
+        The `distance_metric` ({distance_metric}) provided is not 
+        currently supported. Please submit an Issue and/or
+        Pull Request here to add support:
+        https://github.com/bazingagin/npc_gzip
+        
+        """
+
+        if supported_distance_metrics is not None:
+            self.message += (
+                f"supported_distance_metrics: {supported_distance_metrics}\n"
+            )
+
+        if function_name is not None:
+            self.message += f"function_name: {function_name}"
+        super().__init__(self.message)
+
+
+class InvalidObjectTypeException(TypeError):
+    def __init__(
+        self,
+        passed_type: str,
+        supported_types: Optional[list] = None,
+        function_name: Optional[str] = None,
+    ) -> None:
+        self.message = f"""
+        The type passed ({passed_type}) provided is not 
+        currently supported. 
+        
+        """
+
+        if supported_types is not None:
+            self.message += f"supported types: {supported_types}\n"
+
+        if function_name is not None:
+            self.message += f"function_name: {function_name}"
+        super().__init__(self.message)
+
+
+class InputLabelEqualLengthException(Exception):
+    def __init__(
+        self,
+        training_samples: int,
+        label_samples: int,
+        function_name: Optional[str] = None,
+    ) -> None:
+        self.message = f"""
+        If training labels are passed, the number 
+        of training data samples must equal the 
+        number of training label samples
+        
+        training_samples: {training_samples}
+        label_samples: {label_samples}
+        
+        """
+
+        if function_name is not None:
+            self.message += f"function_name: {function_name}"
+        super().__init__(self.message)
diff --git a/npc_gzip/knn_classifier.py b/npc_gzip/knn_classifier.py
new file mode 100644
index 000000000..efbb07914
--- /dev/null
+++ b/npc_gzip/knn_classifier.py
@@ -0,0 +1,356 @@
+from typing import Optional, Sequence, Union
+
+import numpy as np
+from tqdm import tqdm
+
+from npc_gzip.aggregations import concatenate_with_space
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.distance import Distance
+from npc_gzip.exceptions import (
+    InputLabelEqualLengthException,
+    InvalidObjectTypeException,
+    UnsupportedDistanceMetricException,
+)
+
+
+class KnnClassifier:
+    """
+    Given the training input and optional
+    training labels data, this class stores
+    the data in memory
+
+    >>> import random
+    >>> from npc_gzip.compressors.gzip_compressor import GZipCompressor
+
+    >>> training_data = ["hey", "hi", "how are you?", "not too bad"]
+
+    >>> training_labels = [random.randint(0, 1) for _ in range(len(training_data))]
+    >>> assert len(training_data) == len(training_labels)
+
+    >>> model = KnnClassifier(compressor=GZipCompressor(), training_inputs=training_data, training_labels=training_labels, distance_metric="ncd")
+
+    >>> test = np.array(["hey", "you are a real pain in my ass", "go away please"])
+
+    >>> top_k = 1
+    >>> distances, labels, similar_samples = model.predict(test, top_k=top_k)
+    >>> assert distances.shape == (test.shape[0], len(training_data))
+    >>> assert labels.shape == similar_samples.shape
+    >>> assert distances.shape[0] == labels.shape[0] == similar_samples.shape[0]
+    """
+
+    def __init__(
+        self,
+        compressor: BaseCompressor,
+        training_inputs: Sequence,
+        training_labels: Optional[Sequence] = None,
+        distance_metric: str = "ncd",
+    ) -> None:
+        self.compressor = compressor
+
+        if isinstance(training_inputs, list) and isinstance(training_labels, list):
+            if training_labels is not None:
+                if len(training_inputs) != len(training_labels):
+                    raise InputLabelEqualLengthException(
+                        len(training_inputs),
+                        len(training_labels),
+                        function_name="KnnGzip.__init__",
+                    )
+
+            self.training_inputs: np.ndarray = np.array(training_inputs).reshape(-1)
+            self.training_labels: np.ndarray = np.array(training_labels).reshape(-1)
+
+        elif isinstance(training_inputs, np.ndarray) or isinstance(
+            training_labels, np.ndarray
+        ):
+            self.training_inputs: np.ndarray = (
+                np.array(training_inputs)
+                if isinstance(training_inputs, list)
+                else training_inputs
+            )
+            self.training_labels: np.ndarray = (
+                np.array(training_labels)
+                if isinstance(training_labels, list)
+                else training_labels
+            )
+
+            self.training_inputs = self.training_inputs.reshape(-1)
+            self.training_labels = self.training_labels.reshape(-1)
+
+        else:
+            raise InvalidObjectTypeException(
+                type(training_inputs),
+                supported_types=[list, np.ndarray],
+                function_name="KnnGzip.__init__",
+            )
+
+        assert (
+            self.training_inputs.shape == self.training_labels.shape
+        ), f"""
+        Training Inputs and Labels did not maintain their 
+        shape during the conversion from lists to numpy arrays.
+        This is most likely a bug in the numpy package:
+        
+        self.training_inputs.shape: {self.training_inputs.shape}
+        self.training_labels.shape: {self.training_labels.shape}
+        """
+
+        self.supported_distance_metrics: list = ["ncd", "clm", "cdm", "mse"]
+
+        if distance_metric not in self.supported_distance_metrics:
+            raise UnsupportedDistanceMetricException(
+                distance_metric,
+                self.supported_distance_metrics,
+                function_name="KnnGzip.__init__",
+            )
+
+        self.distance_metric = distance_metric
+
+        self.compressed_training_inputs: list = [
+            self.compressor.get_compressed_length(data) for data in self.training_inputs
+        ]
+        self.compressed_training_inputs: np.ndarray = np.array(
+            self.compressed_training_inputs
+        ).reshape(-1)
+
+    def _calculate_distance(
+        self,
+        compressed_input: np.ndarray,
+        compressed_combined: np.ndarray,
+        compressed_training: Optional[np.ndarray] = None,
+    ) -> np.ndarray:
+        """
+        Helper function that converts the string representation
+        of `self.distance_metric` to the actual
+        `npc_gzip.distance.Distance.[distance_metric]`. Then
+        that distance metric is calculated using
+        `self.compressed_training_inputs`, `compressed_input` and
+        `compressed_combined`.
+
+        Arguments:
+            compressed_input (np.ndarray): Numpy array representing the
+                                       compressed lengths of the input
+                                       data to be scored.
+            compressed_combined (np.ndarray): Numpy array representing the
+                                              compressed lengths of the input
+                                              data combined with
+                                              each training sample to be scored.
+        Returns:
+            np.ndarray: Numpy array containing the distance metric.
+        """
+
+        if compressed_training is None:
+            distance = Distance(
+                np.resize(self.compressed_training_inputs, compressed_input.shape),
+                compressed_input,
+                compressed_combined,
+            )
+        else:
+            distance = Distance(
+                np.resize(compressed_training, compressed_input.shape),
+                compressed_input,
+                compressed_combined,
+            )
+
+        if self.distance_metric == "ncd":
+            return distance.ncd
+        elif self.distance_metric == "clm":
+            return distance.clm
+        elif self.distance_metric == "cdm":
+            return distance.cdm
+        elif self.distance_metric == "mse":
+            return distance.mse
+        else:
+            raise UnsupportedDistanceMetricException(
+                self.distance_metric,
+                supported_distance_metrics=self.supported_distance_metrics,
+                function_name="_calculate_distance",
+            )
+
+    def _compress_sample(
+        self,
+        x: str,
+        training_inputs: Optional[np.ndarray] = None,
+    ) -> tuple:
+        """
+        Helper method that compresses `x` against each
+        item in `self.training_inputs` and returns the
+        distance between each sample using the
+        self.distance_metric from the
+        `npc_gzip.distance.Distance` object.
+
+        Arguments:
+            x (np.ndarray): The sample data to compare against
+                            the `training_inputs`.
+            training_inputs (np.ndarray): [Optional] If provided, this
+                                          method will use `training_inputs`
+                                          when calculating the distance matrix
+                                          rather than `self.training_inputs`.
+
+        Returns:
+            np.ndarray: Compressed length of `x` as an array of shape
+                        [self.compressed_training_inputs.shape[0]].
+            np.ndarray: Compressed length of the combination of `x` and
+                        each training sample as an array of shape
+                        [self.compressed_training_inputs.shape[0]].
+        """
+
+        assert isinstance(
+            x, str
+        ), f"Non-string was passed to self._compress_sample: {x}"
+
+        if training_inputs is None:
+            training_inputs = self.training_inputs
+
+        compressed_input_length: int = self.compressor.get_compressed_length(x)
+        compressed_input: list = [
+            compressed_input_length for _ in range(training_inputs.shape[0])
+        ]
+        compressed_input: np.ndarray = np.array(compressed_input).reshape(-1)
+        assert compressed_input.shape == training_inputs.shape
+
+        combined: list = []
+        for training_sample in training_inputs:
+            train_and_x: str = concatenate_with_space(training_sample, x)
+            combined_compressed: int = self.compressor.get_compressed_length(
+                train_and_x
+            )
+            combined.append(combined_compressed)
+
+        combined: np.ndarray = np.array(combined).reshape(-1)
+        assert training_inputs.shape == compressed_input.shape == combined.shape
+
+        return (compressed_input, combined)
+
+    def sample_data(
+        self, sampling_percentage: float = 1.0
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Given a `sampling_percentage`, this method randomly
+        samples data from `self.training_inputs` &
+        `self.training_labels` (if exists) without replacement
+        and returns two numpy arrays containing the randomly
+        sampled data.
+
+        Arguments:
+            sampling_percentage (float): (0, 1.0] % of data to
+                                         randomly sample from the
+                                         training inputs and labels.
+
+        Returns:
+            np.ndarray: Randomly sampled training inputs.
+            np.ndarray: Randomly sampled training labels.
+            np.ndarray: Indices that the training inputs &
+                        labels were sampled.
+        """
+
+        total_inputs: int = self.training_inputs.shape[0]
+        sample_size: int = int(sampling_percentage * total_inputs)
+        randomly_sampled_indices: np.ndarray = np.random.choice(
+            total_inputs, sample_size, replace=False
+        )
+
+        randomly_sampled_inputs: np.ndarray = self.training_inputs[
+            randomly_sampled_indices
+        ]
+        randomly_sampled_labels: np.ndarray = np.array([])
+        if self.training_labels is not None:
+            randomly_sampled_labels = self.training_labels[randomly_sampled_indices]
+
+        return (
+            randomly_sampled_inputs,
+            randomly_sampled_labels,
+            randomly_sampled_indices,
+        )
+
+    def predict(
+        self,
+        x: Sequence,
+        top_k: int = 1,
+        sampling_percentage: float = 1.0,
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Faster version of `predict`. This method
+        will compare `x` against a sample of the
+        training data provided and will return the
+        best matching label if `training_labels` was
+        passed during instantiation.
+
+        If `training_labels` was not passed during
+        instantiation, or if `top_k` is not None, the most
+        similar samples from `training_inputs` will be
+        returned.
+
+        resulting array is [batch_size, self.training_inputs.shape[0] ]
+
+        Arguments:
+            x (Sequence): The sample data to compare against
+                                                  the `training_inputs`.
+            top_k (int): [Optional] If not None, the most similar
+                         `k` number of samples will be returned.
+                         `top_k` must be greater than zero.
+                         [default: top_k=1]
+            sampling_percentage (float): (0.0, 1.0] % of `self.training_inputs`
+                                         to sample predictions against.
+
+        Returns:
+            np.ndarray: The distance-metrics matrix computed on the test
+                        set.
+            np.ndarray: The `top_k` most similar `self.training_labels`.
+            np.ndarray: The `top_k` best matching samples
+                           from `self.training_inputs`.
+        """
+
+        if not isinstance(x, np.ndarray):
+            x: np.ndarray = np.array(x)
+
+        assert top_k > 0, f"top_k ({top_k}) must be greater than zero."
+
+        x: np.ndarray = x.reshape(-1)
+        assert (
+            top_k <= x.shape[0]
+        ), f"""
+        top_k ({top_k}) must be less or equal to than the number of 
+        samples provided to be predicted on ({x.shape[0]})
+        
+        """
+
+        # sample training inputs and labels
+        training_inputs, training_labels, randomly_sampled_indices = self.sample_data(
+            sampling_percentage
+        )
+
+        samples: list = []
+        combined: list = []
+        for sample in tqdm(x, desc="Compressing input..."):
+            compressed_sample, combined_length = self._compress_sample(
+                sample, training_inputs=training_inputs
+            )
+            samples.append(compressed_sample)
+            combined.append(combined_length)
+
+        compressed_samples: np.ndarray = np.array(samples)
+        compressed_combined: np.ndarray = np.array(combined)
+
+        assert isinstance(training_inputs, np.ndarray)
+        assert isinstance(compressed_samples, np.ndarray)
+        assert isinstance(compressed_combined, np.ndarray)
+        assert isinstance(self.compressed_training_inputs, np.ndarray)
+
+        compressed_training: np.ndarray = self.compressed_training_inputs[
+            randomly_sampled_indices
+        ]
+
+        distances: np.ndarray = self._calculate_distance(
+            compressed_samples, compressed_combined, compressed_training
+        )
+
+        # top matching training samples and labels by
+        # minimum distance.
+
+        # get indicies of minimum top_k distances.
+        minimum_distance_indices = np.argpartition(distances, top_k)[:, :top_k]
+
+        similar_samples: np.ndarray = training_inputs[minimum_distance_indices]
+        labels: np.ndarray = training_labels[minimum_distance_indices]
+
+        return distances, labels, similar_samples
diff --git a/npc_gzip/utils.py b/npc_gzip/utils.py
new file mode 100644
index 000000000..f30c5b9ec
--- /dev/null
+++ b/npc_gzip/utils.py
@@ -0,0 +1,70 @@
+"""
+Helper functions used primarily in testing the
+rest of the codebase.
+
+>>> number_of_sentences = random.randint(1, 100)
+>>> dataset = generate_dataset(number_of_sentences)
+>>> assert len(dataset) == number_of_sentences
+"""
+
+import random
+import string
+
+from npc_gzip.exceptions import InvalidObjectTypeException
+
+
+def generate_sentence(number_of_words: int = 10) -> str:
+    """
+    Generates a sentence of random
+    numbers and letters, with 
+    `number_of_words` words in the 
+    sentence such that len(out.split()) \
+    == `number_of_words`.
+
+    Arguments:
+        number_of_words (int): The number of words you
+                               want in the sentence.
+
+    Returns:
+        str: Sentence of random numbers and letters.
+    """
+
+    assert number_of_words > 0, f"`number_of_words` must be greater than zero."
+
+    words = []
+    for word in range(number_of_words):
+        # initializing size of string
+        N = random.randint(1, 50)
+
+        words.append(
+            "".join(random.choices(string.ascii_uppercase + string.digits, k=N))
+        )
+
+    out = " ".join(words)
+    return out
+
+
+def generate_dataset(number_of_sentences: int) -> list:
+    """
+    Loops over `range(number_of_sentences)` that
+    utilizes `generate_sentence()` to generate
+    a dataset of randomly sized sentences.
+
+    Arguments:
+        number_of_sentences (int): The number of
+                                   sentences you
+                                   want in your
+                                   dataset.
+
+    Returns:
+        list: List of sentences (str).
+    """
+
+    assert number_of_sentences > 0, f"`number_of_sentences` must be greater than zero."
+
+    dataset = []
+    for sentence in range(number_of_sentences):
+        number_of_words = random.randint(1, 100)
+        dataset.append(generate_sentence(number_of_words))
+
+    return dataset
diff --git a/compressors.py b/original_codebase/compressors.py
similarity index 81%
rename from compressors.py
rename to original_codebase/compressors.py
index 7df9846d6..bffd1ae85 100644
--- a/compressors.py
+++ b/original_codebase/compressors.py
@@ -1,16 +1,14 @@
 # Compressor Framework
-import io
-import sys
+
 from importlib import import_module
 
 import numpy as np
-import torch.nn.functional as F
-from tqdm import tqdm
 
 
 class DefaultCompressor:
     """For non-neural-based compressor"""
-    def __init__(self, compressor, typ='text'):
+
+    def __init__(self, compressor, typ="text"):
         try:
             self.compressor = import_module(compressor)
         except ModuleNotFoundError:
@@ -32,7 +30,7 @@ def get_compressed_len(self, x: str) -> int:
         else:
             return len(self.compressor.compress(np.array(x).tobytes()))
 
-    def get_bits_per_char(self, original_fn: str) -> float:
+    def get_bits_per_character(self, original_fn: str) -> float:
         """
         Returns the compressed size of the original function
         in bits.
@@ -50,6 +48,6 @@ def get_bits_per_char(self, original_fn: str) -> float:
 
 
 """Test Compressors"""
-if __name__ == '__main__':
-    comp = DefaultCompressor('gzip')
-    print(comp.get_compressed_len('Hello world'))
+if __name__ == "__main__":
+    comp = DefaultCompressor("gzip")
+    print(comp.get_compressed_len("Hello world"))
diff --git a/data.py b/original_codebase/data.py
similarity index 93%
rename from data.py
rename to original_codebase/data.py
index b835a038f..2ff3b0a4f 100644
--- a/data.py
+++ b/original_codebase/data.py
@@ -312,36 +312,7 @@ def process(dataset: Iterable) -> list:
     return train_ds, test_ds
 
 
-def load_filipino() -> tuple:
-    """
-    deprecated - datasets on huggingface have overlapped train&test
-
-    Loads the Dengue Filipino dataset
-
-    Returns:
-        tuple: Tuple of lists containing the training and testing datasets respectively.
-    """
-    def process(dataset: Iterable) -> list:
-        label_dict = OrderedDict()
-        d = {"absent": 0, "dengue": 1, "health": 2, "mosquito": 3, "sick": 4}
-        for k, v in d.items():
-            label_dict[k] = v
-
-        pairs = []
-        for pair in dataset:
-            text = pair["text"]
-            for k in label_dict:
-                if pair[k] == 1:
-                    label = label_dict[k]
-            pairs.append((label, text))
-        return pairs
-
-    ds = load_dataset("dengue_filipino")
-    train_ds, test_ds = process(ds["train"]), process(ds["test"])
-    return train_ds, test_ds
-
-
-def load_filipino(data_directory):
+def load_filipino(data_directory) -> tuple:
     """
     Loads the Dengue Filipino dataset from local directory
 
@@ -352,6 +323,7 @@ def load_filipino(data_directory):
     Returns:
         tuple: Tuple of lists containing the training and testing datasets respectively.
     """
+
     def process(fn):
         pairs = []
         with open(fn, "r") as f:
@@ -359,17 +331,21 @@ def process(fn):
             for row in reader:
                 text = row[0]
                 for i in range(1, 6):
-                    if row[i] == '1':
-                        label = i-1
+                    if row[i] == "1":
+                        label = i - 1
                         pairs.append((label, text))
                         break
         return pairs
-    
-    train_ds, test_ds = process(os.path.join(data_directory, 'train.csv')), process(os.path.join(data_directory, 'test.csv'))
+
+    train_ds, test_ds = process(os.path.join(data_directory, "train.csv")), process(
+        os.path.join(data_directory, "test.csv")
+    )
     return train_ds, test_ds
 
 
-def read_img_with_label(dataset: list, indices: Sequence[int], flatten: bool = True) -> tuple:
+def read_img_with_label(
+    dataset: list, indices: Sequence[int], flatten: bool = True
+) -> tuple:
     """
     Loads items from `dataset` based on the indices listed in `indices`
     and optionally flattens them.
diff --git a/experiments.py b/original_codebase/experiments.py
similarity index 99%
rename from experiments.py
rename to original_codebase/experiments.py
index 5fef94896..2e42f7af0 100644
--- a/experiments.py
+++ b/original_codebase/experiments.py
@@ -13,11 +13,10 @@
 
 import numpy as np
 import torch
+from compressors import DefaultCompressor
 from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score
 from tqdm import tqdm
 
-from compressors import DefaultCompressor
-
 
 class KnnExpText:
     def __init__(
@@ -31,7 +30,9 @@ def __init__(
         self.distance_func = distance_function
         self.distance_matrix: list = []
 
-    def calc_dis(self, data: list, train_data: Optional[list] = None, fast: bool = False) -> None:
+    def calc_dis(
+        self, data: list, train_data: Optional[list] = None, fast: bool = False
+    ) -> None:
         """
         Calculates the distance between either `data` and itself or `data` and `train_data`
         and appends the distance to `self.distance_matrix`.
diff --git a/main_text.py b/original_codebase/main_text.py
similarity index 96%
rename from main_text.py
rename to original_codebase/main_text.py
index c923157fc..5de0a570f 100644
--- a/main_text.py
+++ b/original_codebase/main_text.py
@@ -1,7 +1,11 @@
 import argparse
 import time
 from functools import partial
+from typing import Callable
 
+from compressors import *
+from data import *
+from experiments import *
 from pathos.multiprocessing import ProcessingPool as Pool
 from torchtext.datasets import (
     AG_NEWS,
@@ -12,12 +16,7 @@
     YahooAnswers,
     YelpReviewPolarity,
 )
-
-from compressors import *
-from data import *
-from experiments import *
 from utils import *
-from typing import Callable
 
 # np.random.seed(6)
 
@@ -90,7 +89,11 @@ def record_distance(
 def non_neurl_knn_exp_given_dis(dis_matrix, k, test_label, train_label):
     knn_exp = KnnExpText(None, None, None)
     _, correct = knn_exp.calc_acc(
-        k, test_label, train_label=train_label, provided_distance_matrix=dis_matrix, rand=args.random
+        k,
+        test_label,
+        train_label=train_label,
+        provided_distance_matrix=dis_matrix,
+        rand=args.random,
     )
     return correct
 
@@ -147,7 +150,7 @@ def non_neurl_knn_exp_given_dis(dis_matrix, k, test_label, train_label):
         "swahili": 6,
         "filipino": 5,
         "kirnews": 14,
-        "custom": args.class_num
+        "custom": args.class_num,
     }
     # load dataset
     data_dir = os.path.join(args.data_dir, args.dataset)
@@ -245,11 +248,7 @@ def non_neurl_knn_exp_given_dis(dis_matrix, k, test_label, train_label):
             else:
                 start_idx = args.test_idx_start
             for i in range(0, len(test_data), 100):
-                print(
-                    "from {} to {}".format(
-                        start_idx + i, start_idx + i + 100
-                    )
-                )
+                print("from {} to {}".format(start_idx + i, start_idx + i + 100))
                 output_rel_fn = "test_dis_idx_from_{}_to_{}".format(
                     start_idx + i, start_idx + i + 100
                 )
@@ -288,4 +287,6 @@ def non_neurl_knn_exp_given_dis(dis_matrix, k, test_label, train_label):
                 print("Altogether Accuracy is: {}".format(all_correct / total_num))
             else:
                 dis_matrix = np.load(args.distance_fn)
-                non_neurl_knn_exp_given_dis(dis_matrix, args.k, test_labels, train_labels)
+                non_neurl_knn_exp_given_dis(
+                    dis_matrix, args.k, test_labels, train_labels
+                )
diff --git a/requirements.txt b/original_codebase/requirements.txt
similarity index 100%
rename from requirements.txt
rename to original_codebase/requirements.txt
diff --git a/utils.py b/original_codebase/utils.py
similarity index 98%
rename from utils.py
rename to original_codebase/utils.py
index 8c818426a..1c4769d72 100644
--- a/utils.py
+++ b/original_codebase/utils.py
@@ -1,7 +1,7 @@
 from collections.abc import Sequence
 
 import numpy as np
-import scipy
+import scipy.stats
 import torch
 
 
@@ -192,9 +192,7 @@ def agg_by_avg(i1: torch.Tensor, i2: torch.Tensor) -> torch.Tensor:
 
 
 def agg_by_min_or_max(
-    i1: torch.Tensor,
-    i2: torch.Tensor,
-    aggregate_by_minimum: bool = False,
+    i1: torch.Tensor, i2: torch.Tensor, aggregate_by_minimum: bool = False
 ) -> torch.Tensor:
     """
     Calculates the average of i1 and i2, rounding to the shortest.
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 000000000..b552ae017
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,814 @@
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+
+[[package]]
+name = "black"
+version = "23.7.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "black-23.7.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587"},
+    {file = "black-23.7.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f"},
+    {file = "black-23.7.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be"},
+    {file = "black-23.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc"},
+    {file = "black-23.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995"},
+    {file = "black-23.7.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2"},
+    {file = "black-23.7.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd"},
+    {file = "black-23.7.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a"},
+    {file = "black-23.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926"},
+    {file = "black-23.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad"},
+    {file = "black-23.7.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f"},
+    {file = "black-23.7.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3"},
+    {file = "black-23.7.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6"},
+    {file = "black-23.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a"},
+    {file = "black-23.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320"},
+    {file = "black-23.7.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9"},
+    {file = "black-23.7.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3"},
+    {file = "black-23.7.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087"},
+    {file = "black-23.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91"},
+    {file = "black-23.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491"},
+    {file = "black-23.7.0-py3-none-any.whl", hash = "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96"},
+    {file = "black-23.7.0.tar.gz", hash = "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
+[[package]]
+name = "certifi"
+version = "2023.7.22"
+description = "Python package for providing Mozilla's CA Bundle."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
+    {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.2.0"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"},
+    {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"},
+    {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"},
+    {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"},
+    {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"},
+    {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"},
+    {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
+]
+
+[[package]]
+name = "click"
+version = "8.1.6"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"},
+    {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.1.2"
+description = "Backport of PEP 654 (exception groups)"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"},
+    {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"},
+]
+
+[package.extras]
+test = ["pytest (>=6)"]
+
+[[package]]
+name = "filelock"
+version = "3.12.2"
+description = "A platform independent file lock."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "filelock-3.12.2-py3-none-any.whl", hash = "sha256:cbb791cdea2a72f23da6ac5b5269ab0a0d161e9ef0100e653b69049a7706d1ec"},
+    {file = "filelock-3.12.2.tar.gz", hash = "sha256:002740518d8aa59a26b0c76e10fb8c6e15eae825d34b6fdf670333fd7b938d81"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.5.20)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"]
+
+[[package]]
+name = "idna"
+version = "3.4"
+description = "Internationalized Domain Names in Applications (IDNA)"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
+    {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.0.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
+    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
+]
+
+[[package]]
+name = "isort"
+version = "5.12.0"
+description = "A Python utility / library to sort Python imports."
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
+    {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
+]
+
+[package.extras]
+colors = ["colorama (>=0.4.3)"]
+pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
+plugins = ["setuptools"]
+requirements-deprecated-finder = ["pip-api", "pipreqs"]
+
+[[package]]
+name = "jinja2"
+version = "3.1.2"
+description = "A very fast and expressive template engine."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
+    {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
+]
+
+[package.dependencies]
+MarkupSafe = ">=2.0"
+
+[package.extras]
+i18n = ["Babel (>=2.7)"]
+
+[[package]]
+name = "joblib"
+version = "1.3.1"
+description = "Lightweight pipelining with Python functions"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "joblib-1.3.1-py3-none-any.whl", hash = "sha256:89cf0529520e01b3de7ac7b74a8102c90d16d54c64b5dd98cafcd14307fdf915"},
+    {file = "joblib-1.3.1.tar.gz", hash = "sha256:1f937906df65329ba98013dc9692fe22a4c5e4a648112de500508b18a21b41e3"},
+]
+
+[[package]]
+name = "markupsafe"
+version = "2.1.3"
+description = "Safely add untrusted strings to HTML/XML markup."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"},
+    {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
+    {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"},
+    {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"},
+    {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"},
+    {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"},
+    {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"},
+]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+description = "Python library for arbitrary-precision floating-point arithmetic"
+optional = false
+python-versions = "*"
+files = [
+    {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
+    {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
+]
+
+[package.extras]
+develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
+docs = ["sphinx"]
+gmpy = ["gmpy2 (>=2.1.0a4)"]
+tests = ["pytest (>=4.6)"]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
+[[package]]
+name = "networkx"
+version = "3.1"
+description = "Python package for creating and manipulating graphs and networks"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"},
+    {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"},
+]
+
+[package.extras]
+default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"]
+developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"]
+doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"]
+extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"]
+test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
+
+[[package]]
+name = "numpy"
+version = "1.25.2"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"},
+    {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"},
+    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"},
+    {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"},
+    {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"},
+    {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"},
+    {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"},
+    {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"},
+    {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"},
+    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"},
+    {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"},
+    {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"},
+    {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"},
+    {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"},
+    {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"},
+    {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"},
+    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"},
+    {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"},
+    {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"},
+    {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"},
+    {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"},
+    {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"},
+    {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
+]
+
+[[package]]
+name = "packaging"
+version = "23.1"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
+    {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
+]
+
+[[package]]
+name = "pathspec"
+version = "0.11.2"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"},
+    {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"},
+]
+
+[[package]]
+name = "platformdirs"
+version = "3.10.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"},
+    {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"]
+
+[[package]]
+name = "pluggy"
+version = "1.2.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"},
+    {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
+[[package]]
+name = "portalocker"
+version = "2.7.0"
+description = "Wraps the portalocker recipe for easy usage"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983"},
+    {file = "portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51"},
+]
+
+[package.dependencies]
+pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+docs = ["sphinx (>=1.7.1)"]
+redis = ["redis"]
+tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)"]
+
+[[package]]
+name = "pytest"
+version = "7.4.0"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"},
+    {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+
+[[package]]
+name = "pywin32"
+version = "306"
+description = "Python for Window Extensions"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"},
+    {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"},
+    {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"},
+    {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"},
+    {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"},
+    {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"},
+    {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"},
+    {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"},
+    {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"},
+    {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"},
+    {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"},
+    {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"},
+    {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"},
+    {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"},
+]
+
+[[package]]
+name = "requests"
+version = "2.31.0"
+description = "Python HTTP for Humans."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
+    {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
+]
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<3"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+
+[[package]]
+name = "scikit-learn"
+version = "1.3.0"
+description = "A set of python modules for machine learning and data mining"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "scikit-learn-1.3.0.tar.gz", hash = "sha256:8be549886f5eda46436b6e555b0e4873b4f10aa21c07df45c4bc1735afbccd7a"},
+    {file = "scikit_learn-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:981287869e576d42c682cf7ca96af0c6ac544ed9316328fd0d9292795c742cf5"},
+    {file = "scikit_learn-1.3.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:436aaaae2c916ad16631142488e4c82f4296af2404f480e031d866863425d2a2"},
+    {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7e28d8fa47a0b30ae1bd7a079519dd852764e31708a7804da6cb6f8b36e3630"},
+    {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae80c08834a473d08a204d966982a62e11c976228d306a2648c575e3ead12111"},
+    {file = "scikit_learn-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:552fd1b6ee22900cf1780d7386a554bb96949e9a359999177cf30211e6b20df6"},
+    {file = "scikit_learn-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:79970a6d759eb00a62266a31e2637d07d2d28446fca8079cf9afa7c07b0427f8"},
+    {file = "scikit_learn-1.3.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:850a00b559e636b23901aabbe79b73dc604b4e4248ba9e2d6e72f95063765603"},
+    {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee04835fb016e8062ee9fe9074aef9b82e430504e420bff51e3e5fffe72750ca"},
+    {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d953531f5d9f00c90c34fa3b7d7cfb43ecff4c605dac9e4255a20b114a27369"},
+    {file = "scikit_learn-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:151ac2bf65ccf363664a689b8beafc9e6aae36263db114b4ca06fbbbf827444a"},
+    {file = "scikit_learn-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a885a9edc9c0a341cab27ec4f8a6c58b35f3d449c9d2503a6fd23e06bbd4f6a"},
+    {file = "scikit_learn-1.3.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:9877af9c6d1b15486e18a94101b742e9d0d2f343d35a634e337411ddb57783f3"},
+    {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c470f53cea065ff3d588050955c492793bb50c19a92923490d18fcb637f6383a"},
+    {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd6e2d7389542eae01077a1ee0318c4fec20c66c957f45c7aac0c6eb0fe3c612"},
+    {file = "scikit_learn-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:3a11936adbc379a6061ea32fa03338d4ca7248d86dd507c81e13af428a5bc1db"},
+    {file = "scikit_learn-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:998d38fcec96584deee1e79cd127469b3ad6fefd1ea6c2dfc54e8db367eb396b"},
+    {file = "scikit_learn-1.3.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:ded35e810438a527e17623ac6deae3b360134345b7c598175ab7741720d7ffa7"},
+    {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e8102d5036e28d08ab47166b48c8d5e5810704daecf3a476a4282d562be9a28"},
+    {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7617164951c422747e7c32be4afa15d75ad8044f42e7d70d3e2e0429a50e6718"},
+    {file = "scikit_learn-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d54fb9e6038284548072df22fd34777e434153f7ffac72c8596f2d6987110dd"},
+]
+
+[package.dependencies]
+joblib = ">=1.1.1"
+numpy = ">=1.17.3"
+scipy = ">=1.5.0"
+threadpoolctl = ">=2.0.0"
+
+[package.extras]
+benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"]
+docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
+examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"]
+tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"]
+
+[[package]]
+name = "scipy"
+version = "1.9.3"
+description = "Fundamental algorithms for scientific computing in Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"},
+    {file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"},
+    {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"},
+    {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"},
+    {file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"},
+    {file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"},
+    {file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"},
+    {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"},
+    {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"},
+    {file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"},
+    {file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"},
+    {file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"},
+    {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"},
+    {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"},
+    {file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"},
+    {file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"},
+    {file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"},
+    {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"},
+    {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"},
+    {file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"},
+    {file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"},
+]
+
+[package.dependencies]
+numpy = ">=1.18.5,<1.26.0"
+
+[package.extras]
+dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"]
+doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"]
+test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
+
+[[package]]
+name = "sympy"
+version = "1.12"
+description = "Computer algebra system (CAS) in Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
+    {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
+]
+
+[package.dependencies]
+mpmath = ">=0.19"
+
+[[package]]
+name = "threadpoolctl"
+version = "3.2.0"
+description = "threadpoolctl"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "threadpoolctl-3.2.0-py3-none-any.whl", hash = "sha256:2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032"},
+    {file = "threadpoolctl-3.2.0.tar.gz", hash = "sha256:c96a0ba3bdddeaca37dc4cc7344aafad41cdb8c313f74fdfe387a867bba93355"},
+]
+
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+
+[[package]]
+name = "torch"
+version = "2.0.1"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8ced00b3ba471856b993822508f77c98f48a458623596a4c43136158781e306a"},
+    {file = "torch-2.0.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:359bfaad94d1cda02ab775dc1cc386d585712329bb47b8741607ef6ef4950747"},
+    {file = "torch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7c84e44d9002182edd859f3400deaa7410f5ec948a519cc7ef512c2f9b34d2c4"},
+    {file = "torch-2.0.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:567f84d657edc5582d716900543e6e62353dbe275e61cdc36eda4929e46df9e7"},
+    {file = "torch-2.0.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:787b5a78aa7917465e9b96399b883920c88a08f4eb63b5a5d2d1a16e27d2f89b"},
+    {file = "torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e617b1d0abaf6ced02dbb9486803abfef0d581609b09641b34fa315c9c40766d"},
+    {file = "torch-2.0.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b6019b1de4978e96daa21d6a3ebb41e88a0b474898fe251fd96189587408873e"},
+    {file = "torch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:dbd68cbd1cd9da32fe5d294dd3411509b3d841baecb780b38b3b7b06c7754434"},
+    {file = "torch-2.0.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:ef654427d91600129864644e35deea761fb1fe131710180b952a6f2e2207075e"},
+    {file = "torch-2.0.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:25aa43ca80dcdf32f13da04c503ec7afdf8e77e3a0183dd85cd3e53b2842e527"},
+    {file = "torch-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ef3ea3d25441d3957348f7e99c7824d33798258a2bf5f0f0277cbcadad2e20d"},
+    {file = "torch-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0882243755ff28895e8e6dc6bc26ebcf5aa0911ed81b2a12f241fc4b09075b13"},
+    {file = "torch-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:f66aa6b9580a22b04d0af54fcd042f52406a8479e2b6a550e3d9f95963e168c8"},
+    {file = "torch-2.0.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:1adb60d369f2650cac8e9a95b1d5758e25d526a34808f7448d0bd599e4ae9072"},
+    {file = "torch-2.0.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:1bcffc16b89e296826b33b98db5166f990e3b72654a2b90673e817b16c50e32b"},
+    {file = "torch-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e10e1597f2175365285db1b24019eb6f04d53dcd626c735fc502f1e8b6be9875"},
+    {file = "torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490"},
+    {file = "torch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8742bdc62946c93f75ff92da00e3803216c6cce9b132fbca69664ca38cfb3e18"},
+    {file = "torch-2.0.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:c62df99352bd6ee5a5a8d1832452110435d178b5164de450831a3a8cc14dc680"},
+    {file = "torch-2.0.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:671a2565e3f63b8fe8e42ae3e36ad249fe5e567435ea27b94edaa672a7d0c416"},
+]
+
+[package.dependencies]
+filelock = "*"
+jinja2 = "*"
+networkx = "*"
+sympy = "*"
+typing-extensions = "*"
+
+[package.extras]
+opt-einsum = ["opt-einsum (>=3.3)"]
+
+[[package]]
+name = "torchdata"
+version = "0.6.1"
+description = "Composable data loading modules for PyTorch"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "torchdata-0.6.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:b97f60619764d5f2f62ebcf18bff8a7cfc5c84d30d04d30724b753ec95864a70"},
+    {file = "torchdata-0.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ff46aa549c0da9aa2bcba95767034a81c950dc7ccc0e24f38cf6f7878ca1826d"},
+    {file = "torchdata-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cae1b4a516124c00ef47db12bab0cc6898bd2d7e749ffb14b6ebf1fe610a6b46"},
+    {file = "torchdata-0.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:f2a8b0a388753502d49bd5bd186cd8d41dc2d91121246617804578371591b5e6"},
+    {file = "torchdata-0.6.1-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:6474ed86ed7e060698888fa8ece9155dc94bdb9dfcc1874fa6d7707823551adc"},
+    {file = "torchdata-0.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:05db07d2793d181867578be99c211b3c71f83f3fda0bf33a70e5c93794513692"},
+    {file = "torchdata-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a9d4c7d86b0aab2f70cdf0eae50f6eea997051889b0c04ae2f843df048ba9ea"},
+    {file = "torchdata-0.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:3501325411063b0da6d85c93222234973780ca3df7d110ee1795723f9b2e3405"},
+    {file = "torchdata-0.6.1-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:129d5b2d35717862b194b31a27b61b5faa9efd4ffc36c1a07d85b320dea8b4c6"},
+    {file = "torchdata-0.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2752fd8452d42aeaf480985f5d3d82506ab1ce51ca42cdb61c981145cd2890a8"},
+    {file = "torchdata-0.6.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08b745c14adea7e4a36de4241485f9ea058e3ad3959c3e6dd1526c2f278006e7"},
+    {file = "torchdata-0.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:080c1c54128b4eb8e83cf4de74fa38e892cd6bf3114a557a853969543285f2d9"},
+    {file = "torchdata-0.6.1-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:7e11108140adaba55491b6124f07b61cce161f53f8604e70961da025ccd014bd"},
+    {file = "torchdata-0.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42961c40bf9c3fb66aaf4189f9154cb4f2eb3862359e64267708e3d273452d68"},
+    {file = "torchdata-0.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87d012adf90cd15d7164a886512b6746788477172394fe56971799d276f8809e"},
+    {file = "torchdata-0.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:24d7fb2fcb0af43660fc44d8583cb8f7d25762bb759e8fde003db29c5036d940"},
+    {file = "torchdata-0.6.1-py3-none-any.whl", hash = "sha256:6a6d51cfbb63efe65788ad71e84c2be23a0c6520869e075774e7fc2ee535b9ed"},
+]
+
+[package.dependencies]
+requests = "*"
+torch = "2.0.1"
+urllib3 = ">=1.25"
+
+[[package]]
+name = "torchtext"
+version = "0.15.2"
+description = "Text utilities and datasets for PyTorch"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "torchtext-0.15.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c60c2ec240d86c437478821891c0b167aa0b327f30047196f25da55b82f6785b"},
+    {file = "torchtext-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fe1794f9b2a9f4923f87488783b06d6c683f332a1db60d17cf65ba3f7c724c56"},
+    {file = "torchtext-0.15.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d265a648722164043bd76e59a8d49a44a96a2f4f74e5e1f84801c047db9f20ce"},
+    {file = "torchtext-0.15.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:8fe27a3e2c0ae42f07ff5cae0ea19c49b23d05aa3666bdbc1a262d7b26b8da0d"},
+    {file = "torchtext-0.15.2-cp310-cp310-win_amd64.whl", hash = "sha256:a2dfd26a35e35e85d934c5fb58bd867bf61e73ca086f1736f36dff3a22c6083a"},
+    {file = "torchtext-0.15.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cbb03114513fa44cd6155f46c5341d4674b42cd86ac1e5123b3565a5295047c2"},
+    {file = "torchtext-0.15.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa6f9115e71471cd6060abf4899663719d268d5662950a65357894234fad74a7"},
+    {file = "torchtext-0.15.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:9650a0184528cdce9201cc656575219b4539536a991d1bf86c8be8b424e58f53"},
+    {file = "torchtext-0.15.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:f44fa2bdeaba751eca9ff77c28b374234b5281c7b1d40cea38f67a871e64bfb9"},
+    {file = "torchtext-0.15.2-cp311-cp311-win_amd64.whl", hash = "sha256:3818c39032150d2c787a07692d92163af7a52b90dab16f985bb45273728a4207"},
+    {file = "torchtext-0.15.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91edf72b1819432d3524bcd1ab8d9b649afa4d0ed1dd4df594252fa5d073078c"},
+    {file = "torchtext-0.15.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1f58b4a94e17aada5e2712f2d48437fa8617a09d18160064b0df0c094ad2a3f2"},
+    {file = "torchtext-0.15.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:826ea4726e421d2c9309b35f485e46e10cd0e9a7f2b26275be98ccf73c2691be"},
+    {file = "torchtext-0.15.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:7e57d7b7b05d56d8d80be5fc190f68c428fe26f98893f77783f9a1e3876e36e1"},
+    {file = "torchtext-0.15.2-cp38-cp38-win_amd64.whl", hash = "sha256:6935af64b2af54bd3f4be71ab0a3776f2074834adc2f44e132a9f0da5f69ba47"},
+    {file = "torchtext-0.15.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d8065c4032bfe3558c56e9090ad0cf9c7a01f8800235a938feb41244226b9d4"},
+    {file = "torchtext-0.15.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8d4e81bfad9ee7e4bc66ad41f96878cf5e3c4e9e829de824081bd3a77bc7aa7"},
+    {file = "torchtext-0.15.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a87c582bddf809a62fd6ecc3bc92a20cb78dba055fcf43ddfce668c4be8352b4"},
+    {file = "torchtext-0.15.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8d8487945b0ea3df2fedf8a9cb3e7c2165bdc2107900e82c8725d96312addd3"},
+    {file = "torchtext-0.15.2-cp39-cp39-win_amd64.whl", hash = "sha256:0c7b4671df132751323d09fee91dcc59da380d5172449ba903f04d4646a906aa"},
+]
+
+[package.dependencies]
+numpy = "*"
+requests = "*"
+torch = "2.0.1"
+torchdata = "0.6.1"
+tqdm = "*"
+
+[[package]]
+name = "tqdm"
+version = "4.65.0"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"},
+    {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["py-make (>=0.1.0)", "twine", "wheel"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
+[[package]]
+name = "typing-extensions"
+version = "4.7.1"
+description = "Backported and Experimental Type Hints for Python 3.7+"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"},
+    {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"},
+]
+
+[[package]]
+name = "urllib3"
+version = "2.0.4"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"},
+    {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"},
+]
+
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"]
+socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
+zstd = ["zstandard (>=0.18.0)"]
+
+[metadata]
+lock-version = "2.0"
+python-versions = "^3.9"
+content-hash = "e2cb78e14ddc73381fb5922e6ff7e09c273b17c320efd8687440b6607103e787"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..b52b44a86
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,44 @@
+[tool.poetry]
+name = "npc-gzip"
+version = "0.1.0"
+description = "Code for Paper: “Low-Resource” Text Classification: A Parameter-Free Classification Method with Compressors"
+authors = [
+    "Gin Jiang <bazingagin@gmail.com>",
+    "Zach Bloss <zacharybloss@gmail.com>",
+]
+maintainers = [
+    "Zach Bloss <zacharybloss@gmail.com>",
+    "Gin Jiang <bazingagin@gmail.com>",
+]
+license = "MIT"
+readme = "README.md"
+homepage = "https://github.com/bazingagin/npc_gzip"
+repository = "https://github.com/bazingagin/npc_gzip"
+keywords = ["npc_gzip", "knn-gzip"]
+packages = [{include = "npc_gzip"}]
+
+[tool.poetry.dependencies]
+python = "^3.9"
+numpy = "^1.25.2"
+tqdm = "^4.65.0"
+
+[tool.poetry.group.test.dependencies]
+pytest = "^7.4.0"
+
+
+[tool.poetry.group.lint.dependencies]
+black = "^23.7.0"
+isort = "^5.12.0"
+
+
+[tool.poetry.group.examples.dependencies]
+torchtext = "^0.15.2"
+portalocker = "^2.7.0"
+scikit-learn = "^1.3.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+addopts = "--doctest-modules --ignore original_codebase/ --ignore examples/"
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_aggregations.py b/tests/test_aggregations.py
new file mode 100644
index 000000000..714268edc
--- /dev/null
+++ b/tests/test_aggregations.py
@@ -0,0 +1,25 @@
+import pytest
+
+from npc_gzip.aggregations import aggregate_strings, concatenate_with_space
+from npc_gzip.exceptions import StringTooShortException
+
+
+class TestAggregations:
+    stringa: str = "hey there how are you?"
+    stringb: str = "I am just hanging out!"
+
+    def test_concatenate_with_space(self) -> None:
+        out = concatenate_with_space(self.stringa, self.stringb)
+
+        assert len(out) == len(self.stringa) + len(self.stringb) + 1
+        assert out == f"{self.stringa} {self.stringb}"
+
+    def test_aggregate_strings(self) -> None:
+        out = aggregate_strings(self.stringa, self.stringb, by_character=False)
+        assert len(out) == len(self.stringa) + len(self.stringb) + 1
+
+    def test_aggregate_strings_by_character(self) -> None:
+        out = aggregate_strings(self.stringa, self.stringb, by_character=True)
+        total_length = len("".join(self.stringa.split()))
+        total_length += len("".join(self.stringb.split()))
+        assert len(out) == total_length
diff --git a/tests/test_base_compressor.py b/tests/test_base_compressor.py
new file mode 100644
index 000000000..c99a374d3
--- /dev/null
+++ b/tests/test_base_compressor.py
@@ -0,0 +1,53 @@
+import gzip
+from types import ModuleType
+
+import pytest
+
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.exceptions import InvalidCompressorException
+
+
+class TestBaseCompressor:
+    compressor = BaseCompressor(compressor=gzip)
+    example_input = "hello there!"
+
+    def test_types(self) -> None:
+        invalid_compressors = ["test", 0.1, 123, ("hello", "there"), {"hey": "hi"}]
+
+        for compressor in invalid_compressors:
+            with pytest.raises(InvalidCompressorException):
+                BaseCompressor(compressor)
+
+        valid_compressor = gzip
+        BaseCompressor(valid_compressor)
+
+    def test__open_file(self) -> None:
+        valid_filepath = "tests/test_base_compressor.py"
+        invalid_filepath = "tests/test_base_compressor.py.xyz"
+
+        with pytest.raises(AssertionError):
+            self.compressor._open_file(invalid_filepath)
+
+        with pytest.raises(AssertionError):
+            self.compressor._open_file(invalid_filepath, as_bytes=True)
+
+        file_contents = self.compressor._open_file(valid_filepath, as_bytes=False)
+        assert isinstance(file_contents, str)
+
+        file_contents = self.compressor._open_file(valid_filepath, as_bytes=True)
+        assert isinstance(file_contents, str)
+
+    def test__compress(self) -> None:
+        compressed_bytes = self.compressor._compress(self.example_input)
+        assert isinstance(compressed_bytes, bytes)
+
+    def test_get_compressed_length(self) -> None:
+        example_input_length = self.compressor.get_compressed_length(self.example_input)
+        assert isinstance(example_input_length, int)
+        assert example_input_length > 0
+
+    def test_get_bits_per_character(self) -> None:
+        example_bits_per_character = self.compressor.get_bits_per_character(
+            self.example_input
+        )
+        assert isinstance(example_bits_per_character, float)
diff --git a/tests/test_bz2_compressor.py b/tests/test_bz2_compressor.py
new file mode 100644
index 000000000..ea1ca097d
--- /dev/null
+++ b/tests/test_bz2_compressor.py
@@ -0,0 +1,30 @@
+import bz2
+from types import ModuleType
+
+import pytest
+
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.compressors.bz2_compressor import Bz2Compressor
+from npc_gzip.exceptions import InvalidCompressorException
+
+
+class TestBz2Compressor:
+    base_compressor = BaseCompressor(bz2)
+    compressor = Bz2Compressor()
+    example_input = "hello there!"
+
+    def test__compress(self) -> None:
+        compressed_bytes = self.compressor._compress(self.example_input)
+        base_compressed_bytes = self.base_compressor._compress(self.example_input)
+        assert compressed_bytes == base_compressed_bytes
+
+    def test_get_compressed_length(self) -> None:
+        example_input_length = self.compressor.get_compressed_length(self.example_input)
+        assert isinstance(example_input_length, int)
+        assert example_input_length > 0
+
+    def test_get_bits_per_character(self) -> None:
+        example_bits_per_character = self.compressor.get_bits_per_character(
+            self.example_input
+        )
+        assert isinstance(example_bits_per_character, float)
diff --git a/tests/test_distance.py b/tests/test_distance.py
new file mode 100644
index 000000000..b9470e0b2
--- /dev/null
+++ b/tests/test_distance.py
@@ -0,0 +1,118 @@
+import random
+
+import numpy as np
+import pytest
+
+from npc_gzip.distance import Distance
+from npc_gzip.exceptions import CompressedValuesEqualZero, InvalidShapeException
+
+
+class TestDistance:
+    array_a = np.random.rand(3, 10)
+    array_b = np.random.rand(3, 10)
+    array_ab = np.random.rand(3, 10)
+
+    float_a = random.random()
+    float_b = random.random()
+    float_ab = random.random()
+
+    def test_types(self) -> None:
+        distance = Distance(self.array_a, self.array_b, self.array_ab)
+
+        assert isinstance(distance.compressed_values_a, np.ndarray)
+        assert isinstance(distance.compressed_values_b, np.ndarray)
+        assert isinstance(distance.compressed_values_ab, np.ndarray)
+
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        assert isinstance(distance.compressed_values_a, np.ndarray)
+        assert isinstance(distance.compressed_values_b, np.ndarray)
+        assert isinstance(distance.compressed_values_ab, np.ndarray)
+
+    def test_invalid_shapes(self) -> None:
+        array_a_shape = self.array_a.shape
+        invalid_shape_array = np.random.rand(array_a_shape[0] + 1, array_a_shape[1] + 1)
+        assert invalid_shape_array.shape != self.array_a.shape
+
+        with pytest.raises(InvalidShapeException):
+            Distance(self.array_a, self.array_b, invalid_shape_array)
+
+    def test__ncd(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance._ncd(self.float_a, self.float_b, self.float_ab)
+        assert isinstance(out, float)
+
+        a = b = ab = 0
+        with pytest.raises(CompressedValuesEqualZero):
+            distance._ncd(a, b, ab)
+
+        with pytest.raises(ValueError):
+            distance._ncd(self.array_a, self.array_b, self.array_ab)
+
+    def test__cdm(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance._cdm(self.float_a, self.float_b, self.float_ab)
+        assert isinstance(out, float)
+
+        a = b = ab = 0
+        with pytest.raises(CompressedValuesEqualZero):
+            distance._cdm(a, b, ab)
+
+        with pytest.raises(ValueError):
+            distance._ncd(self.array_a, self.array_b, self.array_ab)
+
+    def test__clm(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance._clm(self.float_a, self.float_b, self.float_ab)
+        assert isinstance(out, float)
+
+        a = b = ab = 0
+        with pytest.raises(CompressedValuesEqualZero):
+            distance._clm(a, b, ab)
+
+        with pytest.raises(ValueError):
+            distance._ncd(self.array_a, self.array_b, self.array_ab)
+
+    def test__mse(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance._mse(self.float_a, self.float_b)
+        assert isinstance(out, float)
+
+        a = b = 0
+        out = distance._mse(a, b)
+        assert isinstance(out, float)
+
+    def test_ncd(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance.ncd
+        assert isinstance(out, np.ndarray)
+
+        distance = Distance(self.array_a, self.array_b, self.array_ab)
+        out = distance.ncd
+        assert isinstance(out, np.ndarray)
+
+    def test_cdm(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance.cdm
+        assert isinstance(out, np.ndarray)
+
+        distance = Distance(self.array_a, self.array_b, self.array_ab)
+        out = distance.cdm
+        assert isinstance(out, np.ndarray)
+
+    def test_clm(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance.clm
+        assert isinstance(out, np.ndarray)
+
+        distance = Distance(self.array_a, self.array_b, self.array_ab)
+        out = distance.clm
+        assert isinstance(out, np.ndarray)
+
+    def test_mse(self) -> None:
+        distance = Distance(self.float_a, self.float_b, self.float_ab)
+        out = distance.mse
+        assert isinstance(out, np.ndarray)
+
+        distance = Distance(self.array_a, self.array_b, self.array_ab)
+        out = distance.mse
+        assert isinstance(out, np.ndarray)
diff --git a/tests/test_gzip_compressor.py b/tests/test_gzip_compressor.py
new file mode 100644
index 000000000..d7ca247dc
--- /dev/null
+++ b/tests/test_gzip_compressor.py
@@ -0,0 +1,30 @@
+import gzip
+from types import ModuleType
+
+import pytest
+
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.compressors.gzip_compressor import GZipCompressor
+from npc_gzip.exceptions import InvalidCompressorException
+
+
+class TestBz2Compressor:
+    base_compressor = BaseCompressor(gzip)
+    compressor = GZipCompressor()
+    example_input = "hello there!"
+
+    def test__compress(self) -> None:
+        compressed_bytes = self.compressor._compress(self.example_input)
+        base_compressed_bytes = self.base_compressor._compress(self.example_input)
+        assert compressed_bytes == base_compressed_bytes
+
+    def test_get_compressed_length(self) -> None:
+        example_input_length = self.compressor.get_compressed_length(self.example_input)
+        assert isinstance(example_input_length, int)
+        assert example_input_length > 0
+
+    def test_get_bits_per_character(self) -> None:
+        example_bits_per_character = self.compressor.get_bits_per_character(
+            self.example_input
+        )
+        assert isinstance(example_bits_per_character, float)
diff --git a/tests/test_knn_classifier.py b/tests/test_knn_classifier.py
new file mode 100644
index 000000000..55f40cd70
--- /dev/null
+++ b/tests/test_knn_classifier.py
@@ -0,0 +1,179 @@
+import random
+
+import numpy as np
+import pytest
+
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.compressors.bz2_compressor import Bz2Compressor
+from npc_gzip.compressors.gzip_compressor import GZipCompressor
+from npc_gzip.compressors.lzma_compressor import LzmaCompressor
+from npc_gzip.distance import Distance
+from npc_gzip.exceptions import (
+    InputLabelEqualLengthException,
+    InvalidObjectTypeException,
+    UnsupportedDistanceMetricException,
+)
+from npc_gzip.knn_classifier import KnnClassifier
+from npc_gzip.utils import generate_dataset
+
+
+class TestKnnClassifier:
+    gzip_compressor: BaseCompressor = GZipCompressor()
+    bz2_compressor: BaseCompressor = Bz2Compressor()
+    lzma_compressor: BaseCompressor = LzmaCompressor()
+
+    dataset_size: int = 100
+    training_dataset: list = generate_dataset(dataset_size)
+    training_labels: list = [random.randint(0, 10) for _ in range(dataset_size)]
+
+    model = KnnClassifier(
+        compressor=gzip_compressor,
+        training_inputs=training_dataset,
+        training_labels=training_labels,
+        distance_metric="ncd",
+    )
+
+    sample_input: str = "hello there"
+
+    def test_init(self) -> None:
+        assert self.model.distance_metric == "ncd"
+        assert isinstance(self.model.training_inputs, np.ndarray)
+        assert isinstance(self.model.compressed_training_inputs, np.ndarray)
+
+        assert (
+            self.model.training_inputs.shape[0]
+            == self.model.training_labels.shape[0]
+            == self.model.compressed_training_inputs.shape[0]
+        )
+
+        assert self.model.supported_distance_metrics == ["ncd", "clm", "cdm", "mse"]
+
+        model = KnnClassifier(
+            compressor=self.gzip_compressor,
+            training_inputs=np.array(self.training_dataset),
+            training_labels=np.array(self.training_labels),
+            distance_metric="ncd",
+        )
+
+        assert isinstance(model.training_inputs, np.ndarray)
+        assert isinstance(model.compressed_training_inputs, np.ndarray)
+        assert (
+            model.training_inputs.shape[0]
+            == model.training_labels.shape[0]
+            == model.compressed_training_inputs.shape[0]
+        )
+
+    def test_invalid_metric(self) -> None:
+        with pytest.raises(UnsupportedDistanceMetricException):
+            KnnClassifier(
+                compressor=self.gzip_compressor,
+                training_inputs=np.array(self.training_dataset),
+                training_labels=np.array(self.training_labels),
+                distance_metric="hoopla",
+            )
+
+    def test_invalid_data_and_label_size(self) -> None:
+        training_data_size = 10
+        training_label_size = training_data_size - 1
+
+        dataset = generate_dataset(training_data_size)
+        labels = [random.randint(0, 10) for _ in range(training_label_size)]
+
+        with pytest.raises(InputLabelEqualLengthException):
+            KnnClassifier(
+                compressor=self.gzip_compressor,
+                training_inputs=dataset,
+                training_labels=labels,
+                distance_metric="ncd",
+            )
+
+    def test__compress_sample(self) -> None:
+        compressed_input, compressed_combined = self.model._compress_sample(
+            self.sample_input
+        )
+
+        assert isinstance(compressed_input, np.ndarray)
+        assert isinstance(compressed_combined, np.ndarray)
+        assert compressed_input.shape == compressed_combined.shape
+
+    def test__calculate_distance(self) -> None:
+        compressed_input, compressed_combined = self.model._compress_sample(
+            self.sample_input
+        )
+        distance = self.model._calculate_distance(compressed_input, compressed_combined)
+
+        assert isinstance(distance, np.ndarray)
+        assert distance.shape == compressed_input.shape == compressed_combined.shape
+
+    def test_predict(self) -> None:
+        top_k = 1
+        (distance, labels, similar_samples) = self.model.predict(
+            self.sample_input, top_k
+        )
+
+        assert distance.shape == (
+            len([self.sample_input]),
+            self.model.training_inputs.shape[0],
+        )
+        assert labels.shape == (top_k, len([self.sample_input]))
+        assert similar_samples.shape == (top_k, len([self.sample_input]))
+
+        test_set_size = random.randint(1, 50)
+        test_set = [self.sample_input for _ in range(test_set_size)]
+        top_k = 2
+        (distance, labels, similar_samples) = self.model.predict(test_set, top_k)
+
+        assert distance.shape == (test_set_size, self.model.training_inputs.shape[0])
+        assert labels.shape == (test_set_size, top_k)
+        assert similar_samples.shape == (test_set_size, top_k)
+
+    def test_negative_top_k(self) -> None:
+        test_set_size = random.randint(1, 50)
+        test_set = [self.sample_input for _ in range(test_set_size)]
+        top_k = -1
+
+        with pytest.raises(AssertionError):
+            self.model.predict(test_set, top_k)
+
+    def test_top_k_bigger_than_test_set(self) -> None:
+        test_set_size = random.randint(1, 10)
+        test_set = [self.sample_input for _ in range(test_set_size)]
+        top_k = test_set_size + 1
+        with pytest.raises(AssertionError):
+            self.model.predict(test_set, top_k)
+
+    def test_sampling_percentage(self) -> None:
+        test_set_size = random.randint(1, 10)
+        test_set = [self.sample_input for _ in range(test_set_size)]
+        top_k = 1
+        (full_distance, full_labels, full_similar_samples) = self.model.predict(
+            test_set, top_k, sampling_percentage=1.0
+        )
+        (distance, labels, similar_samples) = self.model.predict(
+            test_set, top_k, sampling_percentage=0.1
+        )
+
+        assert full_labels.shape == labels.shape
+        assert full_similar_samples.shape == similar_samples.shape
+
+    def test_sample_data(self) -> None:
+        sampling_percentage: float = 0.8
+        (
+            randomly_sampled_inputs,
+            randomly_sampled_labels,
+            randomly_sampled_indices,
+        ) = self.model.sample_data(sampling_percentage)
+
+        assert (
+            randomly_sampled_inputs.shape
+            == randomly_sampled_labels.shape
+            == randomly_sampled_indices.shape
+        )
+        assert randomly_sampled_inputs.shape[0] == int(
+            self.model.training_inputs.shape[0] * sampling_percentage
+        )
+
+        assert (
+            self.model.training_inputs[randomly_sampled_indices]
+            == randomly_sampled_inputs
+        ).all()
diff --git a/tests/test_lzma_compressor.py b/tests/test_lzma_compressor.py
new file mode 100644
index 000000000..56c406645
--- /dev/null
+++ b/tests/test_lzma_compressor.py
@@ -0,0 +1,30 @@
+import lzma
+from types import ModuleType
+
+import pytest
+
+from npc_gzip.compressors.base import BaseCompressor
+from npc_gzip.compressors.lzma_compressor import LzmaCompressor
+from npc_gzip.exceptions import InvalidCompressorException
+
+
+class TestBz2Compressor:
+    base_compressor = BaseCompressor(lzma)
+    compressor = LzmaCompressor()
+    example_input = "hello there!"
+
+    def test__compress(self) -> None:
+        compressed_bytes = self.compressor._compress(self.example_input)
+        base_compressed_bytes = self.base_compressor._compress(self.example_input)
+        assert compressed_bytes == base_compressed_bytes
+
+    def test_get_compressed_length(self) -> None:
+        example_input_length = self.compressor.get_compressed_length(self.example_input)
+        assert isinstance(example_input_length, int)
+        assert example_input_length > 0
+
+    def test_get_bits_per_character(self) -> None:
+        example_bits_per_character = self.compressor.get_bits_per_character(
+            self.example_input
+        )
+        assert isinstance(example_bits_per_character, float)
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 000000000..341762c2b
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,32 @@
+import random
+
+import pytest
+
+from npc_gzip.exceptions import InvalidObjectTypeException
+from npc_gzip.utils import generate_dataset, generate_sentence
+
+
+class TestUtils:
+    def test_generate_sentence(self) -> None:
+        for _ in range(100):
+            number_of_words = random.randint(1, 100)
+            sentence = generate_sentence(number_of_words)
+            assert len(sentence.split()) == number_of_words
+
+        with pytest.raises(TypeError):
+            generate_sentence("hey there")
+
+        with pytest.raises(AssertionError):
+            generate_sentence(-1)
+
+    def test_generate_dataset(self) -> None:
+        for _ in range(100):
+            number_of_sentences = random.randint(1, 100)
+            dataset = generate_dataset(number_of_sentences)
+            assert len(dataset) == number_of_sentences
+
+        with pytest.raises(TypeError):
+            generate_dataset("hey there")
+
+        with pytest.raises(AssertionError):
+            generate_dataset(-1)