-
-
Notifications
You must be signed in to change notification settings - Fork 214
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: implement basic e2e indexing tests
test workflow chore: better workflow chore: job name
- Loading branch information
Showing
8 changed files
with
257 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
name: Test | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
|
||
jobs: | ||
all_main_tests: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
|
||
- name: Set up Python 3.9 (lowest supported version) | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.9 | ||
|
||
- name: Cache Poetry virtualenv | ||
uses: actions/cache@v3 | ||
with: | ||
path: ~/.cache/pypoetry/virtualenvs | ||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} | ||
restore-keys: | | ||
${{ runner.os }}-poetry- | ||
- name: Install Poetry | ||
uses: snok/[email protected] | ||
|
||
- name: Install dependencies | ||
run: poetry install --with dev | ||
|
||
- name: Run tests | ||
run: pytest tests/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,6 +99,8 @@ venv.bak/ | |
# mkdocs documentation | ||
/site | ||
|
||
.ragatouille | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +0,0 @@ | ||
# TODO | ||
# Tests in v0.0.2 | ||
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import pytest | ||
import srsly | ||
|
||
from ragatouille import RAGPretrainedModel | ||
from ragatouille.utils import get_wikipedia_page | ||
|
||
|
||
def test_indexing(): | ||
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") | ||
with open("tests/data/miyazaki_wikipedia.txt", "r") as f: | ||
full_document = f.read() | ||
RAG.index( | ||
collection=[full_document], | ||
index_name="Miyazaki", | ||
max_document_length=180, | ||
split_documents=True, | ||
) | ||
# ensure collection is stored to disk | ||
collection = srsly.read_json( | ||
".ragatouille/colbert/indexes/Miyazaki/collection.json" | ||
) | ||
assert len(collection) > 1 | ||
|
||
|
||
def test_search(): | ||
RAG = RAGPretrainedModel.from_index(".ragatouille/colbert/indexes/Miyazaki/") | ||
k = 3 # How many documents you want to retrieve, defaults to 10, we set it to 3 here for readability | ||
results = RAG.search(query="What animation studio did Miyazaki found?", k=k) | ||
assert len(results) == k | ||
assert ( | ||
"In April 1984, Miyazaki opened his own office in Suginami Ward" | ||
in results[0]["content"] | ||
) | ||
assert ( | ||
"Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, [mijaꜜzaki hajao]; born January 5, 1941)" # noqa | ||
in results[1]["content"] | ||
) | ||
assert ( | ||
'Glen Keane said Miyazaki is a "huge influence" on Walt Disney Animation Studios and has been' # noqa | ||
in results[2]["content"] | ||
) | ||
|
||
all_results = RAG.search( | ||
query=["What animation studio did Miyazaki found?", "Miyazaki son name"], k=k | ||
) | ||
assert ( | ||
"In April 1984, Miyazaki opened his own office in Suginami Ward" | ||
in all_results[0][0]["content"] | ||
) | ||
assert ( | ||
"Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao, [mijaꜜzaki hajao]; born January 5, 1941)" # noqa | ||
in all_results[0][1]["content"] | ||
) | ||
assert ( | ||
'Glen Keane said Miyazaki is a "huge influence" on Walt Disney Animation Studios and has been' # noqa | ||
in all_results[0][2]["content"] | ||
) | ||
assert ( | ||
"== Early life ==\nHayao Miyazaki was born on January 5, 1941" | ||
in all_results[1][0]["content"] # noqa | ||
) | ||
assert ( | ||
"Directed by Isao Takahata, with whom Miyazaki would continue to collaborate for the remainder of his career" # noqa | ||
in all_results[1][1]["content"] | ||
) | ||
assert ( | ||
"Specific works that have influenced Miyazaki include Animal Farm (1945)" # noqa | ||
in all_results[1][2]["content"] | ||
) | ||
print(all_results) | ||
|
||
|
||
@pytest.mark.skip(reason="experimental feature.") | ||
def test_basic_CRUD_addition(): | ||
old_collection = srsly.read_json( | ||
".ragatouille/colbert/indexes/Miyazaki/collection.json" | ||
) | ||
old_collection_len = len(old_collection) | ||
path_to_index = ".ragatouille/colbert/indexes/Miyazaki/" | ||
RAG = RAGPretrainedModel.from_index(path_to_index) | ||
|
||
new_documents = get_wikipedia_page("Studio_Ghibli") | ||
|
||
RAG.add_to_index([new_documents]) | ||
new_collection = srsly.read_json( | ||
".ragatouille/colbert/indexes/Miyazaki/collection.json" | ||
) | ||
assert len(new_collection) > old_collection_len | ||
assert len(new_collection) == 140 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import pytest | ||
|
||
|
||
@pytest.mark.skip(reason="NotImplemented") | ||
def test_from_checkpoint(): | ||
pass | ||
|
||
|
||
@pytest.mark.skip(reason="NotImplemented") | ||
def test_from_index(): | ||
pass | ||
|
||
|
||
@pytest.mark.skip(reason="NotImplemented") | ||
def test_searcher(): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import pytest | ||
|
||
|
||
@pytest.mark.skip(reason="NotImplemented") | ||
def test_finetune(): | ||
pass | ||
|
||
|
||
@pytest.mark.skip(reason="NotImplemented") | ||
def test_raw_bert(): | ||
pass |