-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refacto: redesign pipeline scorers, add input and output spans params…
… to trainable_ner #203
- Loading branch information
Showing
7 changed files
with
110 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from typing import Any, Callable, Dict, Iterable, Union | ||
|
||
from spacy.tokens import Doc | ||
from spacy.training import Example | ||
|
||
Scorer = Union[ | ||
Callable[[Iterable[Doc], Iterable[Doc]], Dict[str, Any]], | ||
Callable[[Iterable[Example]], Dict[str, Any]], | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import pytest | ||
from spacy.tokens import Span | ||
|
||
import edsnlp | ||
from edsnlp.scorers.ner import create_ner_exact_scorer, create_ner_token_scorer | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def gold_and_pred(): | ||
nlp = edsnlp.blank("eds") | ||
|
||
gold_doc1 = nlp.make_doc("Le patient a le covid 19.") | ||
gold_doc1.ents = [Span(gold_doc1, 4, 6, label="covid")] | ||
gold_doc2 = nlp.make_doc("Corona: positif. Le cvid est une maladie.") | ||
gold_doc2.ents = [ | ||
Span(gold_doc2, 0, 1, label="covid"), | ||
Span(gold_doc2, 5, 6, label="covid"), | ||
] | ||
|
||
pred_doc1 = nlp.make_doc("Le patient a le covid 19.") | ||
pred_doc1.ents = [Span(pred_doc1, 4, 6, label="covid")] | ||
pred_doc2 = nlp.make_doc("Corona: positif. Le cvid est une maladie.") | ||
pred_doc2.ents = [Span(pred_doc2, 0, 2, label="covid")] | ||
|
||
return [gold_doc1, gold_doc2], [pred_doc1, pred_doc2] | ||
|
||
|
||
def test_exact_ner_scorer(gold_and_pred): | ||
scorer = create_ner_exact_scorer("ents") | ||
ner_exact_score = scorer(*gold_and_pred) | ||
assert ner_exact_score == { | ||
"ents_p": 0.5, | ||
"ents_r": 1 / 3, | ||
"ents_f": 0.4, | ||
"support": 3, | ||
} | ||
|
||
|
||
def test_token_ner_scorer(gold_and_pred): | ||
scorer = create_ner_token_scorer("ents") | ||
ner_exact_score = scorer(*gold_and_pred) | ||
assert ner_exact_score == { | ||
"ents_f": 0.75, | ||
"ents_p": 0.75, | ||
"ents_r": 0.75, | ||
"support": 4, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters