diff --git a/edsnlp/scorers/__init__.py b/edsnlp/scorers/__init__.py index e69de29bb..564f02643 100644 --- a/edsnlp/scorers/__init__.py +++ b/edsnlp/scorers/__init__.py @@ -0,0 +1,9 @@ +from typing import Any, Callable, Dict, Iterable, Union + +from spacy.tokens import Doc +from spacy.training import Example + +Scorer = Union[ + Callable[[Iterable[Doc], Iterable[Doc]], Dict[str, Any]], + Callable[[Iterable[Example]], Dict[str, Any]], +] diff --git a/edsnlp/scorers/ner.py b/edsnlp/scorers/ner.py index 6cc5e223f..aa93157e0 100644 --- a/edsnlp/scorers/ner.py +++ b/edsnlp/scorers/ner.py @@ -1,5 +1,6 @@ from typing import Any, Dict, Iterable +import spacy.training from spacy.training import Example from edsnlp import registry @@ -103,11 +104,24 @@ def ner_token_scorer( def create_ner_exact_scorer( span_getter: SpanGetterArg, ): - return lambda examples: ner_exact_scorer(examples, span_getter) + return lambda *args, **kwargs: ner_exact_scorer( + make_examples(*args, **kwargs), span_getter + ) @registry.scorers.register("eds.ner_token_scorer") def create_ner_token_scorer( span_getter: SpanGetterArg, ): - return lambda examples: ner_token_scorer(examples, span_getter) + return lambda *args: ner_token_scorer(make_examples(*args), span_getter) + + +def make_examples(*args): + if len(args) == 2: + return ( + [spacy.training.Example(reference=g, predicted=p) for g, p in zip(*args)] + if len(args) == 2 + else args[0] + ) + else: + raise ValueError("Expected either a list of examples or two lists of spans") diff --git a/edsnlp/scorers/speed.py b/edsnlp/scorers/speed.py deleted file mode 100644 index ea50046f7..000000000 --- a/edsnlp/scorers/speed.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Any, Dict, Iterable - -from spacy.training import Example - -from edsnlp import registry - - -def speed_scorer( - examples: Iterable[Example], duration: float, cfg=None -) -> Dict[str, Any]: - words_count = [len(eg.predicted) for eg in examples] - num_words = sum(words_count) - num_docs = len(words_count) - - return { - "wps": num_words / duration, - "dps": num_docs / duration, - } - - -@registry.scorers.register("speed") -def create_speed_scorer(): - return speed_scorer diff --git a/pyproject.toml b/pyproject.toml index 97e6b1a3d..314500150 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -202,6 +202,10 @@ where = ["."] "tables" = "edsnlp.pipelines.misc.tables.factory:create_component" "terminology" = "edsnlp.pipelines.core.terminology.factory:create_component" +[project.entry-points."spacy_scorers"] +"eds.ner_exact_scorer" = "edsnlp.scorers.ner:create_ner_exact_scorer" +"eds.ner_token_scorer" = "edsnlp.scorers.ner:create_ner_token_scorer" + [project.entry-points."edsnlp_accelerator"] "simple" = "edsnlp.accelerators.simple:SimpleAccelerator" "multiprocessing" = "edsnlp.accelerators.multiprocessing:MultiprocessingAccelerator" diff --git a/tests/test_scorers.py b/tests/test_scorers.py new file mode 100644 index 000000000..29b626116 --- /dev/null +++ b/tests/test_scorers.py @@ -0,0 +1,47 @@ +import pytest +from spacy.tokens import Span + +import edsnlp +from edsnlp.scorers.ner import create_ner_exact_scorer, create_ner_token_scorer + + +@pytest.fixture(scope="session") +def gold_and_pred(): + nlp = edsnlp.blank("eds") + + gold_doc1 = nlp.make_doc("Le patient a le covid 19.") + gold_doc1.ents = [Span(gold_doc1, 4, 6, label="covid")] + gold_doc2 = nlp.make_doc("Corona: positif. Le cvid est une maladie.") + gold_doc2.ents = [ + Span(gold_doc2, 0, 1, label="covid"), + Span(gold_doc2, 5, 6, label="covid"), + ] + + pred_doc1 = nlp.make_doc("Le patient a le covid 19.") + pred_doc1.ents = [Span(pred_doc1, 4, 6, label="covid")] + pred_doc2 = nlp.make_doc("Corona: positif. Le cvid est une maladie.") + pred_doc2.ents = [Span(pred_doc2, 0, 2, label="covid")] + + return [gold_doc1, gold_doc2], [pred_doc1, pred_doc2] + + +def test_exact_ner_scorer(gold_and_pred): + scorer = create_ner_exact_scorer("ents") + ner_exact_score = scorer(*gold_and_pred) + assert ner_exact_score == { + "ents_p": 0.5, + "ents_r": 1 / 3, + "ents_f": 0.4, + "support": 3, + } + + +def test_token_ner_scorer(gold_and_pred): + scorer = create_ner_token_scorer("ents") + ner_exact_score = scorer(*gold_and_pred) + assert ner_exact_score == { + "ents_f": 0.75, + "ents_p": 0.75, + "ents_r": 0.75, + "support": 4, + } diff --git a/tests/training/config.cfg b/tests/training/config.cfg index 138cfeda5..696ad953c 100644 --- a/tests/training/config.cfg +++ b/tests/training/config.cfg @@ -28,11 +28,8 @@ embedding = ${components.embedding} target_span_getter = ${vars.ml_span_groups} infer_span_setter = true -[nlp.scorers.speed] -@scorers = "eds.speed" - -[nlp.scorers.ner] -@scorers = "eds.ner_exact_scorer" +[scorer.ner] +@scorers= "eds.ner_exact_scorer" span_getter = ${components.ner.target_span_getter} [vars] @@ -48,3 +45,4 @@ max_steps = 20 validation_interval = 1 batch_size = 4 lr = 3e-3 +scorer = ${scorer} diff --git a/tests/training/test_train.py b/tests/training/test_train.py index 4b9b384b4..e849c3696 100644 --- a/tests/training/test_train.py +++ b/tests/training/test_train.py @@ -1,17 +1,16 @@ import math import random import shutil +import time from collections import defaultdict from itertools import chain, count, repeat from pathlib import Path from typing import Callable, Iterable, List, Optional -import torch from confit import Config from confit.registry import validate_arguments from confit.utils.random import set_seed from spacy.tokens import Doc, Span -from torch.utils.data import DataLoader from tqdm import tqdm import edsnlp @@ -20,6 +19,7 @@ from edsnlp.core.registry import registry from edsnlp.optimization import LinearSchedule, ScheduledOptimizer from edsnlp.pipelines.trainable.ner.ner import TrainableNER +from edsnlp.scorers import Scorer from edsnlp.utils.collections import batchify from edsnlp.utils.filter import filter_spans @@ -112,6 +112,30 @@ def load(nlp): return load +@validate_arguments +class TestScorer: + def __init__(self, **scorers: Scorer): + self.scorers = scorers + + def __call__(self, nlp, docs): + clean_docs = [d.copy() for d in docs] + for d in clean_docs: + d.ents = [] + d.spans.clear() + t0 = time.time() + preds = list(nlp.pipe(clean_docs)) + duration = time.time() - t0 + scores = { + scorer_name: scorer(docs, preds) + for scorer_name, scorer in self.scorers.items() + } + scores["speed"] = dict( + wps=sum(len(d) for d in docs) / duration, + dps=len(docs) / duration, + ) + return scores + + @validate_arguments def train( output_path: Path, @@ -124,7 +148,10 @@ def train( lr: float = 8e-5, validation_interval: int = 10, device: str = "cpu", + scorer: TestScorer = TestScorer(), ): + import torch + device = torch.device(device) set_seed(seed) @@ -138,7 +165,7 @@ def train( # Preprocessing the training dataset into a dataloader preprocessed = list(nlp.preprocess_many(train_docs, supervision=True)) - dataloader = DataLoader( + dataloader = torch.utils.data.DataLoader( preprocessed, batch_sampler=LengthSortedBatchSampler(preprocessed, batch_size), collate_fn=nlp.collate, @@ -189,7 +216,7 @@ def train( print(acc_loss / max(acc_steps, 1)) acc_loss = 0 acc_steps = 0 - last_scores = nlp.score(val_docs) + last_scores = scorer(nlp, val_docs) print(last_scores, "lr", optimizer.param_groups[0]["lr"]) if step == max_steps: break