diff --git a/changelog.md b/changelog.md
index 3f3df11cc..5390ea017 100644
--- a/changelog.md
+++ b/changelog.md
@@ -6,6 +6,7 @@
 
 - Fix `join_thread` missing attribute in `SimpleQueue` when cleaning a multiprocessing executor
 - Support huggingface transformers that do not set `cls_token_id` and `sep_token_id` (we now also look for these tokens in the `special_tokens_map` and `vocab` mappings)
+- Fix changing scorers dict size issue when evaluating during training
 
 ## v0.14.0 (2024-11-14)
 
diff --git a/edsnlp/training/trainer.py b/edsnlp/training/trainer.py
index df37118ed..8e819702d 100644
--- a/edsnlp/training/trainer.py
+++ b/edsnlp/training/trainer.py
@@ -95,13 +95,15 @@ def set_flat_stats(x, stats):
 
 @validate_arguments
 class GenericScorer:
-    def __init__(self, speed=True, **scorers):
+    def __init__(self, speed=True, batch_size: Union[int, str] = 1, **scorers):
         self.scorers = scorers
         self.speed = speed
+        self.batch_size = batch_size
 
     def __call__(self, nlp: Pipeline, docs: Iterable[Any]):
         scores = {}
         docs = list(docs)
+        scorers = dict(self.scorers)
 
         # Speed
         if self.speed:
@@ -118,9 +120,9 @@ def __call__(self, nlp: Pipeline, docs: Iterable[Any]):
             name for name, pipe in nlp.pipeline if isinstance(pipe, BaseNERComponent)
         ]
         ner_scorers = {
-            name: scorer
-            for name, scorer in self.scorers.items()
-            if isinstance(scorer, NerMetric)
+            name: scorers.pop(name)
+            for name in list(scorers)
+            if isinstance(scorers[name], NerMetric)
         }
         if ner_pipes and ner_scorers:
             clean_ner_docs = [d.copy() for d in tqdm(docs, desc="Copying docs")]
@@ -128,7 +130,11 @@ def __call__(self, nlp: Pipeline, docs: Iterable[Any]):
                 d.ents = []
                 d.spans.clear()
             with nlp.select_pipes(enable=ner_pipes):
-                ner_preds = list(nlp.pipe(tqdm(clean_ner_docs, desc="Predicting")))
+                ner_preds = list(
+                    nlp.pipe(tqdm(clean_ner_docs, desc="Predicting")).set_processing(
+                        batch_size=self.batch_size
+                    )
+                )
             for name, scorer in ner_scorers.items():
                 scores[name] = scorer(docs, ner_preds)
 
@@ -139,9 +145,9 @@ def __call__(self, nlp: Pipeline, docs: Iterable[Any]):
             if isinstance(pipe, BaseSpanAttributeClassifierComponent)
         ]
         span_attr_scorers = {
-            name: scorer
-            for name, scorer in self.scorers.items()
-            if isinstance(scorer, SpanAttributeMetric)
+            name: scorers.pop(name)
+            for name in list(scorers)
+            if isinstance(scorers[name], SpanAttributeMetric)
         }
         if qlf_pipes and span_attr_scorers:
             clean_qlf_docs = [d.copy() for d in tqdm(docs, desc="Copying docs")]
@@ -152,7 +158,11 @@ def __call__(self, nlp: Pipeline, docs: Iterable[Any]):
                         for qlf in nlp.get_pipe(name).attributes:
                             BINDING_SETTERS[(qlf, None)](span)
             with nlp.select_pipes(disable=ner_pipes):
-                qlf_preds = list(nlp.pipe(tqdm(clean_qlf_docs, desc="Predicting")))
+                qlf_preds = list(
+                    nlp.pipe(tqdm(clean_qlf_docs, desc="Predicting")).set_processing(
+                        batch_size=self.batch_size
+                    )
+                )
             for name, scorer in span_attr_scorers.items():
                 scores[name] = scorer(docs, qlf_preds)