From 9027a49f7ccc2107be1e45962a298fb98773bec1 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Tue, 26 Dec 2023 17:24:57 +0100 Subject: [PATCH] fix eos bounds when creating paragraph subsets --- src/augmenty/doc/subset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/augmenty/doc/subset.py b/src/augmenty/doc/subset.py index f2faf4c..d770047 100644 --- a/src/augmenty/doc/subset.py +++ b/src/augmenty/doc/subset.py @@ -51,7 +51,7 @@ def paragraph_subset_augmenter_v1( # Respect entity spans while start != 0 and example.y[start].ent_iob_ not in {"O", "B", ""}: start -= 1 - while end < doc_len - 1 and example.y[end + 1].ent_iob_ not in {"O", "B", ""}: + while end < doc_len - 1 and example.y[end - 1].ent_iob_ not in {"O", "B", ""}: end += 1 for k in token_anno: