diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 1bc13020e65b66..abec8a9b911c98 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -175,7 +175,10 @@ def split(self, text: str) -> List[str]: # matches # "[CLS]", "L", we need to match CLS even if L is special for lookstart, looktrie_pointer in states.items(): - if lookstart > start: + if lookstart in to_remove: + # This partial match should be removed + continue + elif lookstart > start: # This partial match is later, we can stop looking break elif lookstart < start: