diff --git a/src/encoder.py b/src/encoder.py index 5f52e723c..a40f75819 100644 --- a/src/encoder.py +++ b/src/encoder.py @@ -49,7 +49,7 @@ def __init__(self, encoder, bpe_merges, errors='replace'): self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges)))) self.cache = {} - # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions + # Should have added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") def bpe(self, token):