Skip to content

Commit

Permalink
Rename BasePreTokenizer as PreTokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
gbenson committed May 22, 2024
1 parent c51f6b4 commit 111ce9a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/dom_tokenizers/pre_tokenizers/dom_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from tokenizers import NormalizedString, PreTokenizedString
from unidecode import unidecode

from .pre_tokenizer import BasePreTokenizer as PreTokenizer
from .pre_tokenizer import PreTokenizer


class DOMSnapshotPreTokenizer(PreTokenizer):
Expand Down
6 changes: 3 additions & 3 deletions src/dom_tokenizers/pre_tokenizers/pre_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import weakref

from tokenizers.pre_tokenizers import PreTokenizer
from tokenizers.pre_tokenizers import PreTokenizer as _PreTokenizer


class BasePreTokenizer:
class PreTokenizer:
@classmethod
def hook_into(cls, tokenizer):
"""Reconfigure `tokenizer` for DOM-aware pre-tokenization.
Expand Down Expand Up @@ -34,7 +34,7 @@ def bind_to(self, tokenizer):
self._tokenizer = weakref.proxy(tokenizer)

# Install ourself as the tokenizer's pre-tokenizer.
backend.pre_tokenizer = PreTokenizer.custom(self)
backend.pre_tokenizer = _PreTokenizer.custom(self)

# Attempt to detect and postpone any lowercasing applied to
# our input until after the base64 detection and handling is
Expand Down

0 comments on commit 111ce9a

Please sign in to comment.