From 7cdbba4811b8bf345efa76b466a448501179a4b1 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Wed, 9 Oct 2024 16:15:18 -0500 Subject: [PATCH] Add missing __hash__ implementation to TransformerTokenizer --- benchmarks/common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/common.py b/benchmarks/common.py index 17c8043f..b56677e7 100644 --- a/benchmarks/common.py +++ b/benchmarks/common.py @@ -1,6 +1,7 @@ from typing import List, Tuple, Union import torch +from datasets.fingerprint import Hasher from transformers import AutoTokenizer, PreTrainedTokenizer @@ -90,6 +91,9 @@ def convert_token_to_string(self, token: str) -> str: return string + def __hash__(self): + return hash(Hasher.hash(self.tokenizer)) + def __eq__(self, other): if isinstance(other, type(self)): if hasattr(self, "model_name") and hasattr(self, "kwargs"):