From eed21c03ba0a8dc77933e06ede262382c483e7cb Mon Sep 17 00:00:00 2001 From: Lydia Nishimwe Date: Thu, 21 Sep 2023 23:47:02 +0200 Subject: [PATCH] Adding symbols with overwrite=True in encode_line and add_file_to_dictionary After fixing the behaviour of add_symbol, two of the unit tests were failing because they called the function with the default value of overwrite (False). --- fairseq/data/dictionary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fairseq/data/dictionary.py b/fairseq/data/dictionary.py index 3695458363..bd29be5b94 100644 --- a/fairseq/data/dictionary.py +++ b/fairseq/data/dictionary.py @@ -337,7 +337,7 @@ def encode_line( for i, word in enumerate(words): if add_if_not_exist: - idx = self.add_symbol(word) + idx = self.add_symbol(word, overwrite=True) else: idx = self.index(word) if consumer is not None: @@ -367,7 +367,7 @@ def _add_file_to_dictionary_single_worker( def add_file_to_dictionary(filename, dict, tokenize, num_workers): def merge_result(counter): for w, c in sorted(counter.items()): - dict.add_symbol(w, c) + dict.add_symbol(w, c, overwrite=True) local_file = PathManager.get_local_path(filename) offsets = find_offsets(local_file, num_workers)