diff --git a/fairseq/data/dictionary.py b/fairseq/data/dictionary.py index 3695458363..bd29be5b94 100644 --- a/fairseq/data/dictionary.py +++ b/fairseq/data/dictionary.py @@ -337,7 +337,7 @@ def encode_line( for i, word in enumerate(words): if add_if_not_exist: - idx = self.add_symbol(word) + idx = self.add_symbol(word, overwrite=True) else: idx = self.index(word) if consumer is not None: @@ -367,7 +367,7 @@ def _add_file_to_dictionary_single_worker( def add_file_to_dictionary(filename, dict, tokenize, num_workers): def merge_result(counter): for w, c in sorted(counter.items()): - dict.add_symbol(w, c) + dict.add_symbol(w, c, overwrite=True) local_file = PathManager.get_local_path(filename) offsets = find_offsets(local_file, num_workers)