Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: deepset-ai/FARM
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: scheiblr/FARM
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 2 commits
  • 2 files changed
  • 2 contributors

Commits on Sep 7, 2020

  1. Copy the full SHA
    80de068 View commit details

Commits on May 21, 2021

  1. Copy the full SHA
    529d078 View commit details
Showing with 21 additions and 2 deletions.
  1. +11 −1 farm/data_handler/processor.py
  2. +10 −1 farm/experiment.py
12 changes: 11 additions & 1 deletion farm/data_handler/processor.py
Original file line number Diff line number Diff line change
@@ -212,7 +212,17 @@ def load_from_dir(cls, load_dir):
if "lower_case" in config.keys():
logger.warning("Loading tokenizer from deprecated FARM config. "
"If you used `custom_vocab` or `never_split_chars`, this won't work anymore.")
tokenizer = Tokenizer.load(load_dir, tokenizer_class=config["tokenizer"], do_lower_case=config["lower_case"])

# automatically estimate lowercase model based on the model name
if config["lower_case"] == 'auto':
if 'uncased' in config["model"]:
lower_case=True
else:
lower_case=False
else:
lower_case = config["lower_case"]

tokenizer = Tokenizer.load(load_dir, tokenizer_class=config["tokenizer"], do_lower_case=lower_case)
else:
tokenizer = Tokenizer.load(load_dir, tokenizer_class=config["tokenizer"])

11 changes: 10 additions & 1 deletion farm/experiment.py
Original file line number Diff line number Diff line change
@@ -57,9 +57,18 @@ def run_experiment(args):

set_all_seeds(args.general.seed)

# automatically estimate lowercase model based on the model name
if args.parameter.lower_case == 'auto':
if 'uncased' in args.parameter.model:
lower_case=True
else:
lower_case=False
else:
lower_case = args.parameter.lower_case

# Prepare Data
tokenizer = Tokenizer.load(
args.parameter.model, do_lower_case=args.parameter.lower_case
args.parameter.model, do_lower_case=lower_case
)

processor = Processor.load(