diff --git a/benchmarks/huggingface/bench/__main__.py b/benchmarks/huggingface/bench/__main__.py index 3d364a3b4..4e46f4ffd 100644 --- a/benchmarks/huggingface/bench/__main__.py +++ b/benchmarks/huggingface/bench/__main__.py @@ -40,7 +40,9 @@ def __init__(self, args): repeat=100000, generators=generators[info.category](info), ) - self.loader = DataLoader(self.data, batch_size=args.batch_size) + self.loader = DataLoader( + self.data, batch_size=args.batch_size, num_workers=args.num_workers + ) self.amp_scaler = torch.cuda.amp.GradScaler(enabled=is_fp16_allowed(args)) if is_fp16_allowed(args): @@ -130,6 +132,12 @@ def parser(): default="fp32", help="Precision configuration", ) + parser.add_argument( + "--num-workers", + type=int, + default=8, + help="number of workers for data loading", + ) # parser.add_argument( # "--no-stdout", # action="store_true", diff --git a/config/base.yaml b/config/base.yaml index 5abbccee0..e0920af3f 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -25,6 +25,7 @@ _hf: install_group: torch argv: --precision: 'tf32-fp16' + --num-workers: 8 plan: method: per_gpu