From 29ffb4394d479663cbb975607d7bcc95b865dc59 Mon Sep 17 00:00:00 2001 From: Yan Wang Date: Tue, 3 Sep 2024 19:26:41 +0200 Subject: [PATCH] Add resnet50 benchmark (#443) (#451) --- thunder/benchmarks/__init__.py | 33 +++++++++++++++++++++++++++++++++ thunder/benchmarks/targets.py | 25 +++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/thunder/benchmarks/__init__.py b/thunder/benchmarks/__init__.py index 9ff20a960f..ba16b1ac4c 100644 --- a/thunder/benchmarks/__init__.py +++ b/thunder/benchmarks/__init__.py @@ -2825,6 +2825,39 @@ def foo(a, m, v, w, b, training): return foo +class ResNet50Benchmark(Benchmark, metaclass=UserFacingBenchmarkMeta): + def __init__( + self, + batch_size: int, + input_shape: tuple[int, int, int], + device: str = "cuda", + dtype: dtypes.dtype = thunder.float32, + requires_grad: bool = False, + ) -> None: + super().__init__() + + # the typical input image size of ResNet50 is (3, 224, 224) + self.shape: tuple[int, int, int, int] = (batch_size,) + input_shape + self.device: str = device + self.dtype: dtypes.dtype = dtype + self.tdtype: torch.dtype = ltorch.to_torch_dtype(dtype) + self.requires_grad: bool = requires_grad + + self.devices: list[str] = [device] + + def make_batch(self) -> tuple[list, dict]: + make = partial(make_tensor, device=self.device, dtype=self.tdtype, requires_grad=self.requires_grad) + a = make(self.shape) + return (a,), {} + + def fn(self) -> Callable: + from torchvision.models import resnet50 + + model = resnet50() + model = model.to(device=self.device, dtype=self.tdtype).requires_grad_(self.requires_grad) + return model + + # TODO Add descriptions to the executors when listed, and list them alphabetically # TODO Allow querying benchmark for details # TODO Allow specifying benchmark arguments diff --git a/thunder/benchmarks/targets.py b/thunder/benchmarks/targets.py index af371b36d4..906454a018 100644 --- a/thunder/benchmarks/targets.py +++ b/thunder/benchmarks/targets.py @@ -23,6 +23,7 @@ NanoGPTCrossEntropyBenchmark, LitGPTGeluBenchmark, NanoGPTLayerNormBenchmark, + ResNet50Benchmark, thunder_apex_executor, thunder_apex_nvfuser_executor, thunder_cudnn_executor, @@ -721,3 +722,27 @@ def test_interpreter_nanogpt_gpt2_fwd(benchmark, executor: Callable): fn = executor(bench.fn()) benchmark(fn, *args, **kwargs) + + +# +# vision benchmarks +# + + +# Sample command to run this benchmark: +# pytest thunder/benchmarks/targets.py -k "test_resnet50" --benchmark-group-by='param:compute_type' +@pytest.mark.parametrize( + "executor,", + executors, + ids=executors_ids, +) +@parametrize_compute_type +def test_resnet50(benchmark, executor: Callable, compute_type: ComputeType): + b = ResNet50Benchmark( + 64, (3, 224, 224), device="cuda:0", dtype=torch.bfloat16, requires_grad=is_requires_grad(compute_type) + ) + + args, kwargs = b.make_batch() + fn = executor(b.fn()) + + benchmark_for_compute_type(compute_type, benchmark, fn, args, kwargs)