lintrunner

NVIDIA · Oct 29, 2024 · fcb9363 · fcb9363
1 parent 07a7f33
commit fcb9363
Show file tree

Hide file tree

Showing 10 changed files with 22 additions and 17 deletions.
diff --git a/benchmarks/python/normalization.py b/benchmarks/python/normalization.py
@@ -489,7 +489,7 @@ def norm_bwd_baseline_benchmark(
         grads = grads.to(memory_format=torch.channels_last)
 
     norm_fwd_fn = batchnorm_fwd_fn if norm == "batch_norm" else instancenorm_fwd_fn
-    
+
     # Compile the fwd fn for torchcompile
     norm_fwd_fn = torch.compile(norm_fwd_fn) if compile else norm_fwd_fn
     output = norm_fwd_fn([inputs, weight, bias, running_mean, running_var])

diff --git a/benchmarks/python/test_dropout_layernorm_bwd.py b/benchmarks/python/test_dropout_layernorm_bwd.py
@@ -207,19 +207,19 @@ def test_dropout_layernorm_bwd_baseline_benchmark(
     grads = torch.randn(size, device="cuda", dtype=dtype)
     weights = torch.randn(size[1], device="cuda", dtype=dtype, requires_grad=True)
     bias = torch.randn(size[1], device="cuda", dtype=dtype, requires_grad=True)
-    
+
     def dropout_layernorm_fwd():
         return torch.nn.functional.layer_norm(
             input2 + torch.nn.functional.dropout(input1, p=dropout_p),
             normalized_shape=input1.shape[1:],
             weight=weights,
             bias=bias,
         )
-    
+
     # Compile the fwd fn for torchcompile
     fwd_fn = torch.compile(dropout_layernorm_fwd) if compile else dropout_layernorm_fwd
     output = fwd_fn()
-    
+
     # Manually compute IOBytes: See PR #1725
     run_benchmark(
         benchmark,

diff --git a/benchmarks/python/test_dropout_rmsnorm_bwd.py b/benchmarks/python/test_dropout_rmsnorm_bwd.py
@@ -190,8 +190,8 @@ def dropout_rmsnorm_fwd():
         x = input2 + torch.nn.functional.dropout(input1, p=dropout_p)
         output = weights * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + 1e-5)
         return output
-    
-    fwd_fn = torch.compile(dropout_rmsnorm_fwd) else dropout_rmsnorm_fwd
+
+    fwd_fn = torch.compile(dropout_rmsnorm_fwd) if compile else dropout_rmsnorm_fwd
     output = fwd_fn()
 
     run_benchmark(

diff --git a/benchmarks/python/test_gelu_bwd.py b/benchmarks/python/test_gelu_bwd.py
@@ -102,12 +102,13 @@ def test_gelu_bwd_baseline_benchmark(
     inputs = torch.randn(size, device="cuda", dtype=dtype, requires_grad=True)
     bias = torch.ones(size[-1], device="cuda", dtype=dtype)
     grads = torch.randn(size, device="cuda", dtype=dtype)
-    
+
     def gelu_fwd():
         return torch.nn.functional.gelu(inputs + bias, approximate="tanh")
+
     fwd_fn = torch.compile(gelu_fwd) if compile else gelu_fwd
     eager_output = fwd_fn()
-    
+
     run_benchmark(
         benchmark,
         unary_bwd_torch,

diff --git a/benchmarks/python/test_huggingface_attn_bwd.py b/benchmarks/python/test_huggingface_attn_bwd.py
@@ -126,7 +126,7 @@ def test_huggingface_attn_bwd_baseline_benchmark(
     attention_mask = torch.zeros(
         batch_size, nh, seq_len, seq_len, device="cuda", dtype=dtype
     )
-    
+
     def huggingface_attn_fwd():
         attn = (inputs + attention_mask).view(batch_size * nh, seq_len, seq_len)
         attn = torch.nn.functional.softmax(attn, dim=-1)

diff --git a/benchmarks/python/test_layernorm_bwd.py b/benchmarks/python/test_layernorm_bwd.py
@@ -170,9 +170,10 @@ def layernorm_fwd():
             weight=weights,
             bias=bias,
         )
+
     fwd_fn = torch.compile(layernorm_fwd) if compile else layernorm_fwd
     output = fwd_fn()
-    
+
     # Manually compute IOBytes: See PR #1725
     run_benchmark(
         benchmark,

diff --git a/benchmarks/python/test_rmsnorm_bwd.py b/benchmarks/python/test_rmsnorm_bwd.py
@@ -132,7 +132,7 @@ def rmsnorm_fwd():
         rms_eps = torch.sqrt(squared_mean + 1e-5)
         output = weights * (inputs / rms_eps)
         return output
-    
+
     # Compile the fwd fn for torchcompile
     fwd_fn = torch.compile(rmsnorm_fwd) if compile else rmsnorm_fwd
     output = fwd_fn()

diff --git a/benchmarks/python/test_scale_bias_relu_bwd.py b/benchmarks/python/test_scale_bias_relu_bwd.py
@@ -94,13 +94,14 @@ def test_sbr_bwd_baseline_benchmark(
     grads = torch.randn(*size, device="cuda", dtype=dtype)
     scale = torch.ones(size[-1], device="cuda", dtype=dtype)
     bias = torch.ones(size[-1], device="cuda", dtype=dtype)
-    
+
     def sbr_fwd():
         return torch.nn.functional.relu(inputs * scale + bias)
+
     # Compile the fwd fn for torchcompile
     fwd_fn = torch.compile(sbr_fwd) if compile else sbr_fwd
     eager_output = sbr_fwd()
-    
+
     run_benchmark(
         benchmark,
         unary_bwd_torch,

diff --git a/benchmarks/python/test_silu_mul_bwd.py b/benchmarks/python/test_silu_mul_bwd.py
@@ -93,13 +93,14 @@ def test_silu_mul_bwd_baseline_benchmark(
     x = torch.randn(*size, device="cuda", dtype=dtype, requires_grad=True)
     y = torch.randn(*size, device="cuda", dtype=dtype, requires_grad=True)
     grads = torch.randn(*size, device="cuda", dtype=dtype)
-    
+
     def silu_mul_fwd():
         return torch.nn.functional.silu(x) * y
+
     # Compile the fwd fn for torchcompile
     fwd_fn = torch.compile(silu_mul_fwd) if compile else silu_mul_fwd
     eager_output = fwd_fn()
-    
+
     run_benchmark(
         benchmark,
         unary_bwd_torch,

diff --git a/benchmarks/python/test_softmax_bwd.py b/benchmarks/python/test_softmax_bwd.py
@@ -106,12 +106,13 @@ def test_softmax_bwd_baseline_benchmark(
         clear_dynamo_cache()
     input = torch.randn(size, device="cuda", dtype=dtype, requires_grad=True)
     grads = torch.randn(size, device="cuda", dtype=dtype)
-    
+
     def softmax_fwd():
         return torch.nn.functional.softmax(input, dim=reduction_axis)
+
     fwd_fn = torch.compile(softmax_fwd) if compile else softmax_fwd
     output = fwd_fn()
-    
+
     run_benchmark(
         benchmark,
         unary_bwd_torch,