From 8d94e1f49dfa1aaac92381e9d56dbdc65c225a90 Mon Sep 17 00:00:00 2001
From: Faraz Shahsavan <faraz.shahsavan@gmail.com>
Date: Thu, 12 Dec 2024 02:10:41 +0000
Subject: [PATCH] Update code

---
 .../sparse_fp8_benchmarks.py                  | 24 +++++++++----------
 .../cutlass_benchmarks/w8a8_benchmarks.py     |  3 +--
 tests/kernels/test_semi_structured.py         |  2 --
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py b/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py
index 0b2fd3e477247..4a76b289e3696 100644
--- a/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py
+++ b/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py
@@ -3,12 +3,10 @@
 import dataclasses
 import itertools
 import multiprocessing as mp
-import os
 import pickle as pkl
 import time
 import traceback
 from multiprocessing import Process, Queue
-from pathlib import Path
 from queue import Empty
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 
@@ -539,17 +537,17 @@ def bench_fp8(dtype: torch.dtype, with_cuda_graph: Optional[int],
     }
 
     # Prepare configs for all kernels
-    standard_kernels = [
-        {'kernel_type': 'pytorch_mm'},
-        {'kernel_type': 'pytorch_scaled_mm'},
-        {'kernel_type': 'pytorch_scaled_mm_fast'},
-        {
-            'kernel_type': 'cutlass_scaled_mm'
-        },
-        {
-            'kernel_type': 'cutlass_scaled_sparse_mm'
-        }
-    ]
+    standard_kernels = [{
+        'kernel_type': 'pytorch_mm'
+    }, {
+        'kernel_type': 'pytorch_scaled_mm'
+    }, {
+        'kernel_type': 'pytorch_scaled_mm_fast'
+    }, {
+        'kernel_type': 'cutlass_scaled_mm'
+    }, {
+        'kernel_type': 'cutlass_scaled_sparse_mm'
+    }]
 
     # Create configs for standard kernels
     all_configs = [{**base_config, **kernel} for kernel in standard_kernels]
diff --git a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
index aabc1bdaf9753..d0353bc8cb42a 100644
--- a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
+++ b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
@@ -8,13 +8,12 @@
 import torch
 import torch.utils.benchmark as TBenchmark
 from torch.utils.benchmark import Measurement as TMeasurement
+from utils import make_rand_tensors
 from weight_shapes import WEIGHT_SHAPES
 
 from vllm import _custom_ops as ops
 from vllm.utils import FlexibleArgumentParser
 
-from utils import make_rand_tensors
-
 DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())
 DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512]
 DEFAULT_TP_SIZES = [1]
diff --git a/tests/kernels/test_semi_structured.py b/tests/kernels/test_semi_structured.py
index 3d8560238de3d..e5c9f14224f5a 100644
--- a/tests/kernels/test_semi_structured.py
+++ b/tests/kernels/test_semi_structured.py
@@ -9,7 +9,6 @@
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
 
-
 CUDA_DEVICES = [
     f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)
 ]
@@ -129,4 +128,3 @@ def test_cutlass_sparse_subset():
                                   out_dtype=torch.bfloat16)
 
     torch.testing.assert_close(out, baseline, rtol=1e-1, atol=1e0)
-