From 8d94e1f49dfa1aaac92381e9d56dbdc65c225a90 Mon Sep 17 00:00:00 2001 From: Faraz Shahsavan Date: Thu, 12 Dec 2024 02:10:41 +0000 Subject: [PATCH] Update code --- .../sparse_fp8_benchmarks.py | 24 +++++++++---------- .../cutlass_benchmarks/w8a8_benchmarks.py | 3 +-- tests/kernels/test_semi_structured.py | 2 -- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py b/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py index 0b2fd3e477247..4a76b289e3696 100644 --- a/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py +++ b/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py @@ -3,12 +3,10 @@ import dataclasses import itertools import multiprocessing as mp -import os import pickle as pkl import time import traceback from multiprocessing import Process, Queue -from pathlib import Path from queue import Empty from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple @@ -539,17 +537,17 @@ def bench_fp8(dtype: torch.dtype, with_cuda_graph: Optional[int], } # Prepare configs for all kernels - standard_kernels = [ - {'kernel_type': 'pytorch_mm'}, - {'kernel_type': 'pytorch_scaled_mm'}, - {'kernel_type': 'pytorch_scaled_mm_fast'}, - { - 'kernel_type': 'cutlass_scaled_mm' - }, - { - 'kernel_type': 'cutlass_scaled_sparse_mm' - } - ] + standard_kernels = [{ + 'kernel_type': 'pytorch_mm' + }, { + 'kernel_type': 'pytorch_scaled_mm' + }, { + 'kernel_type': 'pytorch_scaled_mm_fast' + }, { + 'kernel_type': 'cutlass_scaled_mm' + }, { + 'kernel_type': 'cutlass_scaled_sparse_mm' + }] # Create configs for standard kernels all_configs = [{**base_config, **kernel} for kernel in standard_kernels] diff --git a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py index aabc1bdaf9753..d0353bc8cb42a 100644 --- a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py +++ b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py @@ -8,13 +8,12 @@ import torch import torch.utils.benchmark as TBenchmark from torch.utils.benchmark import Measurement as TMeasurement +from utils import make_rand_tensors from weight_shapes import WEIGHT_SHAPES from vllm import _custom_ops as ops from vllm.utils import FlexibleArgumentParser -from utils import make_rand_tensors - DEFAULT_MODELS = list(WEIGHT_SHAPES.keys()) DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512] DEFAULT_TP_SIZES = [1] diff --git a/tests/kernels/test_semi_structured.py b/tests/kernels/test_semi_structured.py index 3d8560238de3d..e5c9f14224f5a 100644 --- a/tests/kernels/test_semi_structured.py +++ b/tests/kernels/test_semi_structured.py @@ -9,7 +9,6 @@ from vllm import _custom_ops as ops from vllm.platforms import current_platform - CUDA_DEVICES = [ f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2) ] @@ -129,4 +128,3 @@ def test_cutlass_sparse_subset(): out_dtype=torch.bfloat16) torch.testing.assert_close(out, baseline, rtol=1e-1, atol=1e0) -