diff --git a/tests/compile/utils.py b/tests/compile/utils.py index 95cad19126df6..222c63a342a4b 100644 --- a/tests/compile/utils.py +++ b/tests/compile/utils.py @@ -9,29 +9,26 @@ TEST_MODELS = [ ("facebook/opt-125m", {}), - # TODO: add fake implementation for compressed-tensors - # ("nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change", { - # "dtype": torch.float16, - # "quantization": "compressed-tensors" - # }), + ("nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change", { + "dtype": torch.float16, + "quantization": "compressed-tensors" + }), ("neuralmagic/Meta-Llama-3-8B-Instruct-FP8", { "dtype": torch.float16, "quantization": "fp8" }), - # TODO: add fake implementation for compressed-tensors - # ("nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dyn-Per-Token-2048-Samples", { - # "quantization": "compressed-tensors" - # }), + ("nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dyn-Per-Token-2048-Samples", { + "quantization": "compressed-tensors" + }), ("meta-llama/Meta-Llama-3-8B", {}), ] -# TODO: enable in pytorch 2.5 -if False and is_quant_method_supported("aqlm"): # noqa: SIM223 +if is_quant_method_supported("aqlm"): TEST_MODELS.append(("ISTA-DASLab/Llama-2-7b-AQLM-2Bit-1x16-hf", { "quantization": "aqlm" })) -# TODO: enable in pytorch 2.5 +# TODO: figure out why this fails. if False and is_quant_method_supported("gguf"): # noqa: SIM223 TEST_MODELS.append(("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", { "quantization": "gguf" @@ -71,13 +68,13 @@ def check_full_graph_support(model, os.environ["VLLM_TORCH_COMPILE_LEVEL"] = str(optimization_level) os.environ["VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE"] = "1" - # Inductor doesn't support fp8 and the base meta llama uses too - # much memory. - quantization = model_kwargs.get("quantization") - if ((quantization == "fp8" or model == "meta-llama/Meta-Llama-3-8B") + # The base meta llama uses too much memory. + if (model == "meta-llama/Meta-Llama-3-8B" and optimization_level >= CompilationLevel.PIECEWISE): return + print(f"MODEL={model}") + prompts = [ "Hello, my name is", "The president of the United States is",