add llama to ci

vllm-project · Jan 7, 2025 · dde72d6 · dde72d6
1 parent 33e1e13
commit dde72d6
Showing 1 changed file with 6 additions and 6 deletions.
diff --git a/tests/tpu/test_quantization_accuracy.py b/tests/tpu/test_quantization_accuracy.py
@@ -20,15 +20,15 @@ def get_model_args(self) -> str:
 
 # NOTE: Accuracy scores measured on GPUs.
 ACCURACY_CONFIGS = [
-    # GSM8KAccuracyTestConfig(
-    #     model_name="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
-    #     excepted_value=0.76),  # no bias
+    GSM8KAccuracyTestConfig(
+        model_name="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
+        excepted_value=0.76),  # no bias
     # NOTE(rob): We cannot re-initialize VLLM in the same process for TPU,
     # so only one of these tests can run in a single call to pytest. As
     # a follow up, move this into the LM-EVAL section of the CI.
-    GSM8KAccuracyTestConfig(
-        model_name="neuralmagic/Qwen2-7B-Instruct-quantized.w8a8",
-        excepted_value=0.66),  # bias in QKV layers
+    # GSM8KAccuracyTestConfig(
+    #     model_name="neuralmagic/Qwen2-7B-Instruct-quantized.w8a8",
+    #     excepted_value=0.66),  # bias in QKV layers
 ]