Skip to content

Commit

Permalink
add llama to ci
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Jan 7, 2025
1 parent 33e1e13 commit dde72d6
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions tests/tpu/test_quantization_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ def get_model_args(self) -> str:

# NOTE: Accuracy scores measured on GPUs.
ACCURACY_CONFIGS = [
# GSM8KAccuracyTestConfig(
# model_name="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
# excepted_value=0.76), # no bias
GSM8KAccuracyTestConfig(
model_name="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
excepted_value=0.76), # no bias
# NOTE(rob): We cannot re-initialize VLLM in the same process for TPU,
# so only one of these tests can run in a single call to pytest. As
# a follow up, move this into the LM-EVAL section of the CI.
GSM8KAccuracyTestConfig(
model_name="neuralmagic/Qwen2-7B-Instruct-quantized.w8a8",
excepted_value=0.66), # bias in QKV layers
# GSM8KAccuracyTestConfig(
# model_name="neuralmagic/Qwen2-7B-Instruct-quantized.w8a8",
# excepted_value=0.66), # bias in QKV layers
]


Expand Down

0 comments on commit dde72d6

Please sign in to comment.