diff --git a/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl b/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl index 336c45167..92d7033b7 100644 --- a/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl +++ b/benchmarks/inference-server/vllm/manifest-templates/vllm.tftpl @@ -51,6 +51,7 @@ spec: - name: vllm ports: - containerPort: 80 + name: metrics image: "vllm/vllm-openai:v0.5.5" command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] args: ["--model", "${model_id}", "--tensor-parallel-size", "${gpu_count}", "--port", "80", "--swap-space", "${swap_space}", "--disable-log-requests"] diff --git a/benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl b/benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl index f582fc76e..d86cf6ab7 100644 --- a/benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl +++ b/benchmarks/inference-server/vllm/monitoring-templates/vllm-podmonitoring.yaml.tftpl @@ -8,5 +8,5 @@ spec: matchLabels: app: vllm endpoints: - - port: 80 + - port: metrics interval: 15s