Skip to content

Commit

Permalink
Merge branch 'vllm-project:main' into add_chart_helm_example
Browse files Browse the repository at this point in the history
  • Loading branch information
mfournioux authored Dec 5, 2024
2 parents 3914276 + 39c89e7 commit 9cc1fc2
Show file tree
Hide file tree
Showing 54 changed files with 1,472 additions and 553 deletions.
5 changes: 5 additions & 0 deletions .buildkite/nightly-benchmarks/benchmark-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,15 @@ steps:
- VLLM_USAGE_SOURCE
- HF_TOKEN

- block: "Run H100 Benchmark"
key: block-h100
depends_on: ~

- label: "H100"
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
agents:
queue: H100
depends_on: block-h100
plugins:
- docker#v5.12.0:
image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
Expand Down
6 changes: 6 additions & 0 deletions benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class RequestFuncInput:
model: str
best_of: int = 1
logprobs: Optional[int] = None
extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None
ignore_eos: bool = False

Expand All @@ -36,6 +37,7 @@ class RequestFuncOutput:
ttft: float = 0.0 # Time to first token
itl: List[float] = field(
default_factory=list) # List of inter-token latencies
tpot: float = 0.0 # avg next-token latencies
prompt_len: int = 0
error: str = ""

Expand Down Expand Up @@ -242,6 +244,8 @@ async def async_request_openai_completions(
"stream": True,
"ignore_eos": request_func_input.ignore_eos,
}
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}
Expand Down Expand Up @@ -336,6 +340,8 @@ async def async_request_openai_chat_completions(
"stream": True,
"ignore_eos": request_func_input.ignore_eos,
}
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
Expand Down
Loading

0 comments on commit 9cc1fc2

Please sign in to comment.