Skip to content

Commit

Permalink
Update TensorRT-LLM (#1688)
Browse files Browse the repository at this point in the history
* Update TensorRT-LLM

---------

Co-authored-by: IbrahimAmin <[email protected]>
Co-authored-by: Fabian Joswig <[email protected]>
Co-authored-by: Pzzzzz <[email protected]>
Co-authored-by: CoderHam <[email protected]>
Co-authored-by: Konstantin Lopuhin <[email protected]>
  • Loading branch information
6 people authored May 28, 2024
1 parent 5d8ca2f commit f430a4b
Show file tree
Hide file tree
Showing 529 changed files with 1,163,937 additions and 9,114 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ __pycache__/
build*/
*.egg-info/
.coverage
*.csv
*.onnx
tmp/
venv/
Expand Down
210 changes: 0 additions & 210 deletions CHANGELOG.md

This file was deleted.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ TensorRT-LLM

[![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://nvidia.github.io/TensorRT-LLM/)
[![python](https://img.shields.io/badge/python-3.10.12-green)](https://www.python.org/downloads/release/python-31012/)
[![cuda](https://img.shields.io/badge/cuda-12.4.0-green)](https://developer.nvidia.com/cuda-downloads)
[![cuda](https://img.shields.io/badge/cuda-12.4.1-green)](https://developer.nvidia.com/cuda-downloads)
[![trt](https://img.shields.io/badge/TRT-10.0.1-green)](https://developer.nvidia.com/tensorrt)
[![version](https://img.shields.io/badge/release-0.10.0.dev-green)](./setup.py)
[![version](https://img.shields.io/badge/release-0.11.0.dev-green)](./tensorrt_llm/version.py)
[![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE)

[Architecture](./docs/source/architecture/overview.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Results](./docs/source/performance/perf-overview.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](./examples/)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentation](./docs/source/)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/cpp/gptManagerBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ class GptServer
[this](uint64_t requestId, std::list<NamedTensor> const& response_tensors, bool final_response,
std::string const& errMsg)
{ return sendResponse(requestId, response_tensors, final_response, errMsg); },
nullptr, iterationDataCallback, optionalParams, terminateReqId, std::nullopt, excludeInputInOutput);
nullptr, iterationDataCallback, optionalParams, terminateReqId, excludeInputInOutput);
}

~GptServer()
Expand Down
12 changes: 11 additions & 1 deletion benchmarks/cpp/prepare_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import click
from pydantic import BaseModel, field_validator
from transformers import AutoTokenizer
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
from utils.prepare_real_data import dataset
from utils.prepare_synthetic_data import token_norm_dist

Expand All @@ -27,10 +29,12 @@ class RootArgs(BaseModel):
output: str
random_seed: int
task_id: int
std_out: bool
rand_task_id: Optional[Tuple[int, int]]

@field_validator('tokenizer')
def get_tokenizer(cls, v: str):
def get_tokenizer(cls,
v: str) -> PreTrainedTokenizer | PreTrainedTokenizerFast:
try:
tokenizer = AutoTokenizer.from_pretrained(v, padding_side='left')
except EnvironmentError as e:
Expand All @@ -53,6 +57,11 @@ def get_tokenizer(cls, v: str):
type=str,
help="Output json filename.",
default="preprocessed_dataset.json")
@click.option(
"--stdout",
is_flag=True,
help="Print output to stdout with a JSON dataset entry on each line.",
default=False)
@click.option("--random-seed",
required=False,
type=int,
Expand Down Expand Up @@ -80,6 +89,7 @@ def cli(ctx, **kwargs):

ctx.obj = RootArgs(tokenizer=kwargs['tokenizer'],
output=kwargs['output'],
std_out=kwargs['stdout'],
random_seed=kwargs['random_seed'],
task_id=kwargs['task_id'],
rand_task_id=kwargs['rand_task_id'])
Expand Down
26 changes: 17 additions & 9 deletions benchmarks/cpp/utils/prepare_real_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import click
from datasets import load_dataset
from pydantic import BaseModel, model_validator
from utils.utils import dataset_dump, get_norm_dist_tokens
from utils.utils import dataset_dump, get_norm_dist_tokens, print_dataset


def validate_output_len_dist(ctx, param, value):
Expand Down Expand Up @@ -220,11 +220,19 @@ def dataset(root_args, **kwargs):
logging.debug(f"Input lengths: {[len(i) for i in input_ids]}")
logging.debug(f"Output lengths: {output_lens}")

dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "dataset",
"tokenizer": root_args.tokenizer.__class__.__name__,
"num_requests": len(input_ids),
"max_input_len": max(input_lens),
"max_output_len": max(output_lens)
}, root_args.output)
if not root_args.std_out:
dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "dataset",
"tokenizer": root_args.tokenizer.__class__.__name__,
"num_requests": len(input_ids),
"max_input_len": max(input_lens),
"max_output_len": max(output_lens)
}, root_args.output)
else:
print_dataset(
task_ids,
input_ids,
output_lens,
tokenizer=None,
)
33 changes: 20 additions & 13 deletions benchmarks/cpp/utils/prepare_synthetic_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import random

import click
from utils.utils import dataset_dump, gen_random_tokens, get_norm_dist_tokens
from utils.utils import (dataset_dump, gen_random_tokens, get_norm_dist_tokens,
print_dataset)


@click.command()
Expand Down Expand Up @@ -55,15 +56,21 @@ def token_norm_dist(root_args, **kwargs):
min_id, max_id = root_args.rand_task_id
task_ids = [random.randint(min_id, max_id) for _ in range(num_reqs)]

dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "token-norm-dist",
"input_mean": kwargs['input_mean'],
"input_stdev": kwargs['input_stdev'],
"output_mean": kwargs['output_mean'],
"output_stdev": kwargs['output_stdev'],
"num_requests": kwargs['num_requests'],
"tokenize_vocabsize": root_args.tokenizer.vocab_size,
"max_input_len": max_input_len,
"max_output_len": max_output_len
}, root_args.output)
if not root_args.std_out:
dataset_dump(
input_lens, input_ids, output_lens, task_ids, {
"workload_type": "token-norm-dist",
"input_mean": kwargs['input_mean'],
"input_stdev": kwargs['input_stdev'],
"output_mean": kwargs['output_mean'],
"output_stdev": kwargs['output_stdev'],
"num_requests": kwargs['num_requests'],
"tokenize_vocabsize": root_args.tokenizer.vocab_size,
"max_input_len": max_input_len,
"max_output_len": max_output_len
}, root_args.output)
else:
print_dataset(
input_ids,
output_lens,
)
12 changes: 11 additions & 1 deletion benchmarks/cpp/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,17 @@ def dataset_dump(input_lens, input_ids, output_lens, task_ids, metadata,
task_id=task_ids[i]))
workload = Workload(metadata=metadata, samples=samples)
with open(output_file, 'w') as f:
json.dump(workload.dict(), f)
json.dump(workload.model_dump(), f)


def print_dataset(input_ids, output_lens):
for i, input_tokens in enumerate(input_ids):
d = {
"task_id": i,
"logits": input_tokens,
"output_tokens": output_lens[i]
}
print(json.dumps(d, separators=(',', ':'), ensure_ascii=False))


def get_list_of_delays(delay_dist, mean_time_bet_reqs, num_reqs, random_seed):
Expand Down
Loading

0 comments on commit f430a4b

Please sign in to comment.