Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Nov 19, 2024
1 parent 540d0ce commit b45c158
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
4 changes: 3 additions & 1 deletion benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,9 @@ def main(args: argparse.Namespace):
for request in requests)
print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
f"{total_num_tokens / elapsed_time:.2f} total tokens/s, "
f"{total_output_tokens / elapsed_time:.2f} output tokens/s")
f"{total_output_tokens / elapsed_time:.2f} output tokens/s, "
f"{total_num_tokens=} | {total_output_tokens=}"
)

# Output JSON results if specified
if args.output_json:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,7 @@ def get_scheme(
"""
scheme = CompressedTensors24(
model_compressor=self.model_compressor,
layer_name=layer_name
)
layer_name=layer_name)
# scheme = CompressedTensorsW8A8Fp8(
# strategy=QuantizationStrategy.CHANNEL,
# is_static_input_scheme=False)
Expand Down

0 comments on commit b45c158

Please sign in to comment.