Skip to content

Commit

Permalink
[Misc] Enable multi-step output streaming by default (vllm-project#9047)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin authored Oct 4, 2024
1 parent aeb37c2 commit 303d447
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class EngineArgs:
max_cpu_loras: Optional[int] = None
device: str = 'auto'
num_scheduler_steps: int = 1
multi_step_stream_outputs: bool = False
multi_step_stream_outputs: bool = True
ray_workers_use_nsight: bool = False
num_gpu_blocks_override: Optional[int] = None
num_lookahead_slots: int = 0
Expand Down Expand Up @@ -603,13 +603,17 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

parser.add_argument(
'--multi-step-stream-outputs',
action='store_true',
help='If True, then multi-step will stream outputs for every step')
action=StoreBoolean,
default=EngineArgs.multi_step_stream_outputs,
nargs="?",
const="True",
help='If False, then multi-step will stream outputs at the end '
'of all steps')
parser.add_argument(
'--scheduler-delay-factor',
type=float,
default=EngineArgs.scheduler_delay_factor,
help='Apply a delay (of delay factor multiplied by previous'
help='Apply a delay (of delay factor multiplied by previous '
'prompt latency) before scheduling next prompt.')
parser.add_argument(
'--enable-chunked-prefill',
Expand All @@ -632,7 +636,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
type=nullable_str,
choices=[*QUANTIZATION_METHODS, None],
default=EngineArgs.speculative_model_quantization,
help='Method used to quantize the weights of speculative model.'
help='Method used to quantize the weights of speculative model. '
'If None, we first check the `quantization_config` '
'attribute in the model config file. If that is '
'None, we assume the model weights are not '
Expand Down

0 comments on commit 303d447

Please sign in to comment.