Skip to content

Commit

Permalink
fix: fix MAX_NUM_BATCHED_TOKENS default
Browse files Browse the repository at this point in the history
  • Loading branch information
hommayushi3 committed Aug 21, 2024
1 parent 34a5d4b commit 2c58904
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions endpoints-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
NUM_SHARD=${NUM_SHARD:-$(nvidia-smi --list-gpus | wc -l)}
MODEL_PATH=${MODEL_PATH:-"/repository"}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-1}
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-0}
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-1}
ENABLE_CHUNKED_PREFILL=${ENABLE_CHUNKED_PREFILL:-false}
ENABLE_PREFIX_CACHING=${ENABLE_PREFIX_CACHING:-false}
DISABLE_SLIDING_WINDOW=${DISABLE_SLIDING_WINDOW:-false}
Expand All @@ -25,6 +25,9 @@ CMD="$CMD --gpu-memory-utilization $GPU_MEMORY_UTILIZATION"
if [ "$MAX_MODEL_LEN" -ne -1 ]; then
CMD="$CMD --max-model-len $MAX_MODEL_LEN"
fi
if [ "MAX_NUM_BATCHED_TOKENS" -ne 1 ]; then
CMD="$CMD --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS"
fi
if [ "$ENABLE_PREFIX_CACHING" = true ]; then
CMD="$CMD --enable-prefix-caching"
fi
Expand All @@ -43,9 +46,6 @@ fi
if [ "$ENFORCE_EAGER" = true ]; then
CMD="$CMD --enforce-eager"
fi
if [ "MAX_NUM_BATCHED_TOKENS" -ne 0 ]; then
CMD="$CMD --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS"
fi

# Execute the command
eval $CMD

0 comments on commit 2c58904

Please sign in to comment.