Skip to content

Commit

Permalink
fix llava bench (#819)
Browse files Browse the repository at this point in the history
  • Loading branch information
pkhk-1 authored Nov 21, 2024
1 parent cad8092 commit 7fa1f2e
Show file tree
Hide file tree
Showing 13 changed files with 164 additions and 131 deletions.
18 changes: 9 additions & 9 deletions build_paddle_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,23 @@ if command -v nvcc > /dev/null 2>&1; then
case $cuda_version in
"11.2")
echo "安装CUDA 11.2版本的paddlepaddle..."
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu112/
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu112/
;;
"11.6")
echo "安装CUDA 11.6版本的paddlepaddle..."
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu116/
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu116/
;;
"11.7")
echo "安装CUDA 11.7版本的paddlepaddle..."
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu117/
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu117/
;;
"11.8")
echo "安装CUDA 11.8版本的paddlepaddle..."
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
;;
"12.3")
echo "安装CUDA 12.3版本的paddlepaddle..."
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
$PYTHON_CMD -m pip install paddlepaddle-gpu==3.0.0b2 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
;;
*)
echo "警告: 不支持的CUDA版本 ($cuda_version)"
Expand All @@ -83,14 +83,14 @@ if command -v nvcc > /dev/null 2>&1; then
esac
else
echo "未检测到CUDA。安装CPU版本的paddlepaddle..."
$PYTHON_CMD -m pip install paddlepaddle==3.0.0b1
$PYTHON_CMD -m pip install paddlepaddle==3.0.0b2
fi

# 验证安装
echo "验证PaddlePaddle 3.0.0b1安装..."
echo "验证PaddlePaddle 3.0.0b2安装..."
if $PYTHON_CMD -c "import paddle; paddle.utils.run_check()"; then
echo "PaddlePaddle 3.0.0b1安装成功"
echo "PaddlePaddle 3.0.0b2安装成功"
else
echo "PaddlePaddle 3.0.0b1安装验证失败,请检查安装日志"
echo "PaddlePaddle 3.0.0b2安装验证失败,请检查安装日志"
exit 1
fi
8 changes: 4 additions & 4 deletions paddlemix/examples/llava/pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ def main():
if training_args.benchmark:
total_effective_samples = total_samples * training_args.num_train_epochs
effective_samples_per_second = total_effective_samples / train_result.metrics["train_runtime"]
mem_gpu = (
train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
)
# mem_gpu = (
# train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
# )
logger.info(f"Effective_samples_per_second: {effective_samples_per_second} ")
logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
# logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
logger.info("Benchmark done.")
else:
trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1)
Expand Down
8 changes: 4 additions & 4 deletions paddlemix/tools/supervised_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,11 @@ def main():
if training_args.benchmark:
total_effective_samples = total_samples * training_args.num_train_epochs
effective_samples_per_second = total_effective_samples / train_result.metrics["train_runtime"]
mem_gpu = (
train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
)
# mem_gpu = (
# train_result.metrics["train_mem_gpu_peaked_delta"] + train_result.metrics["train_mem_gpu_alloc_delta"]
# )
logger.info(f"Effective_samples_per_second: {effective_samples_per_second} ")
logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
# logger.info(f"train_mem_gpu_peaked: {int(mem_gpu/ (2**20))} MB")
logger.info("Benchmark done.")
else:
trainer.save_model(merge_tensor_parallel=training_args.tensor_parallel_degree > 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
# limitations under the License.

model=llava
model_item=llava-v1.6-vicuna-7b
model_item=llava-v1.6-vicuna-13b-lora_sft
bs_item=16
fp_item=bf16
run_mode=DP
device_num=N1C8
max_epochs=3
num_workers=0
train_stage=sft

# get data
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
# run
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} ${train_stage} 2>&1;
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
# limitations under the License.

model=llava
model_item=vicuna-13b-v1.5
model_item=llava-v1.6-vicuna-13b-pretrain
bs_item=16
fp_item=bf16
run_mode=DP
device_num=N1C8
max_epochs=3
num_workers=0
train_stage=pretrain

# get data
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
# run
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} ${train_stage} 2>&1;
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
# limitations under the License.

model=llava
model_item=llava-v1.6-vicuna-13b
model_item=llava-v1.6-vicuna-13b-sft
bs_item=16
fp_item=bf16
run_mode=DP
device_num=N1C8
max_epochs=3
num_workers=0
train_stage=sft

# get data
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
# run
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} ${train_stage} 2>&1;
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
# limitations under the License.

model=llava
model_item=llava-v1.6-vicuna-7b
model_item=llava-v1.6-vicuna-7b-lora_sft
bs_item=16
fp_item=bf16
run_mode=DP
device_num=N1C8
max_epochs=3
num_workers=0
train_stage=lora_sft

# get data
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
# run
bash tests/test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} ${train_stage} 2>&1;
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model=llava
model_item=llava-v1.6-vicuna-7b-pretrain
bs_item=16
fp_item=bf16
run_mode=DP
device_num=N1C8
max_epochs=3
num_workers=0

# get data
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
# run
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

model=llava
model_item=llava-v1.6-vicuna-7b-sft
bs_item=16
fp_item=bf16
run_mode=DP
device_num=N1C8
max_epochs=3
num_workers=0

# get data
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/prepare.sh
# run
bash ./test_tipc/dygraph/dp/${model}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_epochs} ${num_workers} 2>&1;

This file was deleted.

5 changes: 3 additions & 2 deletions tests/test_tipc/dygraph/dp/llava/benchmark_common/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ wget https://paddlenlp.bj.bcebos.com/models/community/paddlemix/benchmark/llava_
tar -xf llava_bench_data.tar
mv llava_bench_data /root/.paddlemix/datasets/
rm -rf llava_bench_data.tar
ln -s /root/.paddlemix/datasets/llava_bench_data ../
ln -s /root/.paddlemix/datasets/llava_bench_data ./

export http_proxy=agent.baidu.com:8188
export https_proxy=agent.baidu.com:8188
Expand All @@ -32,7 +32,8 @@ python -m pip install -e ../
python -m pip install --upgrade paddlenlp pybind11 regex sentencepiece tqdm visualdl attrdict easydict pyyaml -i https://mirror.baidu.com/pypi/simple
pip install -r ../paddlemix/appflow/requirements.txt
pip install -U ppdiffusers
python -m pip install https://paddle-wheel.bj.bcebos.com/develop/linux/linux-gpu-cuda11.8-cudnn8.6-mkl-gcc8.2-avx/paddlepaddle_gpu-0.0.0.post118-cp310-cp310-linux_x86_64.whl
bash ../build_paddle_env.sh
# python -m pip install https://paddle-wheel.bj.bcebos.com/develop/linux/linux-gpu-cuda11.8-cudnn8.6-mkl-gcc8.2-avx/paddlepaddle_gpu-0.0.0.post118-cp310-cp310-linux_x86_64.whl
python -m pip install paddlenlp==3.0.0b2
python -m pip install huggingface_hub==0.23.0
python -m pip list
Expand Down
Loading

0 comments on commit 7fa1f2e

Please sign in to comment.