Skip to content

Commit

Permalink
collect the configs for rocblas miopen and tensile
Browse files Browse the repository at this point in the history
  • Loading branch information
root committed Nov 16, 2023
1 parent b0b514f commit 76642d2
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def run_generate(args, local_rank):
_run_gen(args, torch_model, tokenizer, input_ids, attention_mask, "Torch")


def func_benchmark(fn, warm=5, steps=10):
def func_benchmark(fn, warm=1, steps=1):
for _ in range(warm):
torch.cuda.nvtx.range_push("gen warmup")
fn()
Expand Down Expand Up @@ -322,9 +322,9 @@ def _run_bmk(args, model, name):
# print(tokenizer.pad_token_id, tokenizer.eos_token_id)

batch = 1
prompt_lens = [32, 64, 128, 256, 512, 1024, 2048]
prompt_lens = [32, 64, 128, 256, 512, 1024, 2048, 3072, 4096]
# prompt_lens = [32]
generate_lens = [1, 33]
generate_lens = [1, 2]

if args.custom_gen:
print_out("[benchmark] Using custom_generate")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
N_GPU=1

bash sample_run.sh $N_GPU --export --optimize --merge

MODEL_NAME=ort-llama2-7b-${N_GPU}gpu
LOGFILE=$MODEL_NAME.log

export ROCBLAS_LAYER=2
bash sample_run.sh $N_GPU --optimize --merge --custom-gen --benchmark --ort 2>&1 | tee ${MODEL_NAME}_rocblas_configs.log
sed -i '/\[[0-9]+,[0-9]+\]<stderr>:/d' ${MODEL_NAME}_rocblas_configs.log
grep -o "rocblas-bench.*" ${MODEL_NAME}_rocblas_configs.log | sort -u &> unique_rocblas_configs_$MODEL_NAME.log
unset ROCBLAS_LAYER

export MIOPEN_ENABLE_LOGGING_CMD=1
bash sample_run.sh $N_GPU --optimize --merge --custom-gen --benchmark --ort 2>&1 | tee ${MODEL_NAME}_miopen_configs.log
sed -i '/\[[0-9]+,[0-9]+\]<stderr>:/d' ${MODEL_NAME}_miopen_configs.log
grep "MIOpenDriver " ${MODEL_NAME}_miopen_configs.log | sed -e 's/.*]//' | sort -u &> unique_miopen_configs_$MODEL_NAME.log
unset MIOPEN_ENABLE_LOGGING_CMD

export TENSILE_DB=0x8000 # dump Tensile kernel names
bash sample_run.sh $N_GPU --optimize --merge --custom-gen --benchmark --ort 2>&1 | tee ${MODEL_NAME}_tensile_configs.log
sed -i '/\[[0-9]+,[0-9]+\]<stderr>:/d'
grep "Running kernel: " ${MODEL_NAME}_tensile_configs.log | sort -u &> unique_kernel_names_$MODEL_NAME.log
unset TENSILE_DB

# export HIPBLASLT_LOG_LEVEL=2
# unset HIPBLASLT_LOG_LEVEL

echo "========================Math lib profiling done"

export RCCL_MSCCL_ENABLE=0
NCCL_LOGFILE=$MODEL_NAME-NCCL.log
HSA_FORCE_FINE_GRAIN_PCIE=1 NCCL_DEBUG=INFO NCCL_DEBUG_SUBSYS=INIT,COLL ./sample_run.sh $N_GPU --optimize --merge --custom-gen --benchmark --ort 2>&1 | tee $NCCL_LOGFILE
python /workspace/nccl-rccl-parser/rccl_nccl_parser.py --nccl-debug-log $NCCL_LOGFILE --output-script-name unique_nccl_configs_$MODEL_NAME.log --unique
43 changes: 43 additions & 0 deletions onnxruntime/python/tools/transformers/models/llama2/parse_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import glob

logList = glob.glob('ort-llama2*gpu.log')
print(logList)

for logFile in logList[:1]:
print("============================")
print("processing", logFile)
rocblasLines=set()
miopenLines=set()
tensileLines=set()

with open(logFile) as file:
for line in file:
# print(line)
if "rocblas-bench" in line:
rocblasLines.add(line[14:])
if "MIOpenDriver" in line:
miopenLines.add(line[14:])
if "Running kernel" in line:
tensileLines.add(line[14:])

print(len(rocblasLines))
print(len(miopenLines))
print(len(tensileLines))

rocblasLines = list(rocblasLines)
print(rocblasLines)
miopenLines = list(miopenLines)
tensileLines = list(tensileLines)

rocblasFileName = "unique_rocblas_configs_" + logFile
miopenFileName = "unique_miopen_configs_" + logFile
tensileFileName = "unique_tensile_configs_" + logFile

# outFiles = [rocblasFileName, miopenFileName, tensileFileName]
# outLines = [rocblasLines, miopenLines, tensileLines]

# for outFileName, outLine in zip(outFiles, outLines):
# with open(outFileName, 'w') as f:
# for line in outLine:
# f.write(f"{line}")
6 changes: 3 additions & 3 deletions onnxruntime/python/tools/transformers/models/llama2/sample_run.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ MPI="mpirun --allow-run-as-root
MPI+=" -x TRANSFORMERS_CACHE=/hf_cache"
MPI+=" -x NCCL_DEBUG=VERSION"

MODEL_NAME=${LLAMA2_MODEL_NAME:-"Llama-2-70b-hf"}
OUTPUT=$MODEL_NAME
MODEL_NAME=${LLAMA2_MODEL_NAME:-"llama-2-7b-chat-hf"}
OUTPUT=/data/onnx/$MODEL_NAME

CMD="$MPI python llama-v2.py --model=meta-llama/$MODEL_NAME --output-name=$OUTPUT ${@:2}"
CMD="$MPI python llama-v2.py --model=/data/$MODEL_NAME --output-name=$OUTPUT ${@:2}"

set -x
$CMD

0 comments on commit 76642d2

Please sign in to comment.