diff --git a/.gitignore b/.gitignore index d51f8ac62..778dcc7b7 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,13 @@ scripts/article/xpu/ dependencies/ benchmarks/gflownet/gflownet + +scripts/inventory.yaml +output/ +sqlite.db +.ruff_cache/ + +test.out +output/ +workspace/ +.pin/tmp-* diff --git a/.no_report b/.no_report new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/report.py b/milabench/report.py index 80df4eca2..a51b1ac3a 100644 --- a/milabench/report.py +++ b/milabench/report.py @@ -41,14 +41,13 @@ def _make_row(summary, compare, weights): # Sum of all the GPU performance # to get the overall perf of the whole machine - if "per_gpu" in summary: acc = 0 for _, metrics in summary["per_gpu"].items(): acc += metrics[metric] else: acc = row["perf"] - + success_ratio = 1 - row["fail"] / row["n"] score = (acc if acc > 0 else row["perf"]) * success_ratio @@ -210,6 +209,29 @@ def make_dataframe(summary, compare=None, weights=None): for key in all_keys } ).transpose() + + return df + + +@error_guard({}) +def make_report( + summary, + compare=None, + html=None, + compare_gpus=False, + price=None, + title=None, + sources=None, + errdata=None, + weights=None, +): + if weights is None: + weights = dict() + + df = make_dataframe(summary, compare, weights) + + # Reorder columns + df = df[sorted(df.columns, key=lambda k: columns_order.get(k, 0))] @error_guard({}) diff --git a/scripts/instructions.sh b/scripts/instructions.sh new file mode 100644 index 000000000..e985babf0 --- /dev/null +++ b/scripts/instructions.sh @@ -0,0 +1,110 @@ +#!/bin/bash + + +set -m + +# +# +# + +echo ">> Configure the benchmark" +echo "==========================" + + +# +# Tweak the values to fit your system +# + +USERNAME=${USER:-"mila"} +SSH_KEY_FILE=$HOME/.ssh/id_rsa +ARCH="cuda" +WORKER_0="cn-d003" +WORKER_1="cn-d004" + + + +# Derived +VERSION="v0.0.8" +IMAGE="ghcr.io/mila-iqia/milabench:$ARCH-$VERSION" + + +# Create the config file +cat >overrides.yaml <> Prepare docker images" +echo "========================" + +ssh $USERNAME@$WORKER_0 "docker pull $IMAGE"& +ssh $USERNAME@$WORKER_1 "docker pull $IMAGE"& +fg +fg + +echo "<< =====================" +echo "" + +# +# +# + +echo ">> Run milabench" +echo "================" + +if [ "$ARCH" = "cuda" ]; then + docker run -it --rm --gpus all --network host --ipc=host --privileged \ + -v $SSH_KEY_FILE:/milabench/id_milabench \ + -v $(pwd)/results:/milabench/envs/runs \ + $IMAGE \ + milabench run --override "$(cat overrides.yaml)" + +elif [ "$ARCH" = "rocm" ]; then + docker run -it --rm --network host --ipc host --privileged \ + --security-opt seccomp=unconfined --group-add video \ + -v /opt/amdgpu/share/libdrm/amdgpu.ids:/opt/amdgpu/share/libdrm/amdgpu.ids \ + -v /opt/rocm:/opt/rocm \ + -v $(pwd)/results:/milabench/envs/runs \ + $IMAGE \ + milabench run --override "$(cat overrides.yaml)" +fi + +echo "<< =============" +echo "" + +# +# +# + +echo ">> Print report" +echo "===============" +docker run -it --rm \ + -v $(pwd)/results:/milabench/envs/runs \ + $IMAGE \ + milabench report --runs /milabench/envs/runs + +echo "<< ============" diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100644 index 000000000..c01f2bfd3 --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,16 @@ + +OUTPUT="test.out" +rm -rf $OUTPUT +touch $OUTPUT +sbatch --ntasks=1\ + --gpus-per-task=rtx8000:1\ + --cpus-per-task=4\ + --time=01:30:00\ + --ntasks-per-node=1\ + --mem=64G\ + -o $OUTPUT\ + slurm.sh\ + -a cuda\ + -b stable_update + +tail -f $OUTPUT \ No newline at end of file diff --git a/scripts/slurm.sh b/scripts/slurm.sh new file mode 100644 index 000000000..abd3f9447 --- /dev/null +++ b/scripts/slurm.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +function usage() { + echo "Usage: $0 [-m] [-p]" + echo " -h Display this help message." + echo " -a arch GPU arch (default: cuda)" + echo " -b BRANCH Branch to checkout (default: master)" + echo " -o ORIGIN Origin to use (default: github/mila/milabench)" + echo " -c CONFIG Configuration (default: milabench/config/standard.yaml)" + echo " -e ENV Environment (default: ./env)" + echo " -p PYTHON Python version (default: 3.9)" + echo " ARGUMENT Any additional argument you want to process." + exit 1 +} + +ARCH="cuda" +PYTHON="3.9" +BRANCH="master" +ORIGIN="https://github.com/mila-iqia/milabench.git" +LOC="$SLURM_TMPDIR" +CONFIG="$LOC/milabench/config/standard.yaml" +BASE="$LOC/base" +ENV="./env" +REMAINING_ARGS="" + + +while getopts ":hm:p:e:a:b:o:c:" opt; do + case $opt in + h) + usage + ;; + p) + PYTHON="$OPTARG" + ;; + b) + BRANCH="$OPTARG" + ;; + o) + ORIGIN="$OPTARG" + ;; + c) + CONFIG="$OPTARG" + ;; + e) + ENV="$OPTARG" + ;; + a) + ARCH="$OPTARG" + ;; + l) + # FIX ME + LOC="$OPTARG" + CONFIG="$LOC/milabench/config/standard.yaml" + BASE="$LOC/base" + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + usage + ;; + esac +done + +shift "$((OPTIND-1))" +REMAINING_ARGS="$@" + +echo " PYTHON: $PYTHON" +echo " branch: $BRANCH" +echo " origin: $ORIGIN" +echo " config: $CONFIG" +echo " env: $ENV" +echo " args: $REMAINING_ARGS" +# +# Fix problem with conda saying it is not "init properly" +# +CONDA_EXEC="$(which conda)" +CONDA_BASE=$(dirname $CONDA_EXEC) +source $CONDA_BASE/../etc/profile.d/conda.sh + +if [ -e $HOME/.credentials.env ]; then + source $HOME/.credentials.env +fi + +cd $LOC +# +# Create a new environment +# +if [ ! -d "$ENV" ] && [ "$ENV" != "base" ] && [ ! -d "$CONDA_ENVS/$ENV" ]; then + conda create --prefix $ENV python=$PYTHON -y +fi +conda activate $ENV + +export HF_HOME=$BASE/cache +export HF_DATASETS_CACHE=$BASE/cache +export TORCH_HOME=$BASE/cache +export XDG_CACHE_HOME=$BASE/cache +export MILABENCH_GPU_ARCH=$ARCH + +export MILABENCH_DASH=no +export PYTHONUNBUFFERED=1 +export MILABENCH_BASE=$BASE +export MILABENCH_CONFIG=$CONFIG +# +# Fetch the repo +# +git clone --single-branch --depth 1 -b $BRANCH $ORIGIN +python -m pip install -e ./milabench + +module load gcc/9.3.0 +module load cuda/11.8 + +echo "" +echo "Install" +echo "-------" +milabench install --config $CONFIG --base $BASE $REMAINING_ARGS + + +echo "" +echo "Prepare" +echo "-------" +milabench prepare --config $CONFIG --base $BASE $REMAINING_ARGS + +echo "" +echo "Run" +echo "---" +milabench run --config $CONFIG --base $BASE $REMAINING_ARGS + +echo "----" +echo "Done after $SECONDS" +echo ""