diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9c31e2f66..fb8c75f26 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -53,10 +53,12 @@ jobs: uses: actions/checkout@v3 - name: Get Image Tag Name + env: + GITHUB_REF_NAME_ENV: ${{ github.ref_name }} run: | REGEX="(.*)v(.*)\.(.*)\.(.*)" IMAGE_TAG="nightly" - if [[ "${{ github.ref_name }}" =~ $REGEX ]]; then + if [[ "${GITHUB_REF_NAME_ENV}" =~ $REGEX ]]; then IMAGE_TAG="${GITHUB_REF_NAME##*/}" fi echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8e0cb45d2..1192bd0d7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -47,6 +47,7 @@ jobs: MILABENCH_ARGS: "" MILABENCH_GPU_ARCH: "${{ matrix.arch }}" MILABENCH_DASH: "no" + MILABENCH_EXCLUDE: "${{ matrix.exclude }}" steps: - uses: actions/checkout@v3 @@ -60,7 +61,7 @@ jobs: - name: Pytorch Sanity run: | - if [[ "${{ matrix.arch }}" == "rocm" ]]; then + if [[ "${MILABENCH_GPU_ARCH}" == "rocm" ]]; then groups /opt/rocm/bin/rocminfo fi @@ -74,8 +75,21 @@ jobs: python -m pip install -U pip python -m pip install -U poetry poetry lock --no-update + # poetry v1.7 has a bug where it can't find pip during the first + # install attempt: + # Output: + # [...]/.venv/bin/python: can't open file + # '[...]/lib/python3.9/site-packages/virtualenv/seed/wheels/embed/pip-23.3.1-py3-none-any.whl/pip': + # [Errno 2] No such file or directory + ! poetry install poetry install + - name: pin + run: | + MILABENCH_GPU_ARCH=cuda poetry run milabench pin -c constraints/cuda.txt --config config/standard.yaml + MILABENCH_GPU_ARCH=rocm poetry run milabench pin -c constraints/rocm.txt --config config/standard.yaml + git diff --stat + - name: tests run: | export PATH="/opt/rocm/bin:$PATH" @@ -83,16 +97,16 @@ jobs: - name: install benchmarks run: | - milabench install --exclude "${{ matrix.exclude }}" + milabench install --exclude "${MILABENCH_EXCLUDE}" - name: prepare benchmarks run: | - milabench prepare --exclude "${{ matrix.exclude }}" + milabench prepare --exclude "${MILABENCH_EXCLUDE}" - name: run benchmarks run: | export PATH="/opt/rocm/bin:$PATH" - milabench run --validations all --exclude "${{ matrix.exclude }}" + milabench run --validations all --exclude "${MILABENCH_EXCLUDE}" - name: Summary run: | diff --git a/.gitignore b/.gitignore index 4279c715a..15e50425e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,8 @@ sqlite.db .pin-constraints-* .mongo/ .pin/tmp-* -slurm*.out \ No newline at end of file + +.no_report +trash/ +workspace/ +slurm-* diff --git a/.pin/constraints-cuda-torch.txt b/.pin/constraints-cuda-torch.txt deleted file mode 100644 index 9db708f95..000000000 --- a/.pin/constraints-cuda-torch.txt +++ /dev/null @@ -1,390 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --output-file=.pin/constraints-cuda-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in -# ---extra-index-url https://download.pytorch.org/whl/cu118 - -absl-py==1.4.0 - # via tensorboard -accelerate==0.19.0 - # via -r benchmarks/accelerate_opt/requirements.in -aiohttp==3.8.4 - # via - # datasets - # fsspec -aiosignal==1.3.1 - # via aiohttp -antlr4-python3-runtime==4.9.3 - # via omegaconf -asttokens==2.2.1 - # via giving -async-timeout==4.0.2 - # via aiohttp -attrs==23.1.0 - # via aiohttp -cachetools==5.3.1 - # via google-auth -certifi==2023.5.7 - # via requests -charset-normalizer==3.1.0 - # via - # aiohttp - # requests -cmake==3.26.3 - # via triton -codefind==0.1.3 - # via ptera -datasets==2.12.0 - # via - # -r benchmarks/accelerate_opt/requirements.in - # evaluate -deepspeed==0.8.3 - # via -r benchmarks/accelerate_opt/requirements.in -dill==0.3.6 - # via - # datasets - # evaluate - # multiprocess -docker==6.1.2 - # via torchx -docstring-parser==0.8.1 - # via torchx -evaluate==0.4.0 - # via -r benchmarks/accelerate_opt/requirements.in -executing==1.2.0 - # via varname -fbgemm-gpu==0.4.1 - # via torchrec -filelock==3.12.0 - # via - # huggingface-hub - # torch - # torchx - # transformers - # triton -frozenlist==1.3.3 - # via - # aiohttp - # aiosignal -fsspec[http]==2023.5.0 - # via - # datasets - # evaluate - # huggingface-hub - # pytorch-lightning - # torchx -future==0.18.3 - # via -r benchmarks/dlrm/requirements.in -giving==0.4.2 - # via - # ptera - # voir -google-auth==2.19.0 - # via - # google-auth-oauthlib - # tensorboard -google-auth-oauthlib==1.0.0 - # via tensorboard -graphviz==0.20.1 - # via torchviz -grpcio==1.54.2 - # via tensorboard -hjson==3.1.0 - # via deepspeed -huggingface-hub==0.14.1 - # via - # -r benchmarks/timm/requirements.in - # datasets - # evaluate - # transformers -idna==3.4 - # via - # requests - # yarl -importlib-metadata==6.6.0 - # via - # markdown - # torchx -iopath==0.1.10 - # via torchrec -jinja2==3.1.2 - # via torch -joblib==1.2.0 - # via scikit-learn -lightning-utilities==0.8.0 - # via pytorch-lightning -lit==16.0.5 - # via triton -markdown==3.4.3 - # via tensorboard -markdown-it-py==2.2.0 - # via rich -markupsafe==2.1.2 - # via - # jinja2 - # werkzeug -mdurl==0.1.2 - # via markdown-it-py -mpmath==1.3.0 - # via sympy -multidict==6.0.4 - # via - # aiohttp - # yarl -multiprocess==0.70.14 - # via - # datasets - # evaluate -mypy-extensions==1.0.0 - # via typing-inspect -networkx==3.1 - # via torch -ninja==1.11.1 - # via - # -r benchmarks/rwkv/requirements.in - # deepspeed -numpy==1.24.3 - # via - # -r benchmarks/dlrm/requirements.in - # -r benchmarks/super-slomo/requirements.in - # accelerate - # datasets - # deepspeed - # evaluate - # onnx - # opencv-python - # pandas - # pyarrow - # pytorch-lightning - # scikit-learn - # scipy - # tensorboard - # torchmetrics - # torchvision - # transformers -oauthlib==3.2.2 - # via requests-oauthlib -omegaconf==2.3.0 - # via voir -onnx==1.14.0 - # via -r benchmarks/dlrm/requirements.in -opencv-python==4.7.0.72 - # via -r benchmarks/super-slomo/requirements.in -ovld==0.3.2 - # via voir -packaging==23.1 - # via - # accelerate - # datasets - # deepspeed - # docker - # evaluate - # huggingface-hub - # lightning-utilities - # pytorch-lightning - # torchmetrics - # transformers -pandas==2.0.2 - # via - # datasets - # evaluate - # torchrec -pillow==9.5.0 - # via torchvision -portalocker==2.7.0 - # via iopath -protobuf==4.23.2 - # via - # onnx - # tensorboard -psutil==5.9.5 - # via - # accelerate - # deepspeed -ptera==1.4.1 - # via voir -py-cpuinfo==9.0.0 - # via deepspeed -pyarrow==12.0.0 - # via datasets -pyasn1==0.5.0 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.3.0 - # via google-auth -pydantic==1.10.8 - # via deepspeed -pydot==1.4.2 - # via -r benchmarks/dlrm/requirements.in -pygments==2.15.1 - # via rich -pynvml==11.5.0 - # via voir -pyparsing==3.0.9 - # via pydot -pyre-extensions==0.0.30 - # via torchx -python-dateutil==2.8.2 - # via pandas -pytorch-lightning==1.9.5 - # via -r benchmarks/rwkv/requirements.in -pytz==2023.3 - # via pandas -pyyaml==6.0 - # via - # -r benchmarks/timm/requirements.in - # accelerate - # datasets - # huggingface-hub - # omegaconf - # pytorch-lightning - # torchx - # transformers -reactivex==4.0.4 - # via giving -regex==2023.5.5 - # via transformers -requests==2.31.0 - # via - # datasets - # docker - # evaluate - # fsspec - # huggingface-hub - # requests-oauthlib - # responses - # tensorboard - # torchvision - # transformers -requests-oauthlib==1.3.1 - # via google-auth-oauthlib -responses==0.18.0 - # via - # datasets - # evaluate -rich==13.3.5 - # via - # -r benchmarks/accelerate_opt/requirements.in - # voir -rsa==4.9 - # via google-auth -safetensors==0.3.1 - # via -r benchmarks/timm/requirements.in -scikit-learn==1.2.2 - # via -r benchmarks/dlrm/requirements.in -scipy==1.10.1 - # via scikit-learn -six==1.16.0 - # via - # asttokens - # google-auth - # python-dateutil -sympy==1.12 - # via torch -tabulate==0.9.0 - # via - # torchrec - # torchx -tensorboard==2.13.0 - # via -r benchmarks/dlrm/requirements.in -tensorboard-data-server==0.7.0 - # via tensorboard -threadpoolctl==3.1.0 - # via scikit-learn -tokenizers==0.13.3 - # via transformers -torch==2.0.1+cu118 - # via - # -r benchmarks/dlrm/requirements.in - # -r benchmarks/timm/requirements.in - # accelerate - # deepspeed - # pytorch-lightning - # torchaudio - # torchmetrics - # torchvision - # torchviz - # triton -torchaudio==2.0.2+cu118 - # via -r benchmarks/accelerate_opt/requirements.in -torchmetrics==0.11.4 - # via - # pytorch-lightning - # torchrec -torchrec==0.4.0 - # via -r benchmarks/dlrm/requirements.in -torchvision==0.15.2+cu118 - # via - # -r benchmarks/super-slomo/requirements.in - # -r benchmarks/torchvision/requirements.in -torchviz==0.0.2 - # via -r benchmarks/dlrm/requirements.in -torchx==0.5.0 - # via -r benchmarks/dlrm/requirements.in -tqdm==4.65.0 - # via - # -r benchmarks/dlrm/requirements.in - # -r benchmarks/torchvision/requirements.in - # datasets - # deepspeed - # evaluate - # huggingface-hub - # iopath - # pytorch-lightning - # torchrec - # transformers -transformers==4.29.2 - # via - # -r benchmarks/accelerate_opt/requirements.in - # -r benchmarks/huggingface/requirements.in -triton==2.0.0 - # via torch -typing-extensions==4.6.2 - # via - # huggingface-hub - # iopath - # lightning-utilities - # onnx - # pydantic - # pyre-extensions - # pytorch-lightning - # reactivex - # torch - # typing-inspect -typing-inspect==0.9.0 - # via pyre-extensions -tzdata==2023.3 - # via pandas -urllib3==1.26.16 - # via - # docker - # google-auth - # requests - # responses - # torchx -varname==0.10.0 - # via giving -voir==0.2.10 - # via - # -r benchmarks/dlrm/requirements.in - # -r benchmarks/timm/requirements.in -websocket-client==1.5.2 - # via docker -werkzeug==2.3.4 - # via tensorboard -wheel==0.40.0 - # via tensorboard -xxhash==3.2.0 - # via - # datasets - # evaluate -yarl==1.9.2 - # via aiohttp -zipp==3.15.0 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/.pin/constraints-rocm-torch.txt b/.pin/constraints-rocm-torch.txt index efd674cce..a220c5c8d 100644 --- a/.pin/constraints-rocm-torch.txt +++ b/.pin/constraints-rocm-torch.txt @@ -1,16 +1,16 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=.pin/constraints-rocm-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in +# pip-compile --config=pyproject.toml --output-file=.pin/constraints-rocm-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ -absl-py==1.4.0 +absl-py==2.0.0 # via tensorboard -accelerate==0.19.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in -aiohttp==3.8.4 +aiohttp==3.8.6 # via # datasets # fsspec @@ -18,62 +18,65 @@ aiosignal==1.3.1 # via aiohttp antlr4-python3-runtime==4.9.3 # via omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via giving -async-timeout==4.0.2 +async-timeout==4.0.3 # via aiohttp attrs==23.1.0 # via aiohttp -cachetools==5.3.1 +cachetools==5.3.2 # via google-auth -certifi==2023.5.7 +certifi==2023.7.22 # via requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # aiohttp # requests -cmake==3.26.3 +cmake==3.27.7 # via pytorch-triton-rocm codefind==0.1.3 # via ptera -datasets==2.12.0 +datasets==2.14.6 # via # -r benchmarks/accelerate_opt/requirements.in # evaluate -deepspeed==0.8.3 - # via -r benchmarks/accelerate_opt/requirements.in -dill==0.3.6 +deepspeed==0.12.2 + # via + # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/rwkv/requirements.in +dill==0.3.7 # via # datasets # evaluate # multiprocess -docker==6.1.2 +docker==6.1.3 # via torchx docstring-parser==0.8.1 # via torchx -evaluate==0.4.0 +evaluate==0.4.1 # via -r benchmarks/accelerate_opt/requirements.in executing==1.2.0 # via varname -fbgemm-gpu==0.4.1 +fbgemm-gpu==0.5.0 # via torchrec -filelock==3.12.0 +filelock==3.13.1 # via # huggingface-hub # pytorch-triton-rocm # torch # torchx # transformers -frozenlist==1.3.3 +frozenlist==1.4.0 # via # aiohttp # aiosignal -fsspec[http]==2023.5.0 +fsspec[http]==2023.10.0 # via # datasets # evaluate # huggingface-hub # pytorch-lightning + # torch # torchx future==0.18.3 # via -r benchmarks/dlrm/requirements.in @@ -81,47 +84,47 @@ giving==0.4.2 # via # ptera # voir -google-auth==2.19.0 +google-auth==2.23.4 # via # google-auth-oauthlib # tensorboard -google-auth-oauthlib==1.0.0 +google-auth-oauthlib==1.1.0 # via tensorboard graphviz==0.20.1 # via torchviz -grpcio==1.54.2 +grpcio==1.59.2 # via tensorboard hjson==3.1.0 # via deepspeed -huggingface-hub==0.14.1 +huggingface-hub==0.17.3 # via # -r benchmarks/timm/requirements.in + # accelerate # datasets # evaluate + # tokenizers # transformers idna==3.4 # via # requests # yarl -importlib-metadata==6.6.0 - # via - # markdown - # torchx -iopath==0.1.10 - # via torchrec +importlib-metadata==6.8.0 + # via torchx jinja2==3.1.2 # via torch -joblib==1.2.0 +joblib==1.3.2 # via scikit-learn -lightning-utilities==0.8.0 - # via pytorch-lightning -lit==16.0.5 +lightning-utilities==0.9.0 + # via + # pytorch-lightning + # torchmetrics +lit==17.0.4 # via pytorch-triton-rocm -markdown==3.4.3 +markdown==3.5.1 # via tensorboard -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # jinja2 # werkzeug @@ -133,26 +136,27 @@ multidict==6.0.4 # via # aiohttp # yarl -multiprocess==0.70.14 +multiprocess==0.70.15 # via # datasets # evaluate mypy-extensions==1.0.0 # via typing-inspect -networkx==3.1 +networkx==3.2.1 # via torch -ninja==1.11.1 +ninja==1.11.1.1 # via # -r benchmarks/rwkv/requirements.in # deepspeed -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/dlrm/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/rwkv/requirements.in # accelerate # datasets # deepspeed # evaluate + # fbgemm-gpu # onnx # opencv-python # pandas @@ -168,13 +172,13 @@ oauthlib==3.2.2 # via requests-oauthlib omegaconf==2.3.0 # via voir -onnx==1.14.0 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in -opencv-python==4.7.0.72 +opencv-python==4.8.1.78 # via -r benchmarks/super-slomo/requirements.in ovld==0.3.2 # via voir -packaging==23.1 +packaging==23.2 # via # accelerate # datasets @@ -186,20 +190,17 @@ packaging==23.1 # pytorch-lightning # torchmetrics # transformers -pandas==2.0.2 +pandas==2.1.2 # via # datasets # evaluate - # torchrec -pillow==9.5.0 +pillow==10.1.0 # via torchvision -portalocker==2.7.0 - # via iopath -protobuf==4.23.2 +protobuf==4.23.4 # via # onnx # tensorboard -psutil==5.9.5 +psutil==5.9.6 # via # accelerate # deepspeed @@ -207,7 +208,7 @@ ptera==1.4.1 # via voir py-cpuinfo==9.0.0 # via deepspeed -pyarrow==12.0.0 +pyarrow==14.0.0 # via datasets pyasn1==0.5.0 # via @@ -215,15 +216,19 @@ pyasn1==0.5.0 # rsa pyasn1-modules==0.3.0 # via google-auth -pydantic==1.10.8 - # via deepspeed +pydantic==1.10.13 + # via + # -r benchmarks/rwkv/requirements.in + # deepspeed pydot==1.4.2 # via -r benchmarks/dlrm/requirements.in -pygments==2.15.1 +pygments==2.16.1 # via rich pynvml==11.5.0 - # via voir -pyparsing==3.0.9 + # via + # deepspeed + # voir +pyparsing==3.1.1 # via pydot pyre-extensions==0.0.30 # via torchx @@ -231,11 +236,11 @@ python-dateutil==2.8.2 # via pandas pytorch-lightning==1.9.5 # via -r benchmarks/rwkv/requirements.in -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via torch -pytz==2023.3 +pytz==2023.3.post1 # via pandas -pyyaml==6.0 +pyyaml==6.0.1 # via # -r benchmarks/timm/requirements.in # accelerate @@ -247,7 +252,7 @@ pyyaml==6.0 # transformers reactivex==4.0.4 # via giving -regex==2023.5.5 +regex==2023.10.3 # via transformers requests==2.31.0 # via @@ -264,44 +269,42 @@ requests==2.31.0 requests-oauthlib==1.3.1 # via google-auth-oauthlib responses==0.18.0 - # via - # datasets - # evaluate -rich==13.3.5 + # via evaluate +rich==13.6.0 # via # -r benchmarks/accelerate_opt/requirements.in # voir rsa==4.9 # via google-auth -safetensors==0.3.1 - # via -r benchmarks/timm/requirements.in -scikit-learn==1.2.2 +safetensors==0.4.0 + # via + # -r benchmarks/timm/requirements.in + # transformers +scikit-learn==1.3.2 # via -r benchmarks/dlrm/requirements.in -scipy==1.10.1 +scipy==1.11.3 # via scikit-learn six==1.16.0 # via # asttokens - # google-auth # python-dateutil + # tensorboard sympy==1.12 # via torch tabulate==0.9.0 - # via - # torchrec - # torchx -tensorboard==2.13.0 + # via torchx +tensorboard==2.15.1 # via -r benchmarks/dlrm/requirements.in -tensorboard-data-server==0.7.0 +tensorboard-data-server==0.7.2 # via tensorboard -threadpoolctl==3.1.0 +threadpoolctl==3.2.0 # via scikit-learn -tokenizers==0.13.3 +tokenizers==0.14.1 # via transformers -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/accelerate_opt/requirements.in - # -r benchmarks/torchvision/requirements.in + # -r benchmarks/rwkv/requirements.in # accelerate # deepspeed # pytorch-lightning @@ -310,44 +313,41 @@ torch==2.0.1+rocm5.4.2 # torchmetrics # torchvision # torchviz -torchaudio==2.0.2+rocm5.4.2 +torchaudio==2.1.0+rocm5.6 # via -r benchmarks/accelerate_opt/requirements.in -torchmetrics==0.11.4 +torchmetrics==1.0.3 # via # pytorch-lightning # torchrec -torchrec==0.4.0 +torchrec==0.5.0 # via -r benchmarks/dlrm/requirements.in -torchvision==0.15.2+rocm5.4.2 +torchvision==0.16.0+rocm5.6 # via # -r benchmarks/accelerate_opt/requirements.in - # -r benchmarks/torchvision/requirements.in + # -r benchmarks/stargan/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.5.0 # via -r benchmarks/dlrm/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via - # -r benchmarks/dlrm/requirements.in + # -r benchmarks/flops/requirements.in # -r benchmarks/torchvision/requirements.in # datasets # deepspeed # evaluate # huggingface-hub - # iopath # pytorch-lightning # torchrec # transformers -transformers==4.29.2 +transformers==4.35.0 # via # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/huggingface/requirements.in -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # huggingface-hub - # iopath # lightning-utilities - # onnx # pydantic # pyre-extensions # pytorch-lightning @@ -358,32 +358,29 @@ typing-inspect==0.9.0 # via pyre-extensions tzdata==2023.3 # via pandas -urllib3==1.26.16 +urllib3==1.26.18 # via # docker - # google-auth # requests # responses # torchx varname==0.10.0 # via giving -voir==0.2.10 +voir==0.2.11 # via # -r benchmarks/accelerate_opt/requirements.in - # -r benchmarks/torchvision/requirements.in -websocket-client==1.5.2 + # -r benchmarks/rwkv/requirements.in +websocket-client==1.6.4 # via docker -werkzeug==2.3.4 - # via tensorboard -wheel==0.40.0 +werkzeug==3.0.1 # via tensorboard -xxhash==3.2.0 +xxhash==3.4.1 # via # datasets # evaluate yarl==1.9.2 # via aiohttp -zipp==3.15.0 +zipp==3.17.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/benchmarks/accelerate_opt/requirements.cuda.txt b/benchmarks/accelerate_opt/requirements.cuda.txt index 1f4194c6b..75fa45e59 100644 --- a/benchmarks/accelerate_opt/requirements.cuda.txt +++ b/benchmarks/accelerate_opt/requirements.cuda.txt @@ -1,14 +1,14 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/accelerate_opt/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-opt.txt benchmarks/accelerate_opt/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/accelerate_opt/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-opt.txt benchmarks/accelerate_opt/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 -accelerate==0.19.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in -aiohttp==3.8.4 +aiohttp==3.8.6 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets @@ -21,11 +21,11 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -async-timeout==4.0.2 +async-timeout==4.0.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -33,59 +33,56 @@ attrs==23.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -datasets==2.12.0 +datasets==2.14.6 # via # -r benchmarks/accelerate_opt/requirements.in # evaluate -deepspeed==0.8.3 +deepspeed==0.12.2 # via -r benchmarks/accelerate_opt/requirements.in -dill==0.3.6 +dill==0.3.7 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # evaluate # multiprocess -evaluate==0.4.0 +evaluate==0.4.1 # via -r benchmarks/accelerate_opt/requirements.in executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # torch # transformers # triton -frozenlist==1.3.3 +frozenlist==1.4.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # aiosignal -fsspec[http]==2023.5.0 +fsspec[http]==2023.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # evaluate # huggingface-hub + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -95,11 +92,13 @@ hjson==3.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed -huggingface-hub==0.14.1 +huggingface-hub==0.17.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt + # accelerate # datasets # evaluate + # tokenizers # transformers idna==3.4 # via @@ -110,15 +109,11 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -lit==16.0.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -135,20 +130,20 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # yarl -multiprocess==0.70.14 +multiprocess==0.70.15 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # evaluate -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -ninja==1.11.1 +ninja==1.11.1.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate @@ -167,7 +162,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate @@ -176,16 +171,16 @@ packaging==23.1 # evaluate # huggingface-hub # transformers -pandas==2.0.2 +pandas==2.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # evaluate -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision -psutil==5.9.5 +psutil==5.9.6 # via # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate @@ -198,31 +193,32 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed -pyarrow==12.0.0 +pyarrow==14.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets -pydantic==1.10.8 +pydantic==1.10.13 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt + # deepspeed # voir python-dateutil==2.8.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pytz==2023.3 +pytz==2023.3.post1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # accelerate @@ -234,7 +230,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -regex==2023.5.5 +regex==2023.10.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers @@ -251,12 +247,15 @@ requests==2.31.0 responses==0.18.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # datasets # evaluate -rich==13.3.5 +rich==13.6.0 # via # -r benchmarks/accelerate_opt/requirements.in # voir +safetensors==0.4.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # transformers six==1.16.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -266,23 +265,22 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -tokenizers==0.13.3 +tokenizers==0.14.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers -torch==2.0.1+cu118 +torch==2.1.0+cu118 # via # -r benchmarks/accelerate_opt/requirements.in # accelerate # deepspeed # torchaudio # torchvision - # triton -torchaudio==2.0.2+cu118 +torchaudio==2.1.0+cu118 # via -r benchmarks/accelerate_opt/requirements.in -torchvision==0.15.2+cu118 +torchvision==0.16.0+cu118 # via -r benchmarks/accelerate_opt/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets @@ -290,13 +288,13 @@ tqdm==4.65.0 # evaluate # huggingface-hub # transformers -transformers==4.29.2 +transformers==4.35.0 # via -r benchmarks/accelerate_opt/requirements.in -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -307,7 +305,7 @@ tzdata==2023.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pandas -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -316,9 +314,9 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/accelerate_opt/requirements.in -xxhash==3.2.0 +xxhash==3.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets diff --git a/benchmarks/accelerate_opt/requirements.in b/benchmarks/accelerate_opt/requirements.in index 1ac835ab2..bcb2f4bfc 100644 --- a/benchmarks/accelerate_opt/requirements.in +++ b/benchmarks/accelerate_opt/requirements.in @@ -7,4 +7,4 @@ evaluate accelerate deepspeed rich -voir>=0.2.9,<0.3 +voir diff --git a/benchmarks/accelerate_opt/requirements.rocm.txt b/benchmarks/accelerate_opt/requirements.rocm.txt index 09cbce64f..b56ff798f 100644 --- a/benchmarks/accelerate_opt/requirements.rocm.txt +++ b/benchmarks/accelerate_opt/requirements.rocm.txt @@ -1,14 +1,14 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/accelerate_opt/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-opt.txt benchmarks/accelerate_opt/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/accelerate_opt/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-opt.txt benchmarks/accelerate_opt/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ -accelerate==0.19.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in -aiohttp==3.8.4 +aiohttp==3.8.6 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets @@ -21,11 +21,11 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -async-timeout==4.0.2 +async-timeout==4.0.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp @@ -33,16 +33,16 @@ attrs==23.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -50,42 +50,43 @@ codefind==0.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # ptera -datasets==2.12.0 +datasets==2.14.6 # via # -r benchmarks/accelerate_opt/requirements.in # evaluate -deepspeed==0.8.3 +deepspeed==0.12.2 # via -r benchmarks/accelerate_opt/requirements.in -dill==0.3.6 +dill==0.3.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets # evaluate # multiprocess -evaluate==0.4.0 +evaluate==0.4.1 # via -r benchmarks/accelerate_opt/requirements.in executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # pytorch-triton-rocm # torch # transformers -frozenlist==1.3.3 +frozenlist==1.4.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # aiosignal -fsspec[http]==2023.5.0 +fsspec[http]==2023.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets # evaluate # huggingface-hub + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -95,11 +96,13 @@ hjson==3.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed -huggingface-hub==0.14.1 +huggingface-hub==0.17.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt + # accelerate # datasets # evaluate + # tokenizers # transformers idna==3.4 # via @@ -110,15 +113,15 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==16.0.5 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -135,20 +138,20 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # yarl -multiprocess==0.70.14 +multiprocess==0.70.15 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets # evaluate -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -ninja==1.11.1 +ninja==1.11.1.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # accelerate @@ -167,7 +170,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # accelerate @@ -176,16 +179,16 @@ packaging==23.1 # evaluate # huggingface-hub # transformers -pandas==2.0.2 +pandas==2.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets # evaluate -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision -psutil==5.9.5 +psutil==5.9.6 # via # -c .pin/../.pin/constraints-rocm-torch.txt # accelerate @@ -198,35 +201,36 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed -pyarrow==12.0.0 +pyarrow==14.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets -pydantic==1.10.8 +pydantic==1.10.13 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt + # deepspeed # voir python-dateutil==2.8.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pandas -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pytz==2023.3 +pytz==2023.3.post1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pandas -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # accelerate @@ -238,7 +242,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -regex==2023.5.5 +regex==2023.10.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # transformers @@ -255,12 +259,15 @@ requests==2.31.0 responses==0.18.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt - # datasets # evaluate -rich==13.3.5 +rich==13.6.0 # via # -r benchmarks/accelerate_opt/requirements.in # voir +safetensors==0.4.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # transformers six==1.16.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -270,11 +277,11 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -tokenizers==0.13.3 +tokenizers==0.14.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # transformers -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/accelerate_opt/requirements.in # accelerate @@ -282,11 +289,11 @@ torch==2.0.1+rocm5.4.2 # pytorch-triton-rocm # torchaudio # torchvision -torchaudio==2.0.2+rocm5.4.2 +torchaudio==2.1.0+rocm5.6 # via -r benchmarks/accelerate_opt/requirements.in -torchvision==0.15.2+rocm5.4.2 +torchvision==0.16.0+rocm5.6 # via -r benchmarks/accelerate_opt/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets @@ -294,9 +301,9 @@ tqdm==4.65.0 # evaluate # huggingface-hub # transformers -transformers==4.29.2 +transformers==4.35.0 # via -r benchmarks/accelerate_opt/requirements.in -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -307,7 +314,7 @@ tzdata==2023.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pandas -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -316,9 +323,9 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/accelerate_opt/requirements.in -xxhash==3.2.0 +xxhash==3.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets diff --git a/benchmarks/dlrm/requirements.cuda.txt b/benchmarks/dlrm/requirements.cuda.txt index 98abcb0cf..a6b3e0719 100644 --- a/benchmarks/dlrm/requirements.cuda.txt +++ b/benchmarks/dlrm/requirements.cuda.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/dlrm/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-dlrm.txt benchmarks/dlrm/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/dlrm/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-dlrm.txt benchmarks/dlrm/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 -absl-py==1.4.0 +absl-py==2.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -14,31 +14,27 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -cachetools==5.3.1 +cachetools==5.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # google-auth -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -docker==6.1.2 +docker==6.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchx @@ -50,19 +46,20 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -fbgemm-gpu==0.4.1 +fbgemm-gpu==0.5.0+cu118 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchrec -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch # torchx # triton -fsspec==2023.5.0 +fsspec==2023.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt + # torch # torchx future==0.18.3 # via -r benchmarks/dlrm/requirements.in @@ -71,12 +68,12 @@ giving==0.4.2 # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -google-auth==2.19.0 +google-auth==2.23.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # google-auth-oauthlib # tensorboard -google-auth-oauthlib==1.0.0 +google-auth-oauthlib==1.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -84,7 +81,7 @@ graphviz==0.20.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchviz -grpcio==1.54.2 +grpcio==1.59.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -92,36 +89,31 @@ idna==3.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -importlib-metadata==6.6.0 +importlib-metadata==6.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # markdown # torchx -iopath==0.1.10 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # torchrec jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -joblib==1.2.0 +joblib==1.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # scikit-learn -lit==16.0.5 +lightning-utilities==0.9.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown==3.4.3 + # torchmetrics +markdown==3.5.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -138,15 +130,14 @@ mypy-extensions==1.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # typing-inspect -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/dlrm/requirements.in # onnx - # pandas # scikit-learn # scipy # tensorboard @@ -159,26 +150,19 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -onnx==1.14.0 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # docker + # lightning-utilities # torchmetrics -pandas==2.0.2 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # torchrec -portalocker==2.7.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # iopath -protobuf==4.23.2 +protobuf==4.23.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # onnx @@ -198,7 +182,7 @@ pyasn1-modules==0.3.0 # google-auth pydot==1.4.2 # via -r benchmarks/dlrm/requirements.in -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich @@ -206,7 +190,7 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pyparsing==3.0.9 +pyparsing==3.1.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pydot @@ -214,15 +198,7 @@ pyre-extensions==0.0.30 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchx -python-dateutil==2.8.2 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # pandas -pytz==2023.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # pandas -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf @@ -241,7 +217,7 @@ requests-oauthlib==1.3.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # google-auth-oauthlib -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -249,9 +225,9 @@ rsa==4.9 # via # -c .pin/../.pin/constraints-cuda-torch.txt # google-auth -scikit-learn==1.2.2 +scikit-learn==1.3.2 # via -r benchmarks/dlrm/requirements.in -scipy==1.10.1 +scipy==1.11.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # scikit-learn @@ -259,8 +235,7 @@ six==1.16.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # asttokens - # google-auth - # python-dateutil + # tensorboard sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -268,48 +243,44 @@ sympy==1.12 tabulate==0.9.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # torchrec # torchx -tensorboard==2.13.0 +tensorboard==2.15.1 # via -r benchmarks/dlrm/requirements.in -tensorboard-data-server==0.7.0 +tensorboard-data-server==0.7.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard -threadpoolctl==3.1.0 +threadpoolctl==3.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # scikit-learn -torch==2.0.1+cu118 +torch==2.1.0+cu118 # via # -r benchmarks/dlrm/requirements.in # torchmetrics # torchviz - # triton -torchmetrics==0.11.4 +torchmetrics==1.0.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchrec -torchrec==0.4.0 +torchrec==0.5.0+cu118 # via -r benchmarks/dlrm/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.5.0 # via -r benchmarks/dlrm/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via # -r benchmarks/dlrm/requirements.in - # iopath # torchrec -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # iopath - # onnx + # lightning-utilities # pyre-extensions # reactivex # torch @@ -318,36 +289,27 @@ typing-inspect==0.9.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pyre-extensions -tzdata==2023.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # pandas -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # docker - # google-auth # requests # torchx varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/dlrm/requirements.in -websocket-client==1.5.2 +websocket-client==1.6.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # docker -werkzeug==2.3.4 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # tensorboard -wheel==0.40.0 +werkzeug==3.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard -zipp==3.15.0 +zipp==3.17.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # importlib-metadata diff --git a/benchmarks/dlrm/requirements.in b/benchmarks/dlrm/requirements.in index b24bdf154..1bda1dcf5 100644 --- a/benchmarks/dlrm/requirements.in +++ b/benchmarks/dlrm/requirements.in @@ -11,4 +11,4 @@ torchx tensorboard # Following limits are for milabench -voir>=0.2.9,<0.3 +voir diff --git a/benchmarks/dlrm/requirements.rocm.txt b/benchmarks/dlrm/requirements.rocm.txt index a8d29307e..a36f32986 100644 --- a/benchmarks/dlrm/requirements.rocm.txt +++ b/benchmarks/dlrm/requirements.rocm.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/dlrm/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-dlrm.txt benchmarks/dlrm/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/dlrm/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-dlrm.txt benchmarks/dlrm/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ -absl-py==1.4.0 +absl-py==2.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard @@ -14,23 +14,23 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -cachetools==5.3.1 +cachetools==5.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # google-auth -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -38,7 +38,7 @@ codefind==0.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # ptera -docker==6.1.2 +docker==6.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchx @@ -50,19 +50,20 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -fbgemm-gpu==0.4.1 +fbgemm-gpu==0.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchrec -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm # torch # torchx -fsspec==2023.5.0 +fsspec==2023.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt + # torch # torchx future==0.18.3 # via -r benchmarks/dlrm/requirements.in @@ -71,12 +72,12 @@ giving==0.4.2 # -c .pin/../.pin/constraints-rocm-torch.txt # ptera # voir -google-auth==2.19.0 +google-auth==2.23.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # google-auth-oauthlib # tensorboard -google-auth-oauthlib==1.0.0 +google-auth-oauthlib==1.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard @@ -84,7 +85,7 @@ graphviz==0.20.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchviz -grpcio==1.54.2 +grpcio==1.59.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard @@ -92,36 +93,35 @@ idna==3.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -importlib-metadata==6.6.0 +importlib-metadata==6.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt - # markdown # torchx -iopath==0.1.10 - # via - # -c .pin/../.pin/constraints-rocm-torch.txt - # torchrec jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -joblib==1.2.0 +joblib==1.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # scikit-learn -lit==16.0.5 +lightning-utilities==0.9.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchmetrics +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown==3.4.3 +markdown==3.5.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -138,15 +138,15 @@ mypy-extensions==1.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # typing-inspect -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/dlrm/requirements.in + # fbgemm-gpu # onnx - # pandas # scikit-learn # scipy # tensorboard @@ -159,26 +159,19 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -onnx==1.14.0 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # docker + # lightning-utilities # torchmetrics -pandas==2.0.2 - # via - # -c .pin/../.pin/constraints-rocm-torch.txt - # torchrec -portalocker==2.7.0 - # via - # -c .pin/../.pin/constraints-rocm-torch.txt - # iopath -protobuf==4.23.2 +protobuf==4.23.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # onnx @@ -198,7 +191,7 @@ pyasn1-modules==0.3.0 # google-auth pydot==1.4.2 # via -r benchmarks/dlrm/requirements.in -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich @@ -206,7 +199,7 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pyparsing==3.0.9 +pyparsing==3.1.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pydot @@ -214,19 +207,11 @@ pyre-extensions==0.0.30 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchx -python-dateutil==2.8.2 - # via - # -c .pin/../.pin/constraints-rocm-torch.txt - # pandas -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pytz==2023.3 - # via - # -c .pin/../.pin/constraints-rocm-torch.txt - # pandas -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf @@ -245,7 +230,7 @@ requests-oauthlib==1.3.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # google-auth-oauthlib -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir @@ -253,9 +238,9 @@ rsa==4.9 # via # -c .pin/../.pin/constraints-rocm-torch.txt # google-auth -scikit-learn==1.2.2 +scikit-learn==1.3.2 # via -r benchmarks/dlrm/requirements.in -scipy==1.10.1 +scipy==1.11.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # scikit-learn @@ -263,8 +248,7 @@ six==1.16.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # asttokens - # google-auth - # python-dateutil + # tensorboard sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -272,44 +256,41 @@ sympy==1.12 tabulate==0.9.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt - # torchrec # torchx -tensorboard==2.13.0 +tensorboard==2.15.1 # via -r benchmarks/dlrm/requirements.in -tensorboard-data-server==0.7.0 +tensorboard-data-server==0.7.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard -threadpoolctl==3.1.0 +threadpoolctl==3.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # scikit-learn -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/dlrm/requirements.in # pytorch-triton-rocm # torchmetrics # torchviz -torchmetrics==0.11.4 +torchmetrics==1.0.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchrec -torchrec==0.4.0 +torchrec==0.5.0 # via -r benchmarks/dlrm/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.5.0 # via -r benchmarks/dlrm/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via # -r benchmarks/dlrm/requirements.in - # iopath # torchrec -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt - # iopath - # onnx + # lightning-utilities # pyre-extensions # reactivex # torch @@ -318,36 +299,27 @@ typing-inspect==0.9.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pyre-extensions -tzdata==2023.3 - # via - # -c .pin/../.pin/constraints-rocm-torch.txt - # pandas -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # docker - # google-auth # requests # torchx varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/dlrm/requirements.in -websocket-client==1.5.2 +websocket-client==1.6.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # docker -werkzeug==2.3.4 - # via - # -c .pin/../.pin/constraints-rocm-torch.txt - # tensorboard -wheel==0.40.0 +werkzeug==3.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard -zipp==3.15.0 +zipp==3.17.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # importlib-metadata diff --git a/benchmarks/dlrm/voirfile.py b/benchmarks/dlrm/voirfile.py index 7a489ecaa..e18491fe3 100644 --- a/benchmarks/dlrm/voirfile.py +++ b/benchmarks/dlrm/voirfile.py @@ -47,12 +47,7 @@ def instrument_main(ov, options: Config): yield ov.phases.load_script # Loss - ( - ov.probe("//run > L") - .throttle(1)["L"] - .map(float) - .give("loss") - ) + (ov.probe("//run > L").throttle(1)["L"].map(float).give("loss")) # Compute Start & End + Batch ov.probe( diff --git a/benchmarks/flops/activator b/benchmarks/flops/activator new file mode 100755 index 000000000..083c28cb1 --- /dev/null +++ b/benchmarks/flops/activator @@ -0,0 +1,7 @@ +#!/bin/bash + +venv="$1" +shift + +source "$venv"/bin/activate +exec "$@" diff --git a/benchmarks/flops/benchfile.py b/benchmarks/flops/benchfile.py new file mode 100644 index 000000000..0bb601d67 --- /dev/null +++ b/benchmarks/flops/benchfile.py @@ -0,0 +1,19 @@ +from milabench.pack import Package + + +class FlopsBenchmarch(Package): + base_requirements = "requirements.in" + prepare_script = "prepare.py" + main_script = "main.py" + + def build_run_plan(self) -> "execs.Executor": + import milabench.executors as execs + + main = self.dirs.code / self.main_script + pack = execs.PackExecutor(self, *self.argv, lazy=True) + # pack = execs.VoirExecutor(pack, cwd=main.parent) + pack = execs.ActivatorExecutor(pack, use_stdout=True) + return pack + + +__pack__ = FlopsBenchmarch diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py new file mode 100755 index 000000000..5d2aa20cb --- /dev/null +++ b/benchmarks/flops/main.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +from argparse import ArgumentParser +import json +import time +import sys +import multiprocessing + +import torch + +from voir.smuggle import SmuggleWriter +from voir.instruments.gpu import get_gpu_info +from voir.instruments.utils import Monitor + +KILO = 1e3 +MEGA = 1e6 +GIGA = 1e9 +TERA = 1e12 +EXA = 1e18 + + +def _worker(state, queue, func, delay): + import time + + while state["running"]: + queue.put(func()) + time.sleep(delay) + + +class Monitor: + def __init__(self, delay, func): + self.manager = multiprocessing.Manager() + self.state = self.manager.dict() + self.state["running"] = True + self.results = multiprocessing.Queue() + self.process = multiprocessing.Process( + target=_worker, + args=(self.state, self.results, func, delay), + ) + + def start(self): + self.process.start() + + def stop(self): + self.state["running"] = False + self.process.join() + + +def modelflops( + model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA +): + # Not sure how much thop is correct in its computation + # it says it return MAC but I feel its methods is wrong + from thop import profile + + # MAC: Multiply–accumulate operation + batch = torch.randn(*shape, dtype=dtype, device="cuda:0") + + flops, _ = profile(model, inputs=(batch,)) + + with torch.no_grad(): + # Prepare + torch.cuda.empty_cache() + + batch = batch.cuda() + model = model.to(dtype=dtype, device="cuda:0") + + torch.cuda.synchronize() + + # Start + start = time.time() + + for i in range(repeat): + _ = model(batch) + + torch.cuda.synchronize() + end = time.time() + # -- + + return (flops * repeat) / (end - start) / unit + + +def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32, log=None): + torch.cuda.empty_cache() + a = torch.eye(n, dtype=dtype, device="cuda:0") + x = torch.randn((m, n), dtype=dtype, device="cuda:0") + y = torch.zeros_like(x) + + F = N * (2 * m * n * n + 2 * m * n * n) + + for i in range(R): + torch.cuda.synchronize() + ts = -time.time() + + for _ in range(N): + # No allocation in main loop using dual-out strategy + y = torch.mm(x, a, out=y) + x = torch.mm(y, a, out=x) + + torch.cuda.synchronize() + ts += time.time() + + if log is not None: + log({"task": "train", "rate": F / ts / unit, "units": "Tflops"}) + + torch.cuda.empty_cache() + + +def setupvoir(): + # wtf this do + data_file = SmuggleWriter(sys.stdout) + # data_file = sys.stdout + + def log(data): + if data_file is not None: + data["t"] = time.time() + print(json.dumps(data), file=data_file) + + while not monitor.results.empty(): + print(json.dumps(monitor.results.get()), file=data_file) + + def monitor_fn(): + data = { + gpu["device"]: { + "memory": [ + gpu["memory"]["used"], + gpu["memory"]["total"], + ], + "load": gpu["utilization"]["compute"], + "temperature": gpu["temperature"], + "power": gpu["power"], + } + for gpu in get_gpu_info()["gpus"].values() + } + return {"task": "main", "gpudata": data, "t": time.time()} + + monitor = Monitor(0.5, monitor_fn) + monitor.start() + return log, monitor + + +def main(): + dtypes = { + "bf16": torch.bfloat16, + "fp16": torch.float16, + "fp32": torch.float32, + } + + parser = ArgumentParser() + parser.add_argument("--repeat", type=int, default=100) + parser.add_argument("--number", type=int, default=100) + parser.add_argument("--m", type=int, default=256) + parser.add_argument("--n", type=int, default=256) + parser.add_argument("--dtype", type=str, default="fp32", choices=dtypes.keys()) + parser.add_argument("--tf32", action="store_true", default=False) + + args = parser.parse_args() + + torch.backends.cuda.matmul.allow_tf32 = False + if args.tf32: + torch.backends.cuda.matmul.allow_tf32 = True + + log, monitor = setupvoir() + + f(args.number, args.repeat, args.m, args.n, TERA, dtypes[args.dtype], log) + + monitor.stop() + + +if __name__ == "__main__": + main() diff --git a/benchmarks/flops/prepare.py b/benchmarks/flops/prepare.py new file mode 100755 index 000000000..4265cc3e6 --- /dev/null +++ b/benchmarks/flops/prepare.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/benchmarks/flops/requirements.cuda.txt b/benchmarks/flops/requirements.cuda.txt new file mode 100644 index 000000000..b10f89449 --- /dev/null +++ b/benchmarks/flops/requirements.cuda.txt @@ -0,0 +1,153 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-flops.txt benchmarks/flops/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/cu118 + +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +certifi==2023.7.22 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +codefind==0.1.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera +executing==1.2.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # varname +filelock==3.13.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch + # triton +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +giving==0.4.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera + # voir +idna==3.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +jinja2==3.1.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich +markupsafe==2.1.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # sympy +networkx==3.2.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +numpy==1.26.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +ovld==0.3.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pillow==10.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pygments==2.16.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich +pynvml==11.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pyyaml==6.0.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +requests==2.31.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +rich==13.6.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # asttokens +sympy==1.12 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +torch==2.1.0+cu118 + # via + # -r benchmarks/flops/requirements.in + # torchvision +torchvision==0.16.0+cu118 + # via -r benchmarks/flops/requirements.in +tqdm==4.66.1 + # via -r benchmarks/flops/requirements.in +triton==2.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +typing-extensions==4.8.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # reactivex + # torch +urllib3==1.26.18 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +varname==0.10.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +voir==0.2.11 + # via -r benchmarks/flops/requirements.in diff --git a/benchmarks/flops/requirements.in b/benchmarks/flops/requirements.in new file mode 100644 index 000000000..7d30d94e7 --- /dev/null +++ b/benchmarks/flops/requirements.in @@ -0,0 +1,4 @@ +torch +torchvision +tqdm +voir diff --git a/benchmarks/flops/requirements.rocm.txt b/benchmarks/flops/requirements.rocm.txt new file mode 100644 index 000000000..23d10b701 --- /dev/null +++ b/benchmarks/flops/requirements.rocm.txt @@ -0,0 +1,162 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-flops.txt benchmarks/flops/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ + +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving +certifi==2023.7.22 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +cmake==3.27.7 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm +codefind==0.1.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # ptera +executing==1.2.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # varname +filelock==3.13.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm + # torch +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +giving==0.4.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # ptera + # voir +idna==3.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +jinja2==3.1.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +lit==17.0.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # rich +markupsafe==2.1.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # sympy +networkx==3.2.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +numpy==1.26.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +ovld==0.3.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +pillow==10.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +pygments==2.16.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # rich +pynvml==11.5.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +pytorch-triton-rocm==2.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +pyyaml==6.0.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving +requests==2.31.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +rich==13.6.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # asttokens +sympy==1.12 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +torch==2.1.0+rocm5.6 + # via + # -r benchmarks/flops/requirements.in + # pytorch-triton-rocm + # torchvision +torchvision==0.16.0+rocm5.6 + # via -r benchmarks/flops/requirements.in +tqdm==4.66.1 + # via -r benchmarks/flops/requirements.in +typing-extensions==4.8.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # reactivex + # torch +urllib3==1.26.18 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +varname==0.10.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving +voir==0.2.11 + # via -r benchmarks/flops/requirements.in diff --git a/benchmarks/huggingface/requirements.cuda.txt b/benchmarks/huggingface/requirements.cuda.txt index 70053a636..bb24e6654 100644 --- a/benchmarks/huggingface/requirements.cuda.txt +++ b/benchmarks/huggingface/requirements.cuda.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/huggingface/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-hf.txt benchmarks/huggingface/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/huggingface/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-hf.txt benchmarks/huggingface/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 @@ -10,22 +10,18 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -34,25 +30,27 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # torch # transformers # triton -fsspec==2023.5.0 +fsspec==2023.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -huggingface-hub==0.14.1 +huggingface-hub==0.17.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt + # tokenizers # transformers idna==3.4 # via @@ -62,15 +60,11 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -lit==16.0.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -82,11 +76,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers @@ -98,7 +92,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -107,7 +101,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich @@ -115,7 +109,7 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -125,7 +119,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -regex==2023.5.5 +regex==2023.10.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers @@ -134,10 +128,14 @@ requests==2.31.0 # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # transformers -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir +safetensors==0.4.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # transformers six==1.16.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -146,32 +144,30 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -tokenizers==0.13.3 +tokenizers==0.14.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # transformers -torch==2.0.1+cu118 - # via - # -r benchmarks/huggingface/requirements.in - # triton -tqdm==4.65.0 +torch==2.1.0+cu118 + # via -r benchmarks/huggingface/requirements.in +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # transformers -transformers==4.29.2 +transformers==4.35.0 # via -r benchmarks/huggingface/requirements.in -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -179,5 +175,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/huggingface/requirements.in diff --git a/benchmarks/huggingface/requirements.in b/benchmarks/huggingface/requirements.in index f0e86349e..85d2c3a0a 100644 --- a/benchmarks/huggingface/requirements.in +++ b/benchmarks/huggingface/requirements.in @@ -1,3 +1,3 @@ torch transformers -voir>=0.2.9,<0.3 +voir diff --git a/benchmarks/huggingface/requirements.rocm.txt b/benchmarks/huggingface/requirements.rocm.txt index b8d76be1f..4e39b0c45 100644 --- a/benchmarks/huggingface/requirements.rocm.txt +++ b/benchmarks/huggingface/requirements.rocm.txt @@ -1,28 +1,28 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/huggingface/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-hf.txt benchmarks/huggingface/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/huggingface/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-hf.txt benchmarks/huggingface/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -34,25 +34,27 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # pytorch-triton-rocm # torch # transformers -fsspec==2023.5.0 +fsspec==2023.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # ptera # voir -huggingface-hub==0.14.1 +huggingface-hub==0.17.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt + # tokenizers # transformers idna==3.4 # via @@ -62,15 +64,15 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==16.0.5 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -82,11 +84,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # transformers @@ -98,7 +100,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -107,7 +109,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich @@ -115,11 +117,11 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -129,7 +131,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -regex==2023.5.5 +regex==2023.10.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # transformers @@ -138,10 +140,14 @@ requests==2.31.0 # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # transformers -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir +safetensors==0.4.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # transformers six==1.16.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -150,28 +156,28 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -tokenizers==0.13.3 +tokenizers==0.14.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # transformers -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/huggingface/requirements.in # pytorch-triton-rocm -tqdm==4.65.0 +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # transformers -transformers==4.29.2 +transformers==4.35.0 # via -r benchmarks/huggingface/requirements.in -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -179,5 +185,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/huggingface/requirements.in diff --git a/benchmarks/llama/benchfile.py b/benchmarks/llama/benchfile.py new file mode 100644 index 000000000..8b253bc92 --- /dev/null +++ b/benchmarks/llama/benchfile.py @@ -0,0 +1,43 @@ +import uuid + +from milabench.executors import CmdExecutor +from milabench.pack import Package + + +class LLAMA(Package): + base_requirements = "requirements.in" + main_script = "main.py" + + def make_env(self): + return { + **super().make_env(), + "OMP_NUM_THREADS": str(self.config.get("cpus_per_gpu", 8)), + } + + async def install(self): + await super().install() + + def build_prepare_plan(self): + return CmdExecutor( + self, + "python", + str(self.dirs.code / "main.py"), + *self.argv, + "--prepare", + "--cache", + str(self.dirs.cache), + ) + + def build_run_plan(self): + return CmdExecutor( + self, + "python", + str(self.dirs.code / "main.py"), + *self.argv, + "--cache", + str(self.dirs.cache), + use_stdout=True, + ) + + +__pack__ = LLAMA diff --git a/benchmarks/llama/config/llama2_13b_chat_hf.config b/benchmarks/llama/config/llama2_13b_chat_hf.config new file mode 100644 index 000000000..48a3bef58 --- /dev/null +++ b/benchmarks/llama/config/llama2_13b_chat_hf.config @@ -0,0 +1,25 @@ +{ + "_name_or_path": null, + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.31.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/benchmarks/llama/config/llama2_70b_chat_hf.config b/benchmarks/llama/config/llama2_70b_chat_hf.config new file mode 100644 index 000000000..5b1cbae13 --- /dev/null +++ b/benchmarks/llama/config/llama2_70b_chat_hf.config @@ -0,0 +1,25 @@ +{ + "_name_or_path": "meta-llama/Llama-2-70b-chat-hf", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.31.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/benchmarks/llama/config/llama2_7b_chat_hf.config b/benchmarks/llama/config/llama2_7b_chat_hf.config new file mode 100644 index 000000000..bd41792ae --- /dev/null +++ b/benchmarks/llama/config/llama2_7b_chat_hf.config @@ -0,0 +1,25 @@ +{ + "_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.32.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/benchmarks/llama/main.py b/benchmarks/llama/main.py new file mode 100755 index 000000000..5bb20164e --- /dev/null +++ b/benchmarks/llama/main.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python + +import json +import os +import argparse +import time +import sys +import multiprocessing + +import torch + +from voir.smuggle import SmuggleWriter +from voir.instruments.gpu import get_gpu_info + +root = os.path.dirname(__file__) + + +def available_models(): + models = dict() + + for size in ("7b", "13b", "70b"): + models[f"llama2-{size}"] = { + "name": f"meta-llama/Llama-2-{size}-chat-hf", + "config": f"llama2_{size}_chat_hf.config", + } + + return models + + +def _worker(state, queue, func, delay): + import time + + while state["running"]: + queue.put(func()) + time.sleep(delay) + + +class Monitor: + def __init__(self, delay, func): + self.manager = multiprocessing.Manager() + self.state = self.manager.dict() + self.state["running"] = True + self.results = multiprocessing.Queue() + self.process = multiprocessing.Process( + target=_worker, + args=(self.state, self.results, func, delay), + ) + + def start(self): + self.process.start() + + def stop(self): + self.state["running"] = False + self.process.join() + + +def setupvoir(): + # wtf this do + data_file = SmuggleWriter(sys.stdout) + # data_file = sys.stdout + + def log(data): + if data_file is not None: + data["t"] = time.time() + print(json.dumps(data), file=data_file) + + while not monitor.results.empty(): + print(json.dumps(monitor.results.get()), file=data_file) + + def monitor_fn(): + data = { + gpu["device"]: { + "memory": [ + gpu["memory"]["used"], + gpu["memory"]["total"], + ], + "load": gpu["utilization"]["compute"], + "temperature": gpu["temperature"], + "power": gpu["power"], + } + for gpu in get_gpu_info()["gpus"].values() + } + return {"task": "main", "gpudata": data, "t": time.time()} + + monitor = Monitor(0.5, monitor_fn) + monitor.start() + return log, monitor + + +class WrappedTokenizer: + def __init__(self, tokenizer): + self.tokenizer = tokenizer + self.count = 0 + + def __call__(self, *args, **kwargs): + input_ids = self.tokenizer(*args, **kwargs) + + self.count = 1 + for c in input_ids["input_ids"].shape: + self.count *= c + + return input_ids + + def __getattr__(self, attr): + if hasattr(self.tokenizer, attr): + method = getattr(self.tokenizer, attr) + return method + else: + raise AttributeError( + f"'{type(self.tokenizer).__name__}' object has no attribute '{attr}'" + ) + + +def println(*args, **kwargs): + print(*args, *kwargs, file=sys.stderr) + + +def huggingface_main(args, model, config): + # Huggingface imported AFTER setup + import transformers + from transformers import LlamaForCausalLM, LlamaTokenizerFast + from transformers.models.llama.configuration_llama import LlamaConfig + + from datasets import load_dataset + + # Dataset here + println("Dataset") + dataset = load_dataset("wikitext", "wikitext-103-v1") + + println("Tokenizer") + # LLAMA tokenizer official tokenizer is hidden behind a login + tokenizer = WrappedTokenizer( + LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer") + ) + + # Prepare is done + if args.prepare: + return 0 + + # We do not download LLAMA because it takes too long + # we just instantiate an untrained one + println("Model") + model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda() + + println("Pipeline") + pipeline = transformers.pipeline( + "text-generation", + model=model, + torch_dtype=torch.float16, + # device_map="cuda", + tokenizer=tokenizer, + device=torch.device("cuda"), + ) + + in_token_count = 0 + out_token_count = 0 + + start = time.time() + + log, monitor = setupvoir() + + println("Starting") + count = 0 + for entry in dataset["train"]: + text = entry["text"].strip() + + # Titles + if text == "" or text.startswith(" = ") or len(text) < 10: + continue + + count += 1 + sequences = pipeline( + text, + do_sample=True, + top_k=10, + num_return_sequences=1, + eos_token_id=tokenizer.eos_token_id, + max_length=400, + ) + + for seq in sequences: + out_token_count += len(seq["generated_text"]) + + in_token_count += tokenizer.count + total = out_token_count + in_token_count + + elapsed = time.time() - start + println( + f"{elapsed =}, {total / elapsed =} {in_token_count =} {out_token_count =}" + ) + + if total > 30: + out_token_count = 0 + in_token_count = 0 + start = time.time() + + if log is not None: + log({"task": "train", "rate": total / elapsed, "units": "Tok/s"}) + + if count > 40: + break + + monitor.stop() + + +def main(): + import torch + + models = available_models() + + parser = argparse.ArgumentParser() + parser.add_argument("--model", default="llama2-7b", choices=models.keys()) + parser.add_argument("--prepare", action="store_true") + parser.add_argument("--cache", required=True, type=str) + + # + args = parser.parse_args() + os.environ["XDG_CACHE_HOME"] = str(args.cache) + + settings = models[args.model] + model, config = settings["name"], settings["config"] + + with open(os.path.join(root, "config", config), "r") as file: + config = json.load(file) + + with torch.no_grad(): + return huggingface_main(args, model, config) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/llama/requirements.cuda.txt b/benchmarks/llama/requirements.cuda.txt new file mode 100644 index 000000000..58f88112e --- /dev/null +++ b/benchmarks/llama/requirements.cuda.txt @@ -0,0 +1,181 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/llama/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-llm.txt benchmarks/llama/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/cu118 + +aiohttp==3.8.6 + # via + # datasets + # fsspec +aiosignal==1.3.1 + # via aiohttp +antlr4-python3-runtime==4.9.3 + # via omegaconf +asttokens==2.4.1 + # via giving +async-timeout==4.0.3 + # via aiohttp +attrs==23.1.0 + # via aiohttp +certifi==2023.7.22 + # via requests +charset-normalizer==3.3.2 + # via + # aiohttp + # requests +codefind==0.1.3 + # via ptera +datasets==2.14.6 + # via -r benchmarks/llama/requirements.in +dill==0.3.7 + # via + # datasets + # multiprocess +executing==1.2.0 + # via varname +fairscale==0.4.13 + # via -r benchmarks/llama/requirements.in +filelock==3.13.1 + # via + # huggingface-hub + # torch + # transformers + # triton +fire==0.5.0 + # via -r benchmarks/llama/requirements.in +frozenlist==1.4.0 + # via + # aiohttp + # aiosignal +fsspec[http]==2023.10.0 + # via + # datasets + # huggingface-hub + # torch +giving==0.4.2 + # via + # ptera + # voir +huggingface-hub==0.17.3 + # via + # datasets + # tokenizers + # transformers +idna==3.4 + # via + # requests + # yarl +jinja2==3.1.2 + # via torch +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.3 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py +mpmath==1.3.0 + # via sympy +multidict==6.0.4 + # via + # aiohttp + # yarl +multiprocess==0.70.15 + # via datasets +networkx==3.2.1 + # via torch +numpy==1.26.1 + # via + # datasets + # fairscale + # pandas + # pyarrow + # transformers +omegaconf==2.3.0 + # via voir +ovld==0.3.2 + # via voir +packaging==23.2 + # via + # datasets + # huggingface-hub + # transformers +pandas==2.1.2 + # via datasets +ptera==1.4.1 + # via voir +pyarrow==14.0.0 + # via datasets +pygments==2.16.1 + # via rich +pynvml==11.5.0 + # via voir +python-dateutil==2.8.2 + # via pandas +pytz==2023.3.post1 + # via pandas +pyyaml==6.0.1 + # via + # datasets + # huggingface-hub + # omegaconf + # transformers +reactivex==4.0.4 + # via giving +regex==2023.10.3 + # via transformers +requests==2.31.0 + # via + # datasets + # fsspec + # huggingface-hub + # transformers +rich==13.6.0 + # via voir +safetensors==0.4.0 + # via transformers +sentencepiece==0.1.99 + # via -r benchmarks/llama/requirements.in +six==1.16.0 + # via + # asttokens + # fire + # python-dateutil +sympy==1.12 + # via torch +termcolor==2.3.0 + # via fire +tokenizers==0.14.1 + # via transformers +torch==2.1.0+cu118 + # via + # -r benchmarks/llama/requirements.in + # fairscale +tqdm==4.66.1 + # via + # datasets + # huggingface-hub + # transformers +transformers==4.35.0 + # via -r benchmarks/llama/requirements.in +triton==2.1.0 + # via torch +typing-extensions==4.8.0 + # via + # huggingface-hub + # reactivex + # torch +tzdata==2023.3 + # via pandas +urllib3==2.0.7 + # via requests +varname==0.10.0 + # via giving +voir==0.2.11 + # via -r benchmarks/llama/requirements.in +xxhash==3.4.1 + # via datasets +yarl==1.9.2 + # via aiohttp diff --git a/benchmarks/llama/requirements.in b/benchmarks/llama/requirements.in new file mode 100644 index 000000000..9b9c48d80 --- /dev/null +++ b/benchmarks/llama/requirements.in @@ -0,0 +1,7 @@ +torch +fairscale +fire +sentencepiece +voir +datasets +transformers diff --git a/benchmarks/llama/requirements.rocm.txt b/benchmarks/llama/requirements.rocm.txt new file mode 100644 index 000000000..eb26e2fa9 --- /dev/null +++ b/benchmarks/llama/requirements.rocm.txt @@ -0,0 +1,186 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/llama/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-llm.txt benchmarks/llama/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ + +aiohttp==3.8.6 + # via + # datasets + # fsspec +aiosignal==1.3.1 + # via aiohttp +antlr4-python3-runtime==4.9.3 + # via omegaconf +asttokens==2.4.1 + # via giving +async-timeout==4.0.3 + # via aiohttp +attrs==23.1.0 + # via aiohttp +certifi==2023.7.22 + # via requests +charset-normalizer==3.3.2 + # via + # aiohttp + # requests +cmake==3.27.7 + # via pytorch-triton-rocm +codefind==0.1.3 + # via ptera +datasets==2.14.6 + # via -r benchmarks/llama/requirements.in +dill==0.3.7 + # via + # datasets + # multiprocess +executing==1.2.0 + # via varname +fairscale==0.4.13 + # via -r benchmarks/llama/requirements.in +filelock==3.13.1 + # via + # huggingface-hub + # pytorch-triton-rocm + # torch + # transformers +fire==0.5.0 + # via -r benchmarks/llama/requirements.in +frozenlist==1.4.0 + # via + # aiohttp + # aiosignal +fsspec[http]==2023.10.0 + # via + # datasets + # huggingface-hub + # torch +giving==0.4.2 + # via + # ptera + # voir +huggingface-hub==0.17.3 + # via + # datasets + # tokenizers + # transformers +idna==3.4 + # via + # requests + # yarl +jinja2==3.1.2 + # via torch +lit==17.0.4 + # via pytorch-triton-rocm +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.3 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py +mpmath==1.3.0 + # via sympy +multidict==6.0.4 + # via + # aiohttp + # yarl +multiprocess==0.70.15 + # via datasets +networkx==3.2.1 + # via torch +numpy==1.26.1 + # via + # datasets + # fairscale + # pandas + # pyarrow + # transformers +omegaconf==2.3.0 + # via voir +ovld==0.3.2 + # via voir +packaging==23.2 + # via + # datasets + # huggingface-hub + # transformers +pandas==2.1.2 + # via datasets +ptera==1.4.1 + # via voir +pyarrow==14.0.0 + # via datasets +pygments==2.16.1 + # via rich +pynvml==11.5.0 + # via voir +python-dateutil==2.8.2 + # via pandas +pytorch-triton-rocm==2.1.0 + # via torch +pytz==2023.3.post1 + # via pandas +pyyaml==6.0.1 + # via + # datasets + # huggingface-hub + # omegaconf + # transformers +reactivex==4.0.4 + # via giving +regex==2023.10.3 + # via transformers +requests==2.31.0 + # via + # datasets + # fsspec + # huggingface-hub + # transformers +rich==13.6.0 + # via voir +safetensors==0.4.0 + # via transformers +sentencepiece==0.1.99 + # via -r benchmarks/llama/requirements.in +six==1.16.0 + # via + # asttokens + # fire + # python-dateutil +sympy==1.12 + # via torch +termcolor==2.3.0 + # via fire +tokenizers==0.14.1 + # via transformers +torch==2.1.0+rocm5.6 + # via + # -r benchmarks/llama/requirements.in + # fairscale + # pytorch-triton-rocm +tqdm==4.66.1 + # via + # datasets + # huggingface-hub + # transformers +transformers==4.35.0 + # via -r benchmarks/llama/requirements.in +typing-extensions==4.8.0 + # via + # huggingface-hub + # reactivex + # torch +tzdata==2023.3 + # via pandas +urllib3==2.0.7 + # via requests +varname==0.10.0 + # via giving +voir==0.2.11 + # via -r benchmarks/llama/requirements.in +xxhash==3.4.1 + # via datasets +yarl==1.9.2 + # via aiohttp diff --git a/benchmarks/rwkv/prepare.py b/benchmarks/rwkv/prepare.py index 1e51bb2b1..992e6c099 100755 --- a/benchmarks/rwkv/prepare.py +++ b/benchmarks/rwkv/prepare.py @@ -24,9 +24,7 @@ print("This will compile the appropriate torch extensions.") print("=" * 80) result = subprocess.run( - ["voir", - "--no-dash", "--interval", "1", "--stop", "1", - "train.py", *argv] + ["voir", "--no-dash", "--interval", "1", "--stop", "1", "train.py", *argv] ) print("=" * 80) print("Done") diff --git a/benchmarks/rwkv/requirements.cuda.txt b/benchmarks/rwkv/requirements.cuda.txt index 2c0c866e4..830a0a40a 100644 --- a/benchmarks/rwkv/requirements.cuda.txt +++ b/benchmarks/rwkv/requirements.cuda.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/rwkv/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-rwkv.txt benchmarks/rwkv/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/rwkv/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-rwkv.txt benchmarks/rwkv/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 -aiohttp==3.8.4 +aiohttp==3.8.6 # via # -c .pin/../.pin/constraints-cuda-torch.txt # fsspec @@ -18,11 +18,11 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -async-timeout==4.0.2 +async-timeout==4.0.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -30,43 +30,40 @@ attrs==23.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -deepspeed==0.8.3 +deepspeed==0.12.2 # via -r benchmarks/rwkv/requirements.in executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton -frozenlist==1.3.3 +frozenlist==1.4.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # aiosignal -fsspec[http]==2023.5.0 +fsspec[http]==2023.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pytorch-lightning + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -85,19 +82,16 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -lightning-utilities==0.8.0 +lightning-utilities==0.9.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pytorch-lightning -lit==16.0.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown-it-py==2.2.0 + # torchmetrics +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -114,15 +108,15 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # yarl -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -ninja==1.11.1 +ninja==1.11.1.1 # via # -r benchmarks/rwkv/requirements.in # deepspeed -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/rwkv/requirements.in # deepspeed @@ -136,14 +130,14 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed # lightning-utilities # pytorch-lightning # torchmetrics -psutil==5.9.5 +psutil==5.9.6 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed @@ -155,21 +149,22 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed -pydantic==1.10.8 +pydantic==1.10.13 # via - # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/rwkv/requirements.in # deepspeed -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt + # deepspeed # voir pytorch-lightning==1.9.5 # via -r benchmarks/rwkv/requirements.in -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf @@ -182,7 +177,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # fsspec -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -194,27 +189,26 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -torch==2.0.1+cu118 +torch==2.1.0+cu118 # via # -r benchmarks/rwkv/requirements.in # deepspeed # pytorch-lightning # torchmetrics - # triton -torchmetrics==0.11.4 +torchmetrics==1.0.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # pytorch-lightning -tqdm==4.65.0 +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed # pytorch-lightning -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # lightning-utilities @@ -222,7 +216,7 @@ typing-extensions==4.6.2 # pytorch-lightning # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -230,7 +224,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.9 +voir==0.2.11 # via -r benchmarks/rwkv/requirements.in yarl==1.9.2 # via diff --git a/benchmarks/rwkv/requirements.in b/benchmarks/rwkv/requirements.in index 79d763f72..29a8180d0 100644 --- a/benchmarks/rwkv/requirements.in +++ b/benchmarks/rwkv/requirements.in @@ -3,4 +3,5 @@ torch deepspeed pytorch-lightning<2.0 ninja -voir>=0.2.9,<0.3 +voir>=0.2.10,<0.3 +pydantic<2 diff --git a/benchmarks/rwkv/requirements.rocm.txt b/benchmarks/rwkv/requirements.rocm.txt index 79710b03d..e97d63520 100644 --- a/benchmarks/rwkv/requirements.rocm.txt +++ b/benchmarks/rwkv/requirements.rocm.txt @@ -1,12 +1,12 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/rwkv/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-rwkv.txt benchmarks/rwkv/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/rwkv/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-rwkv.txt benchmarks/rwkv/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ -aiohttp==3.8.4 +aiohttp==3.8.6 # via # -c .pin/../.pin/constraints-rocm-torch.txt # fsspec @@ -18,11 +18,11 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -async-timeout==4.0.2 +async-timeout==4.0.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp @@ -30,16 +30,16 @@ attrs==23.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -47,26 +47,27 @@ codefind==0.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # ptera -deepspeed==0.8.3 +deepspeed==0.12.2 # via -r benchmarks/rwkv/requirements.in executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm # torch -frozenlist==1.3.3 +frozenlist==1.4.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # aiosignal -fsspec[http]==2023.5.0 +fsspec[http]==2023.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-lightning + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -85,19 +86,20 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lightning-utilities==0.8.0 +lightning-utilities==0.9.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-lightning -lit==16.0.5 + # torchmetrics +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -114,15 +116,15 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # yarl -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -ninja==1.11.1 +ninja==1.11.1.1 # via # -r benchmarks/rwkv/requirements.in # deepspeed -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/rwkv/requirements.in # deepspeed @@ -136,14 +138,14 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed # lightning-utilities # pytorch-lightning # torchmetrics -psutil==5.9.5 +psutil==5.9.6 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed @@ -155,25 +157,26 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed -pydantic==1.10.8 +pydantic==1.10.13 # via - # -c .pin/../.pin/constraints-rocm-torch.txt + # -r benchmarks/rwkv/requirements.in # deepspeed -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt + # deepspeed # voir pytorch-lightning==1.9.5 # via -r benchmarks/rwkv/requirements.in -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf @@ -186,7 +189,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # fsspec -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir @@ -198,23 +201,23 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/rwkv/requirements.in # deepspeed # pytorch-lightning # pytorch-triton-rocm # torchmetrics -torchmetrics==0.11.4 +torchmetrics==1.0.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-lightning -tqdm==4.65.0 +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed # pytorch-lightning -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # lightning-utilities @@ -222,7 +225,7 @@ typing-extensions==4.6.2 # pytorch-lightning # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -230,7 +233,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.9 +voir==0.2.11 # via -r benchmarks/rwkv/requirements.in yarl==1.9.2 # via diff --git a/benchmarks/rwkv/rwkv-v4neo/chat.py b/benchmarks/rwkv/rwkv-v4neo/chat.py index d214ba281..19e2b36f9 100644 --- a/benchmarks/rwkv/rwkv-v4neo/chat.py +++ b/benchmarks/rwkv/rwkv-v4neo/chat.py @@ -2,12 +2,13 @@ # The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM ######################################################################################################## -print('Loading...') +print("Loading...") from src.model_run import RWKV_RNN import numpy as np import os, copy, types, gc, sys import torch from src.utils import TOKENIZER + try: os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] except: @@ -17,7 +18,7 @@ torch.backends.cuda.matmul.allow_tf32 = True np.set_printoptions(precision=4, suppress=True, linewidth=200) -CHAT_LANG = 'English' # English Chinese +CHAT_LANG = "English" # English Chinese WORD_NAME = [ "20B_tokenizer.json", @@ -28,14 +29,16 @@ args = types.SimpleNamespace() args.RUN_DEVICE = "cuda" # 'cpu' (already very fast) // 'cuda' -args.FLOAT_MODE = "fp16" # fp32 (good for CPU) // fp16 (recommended for GPU) // bf16 (less accurate) +args.FLOAT_MODE = ( + "fp16" # fp32 (good for CPU) // fp16 (recommended for GPU) // bf16 (less accurate) +) args.vocab_size = 50277 args.head_qk = 0 args.pre_ffn = 0 args.grad_cp = 0 args.my_pos_emb = 0 -args.MODEL_NAME = '/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-14b/RWKV-4-Pile-14B-20230108-5170' +args.MODEL_NAME = "/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-14b/RWKV-4-Pile-14B-20230108-5170" args.n_layer = 40 args.n_embd = 5120 args.ctx_len = 1024 @@ -50,7 +53,7 @@ # args.n_embd = 2560 # args.ctx_len = 1024 -if CHAT_LANG == 'English': +if CHAT_LANG == "English": user = "User" bot = "Bot" interface = ":" @@ -58,7 +61,7 @@ # The following is a verbose and detailed conversation between an AI assistant called {bot}, and a human user called {user}. {bot} is intelligent, knowledgeable, wise and polite. # The following is a conversation between a highly knowledgeable and intelligent AI called {bot}, and a human called {user}. In the following interactions, {user} and {bot} converse in natural language, and {bot} do its best to answer {user}'s questions. {bot} is respectful, polite and inclusive. {bot} knows a lot, and always tells the truth. - init_prompt = f''' + init_prompt = f""" The following is a verbose and detailed conversation between an AI assistant called {bot}, and a human user called {user}. {bot} is intelligent, knowledgeable, wise and polite. {user}{interface} french revolution what year @@ -81,8 +84,8 @@ {bot}{interface} LHC is a high-energy particle collider, built by CERN, and completed in 2008. They used it to confirm the existence of the Higgs boson in 2012. -''' - HELP_MSG = '''Commands: +""" + HELP_MSG = """Commands: say something --> chat with bot. use \\n for new line. +alt --> alternate chat reply +reset --> reset chat @@ -94,9 +97,9 @@ Now talk with the bot and enjoy. Remember to +reset periodically to clean up the bot's memory. Use RWKV-4 14B for best results. This is not instruct-tuned for conversation yet, so don't expect good quality. Better use +gen for free generation. -''' -elif CHAT_LANG == 'Chinese': - args.MODEL_NAME = '/fsx/BlinkDL/CODE/_PUBLIC_/RWKV-LM/RWKV-v4neo/7-run3z/rwkv-293' +""" +elif CHAT_LANG == "Chinese": + args.MODEL_NAME = "/fsx/BlinkDL/CODE/_PUBLIC_/RWKV-LM/RWKV-v4neo/7-run3z/rwkv-293" args.n_layer = 32 args.n_embd = 4096 args.ctx_len = 1024 @@ -105,7 +108,7 @@ bot = "A" interface = ":" - init_prompt = ''' + init_prompt = """ Q: 企鹅会飞吗? A: 企鹅是不会飞的。它们的翅膀主要用于游泳和平衡,而不是飞行。 @@ -114,8 +117,8 @@ A: 西瓜是一种常见的水果,是一种多年生蔓生藤本植物。西瓜的果实呈圆形或卵形,通常是绿色的,里面有红色或黄色的肉和很多的籽。西瓜味甜,多吃可以增加水分,是夏季非常受欢迎的水果之一。 -''' - HELP_MSG = '''指令: +""" + HELP_MSG = """指令: 直接输入内容 --> 和机器人聊天,用\\n代表换行 +alt --> 让机器人换个回答 +reset --> 重置对话 @@ -126,14 +129,14 @@ +retry --> 换个 +gen / +qa 的回答 现在可以输入内容和机器人聊天(注意它不怎么懂中文,它可能更懂英文)。请经常使用 +reset 重置机器人记忆。 -''' +""" # Load Model os.environ["RWKV_RUN_DEVICE"] = args.RUN_DEVICE MODEL_NAME = args.MODEL_NAME -print(f'loading... {MODEL_NAME}') +print(f"loading... {MODEL_NAME}") model = RWKV_RNN(args) model_tokens = [] @@ -142,15 +145,18 @@ ######################################################################################################## -def run_rnn(tokens, newline_adj = 0): + +def run_rnn(tokens, newline_adj=0): global model_tokens, current_state for i in range(len(tokens)): model_tokens += [int(tokens[i])] if i == len(tokens) - 1: out, current_state = model.forward(model_tokens, current_state) else: - current_state = model.forward(model_tokens, current_state, preprocess_only = True) - + current_state = model.forward( + model_tokens, current_state, preprocess_only=True + ) + # print(f'### model ###\n[{tokenizer.tokenizer.decode(model_tokens)}]') out[0] = -999999999 # disable <|endoftext|> @@ -159,60 +165,67 @@ def run_rnn(tokens, newline_adj = 0): # out[15] += newline_adj / 2 # '.' return out + all_state = {} + + def save_all_stat(srv, name, last_out): - n = f'{name}_{srv}' + n = f"{name}_{srv}" all_state[n] = {} - all_state[n]['out'] = last_out - all_state[n]['rnn'] = copy.deepcopy(current_state) - all_state[n]['token'] = copy.deepcopy(model_tokens) + all_state[n]["out"] = last_out + all_state[n]["rnn"] = copy.deepcopy(current_state) + all_state[n]["token"] = copy.deepcopy(model_tokens) + def load_all_stat(srv, name): global model_tokens, current_state - n = f'{name}_{srv}' - current_state = copy.deepcopy(all_state[n]['rnn']) - model_tokens = copy.deepcopy(all_state[n]['token']) - return all_state[n]['out'] + n = f"{name}_{srv}" + current_state = copy.deepcopy(all_state[n]["rnn"]) + model_tokens = copy.deepcopy(all_state[n]["token"]) + return all_state[n]["out"] + ######################################################################################################## # Run inference -print(f'\nRun prompt...') +print(f"\nRun prompt...") out = run_rnn(tokenizer.tokenizer.encode(init_prompt)) gc.collect() torch.cuda.empty_cache() -save_all_stat('', 'chat_init', out) +save_all_stat("", "chat_init", out) -srv_list = ['dummy_server'] +srv_list = ["dummy_server"] for s in srv_list: - save_all_stat(s, 'chat', out) + save_all_stat(s, "chat", out) + +print(f"### prompt ###\n[{tokenizer.tokenizer.decode(model_tokens)}]\n") -print(f'### prompt ###\n[{tokenizer.tokenizer.decode(model_tokens)}]\n') def reply_msg(msg): - print(f'{bot}{interface} {msg}\n') + print(f"{bot}{interface} {msg}\n") + def on_message(message): global model_tokens, current_state - srv = 'dummy_server' + srv = "dummy_server" - msg = message.replace('\\n','\n').strip() + msg = message.replace("\\n", "\n").strip() if len(msg) > 1000: - reply_msg('your message is too long (max 1000 tokens)') + reply_msg("your message is too long (max 1000 tokens)") return x_temp = 1.0 x_top_p = 0.85 - if ("-temp=" in msg): + if "-temp=" in msg: x_temp = float(msg.split("-temp=")[1].split(" ")[0]) - msg = msg.replace("-temp="+f'{x_temp:g}', "") + msg = msg.replace("-temp=" + f"{x_temp:g}", "") # print(f"temp: {x_temp}") - if ("-top_p=" in msg): + if "-top_p=" in msg: x_top_p = float(msg.split("-top_p=")[1].split(" ")[0]) - msg = msg.replace("-top_p="+f'{x_top_p:g}', "") + msg = msg.replace("-top_p=" + f"{x_top_p:g}", "") # print(f"top_p: {x_top_p}") if x_temp <= 0.2: x_temp = 0.2 @@ -220,31 +233,35 @@ def on_message(message): x_temp = 5 if x_top_p <= 0: x_top_p = 0 - - if msg == '+reset': - out = load_all_stat('', 'chat_init') - save_all_stat(srv, 'chat', out) + + if msg == "+reset": + out = load_all_stat("", "chat_init") + save_all_stat(srv, "chat", out) reply_msg("Chat reset.") return - elif msg[:5].lower() == '+gen ' or msg[:4].lower() == '+qa ' or msg.lower() == '+more' or msg.lower() == '+retry': - - if msg[:5].lower() == '+gen ': - new = '\n' + msg[5:].strip() + elif ( + msg[:5].lower() == "+gen " + or msg[:4].lower() == "+qa " + or msg.lower() == "+more" + or msg.lower() == "+retry" + ): + if msg[:5].lower() == "+gen ": + new = "\n" + msg[5:].strip() # print(f'### prompt ###\n[{new}]') current_state = None out = run_rnn(tokenizer.tokenizer.encode(new)) - save_all_stat(srv, 'gen_0', out) + save_all_stat(srv, "gen_0", out) - elif msg[:4].lower() == '+qa ': - out = load_all_stat('', 'chat_init') + elif msg[:4].lower() == "+qa ": + out = load_all_stat("", "chat_init") real_msg = msg[4:].strip() new = f"{user}{interface} {real_msg}\n\n{bot}{interface}" # print(f'### qa ###\n[{new}]') - + out = run_rnn(tokenizer.tokenizer.encode(new)) - save_all_stat(srv, 'gen_0', out) + save_all_stat(srv, "gen_0", out) # new = f"\nThe following is an excellent Q&A session consists of detailed and factual information.\n\nQ: What is 3+5?\nA: The answer is 8.\n\nQ: {msg[9:].strip()}\nA:" # print(f'### prompt ###\n[{new}]') @@ -252,16 +269,16 @@ def on_message(message): # out = run_rnn(tokenizer.tokenizer.encode(new)) # save_all_stat(srv, 'gen_0', out) - elif msg.lower() == '+more': + elif msg.lower() == "+more": try: - out = load_all_stat(srv, 'gen_1') - save_all_stat(srv, 'gen_0', out) + out = load_all_stat(srv, "gen_1") + save_all_stat(srv, "gen_0", out) except: return - elif msg.lower() == '+retry': + elif msg.lower() == "+retry": try: - out = load_all_stat(srv, 'gen_0') + out = load_all_stat(srv, "gen_0") except: return @@ -276,37 +293,37 @@ def on_message(message): top_p_usual=x_top_p, top_p_newline=x_top_p, ) - if msg[:4].lower() == '+qa ': + if msg[:4].lower() == "+qa ": out = run_rnn([token], newline_adj=-1) else: out = run_rnn([token]) - + xxx = tokenizer.tokenizer.decode(model_tokens[out_last:]) - if '\ufffd' not in xxx: - print(xxx, end='', flush=True) + if "\ufffd" not in xxx: + print(xxx, end="", flush=True) out_last = begin + i + 1 - print('\n') + print("\n") # send_msg = tokenizer.tokenizer.decode(model_tokens[begin:]).strip() # print(f'### send ###\n[{send_msg}]') # reply_msg(send_msg) - save_all_stat(srv, 'gen_1', out) + save_all_stat(srv, "gen_1", out) else: - if msg.lower() == '+alt': + if msg.lower() == "+alt": try: - out = load_all_stat(srv, 'chat_pre') + out = load_all_stat(srv, "chat_pre") except: return else: - out = load_all_stat(srv, 'chat') + out = load_all_stat(srv, "chat") new = f"{user}{interface} {msg}\n\n{bot}{interface}" # print(f'### add ###\n[{new}]') out = run_rnn(tokenizer.tokenizer.encode(new), newline_adj=-999999999) - save_all_stat(srv, 'chat_pre', out) + save_all_stat(srv, "chat_pre", out) begin = len(model_tokens) out_last = begin - print(f'{bot}{interface}', end='', flush=True) + print(f"{bot}{interface}", end="", flush=True) for i in range(999): if i <= 0: newline_adj = -999999999 @@ -315,7 +332,7 @@ def on_message(message): elif i <= 130: newline_adj = 0 else: - newline_adj = (i - 130) * 0.25 # MUST END THE GENERATION + newline_adj = (i - 130) * 0.25 # MUST END THE GENERATION token = tokenizer.sample_logits( out, model_tokens, @@ -327,15 +344,15 @@ def on_message(message): out = run_rnn([token], newline_adj=newline_adj) xxx = tokenizer.tokenizer.decode(model_tokens[out_last:]) - if '\ufffd' not in xxx: - print(xxx, end='', flush=True) + if "\ufffd" not in xxx: + print(xxx, end="", flush=True) out_last = begin + i + 1 - + send_msg = tokenizer.tokenizer.decode(model_tokens[begin:]) - if '\n\n' in send_msg: + if "\n\n" in send_msg: send_msg = send_msg.strip() break - + # send_msg = tokenizer.tokenizer.decode(model_tokens[begin:]).strip() # if send_msg.endswith(f'{user}{interface}'): # warning: needs to fix state too !!! # send_msg = send_msg[:-len(f'{user}{interface}')].strip() @@ -349,13 +366,14 @@ def on_message(message): # print(f'### send ###\n[{send_msg}]') # reply_msg(send_msg) - save_all_stat(srv, 'chat', out) + save_all_stat(srv, "chat", out) + print(HELP_MSG) while True: - msg = input(f'{user}{interface} ') + msg = input(f"{user}{interface} ") if len(msg.strip()) > 0: on_message(msg) else: - print('Erorr: please say something') + print("Erorr: please say something") diff --git a/benchmarks/rwkv/rwkv-v4neo/img_demoAE.py b/benchmarks/rwkv/rwkv-v4neo/img_demoAE.py index ab0d4edd6..43c0c3cf3 100644 --- a/benchmarks/rwkv/rwkv-v4neo/img_demoAE.py +++ b/benchmarks/rwkv/rwkv-v4neo/img_demoAE.py @@ -9,55 +9,58 @@ from torch.nn import functional as F import torchvision as vision import torchvision.transforms as transforms + np.set_printoptions(precision=4, suppress=True, linewidth=200) -print(f'loading...') +print(f"loading...") ######################################################################################################## -model_prefix = 'test/image_trained/out-v7c_d8_256-224-13bit-OB32x0.5-201' -input_img = 'test/img_ae_test/test0.png' +model_prefix = "test/image_trained/out-v7c_d8_256-224-13bit-OB32x0.5-201" +input_img = "test/img_ae_test/test0.png" ######################################################################################################## + class ToBinary(torch.autograd.Function): @staticmethod def forward(ctx, x): - return torch.floor(x + 0.5) # no need for noise when we have plenty of data + return torch.floor(x + 0.5) # no need for noise when we have plenty of data @staticmethod def backward(ctx, grad_output): - return grad_output.clone() # pass-through + return grad_output.clone() # pass-through + class R_ENCODER(nn.Module): def __init__(self, args): super().__init__() self.args = args dd = 8 - self.Bxx = nn.BatchNorm2d(dd*64) + self.Bxx = nn.BatchNorm2d(dd * 64) self.CIN = nn.Conv2d(3, dd, kernel_size=3, padding=1) self.Cx0 = nn.Conv2d(dd, 32, kernel_size=3, padding=1) self.Cx1 = nn.Conv2d(32, dd, kernel_size=3, padding=1) - self.B00 = nn.BatchNorm2d(dd*4) - self.C00 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C01 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) - self.C02 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C03 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) + self.B00 = nn.BatchNorm2d(dd * 4) + self.C00 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C01 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) + self.C02 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C03 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) - self.B10 = nn.BatchNorm2d(dd*16) - self.C10 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C11 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) - self.C12 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C13 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) + self.B10 = nn.BatchNorm2d(dd * 16) + self.C10 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C11 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) + self.C12 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C13 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) - self.B20 = nn.BatchNorm2d(dd*64) - self.C20 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C21 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) - self.C22 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C23 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) + self.B20 = nn.BatchNorm2d(dd * 64) + self.C20 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C21 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) + self.C22 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C23 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) - self.COUT = nn.Conv2d(dd*64, args.my_img_bit, kernel_size=3, padding=1) + self.COUT = nn.Conv2d(dd * 64, args.my_img_bit, kernel_size=3, padding=1) def forward(self, img): ACT = F.mish @@ -81,30 +84,31 @@ def forward(self, img): x = self.COUT(x + xx) return torch.sigmoid(x) + class R_DECODER(nn.Module): def __init__(self, args): super().__init__() self.args = args dd = 8 - self.CIN = nn.Conv2d(args.my_img_bit, dd*64, kernel_size=3, padding=1) - - self.B00 = nn.BatchNorm2d(dd*64) - self.C00 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C01 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) - self.C02 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C03 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) - - self.B10 = nn.BatchNorm2d(dd*16) - self.C10 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C11 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) - self.C12 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C13 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) - - self.B20 = nn.BatchNorm2d(dd*4) - self.C20 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C21 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) - self.C22 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C23 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) + self.CIN = nn.Conv2d(args.my_img_bit, dd * 64, kernel_size=3, padding=1) + + self.B00 = nn.BatchNorm2d(dd * 64) + self.C00 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C01 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) + self.C02 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C03 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) + + self.B10 = nn.BatchNorm2d(dd * 16) + self.C10 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C11 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) + self.C12 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C13 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) + + self.B20 = nn.BatchNorm2d(dd * 4) + self.C20 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C21 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) + self.C22 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C23 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) self.Cx0 = nn.Conv2d(dd, 32, kernel_size=3, padding=1) self.Cx1 = nn.Conv2d(32, dd, kernel_size=3, padding=1) @@ -128,30 +132,33 @@ def forward(self, code): x = x + self.Cx1(ACT(self.Cx0(x))) x = self.COUT(x) - + return torch.sigmoid(x) + ######################################################################################################## -print(f'building model...') +print(f"building model...") args = types.SimpleNamespace() args.my_img_bit = 13 encoder = R_ENCODER(args).eval().cuda() decoder = R_DECODER(args).eval().cuda() -zpow = torch.tensor([2**i for i in range(0,13)]).reshape(13,1,1).cuda().long() +zpow = torch.tensor([2**i for i in range(0, 13)]).reshape(13, 1, 1).cuda().long() -encoder.load_state_dict(torch.load(f'{model_prefix}-E.pth')) -decoder.load_state_dict(torch.load(f'{model_prefix}-D.pth')) +encoder.load_state_dict(torch.load(f"{model_prefix}-E.pth")) +decoder.load_state_dict(torch.load(f"{model_prefix}-D.pth")) ######################################################################################################## -print(f'test image...') -img_transform = transforms.Compose([ - transforms.PILToTensor(), - transforms.ConvertImageDtype(torch.float), - transforms.Resize((224, 224)) -]) +print(f"test image...") +img_transform = transforms.Compose( + [ + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Resize((224, 224)), + ] +) with torch.no_grad(): img = img_transform(Image.open(input_img)).unsqueeze(0).cuda() @@ -159,7 +166,7 @@ def forward(self, code): z = ToBinary.apply(z) zz = torch.sum(z.squeeze().long() * zpow, dim=0) - print(f'Code shape = {zz.shape}\n{zz.cpu().numpy()}\n') - + print(f"Code shape = {zz.shape}\n{zz.cpu().numpy()}\n") + out = decoder(z) vision.utils.save_image(out, f"{input_img.split('.')[0]}-out-13bit.jpg") diff --git a/benchmarks/rwkv/rwkv-v4neo/run.py b/benchmarks/rwkv/rwkv-v4neo/run.py index f13e97f08..eb7109cb6 100644 --- a/benchmarks/rwkv/rwkv-v4neo/run.py +++ b/benchmarks/rwkv/rwkv-v4neo/run.py @@ -6,6 +6,7 @@ import math, os, sys, types, time, gc import torch from src.utils import TOKENIZER + try: os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] except: @@ -20,12 +21,14 @@ # Step 1: set model & config (use v4 to run your trained-from-scratch models. v4 and v4neo are compatible) ######################################################################################################## -args.RUN_DEVICE = "cuda" # 'cuda' // 'cpu' (already fast) -args.FLOAT_MODE = "fp16" # fp16 (good for GPU, does not work for CPU) // fp32 (good for CPU) // bf16 (less accurate, but works for CPU) +args.RUN_DEVICE = "cuda" # 'cuda' // 'cpu' (already fast) +args.FLOAT_MODE = "fp16" # fp16 (good for GPU, does not work for CPU) // fp32 (good for CPU) // bf16 (less accurate, but works for CPU) # if args.RUN_DEVICE == "cuda": # os.environ["RWKV_RUN_BACKEND"] = 'nvfuser' # !!!BUGGY!!! wrong output -os.environ["RWKV_JIT_ON"] = '1' # '1' or '0'. very useful for GPU/CPU fp32, but might be harmful for GPU fp16. please benchmark !!! +os.environ[ + "RWKV_JIT_ON" +] = "1" # '1' or '0'. very useful for GPU/CPU fp32, but might be harmful for GPU fp16. please benchmark !!! TOKEN_MODE = "pile" WORD_NAME = [ @@ -58,7 +61,7 @@ # n_embd = 2560 # ctx_len = 1024 -MODEL_NAME = '/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-7b/RWKV-4-Pile-7B-20221115-8047' +MODEL_NAME = "/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-7b/RWKV-4-Pile-7B-20221115-8047" n_layer = 32 n_embd = 4096 ctx_len = 1024 @@ -129,12 +132,12 @@ ######################################################################################################## -print(f'\nUsing {args.RUN_DEVICE.upper()}. Loading {MODEL_NAME}...') +print(f"\nUsing {args.RUN_DEVICE.upper()}. Loading {MODEL_NAME}...") from src.model_run import RWKV_RNN model = RWKV_RNN(args) -print(f'\nOptimizing speed...') +print(f"\nOptimizing speed...") out, _ = model.forward([187], None) # print(out) gc.collect() @@ -142,10 +145,10 @@ # input(0) -print(f'\nLoading tokenizer {WORD_NAME}...') +print(f"\nLoading tokenizer {WORD_NAME}...") tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR) if TOKEN_MODE == "pile": - assert tokenizer.tokenizer.decode([187]) == '\n' + assert tokenizer.tokenizer.decode([187]) == "\n" ######################################################################################################## @@ -165,6 +168,7 @@ time_slot = {} time_ref = time.time_ns() + def record_time(name): if name not in time_slot: time_slot[name] = 1e20 @@ -172,13 +176,14 @@ def record_time(name): if tt < time_slot[name]: time_slot[name] = tt + init_state = None init_out = None state = None out = None for TRIAL in range(1 if DEBUG_DEBUG else NUM_TRIALS): - print(("-" * 50) + '\n' + context, end="") + print(("-" * 50) + "\n" + context, end="") time_ref = time.time_ns() ctx = src_ctx.copy() @@ -193,7 +198,7 @@ def record_time(name): gc.collect() torch.cuda.empty_cache() - record_time('preprocess') + record_time("preprocess") out_last = src_len for i in range(src_len, src_len + (1 if DEBUG_DEBUG else LENGTH_PER_TRIAL)): x = ctx[: i + 1] @@ -205,7 +210,14 @@ def record_time(name): else: out, state = model.forward(x, state) if DEBUG_DEBUG: - print("model", np.array(x), "==>", np.array(out), np.max(out.cpu().numpy()), np.min(out.cpu().numpy())) + print( + "model", + np.array(x), + "==>", + np.array(out), + np.max(out.cpu().numpy()), + np.min(out.cpu().numpy()), + ) if TOKEN_MODE == "pile": out[0] = -999999999 # disable <|endoftext|> @@ -224,14 +236,15 @@ def record_time(name): print(char, end="", flush=True) else: char = tokenizer.tokenizer.decode(ctx[out_last:]) - if '\ufffd' not in char: # is valid utf8 string? + if "\ufffd" not in char: # is valid utf8 string? print(char, end="", flush=True) - out_last = i+1 + out_last = i + 1 - record_time('total') + record_time("total") # print(f'\n\n{time_slot}\n\n') print( - f"\n\n--- preprocess {round(time_slot['preprocess'], 2)}s, generation {round(time_slot['total']-time_slot['preprocess'], 2)}s ", end = '' + f"\n\n--- preprocess {round(time_slot['preprocess'], 2)}s, generation {round(time_slot['total']-time_slot['preprocess'], 2)}s ", + end="", ) -print(("-" * 50) + '\n') +print(("-" * 50) + "\n") diff --git a/benchmarks/rwkv/rwkv-v4neo/src/binidx.py b/benchmarks/rwkv/rwkv-v4neo/src/binidx.py index 369081ad4..8d5b40bfe 100644 --- a/benchmarks/rwkv/rwkv-v4neo/src/binidx.py +++ b/benchmarks/rwkv/rwkv-v4neo/src/binidx.py @@ -7,6 +7,7 @@ from functools import lru_cache from itertools import accumulate + def print_rank_0(*message): pass # """If distributed is initialized print only on rank 0.""" @@ -16,12 +17,14 @@ def print_rank_0(*message): # else: # print(*message, flush=True) + def _warmup_mmap_file(path): pass # with open(path, "rb") as stream: # while stream.read(100 * 1024 * 1024): # pass + dtypes = { 1: np.uint8, 2: np.int8, @@ -33,18 +36,22 @@ def _warmup_mmap_file(path): 8: np.uint16, } + def code(dtype): for k in dtypes.keys(): if dtypes[k] == dtype: return k raise ValueError(dtype) + def index_file_path(prefix_path): return prefix_path + ".idx" + def data_file_path(prefix_path): return prefix_path + ".bin" + class MMapIndexedDataset(torch.utils.data.Dataset): class Index(object): _HDR_MAGIC = b"MMIDIDX\x00\x00" @@ -100,7 +107,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._file.close() return _Writer() - + def __init__(self, path, skip_warmup=False): with open(path, "rb") as stream: magic_test = stream.read(9) @@ -217,8 +224,7 @@ def __getitem__(self, idx): elif isinstance(idx, slice): start, stop, step = idx.indices(len(self)) if step != 1: - raise ValueError( - "Slices into indexed_dataset must be contiguous") + raise ValueError("Slices into indexed_dataset must be contiguous") ptr = self._index._pointers[start] sizes = self._index._sizes[idx] offsets = list(accumulate(sizes)) diff --git a/benchmarks/rwkv/rwkv-v4neo/src/dataset.py b/benchmarks/rwkv/rwkv-v4neo/src/dataset.py index 71cbb1a57..6ddafb90b 100644 --- a/benchmarks/rwkv/rwkv-v4neo/src/dataset.py +++ b/benchmarks/rwkv/rwkv-v4neo/src/dataset.py @@ -17,15 +17,24 @@ def __init__(self, args): if args.data_type == "binidx": self.vocab_size = args.vocab_size - rank_zero_info(f"Current vocab size = {self.vocab_size} (make sure it's correct)") + rank_zero_info( + f"Current vocab size = {self.vocab_size} (make sure it's correct)" + ) if args.my_pile_version == 1: self.data = MMapIndexedDataset(args.data_file) - self.data_size = len(self.data._bin_buffer) // self.data._index._dtype_size + self.data_size = ( + len(self.data._bin_buffer) // self.data._index._dtype_size + ) rank_zero_info(f"Data has {self.data_size} tokens.") else: - data_list = open(args.data_file, "r", encoding='utf-8').read().strip().split('\n') - data_list = [i.strip().split(' ') for i in data_list] + data_list = ( + open(args.data_file, "r", encoding="utf-8") + .read() + .strip() + .split("\n") + ) + data_list = [i.strip().split(" ") for i in data_list] self.data = [] self.data_size = int(data_list[-1][-1]) rank_zero_info(f"Data has {self.data_size} chunks.") @@ -37,29 +46,46 @@ def __init__(self, args): # rank_zero_info(self.data) if args.my_qa_mask > 0: - self.data_pile = MMapIndexedDataset('/fsx/BlinkDL/pile/pile_20B_tokenizer_text_document') - self.data_pile_size = len(self.data_pile._bin_buffer) // self.data._index._dtype_size + self.data_pile = MMapIndexedDataset( + "/fsx/BlinkDL/pile/pile_20B_tokenizer_text_document" + ) + self.data_pile_size = ( + len(self.data_pile._bin_buffer) // self.data._index._dtype_size + ) if args.my_pile_stage > 0: # assert self.data_size == 332115325534 and self.vocab_size == 50277 self.samples_per_epoch = args.epoch_steps * args.real_bsz assert self.samples_per_epoch == 40320 - rank_zero_info(f"########## Pile 20b-tokenized stage {args.my_pile_stage} ##########") + rank_zero_info( + f"########## Pile 20b-tokenized stage {args.my_pile_stage} ##########" + ) dataset_slot = self.data_size // args.ctx_len if args.my_pile_stage != 4: assert MaybeIsPrime(args.magic_prime) assert args.magic_prime % 3 == 2 - assert args.magic_prime / dataset_slot > 0.99 and args.magic_prime / dataset_slot <= 1 + assert ( + args.magic_prime / dataset_slot > 0.99 + and args.magic_prime / dataset_slot <= 1 + ) elif args.data_type == "numpy": self.data = np.load(args.data_file).astype("int") self.vocab_size = args.vocab_size - rank_zero_info("Current vocab size =", self.vocab_size, "(make sure it's correct)") + rank_zero_info( + "Current vocab size =", self.vocab_size, "(make sure it's correct)" + ) self.data_size = len(self.data) rank_zero_info(f"Data has {self.data_size} tokens.") elif args.data_type == "uint16": - self.data = np.fromfile(args.data_file, dtype=np.uint16).astype("int32").reshape(-1, args.my_sample_len) + self.data = ( + np.fromfile(args.data_file, dtype=np.uint16) + .astype("int32") + .reshape(-1, args.my_sample_len) + ) self.vocab_size = args.vocab_size - rank_zero_info("Current vocab size =", self.vocab_size, "(make sure it's correct)") + rank_zero_info( + "Current vocab size =", self.vocab_size, "(make sure it's correct)" + ) self.data_size = self.data.shape[0] rank_zero_info(f"Data has {self.data_size} samples.") elif args.data_type == "wds_img": @@ -92,10 +118,14 @@ def __init__(self, args): for u in unique: xxObj[xx] = u xx += 1 - with open(f"{args.proj_dir}/vocab.json", "w", encoding="utf-16le") as vocab_file: + with open( + f"{args.proj_dir}/vocab.json", "w", encoding="utf-16le" + ) as vocab_file: vocab_file.write(json.dumps(xxObj, ensure_ascii=False)) self.data_size = len(self.data) - rank_zero_info(f"Data has {self.data_size} tokens, {self.vocab_size} vocab size.") + rank_zero_info( + f"Data has {self.data_size} tokens, {self.vocab_size} vocab size." + ) self.stoi = {ch: i for i, ch in enumerate(unique)} self.itos = {i: ch for i, ch in enumerate(unique)} @@ -110,36 +140,53 @@ def __getitem__(self, idx): # print(f"epoch {epoch} idx {idx} rank {rank}/{world_size}") if args.data_type == "wds_img": + def init_wds(self, bias=0): def identity(x): - return x + return x + import webdataset as wds import torchvision.transforms as transforms + # img_transform = transforms.Compose( # [transforms.CenterCrop(256)] # ) - img_transform = transforms.Compose([ - transforms.CenterCrop(512), - transforms.Resize((args.my_img_size)) - ]) - self.data_raw = wds.WebDataset(args.data_file, resampled=True).shuffle(10000, initial=1000, rng=random.Random(epoch*100000+rank+bias*1e9)).decode("torchrgb").to_tuple("jpg", "json", "txt").map_tuple(img_transform, identity, identity) + img_transform = transforms.Compose( + [transforms.CenterCrop(512), transforms.Resize((args.my_img_size))] + ) + self.data_raw = ( + wds.WebDataset(args.data_file, resampled=True) + .shuffle( + 10000, + initial=1000, + rng=random.Random(epoch * 100000 + rank + bias * 1e9), + ) + .decode("torchrgb") + .to_tuple("jpg", "json", "txt") + .map_tuple(img_transform, identity, identity) + ) for pp in self.data_raw.pipeline: - if 'Resampled' in str(pp): + if "Resampled" in str(pp): pp.deterministic = True + def worker_seed(): - return rank*100000+epoch+bias*1e9 + return rank * 100000 + epoch + bias * 1e9 + pp.worker_seed = worker_seed self.data = iter(self.data_raw) # print(f"WebDataset loaded for rank {rank} epoch {epoch}") + if self.data == None: init_wds(self) trial = 0 while trial < 10: try: - dd = next(self.data) # jpg, json, txt + dd = next(self.data) # jpg, json, txt break except: - print(f'[dataloader error - epoch {epoch} rank {rank} - trying a new shuffle]') + print( + f"[dataloader error - epoch {epoch} rank {rank} - trying a new shuffle]" + ) self.error_count += 1 init_wds(self, self.error_count) trial += 1 @@ -150,7 +197,7 @@ def worker_seed(): return dd[0], dd[2] else: if args.data_type == "uint16": - i = np.random.randint(0, self.data_size-1) + i = np.random.randint(0, self.data_size - 1) dix = self.data[i] x = torch.tensor(dix[:-1], dtype=torch.long) y = torch.tensor(dix[1:], dtype=torch.long) @@ -203,8 +250,12 @@ def worker_seed(): for j in range(len(data)): if i < data[j][0]: ii = i - i = (i - (data[j-1][0] if j > 0 else 0)) % data[j][1] - dix = data[j][2].get(idx=0, offset=i, length=req_len).astype(int) + i = (i - (data[j - 1][0] if j > 0 else 0)) % data[j][1] + dix = ( + data[j][2] + .get(idx=0, offset=i, length=req_len) + .astype(int) + ) # print(ii, j, i) break elif args.data_type == "numpy": @@ -220,7 +271,12 @@ def worker_seed(): z_sum = 0 isGood = False for i in range(3, ctx_len): - if dix[i] == 27 and dix[i-1] == 34 and dix[i-2] == 187 and dix[i-3] == 187: + if ( + dix[i] == 27 + and dix[i - 1] == 34 + and dix[i - 2] == 187 + and dix[i - 3] == 187 + ): isGood = True if dix[i] == 0: isGood = False @@ -230,7 +286,9 @@ def worker_seed(): if z_sum == 0: z = [1] * ctx_len i = np.random.randint(0, self.data_pile_size - req_len) - dix = self.data_pile.get(idx=0, offset=i, length=req_len).astype(int) + dix = self.data_pile.get( + idx=0, offset=i, length=req_len + ).astype(int) z = torch.tensor(z, dtype=torch.bfloat16) x = torch.tensor(dix[:-1], dtype=torch.long) diff --git a/benchmarks/rwkv/rwkv-v4neo/src/model.py b/benchmarks/rwkv/rwkv-v4neo/src/model.py index b79f96d26..0914c160e 100644 --- a/benchmarks/rwkv/rwkv-v4neo/src/model.py +++ b/benchmarks/rwkv/rwkv-v4neo/src/model.py @@ -4,6 +4,7 @@ import os, math, gc, importlib import torch + # torch._C._jit_set_profiling_executor(True) # torch._C._jit_set_profiling_mode(True) import torch.nn as nn @@ -11,16 +12,18 @@ import pytorch_lightning as pl from pytorch_lightning.utilities import rank_zero_info, rank_zero_only from pytorch_lightning.strategies import DeepSpeedStrategy -if importlib.util.find_spec('deepspeed'): + +if importlib.util.find_spec("deepspeed"): import deepspeed from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam # from deepspeed.runtime.fp16.onebit.zoadam import ZeroOneAdam try: - print('RWKV_MY_TESTING', os.environ["RWKV_MY_TESTING"]) + print("RWKV_MY_TESTING", os.environ["RWKV_MY_TESTING"]) except: - os.environ["RWKV_MY_TESTING"] = '' + os.environ["RWKV_MY_TESTING"] = "" + def __nop(ob): return ob @@ -43,7 +46,23 @@ def __nop(ob): from torch.utils.cpp_extension import load if os.environ["RWKV_FLOAT_MODE"] == "bf16": - wkv_cuda = load(name=f"wkv_{T_MAX}_bf16", sources=["cuda/wkv_op_bf16.cpp", "cuda/wkv_cuda_bf16.cu"], verbose=True, extra_cuda_cflags=["-t 4", "-std=c++17", "-res-usage", "--maxrregcount 60", "--use_fast_math", "-O3", "-Xptxas -O3", "--extra-device-vectorization", f"-DTmax={T_MAX}"]) + wkv_cuda = load( + name=f"wkv_{T_MAX}_bf16", + sources=["cuda/wkv_op_bf16.cpp", "cuda/wkv_cuda_bf16.cu"], + verbose=True, + extra_cuda_cflags=[ + "-t 4", + "-std=c++17", + "-res-usage", + "--maxrregcount 60", + "--use_fast_math", + "-O3", + "-Xptxas -O3", + "--extra-device-vectorization", + f"-DTmax={T_MAX}", + ], + ) + class WKV(torch.autograd.Function): @staticmethod def forward(ctx, B, T, C, w, u, k, v): @@ -56,10 +75,16 @@ def forward(ctx, B, T, C, w, u, k, v): u = u.contiguous() k = k.contiguous() v = v.contiguous() - y = torch.empty((B, T, C), device=w.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16) + y = torch.empty( + (B, T, C), + device=w.device, + memory_format=torch.contiguous_format, + dtype=torch.bfloat16, + ) wkv_cuda.forward(B, T, C, w, u, k, v, y) ctx.save_for_backward(w, u, k, v, y) return y + @staticmethod def backward(ctx, gy): B = ctx.B @@ -68,16 +93,51 @@ def backward(ctx, gy): assert T <= T_MAX assert B * C % min(C, 32) == 0 w, u, k, v, y = ctx.saved_tensors - gw = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16) - gu = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16) - gk = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16) - gv = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format, dtype=torch.bfloat16) + gw = torch.empty( + (B, C), + device=gy.device, + memory_format=torch.contiguous_format, + dtype=torch.bfloat16, + ) + gu = torch.empty( + (B, C), + device=gy.device, + memory_format=torch.contiguous_format, + dtype=torch.bfloat16, + ) + gk = torch.empty( + (B, T, C), + device=gy.device, + memory_format=torch.contiguous_format, + dtype=torch.bfloat16, + ) + gv = torch.empty( + (B, T, C), + device=gy.device, + memory_format=torch.contiguous_format, + dtype=torch.bfloat16, + ) wkv_cuda.backward(B, T, C, w, u, k, v, y, gy.contiguous(), gw, gu, gk, gv) gw = torch.sum(gw, dim=0) gu = torch.sum(gu, dim=0) return (None, None, None, gw, gu, gk, gv) + else: - wkv_cuda = load(name=f"wkv_{T_MAX}", sources=["cuda/wkv_op.cpp", "cuda/wkv_cuda.cu"], verbose=True, extra_cuda_cflags=["-res-usage", "--maxrregcount 60", "--use_fast_math", "-O3", "-Xptxas -O3", "--extra-device-vectorization", f"-DTmax={T_MAX}"]) + wkv_cuda = load( + name=f"wkv_{T_MAX}", + sources=["cuda/wkv_op.cpp", "cuda/wkv_cuda.cu"], + verbose=True, + extra_cuda_cflags=[ + "-res-usage", + "--maxrregcount 60", + "--use_fast_math", + "-O3", + "-Xptxas -O3", + "--extra-device-vectorization", + f"-DTmax={T_MAX}", + ], + ) + class WKV(torch.autograd.Function): @staticmethod def forward(ctx, B, T, C, w, u, k, v): @@ -96,7 +156,9 @@ def forward(ctx, B, T, C, w, u, k, v): u = u.float().contiguous() k = k.float().contiguous() v = v.float().contiguous() - y = torch.empty((B, T, C), device=w.device, memory_format=torch.contiguous_format) + y = torch.empty( + (B, T, C), device=w.device, memory_format=torch.contiguous_format + ) wkv_cuda.forward(B, T, C, w, u, k, v, y) ctx.save_for_backward(w, u, k, v, y) if "32" in os.environ["RWKV_FLOAT_MODE"]: @@ -105,6 +167,7 @@ def forward(ctx, B, T, C, w, u, k, v): return y.half() elif os.environ["RWKV_FLOAT_MODE"] == "bf16": return y.bfloat16() + @staticmethod def backward(ctx, gy): B = ctx.B @@ -113,14 +176,26 @@ def backward(ctx, gy): assert T <= T_MAX assert B * C % min(C, 32) == 0 w, u, k, v, y = ctx.saved_tensors - gw = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format) - gu = torch.empty((B, C), device=gy.device, memory_format=torch.contiguous_format) - gk = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format) - gv = torch.empty((B, T, C), device=gy.device, memory_format=torch.contiguous_format) + gw = torch.empty( + (B, C), device=gy.device, memory_format=torch.contiguous_format + ) + gu = torch.empty( + (B, C), device=gy.device, memory_format=torch.contiguous_format + ) + gk = torch.empty( + (B, T, C), device=gy.device, memory_format=torch.contiguous_format + ) + gv = torch.empty( + (B, T, C), device=gy.device, memory_format=torch.contiguous_format + ) if "32" in os.environ["RWKV_FLOAT_MODE"]: - wkv_cuda.backward(B, T, C, w, u, k, v, y, gy.contiguous(), gw, gu, gk, gv) + wkv_cuda.backward( + B, T, C, w, u, k, v, y, gy.contiguous(), gw, gu, gk, gv + ) else: - wkv_cuda.backward(B, T, C, w, u, k, v, y, gy.float().contiguous(), gw, gu, gk, gv) + wkv_cuda.backward( + B, T, C, w, u, k, v, y, gy.float().contiguous(), gw, gu, gk, gv + ) gw = torch.sum(gw, dim=0) gu = torch.sum(gu, dim=0) if "32" in os.environ["RWKV_FLOAT_MODE"]: @@ -128,7 +203,15 @@ def backward(ctx, gy): elif os.environ["RWKV_FLOAT_MODE"] == "fp16": return (None, None, None, gw.half(), gu.half(), gk.half(), gv.half()) elif os.environ["RWKV_FLOAT_MODE"] == "bf16": - return (None, None, None, gw.bfloat16(), gu.bfloat16(), gk.bfloat16(), gv.bfloat16()) + return ( + None, + None, + None, + gw.bfloat16(), + gu.bfloat16(), + gk.bfloat16(), + gv.bfloat16(), + ) def RUN_CUDA(B, T, C, w, u, k, v): @@ -154,21 +237,27 @@ def __init__(self, args, layer_id): ddd = torch.ones(1, 1, args.n_embd) for i in range(args.n_embd): ddd[0, 0, i] = i / args.n_embd - + # fancy time_decay decay_speed = torch.ones(args.dim_att) for h in range(args.dim_att): - decay_speed[h] = -5 + 8 * (h / (args.dim_att - 1)) ** (0.7 + 1.3 * ratio_0_to_1) + decay_speed[h] = -5 + 8 * (h / (args.dim_att - 1)) ** ( + 0.7 + 1.3 * ratio_0_to_1 + ) self.time_decay = nn.Parameter(decay_speed) # print(layer_id, self.time_decay.flatten()[:3].cpu().numpy(), '...', self.time_decay.flatten()[-3:].cpu().numpy()) # fancy time_first zigzag = torch.tensor([(i + 1) % 3 - 1 for i in range(args.dim_att)]) * 0.5 - self.time_first = nn.Parameter(torch.ones(args.dim_att) * math.log(0.3) + zigzag) + self.time_first = nn.Parameter( + torch.ones(args.dim_att) * math.log(0.3) + zigzag + ) # fancy time_mix self.time_mix_k = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0)) - self.time_mix_v = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1) + self.time_mix_v = nn.Parameter( + torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1 + ) self.time_mix_r = nn.Parameter(torch.pow(ddd, 0.5 * ratio_1_to_almost0)) self.time_shift = nn.ZeroPad2d((0, 0, 1, -1)) @@ -177,8 +266,10 @@ def __init__(self, args, layer_id): self.receptance = nn.Linear(args.n_embd, args.dim_att, bias=False) self.output = nn.Linear(args.dim_att, args.n_embd, bias=False) - if 'a' in os.environ["RWKV_MY_TESTING"]: - self.register_buffer("att_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))) + if "a" in os.environ["RWKV_MY_TESTING"]: + self.register_buffer( + "att_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len)) + ) d_qkv = args.n_embd // 16 self.qq = nn.Linear(args.n_embd, d_qkv, bias=False) self.kk = nn.Linear(args.n_embd, d_qkv, bias=False) @@ -187,12 +278,17 @@ def __init__(self, args, layer_id): with torch.no_grad(): self.time_mix_qq = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0)) self.time_mix_kk = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0)) - self.time_mix_vv = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1) + self.time_mix_vv = nn.Parameter( + torch.pow(ddd, ratio_1_to_almost0) + 0.3 * ratio_0_to_1 + ) + + if "a" not in os.environ["RWKV_MY_TESTING"]: - if 'a' not in os.environ["RWKV_MY_TESTING"]: @MyFunction def jit_func(self, x): - xx = self.time_shift(x) # Mix x with the previous timestep to produce xk, xv, xr + xx = self.time_shift( + x + ) # Mix x with the previous timestep to produce xk, xv, xr xk = x * self.time_mix_k + xx * (1 - self.time_mix_k) xv = x * self.time_mix_v + xx * (1 - self.time_mix_v) xr = x * self.time_mix_r + xx * (1 - self.time_mix_r) @@ -205,21 +301,26 @@ def jit_func(self, x): def forward(self, x): B, T, C = x.size() # x = (Batch,Time,Channel) sr, k, v = self.jit_func(x) - rwkv = sr * RUN_CUDA(B, T, self.args.dim_att, self.time_decay, self.time_first, k, v) + rwkv = sr * RUN_CUDA( + B, T, self.args.dim_att, self.time_decay, self.time_first, k, v + ) return self.output(rwkv) - if 'a' in os.environ["RWKV_MY_TESTING"]: + if "a" in os.environ["RWKV_MY_TESTING"]: + @MyFunction def QKV(self, q, k, v): att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) - att = att.masked_fill(self.att_mask == 0, float('-inf')) - att = F.softmax(att, dim = -1) + att = att.masked_fill(self.att_mask == 0, float("-inf")) + att = F.softmax(att, dim=-1) x = att @ v return x @MyFunction def jit_funcQKV(self, x): - xx = self.time_shift(x) # Mix x with the previous timestep to produce xk, xv, xr + xx = self.time_shift( + x + ) # Mix x with the previous timestep to produce xk, xv, xr xk = x * self.time_mix_k + xx * (1 - self.time_mix_k) xv = x * self.time_mix_v + xx * (1 - self.time_mix_v) xr = x * self.time_mix_r + xx * (1 - self.time_mix_r) @@ -238,12 +339,16 @@ def jit_funcQKV(self, x): def forward(self, x): B, T, C = x.size() # x = (Batch,Time,Channel) sr, k, v, qq, kk, vv = self.jit_funcQKV(x) - rwkv = sr * RUN_CUDA(B, T, self.args.dim_att, self.time_decay, self.time_first, k, v) + rwkv = sr * RUN_CUDA( + B, T, self.args.dim_att, self.time_decay, self.time_first, k, v + ) rwkv = self.output(rwkv) + self.oo(self.QKV(qq, kk, vv)) return rwkv + ######################################################################################################## + class RWKV_ChannelMix(MyModule): def __init__(self, args, layer_id): super().__init__() @@ -258,7 +363,7 @@ def __init__(self, args, layer_id): ddd[0, 0, i] = i / args.n_embd self.time_mix_k = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0)) self.time_mix_r = nn.Parameter(torch.pow(ddd, ratio_1_to_almost0)) - + self.key = nn.Linear(args.n_embd, args.dim_ffn, bias=False) self.receptance = nn.Linear(args.n_embd, args.n_embd, bias=False) self.value = nn.Linear(args.dim_ffn, args.n_embd, bias=False) @@ -273,6 +378,7 @@ def forward(self, x): kv = self.value(k) return torch.sigmoid(self.receptance(xr)) * kv + class MishGLU(MyModule): def __init__(self, args, layer_id): super().__init__() @@ -302,6 +408,7 @@ def forward(self, x): b = self.bb(xb) return self.value(a * F.mish(b)) + ######################################################################################################## # The RWKV Model with our blocks ######################################################################################################## @@ -319,25 +426,31 @@ def __init__(self, args, layer_id): if self.layer_id == 0: self.ln0 = nn.LayerNorm(args.n_embd) if args.my_pos_emb > 0: - self.pos_emb_x = nn.Parameter(torch.zeros((1,args.my_pos_emb,args.n_embd))) - self.pos_emb_y = nn.Parameter(torch.zeros((args.my_pos_emb,1,args.n_embd))) + self.pos_emb_x = nn.Parameter( + torch.zeros((1, args.my_pos_emb, args.n_embd)) + ) + self.pos_emb_y = nn.Parameter( + torch.zeros((args.my_pos_emb, 1, args.n_embd)) + ) if self.layer_id == 0 and self.args.pre_ffn > 0: self.ffnPre = RWKV_ChannelMix(args, 0) else: self.att = RWKV_TimeMix(args, layer_id) - if 'g' in os.environ["RWKV_MY_TESTING"]: + if "g" in os.environ["RWKV_MY_TESTING"]: self.ffn = MishGLU(args, layer_id) else: self.ffn = RWKV_ChannelMix(args, layer_id) - + if args.tiny_att_dim > 0 and self.layer_id == args.tiny_att_layer: self.tiny_ln = nn.LayerNorm(args.n_embd) self.tiny_q = nn.Linear(args.n_embd, args.tiny_att_dim, bias=False) self.tiny_k = nn.Linear(args.n_embd, args.tiny_att_dim, bias=False) self.tiny_v = nn.Linear(args.n_embd, args.n_embd, bias=False) - self.register_buffer("tiny_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))) + self.register_buffer( + "tiny_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len)) + ) def forward(self, x, x_emb=None): args = self.args @@ -345,7 +458,7 @@ def forward(self, x, x_emb=None): if self.layer_id == 0: x = self.ln0(x) if args.my_pos_emb > 0: - pos_emb = (self.pos_emb_x + self.pos_emb_y).reshape(T+1, -1)[:-1,:] + pos_emb = (self.pos_emb_x + self.pos_emb_y).reshape(T + 1, -1)[:-1, :] x = x + pos_emb if self.layer_id == 0 and args.pre_ffn > 0: @@ -385,13 +498,13 @@ class RWKV(pl.LightningModule): def __init__(self, args): super().__init__() self.args = args - if not hasattr(args, 'dim_att'): + if not hasattr(args, "dim_att"): args.dim_att = args.n_embd - if not hasattr(args, 'dim_ffn'): + if not hasattr(args, "dim_ffn"): args.dim_ffn = args.n_embd * 4 - if not hasattr(args, 'tiny_att_layer'): + if not hasattr(args, "tiny_att_layer"): args.tiny_att_layer = -1 - if not hasattr(args, 'tiny_att_dim'): + if not hasattr(args, "tiny_att_dim"): args.tiny_att_dim = -1 self.emb = nn.Embedding(args.vocab_size, args.n_embd) @@ -404,7 +517,9 @@ def __init__(self, args): if args.head_qk > 0: self.head_q = nn.Linear(args.n_embd, args.head_qk, bias=False) self.head_k = nn.Linear(args.n_embd, args.head_qk, bias=False) - self.register_buffer("copy_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len))) + self.register_buffer( + "copy_mask", torch.tril(torch.ones(args.ctx_len, args.ctx_len)) + ) def configure_optimizers(self): args = self.args @@ -436,24 +551,69 @@ def configure_optimizers(self): param_dict = {n: p for n, p in self.named_parameters()} if args.my_pile_stage == 2: optim_groups = [ - {"params": [param_dict[n] for n in lr_1x], "weight_decay": 0.0, "my_lr_scale": 1.0}, - {"params": [param_dict[n] for n in lr_2x], "weight_decay": 0.0, "my_lr_scale": 5.0},# test: 2e-3 / args.lr_init}, - {"params": [param_dict[n] for n in lr_3x], "weight_decay": 0.0, "my_lr_scale": 5.0},# test: 3e-3 / args.lr_init}, + { + "params": [param_dict[n] for n in lr_1x], + "weight_decay": 0.0, + "my_lr_scale": 1.0, + }, + { + "params": [param_dict[n] for n in lr_2x], + "weight_decay": 0.0, + "my_lr_scale": 5.0, + }, # test: 2e-3 / args.lr_init}, + { + "params": [param_dict[n] for n in lr_3x], + "weight_decay": 0.0, + "my_lr_scale": 5.0, + }, # test: 3e-3 / args.lr_init}, ] else: optim_groups = [ - {"params": [param_dict[n] for n in lr_1x], "weight_decay": 0.0, "my_lr_scale": 1.0}, - {"params": [param_dict[n] for n in lr_2x], "weight_decay": 0.0, "my_lr_scale": 2.0}, - {"params": [param_dict[n] for n in lr_3x], "weight_decay": 0.0, "my_lr_scale": 3.0}, + { + "params": [param_dict[n] for n in lr_1x], + "weight_decay": 0.0, + "my_lr_scale": 1.0, + }, + { + "params": [param_dict[n] for n in lr_2x], + "weight_decay": 0.0, + "my_lr_scale": 2.0, + }, + { + "params": [param_dict[n] for n in lr_3x], + "weight_decay": 0.0, + "my_lr_scale": 3.0, + }, ] else: optim_groups = [ - {"params": [p for n, p in self.named_parameters()], "weight_decay": 0.0}, + { + "params": [p for n, p in self.named_parameters()], + "weight_decay": 0.0, + }, ] if self.deepspeed_offload: - return DeepSpeedCPUAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adamw_mode=False, weight_decay=0, amsgrad=False) - return FusedAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, adam_w_mode=False, weight_decay=0, amsgrad=False) + return DeepSpeedCPUAdam( + optim_groups, + lr=self.args.lr_init, + betas=self.args.betas, + eps=self.args.adam_eps, + bias_correction=True, + adamw_mode=False, + weight_decay=0, + amsgrad=False, + ) + return FusedAdam( + optim_groups, + lr=self.args.lr_init, + betas=self.args.betas, + eps=self.args.adam_eps, + bias_correction=True, + adam_w_mode=False, + weight_decay=0, + amsgrad=False, + ) # return ZeroOneAdam(optim_groups, lr=self.args.lr_init, betas=self.args.betas, eps=self.args.adam_eps, bias_correction=True, weight_decay=0, amsgrad=False, cuda_aware=False) @property @@ -521,10 +681,14 @@ def training_step(self, batch, batch_idx): logits = self(idx) if sum_mask == mask.shape[0]: - loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1)) + loss = F.cross_entropy( + logits.view(-1, logits.size(-1)), targets.view(-1) + ) # print('rank', self.global_rank, 'loss', loss.item()) else: - loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), reduction='none') + loss = F.cross_entropy( + logits.view(-1, logits.size(-1)), targets.view(-1), reduction="none" + ) # loss_raw = loss loss = torch.sum(loss * mask) / sum_mask @@ -564,7 +728,14 @@ def generate_init_weight(self): gain = 1.0 scale = 1.0 - if "ln_" in n or ".ln" in n or "time_" in n or "_mask" in n or "pos_emb" in n or '.mask.' in n: + if ( + "ln_" in n + or ".ln" in n + or "time_" in n + or "_mask" in n + or "pos_emb" in n + or ".mask." in n + ): m[n] = p else: if n == "emb.weight": @@ -572,7 +743,19 @@ def generate_init_weight(self): else: if shape[0] > shape[1]: gain = math.sqrt(shape[0] / shape[1]) - for kk in [".att.key.", ".att.receptance.", ".att.output.", ".att.key.", ".ffn.value.", ".ffn.receptance.", ".ffnPre.value.", ".ffnPre.receptance.", "head_q.", '.oo.', '.rr.']: + for kk in [ + ".att.key.", + ".att.receptance.", + ".att.output.", + ".att.key.", + ".ffn.value.", + ".ffn.receptance.", + ".ffnPre.value.", + ".ffnPre.receptance.", + "head_q.", + ".oo.", + ".rr.", + ]: if kk in n: scale = 0 if n == "head.weight": @@ -582,7 +765,9 @@ def generate_init_weight(self): if "head_q." in n: scale = 0 - print(f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {str(scale).ljust(4)} {n}") + print( + f"{str(shape[0]).ljust(5)} {str(shape[1]).ljust(5)} {str(scale).ljust(4)} {n}" + ) if self.args.accelerator.upper() == "GPU": m[n] = torch.empty((shape[0], shape[1]), device="cuda") diff --git a/benchmarks/rwkv/rwkv-v4neo/src/model_img.py b/benchmarks/rwkv/rwkv-v4neo/src/model_img.py index 24337236b..3a9bceb4e 100644 --- a/benchmarks/rwkv/rwkv-v4neo/src/model_img.py +++ b/benchmarks/rwkv/rwkv-v4neo/src/model_img.py @@ -13,10 +13,14 @@ from pytorch_lightning.strategies import DeepSpeedStrategy import deepspeed from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam + # from pytorch_msssim import MS_SSIM + def __nop(ob): return ob + + MyModule = torch.jit.ScriptModule # MyFunction = __nop MyFunction = torch.jit.script_method @@ -24,6 +28,7 @@ def __nop(ob): import clip from transformers import CLIPModel + class L2pooling(nn.Module): def __init__(self, filter_size=5, stride=2, channels=None, pad_off=0): super(L2pooling, self).__init__() @@ -149,55 +154,57 @@ def forward(self, x, y, require_grad=False, batch_average=False): class ToBinary(torch.autograd.Function): @staticmethod - def forward(ctx, x):#, noise_scale): + def forward(ctx, x): # , noise_scale): # if noise_scale > 0: # noise_min = 0.5 - noise_scale / 2 # noise_max = 0.5 + noise_scale / 2 # return torch.floor(x + torch.empty_like(x).uniform_(noise_min, noise_max)) # else: - return torch.floor(x + 0.5) # no need for noise when we have plenty of data + return torch.floor(x + 0.5) # no need for noise when we have plenty of data @staticmethod def backward(ctx, grad_output): - return grad_output.clone()#, None + return grad_output.clone() # , None + ######################################################################################################## + class R_ENCODER(MyModule): def __init__(self, args): super().__init__() self.args = args dd = 8 - self.Bxx = nn.BatchNorm2d(dd*64) + self.Bxx = nn.BatchNorm2d(dd * 64) self.CIN = nn.Conv2d(3, dd, kernel_size=3, padding=1) self.Cx0 = nn.Conv2d(dd, 32, kernel_size=3, padding=1) self.Cx1 = nn.Conv2d(32, dd, kernel_size=3, padding=1) - self.B00 = nn.BatchNorm2d(dd*4) - self.C00 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C01 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) - self.C02 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C03 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) - - self.B10 = nn.BatchNorm2d(dd*16) - self.C10 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C11 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) - self.C12 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C13 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) - - self.B20 = nn.BatchNorm2d(dd*64) - self.C20 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C21 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) - self.C22 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C23 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) + self.B00 = nn.BatchNorm2d(dd * 4) + self.C00 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C01 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) + self.C02 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C03 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) + + self.B10 = nn.BatchNorm2d(dd * 16) + self.C10 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C11 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) + self.C12 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C13 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) + + self.B20 = nn.BatchNorm2d(dd * 64) + self.C20 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C21 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) + self.C22 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C23 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) # self.B21 = nn.BatchNorm2d(dd*64) # self.C24 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) # self.C25 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) # self.C26 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) # self.C27 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) - self.COUT = nn.Conv2d(dd*64, args.my_img_bit, kernel_size=3, padding=1) + self.COUT = nn.Conv2d(dd * 64, args.my_img_bit, kernel_size=3, padding=1) @MyFunction def forward(self, img): @@ -224,37 +231,39 @@ def forward(self, img): x = self.COUT(x + xx) return torch.sigmoid(x) + ######################################################################################################## + class R_DECODER(MyModule): def __init__(self, args): super().__init__() self.args = args dd = 8 - self.CIN = nn.Conv2d(args.my_img_bit, dd*64, kernel_size=3, padding=1) + self.CIN = nn.Conv2d(args.my_img_bit, dd * 64, kernel_size=3, padding=1) - self.B00 = nn.BatchNorm2d(dd*64) - self.C00 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C01 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) - self.C02 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) - self.C03 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) + self.B00 = nn.BatchNorm2d(dd * 64) + self.C00 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C01 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) + self.C02 = nn.Conv2d(dd * 64, 256, kernel_size=3, padding=1) + self.C03 = nn.Conv2d(256, dd * 64, kernel_size=3, padding=1) # self.B01 = nn.BatchNorm2d(dd*64) # self.C04 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) # self.C05 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) # self.C06 = nn.Conv2d(dd*64, 256, kernel_size=3, padding=1) # self.C07 = nn.Conv2d(256, dd*64, kernel_size=3, padding=1) - self.B10 = nn.BatchNorm2d(dd*16) - self.C10 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C11 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) - self.C12 = nn.Conv2d(dd*16, 256, kernel_size=3, padding=1) - self.C13 = nn.Conv2d(256, dd*16, kernel_size=3, padding=1) + self.B10 = nn.BatchNorm2d(dd * 16) + self.C10 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C11 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) + self.C12 = nn.Conv2d(dd * 16, 256, kernel_size=3, padding=1) + self.C13 = nn.Conv2d(256, dd * 16, kernel_size=3, padding=1) - self.B20 = nn.BatchNorm2d(dd*4) - self.C20 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C21 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) - self.C22 = nn.Conv2d(dd*4, 256, kernel_size=3, padding=1) - self.C23 = nn.Conv2d(256, dd*4, kernel_size=3, padding=1) + self.B20 = nn.BatchNorm2d(dd * 4) + self.C20 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C21 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) + self.C22 = nn.Conv2d(dd * 4, 256, kernel_size=3, padding=1) + self.C23 = nn.Conv2d(256, dd * 4, kernel_size=3, padding=1) self.Cx0 = nn.Conv2d(dd, 32, kernel_size=3, padding=1) self.Cx1 = nn.Conv2d(32, dd, kernel_size=3, padding=1) @@ -281,47 +290,52 @@ def forward(self, code): x = x + self.Cx1(ACT(self.Cx0(x))) x = self.COUT(x) - + return torch.sigmoid(x) + ########################################################################################################` + def cosine_loss(x, y): x = F.normalize(x, dim=-1) y = F.normalize(y, dim=-1) - return 1 - torch.einsum('ij,ij->i',[x,y]) + return 1 - torch.einsum("ij,ij->i", [x, y]) + class RWKV_IMG(pl.LightningModule): def __init__(self, args): super().__init__() self.args = args - + self.encoder = R_ENCODER(args) self.decoder = R_DECODER(args) self.clip_model = None clip_name = args.my_img_clip - if clip_name == 'B32': - clip_name = 'ViT-B/32' - elif clip_name == 'B16': - clip_name = 'ViT-B/16' - elif clip_name == 'L14': - clip_name = 'ViT-L/14' - elif clip_name == 'OB32': + if clip_name == "B32": + clip_name = "ViT-B/32" + elif clip_name == "B16": + clip_name = "ViT-B/16" + elif clip_name == "L14": + clip_name = "ViT-L/14" + elif clip_name == "OB32": clip_name = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" self.clip_model = CLIPModel.from_pretrained(clip_name) self.clip_model.encode_image = self.clip_model.get_image_features if self.clip_model == None: - self.clip_model, _ = clip.load(clip_name, jit = True) + self.clip_model, _ = clip.load(clip_name, jit=True) self.register_buffer( - "clip_mean", torch.tensor([0.48145466, 0.4578275, 0.40821073]).view(1, 3, 1, 1) + "clip_mean", + torch.tensor([0.48145466, 0.4578275, 0.40821073]).view(1, 3, 1, 1), ) self.register_buffer( - "clip_std", torch.tensor([0.26862954, 0.26130258, 0.27577711]).view(1, 3, 1, 1) + "clip_std", + torch.tensor([0.26862954, 0.26130258, 0.27577711]).view(1, 3, 1, 1), ) for n, p in self.named_parameters(): - if 'clip_model' in n: + if "clip_model" in n: p.requires_grad = False self.loss_dists = DISTS() @@ -365,7 +379,7 @@ def deepspeed_offload(self) -> bool: def forward(self, img): z = self.encoder(img) - z = ToBinary.apply(z)#, self.args.my_img_noise_scale) + z = ToBinary.apply(z) # , self.args.my_img_noise_scale) out = self.decoder(z) return out @@ -379,10 +393,12 @@ def training_step(self, batch, batch_idx): if not os.path.exists(img_dir): os.makedirs(img_dir) vision.utils.save_image( - img[:4], f"{img_dir}/{self.trainer.global_step}-src.jpg"#, padding=0 + img[:4], + f"{img_dir}/{self.trainer.global_step}-src.jpg", # , padding=0 ) vision.utils.save_image( - out[:4], f"{img_dir}/{self.trainer.global_step}-out.jpg"#, padding=0 + out[:4], + f"{img_dir}/{self.trainer.global_step}-out.jpg", # , padding=0 ) # loss_ssim = 1 - self.loss_ssim(out, img) @@ -394,7 +410,11 @@ def training_step(self, batch, batch_idx): if args.my_img_l1_scale > 0: loss_l1 = F.l1_loss(out, img) - return loss_dists + loss_clip * args.my_img_clip_scale + loss_l1 * args.my_img_l1_scale + return ( + loss_dists + + loss_clip * args.my_img_clip_scale + + loss_l1 * args.my_img_l1_scale + ) else: return loss_dists + loss_clip * args.my_img_clip_scale @@ -418,7 +438,7 @@ def generate_init_weight(self): scale = 1 p = self.state_dict()[n] shape = p.shape - ss = n.split('.') + ss = n.split(".") # if ss[0] in ['encoder', 'decoder']: # if ss[2] == 'bias': diff --git a/benchmarks/rwkv/rwkv-v4neo/src/model_run.py b/benchmarks/rwkv/rwkv-v4neo/src/model_run.py index 2516e508c..184a35cfa 100644 --- a/benchmarks/rwkv/rwkv-v4neo/src/model_run.py +++ b/benchmarks/rwkv/rwkv-v4neo/src/model_run.py @@ -10,8 +10,12 @@ from typing import List, Dict MyModule = nn.Module + + def __nop(ob): return ob + + MyFunction = __nop # # try torchdynamo @@ -24,14 +28,17 @@ def __nop(ob): MyFunction = torch.jit.script_method RWKV_HEAD_QK_DIM = 0 -print(f'\nRWKV_HEAD_QK_DIM {RWKV_HEAD_QK_DIM} RWKV_JIT_ON {os.environ["RWKV_JIT_ON"]}\n') +print( + f'\nRWKV_HEAD_QK_DIM {RWKV_HEAD_QK_DIM} RWKV_JIT_ON {os.environ["RWKV_JIT_ON"]}\n' +) -DEBUG_TIME = False # True False - show trained time-coeffs +DEBUG_TIME = False # True False - show trained time-coeffs -RWKV_RESCALE_LAYER = 6 # set x=x/2 every X layer +RWKV_RESCALE_LAYER = 6 # set x=x/2 every X layer ############################################################################################################ + class RWKV_RNN(MyModule): def __init__(self, args): super().__init__() @@ -41,30 +48,32 @@ def __init__(self, args): self.RUN_DEVICE = args.RUN_DEVICE with torch.no_grad(): - w = torch.load(args.MODEL_NAME + '.pth', map_location='cpu') + w = torch.load(args.MODEL_NAME + ".pth", map_location="cpu") # refine weights and send to correct device keys = list(w.keys()) - if 'pos_emb_x' in keys: - w['pos_emb'] = (w['pos_emb_x'] + w['pos_emb_y']).reshape(args.ctx_len+1, -1)[:-1,:] + if "pos_emb_x" in keys: + w["pos_emb"] = (w["pos_emb_x"] + w["pos_emb_y"]).reshape( + args.ctx_len + 1, -1 + )[:-1, :] keys = list(w.keys()) print_need_newline = False for x in keys: block_id = 0 - if 'blocks.' in x: - block_id = int(x.split('.')[1]) - if 'att.output.weight' in x: + if "blocks." in x: + block_id = int(x.split(".")[1]) + if "att.output.weight" in x: w[x] = w[x] / (2 ** int(block_id // RWKV_RESCALE_LAYER)) - if 'ffn.value.weight' in x: + if "ffn.value.weight" in x: w[x] = w[x] / (2 ** int(block_id // RWKV_RESCALE_LAYER)) - - if '.time_' in x: + + if ".time_" in x: w[x] = w[x].squeeze() if DEBUG_TIME: print(x, w[x].numpy()) - if '.time_decay' in x: + if ".time_decay" in x: w[x] = w[x].float() w[x] = -torch.exp(w[x]) - elif '.time_first' in x: + elif ".time_first" in x: w[x] = w[x].float() else: if self.FLOAT_MODE == "fp32": @@ -75,23 +84,27 @@ def __init__(self, args): w[x] = w[x].half() w[x].requires_grad = False - if args.RUN_DEVICE == 'cuda' and x != 'emb.weight': + if args.RUN_DEVICE == "cuda" and x != "emb.weight": w[x] = w[x].cuda() - if ('blocks.' not in x) or ('blocks.0.' in x): + if ("blocks." not in x) or ("blocks.0." in x): if print_need_newline: - print('\n', end = '') + print("\n", end="") print_need_newline = False - print(x.ljust(40), str(w[x].dtype).replace('torch.', '').ljust(10), w[x].device) + print( + x.ljust(40), + str(w[x].dtype).replace("torch.", "").ljust(10), + w[x].device, + ) else: print_need_newline = True - print('.', end = '', flush = True) + print(".", end="", flush=True) # store weights in self.w keys = list(w.keys()) self.w = types.SimpleNamespace() for x in keys: - xx = x.split('.') + xx = x.split(".") here = self.w for i in range(len(xx)): if xx[i].isdigit(): @@ -103,7 +116,7 @@ def __init__(self, args): if i == len(xx) - 1: setattr(here, xx[i], w[x]) elif not hasattr(here, xx[i]): - if xx[i+1].isdigit(): + if xx[i + 1].isdigit(): setattr(here, xx[i], {}) else: setattr(here, xx[i], types.SimpleNamespace()) @@ -119,19 +132,23 @@ def LN(self, x, w): # state[] 0=ffn_xx 1=att_xx 2=att_aa 3=att_bb 4=att_pp @MyFunction - def FF(self, x, state, i:int, time_mix_k, time_mix_r, kw, vw, rw): + def FF(self, x, state, i: int, time_mix_k, time_mix_r, kw, vw, rw): if self.FLOAT_MODE == "bf16": - xk = x * time_mix_k + state[5*i+0].type(torch.bfloat16) * (1 - time_mix_k) - xr = x * time_mix_r + state[5*i+0].type(torch.bfloat16) * (1 - time_mix_r) - state[5*i+0] = x.float() + xk = x * time_mix_k + state[5 * i + 0].type(torch.bfloat16) * ( + 1 - time_mix_k + ) + xr = x * time_mix_r + state[5 * i + 0].type(torch.bfloat16) * ( + 1 - time_mix_r + ) + state[5 * i + 0] = x.float() elif self.FLOAT_MODE == "fp16": - xk = x * time_mix_k + state[5*i+0].half() * (1 - time_mix_k) - xr = x * time_mix_r + state[5*i+0].half() * (1 - time_mix_r) - state[5*i+0] = x.float() + xk = x * time_mix_k + state[5 * i + 0].half() * (1 - time_mix_k) + xr = x * time_mix_r + state[5 * i + 0].half() * (1 - time_mix_r) + state[5 * i + 0] = x.float() else: - xk = x * time_mix_k + state[5*i+0] * (1 - time_mix_k) - xr = x * time_mix_r + state[5*i+0] * (1 - time_mix_r) - state[5*i+0] = x + xk = x * time_mix_k + state[5 * i + 0] * (1 - time_mix_k) + xr = x * time_mix_r + state[5 * i + 0] * (1 - time_mix_r) + state[5 * i + 0] = x r = torch.sigmoid(rw @ xr) k = torch.square(torch.relu(kw @ xk)) @@ -140,36 +157,56 @@ def FF(self, x, state, i:int, time_mix_k, time_mix_r, kw, vw, rw): return r * kv @MyFunction - def SA(self, x, state, i:int, time_mix_k, time_mix_v, time_mix_r, time_first, time_decay, kw, vw, rw, ow): + def SA( + self, + x, + state, + i: int, + time_mix_k, + time_mix_v, + time_mix_r, + time_first, + time_decay, + kw, + vw, + rw, + ow, + ): if self.FLOAT_MODE == "bf16": - xk = x * time_mix_k + state[5*i+1].type(torch.bfloat16) * (1 - time_mix_k) - xv = x * time_mix_v + state[5*i+1].type(torch.bfloat16) * (1 - time_mix_v) - xr = x * time_mix_r + state[5*i+1].type(torch.bfloat16) * (1 - time_mix_r) - state[5*i+1] = x.float() + xk = x * time_mix_k + state[5 * i + 1].type(torch.bfloat16) * ( + 1 - time_mix_k + ) + xv = x * time_mix_v + state[5 * i + 1].type(torch.bfloat16) * ( + 1 - time_mix_v + ) + xr = x * time_mix_r + state[5 * i + 1].type(torch.bfloat16) * ( + 1 - time_mix_r + ) + state[5 * i + 1] = x.float() elif self.FLOAT_MODE == "fp16": - xk = x * time_mix_k + state[5*i+1].half() * (1 - time_mix_k) - xv = x * time_mix_v + state[5*i+1].half() * (1 - time_mix_v) - xr = x * time_mix_r + state[5*i+1].half() * (1 - time_mix_r) - state[5*i+1] = x.float() + xk = x * time_mix_k + state[5 * i + 1].half() * (1 - time_mix_k) + xv = x * time_mix_v + state[5 * i + 1].half() * (1 - time_mix_v) + xr = x * time_mix_r + state[5 * i + 1].half() * (1 - time_mix_r) + state[5 * i + 1] = x.float() else: - xk = x * time_mix_k + state[5*i+1] * (1 - time_mix_k) - xv = x * time_mix_v + state[5*i+1] * (1 - time_mix_v) - xr = x * time_mix_r + state[5*i+1] * (1 - time_mix_r) - state[5*i+1] = x + xk = x * time_mix_k + state[5 * i + 1] * (1 - time_mix_k) + xv = x * time_mix_v + state[5 * i + 1] * (1 - time_mix_v) + xr = x * time_mix_r + state[5 * i + 1] * (1 - time_mix_r) + state[5 * i + 1] = x r = torch.sigmoid(rw @ xr) k = kw @ xk v = vw @ xv - if '16' in self.FLOAT_MODE: + if "16" in self.FLOAT_MODE: kk = k.float() vv = v.float() else: kk = k vv = v - aa = state[5*i+2] - bb = state[5*i+3] - pp = state[5*i+4] + aa = state[5 * i + 2] + bb = state[5 * i + 3] + pp = state[5 * i + 4] ww = time_first + kk p = torch.maximum(pp, ww) e1 = torch.exp(pp - p) @@ -180,52 +217,72 @@ def SA(self, x, state, i:int, time_mix_k, time_mix_v, time_mix_r, time_first, ti p = torch.maximum(ww, kk) e1 = torch.exp(ww - p) e2 = torch.exp(kk - p) - state[5*i+2] = e1 * aa + e2 * vv - state[5*i+3] = e1 * bb + e2 - state[5*i+4] = p + state[5 * i + 2] = e1 * aa + e2 * vv + state[5 * i + 3] = e1 * bb + e2 + state[5 * i + 4] = p if self.FLOAT_MODE == "bf16": wkv = (a / b).type(torch.bfloat16) elif self.FLOAT_MODE == "fp16": wkv = (a / b).half() else: wkv = a / b - + return ow @ (r * wkv) - def forward(self, ctx, state, preprocess_only = False): + def forward(self, ctx, state, preprocess_only=False): with torch.no_grad(): w = self.w args = self.args x = w.emb.weight[ctx[-1]] - if self.RUN_DEVICE == 'cuda': + if self.RUN_DEVICE == "cuda": x = x.cuda() try: - pos_emb = w.pos_emb[len(ctx)-1] + pos_emb = w.pos_emb[len(ctx) - 1] x = x + pos_emb except: - pass + pass if state == None: - state = torch.zeros(args.n_layer * 5, args.n_embd, device=self.RUN_DEVICE) + state = torch.zeros( + args.n_layer * 5, args.n_embd, device=self.RUN_DEVICE + ) for i in range(args.n_layer): - state[5*i+4] -= 1e30 + state[5 * i + 4] -= 1e30 for i in range(args.n_layer): if i == 0: x = self.LN(x, w.blocks[i].ln0) - + ww = w.blocks[i].att - x = x + self.SA(self.LN(x, w.blocks[i].ln1), state, i, - ww.time_mix_k, ww.time_mix_v, ww.time_mix_r, ww.time_first, ww.time_decay, - ww.key.weight, ww.value.weight, ww.receptance.weight, ww.output.weight) - + x = x + self.SA( + self.LN(x, w.blocks[i].ln1), + state, + i, + ww.time_mix_k, + ww.time_mix_v, + ww.time_mix_r, + ww.time_first, + ww.time_decay, + ww.key.weight, + ww.value.weight, + ww.receptance.weight, + ww.output.weight, + ) + ww = w.blocks[i].ffn - x = x + self.FF(self.LN(x, w.blocks[i].ln2), state, i, - ww.time_mix_k, ww.time_mix_r, - ww.key.weight, ww.value.weight, ww.receptance.weight) - - if (i+1) % RWKV_RESCALE_LAYER == 0: + x = x + self.FF( + self.LN(x, w.blocks[i].ln2), + state, + i, + ww.time_mix_k, + ww.time_mix_r, + ww.key.weight, + ww.value.weight, + ww.receptance.weight, + ) + + if (i + 1) % RWKV_RESCALE_LAYER == 0: x = x / 2 if preprocess_only: diff --git a/benchmarks/rwkv/rwkv-v4neo/src/trainer.py b/benchmarks/rwkv/rwkv-v4neo/src/trainer.py index 9791ea524..98f229c40 100644 --- a/benchmarks/rwkv/rwkv-v4neo/src/trainer.py +++ b/benchmarks/rwkv/rwkv-v4neo/src/trainer.py @@ -5,6 +5,7 @@ from pytorch_lightning.utilities import rank_zero_info, rank_zero_only from giving import give + def my_save(dd, ff): pass # if '14b-run1' not in ff: @@ -15,6 +16,7 @@ def my_save(dd, ff): # torch.save(dd, fff) # subprocess.Popen(f" aws s3 mv {fff} s3://rwkv-14b-4k/{fn} --quiet", shell=True) + class train_callback(pl.Callback): def __init__(self, args): super().__init__() @@ -39,7 +41,9 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): if args.lr_final == 0 or args.lr_init == 0: # linear decay lr = args.lr_init + (args.lr_final - args.lr_init) * progress else: # exp decay - lr = args.lr_init * math.exp(math.log(args.lr_final / args.lr_init) * pow(progress, 1)) + lr = args.lr_init * math.exp( + math.log(args.lr_final / args.lr_init) * pow(progress, 1) + ) if trainer.global_step < w_step: lr = lr * (0.2 + 0.8 * trainer.global_step / w_step) @@ -61,7 +65,9 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): trainer.my_loss_sum = 0 trainer.my_loss_count = 0 trainer.my_log = open(args.proj_dir + "/train_log.txt", "a") - trainer.my_log.write(f"NEW RUN {args.my_timestamp}\n{vars(self.args)}\n") + trainer.my_log.write( + f"NEW RUN {args.my_timestamp}\n{vars(self.args)}\n" + ) try: print(f"\n{trainer.strategy.config}\n") trainer.my_log.write(f"{trainer.strategy.config}\n") @@ -71,6 +77,7 @@ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): if len(args.wandb) > 0: print("Login to wandb...") import wandb + wandb.init( project=args.wandb, name=args.run_name + " " + args.my_timestamp, @@ -105,19 +112,25 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx): # self.log("s", real_step, prog_bar=True, on_step=True) if len(args.wandb) > 0: - lll = {"loss": trainer.my_loss, "lr": trainer.my_lr, "Gtokens": real_step * token_per_step / 1e9} + lll = { + "loss": trainer.my_loss, + "lr": trainer.my_lr, + "Gtokens": real_step * token_per_step / 1e9, + } if kt_s > 0: lll["kt/s"] = kt_s trainer.my_wandb.log(lll, step=int(real_step)) if args.magic_prime > 0: expand_factor = 2 if args.my_qa_mask > 0 else 1 - if int(real_step) == int(args.magic_prime * expand_factor // args.real_bsz) - 1: + if ( + int(real_step) + == int(args.magic_prime * expand_factor // args.real_bsz) - 1 + ): to_save_dict = pl_module.state_dict() my_save( to_save_dict, f"{args.proj_dir}/rwkv-final.pth", ) - def on_train_epoch_start(self, trainer, pl_module): args = self.args @@ -147,7 +160,9 @@ def on_train_epoch_end(self, trainer, pl_module): # ) # except Exception as e: # print('Error\n\n', e, '\n\n') - trainer.my_log.write(f"{args.epoch_begin + trainer.current_epoch} {trainer.my_epoch_loss:.6f} {math.exp(trainer.my_epoch_loss):.4f} {trainer.my_lr:.8f} {datetime.datetime.now()} {trainer.current_epoch}\n") + trainer.my_log.write( + f"{args.epoch_begin + trainer.current_epoch} {trainer.my_epoch_loss:.6f} {math.exp(trainer.my_epoch_loss):.4f} {trainer.my_lr:.8f} {datetime.datetime.now()} {trainer.current_epoch}\n" + ) trainer.my_log.flush() trainer.my_loss_sum = 0 @@ -169,22 +184,22 @@ def generate_init_weight(model, init_weight_name): mm[k] = src.reshape(mm[k].shape) except: tmp = mm[k].squeeze().clone() - print(k, src.shape, '-->', mm[k].shape) + print(k, src.shape, "-->", mm[k].shape) ss = src.shape[0] dd = tmp.shape[0] for i in range(dd): pos = i / dd * ss if pos >= ss - 1: - tmp[i] = src[ss-1] + tmp[i] = src[ss - 1] else: p0 = int(math.floor(pos)) ii = pos - p0 - tmp[i] = src[p0] * (1-ii) + src[p0+1] * (ii) + tmp[i] = src[p0] * (1 - ii) + src[p0 + 1] * (ii) mm[k] = tmp.reshape(mm[k].shape) sss = src.squeeze().float().cpu().numpy() - print(sss[:10], '...', sss[-10:]) + print(sss[:10], "...", sss[-10:]) mmm = mm[k].squeeze().float().cpu().numpy() - print(mmm[:10], '...', mmm[-10:]) + print(mmm[:10], "...", mmm[-10:]) # print(f"Save to {init_weight_name}...") # torch.save(mm, init_weight_name) diff --git a/benchmarks/rwkv/rwkv-v4neo/src/utils.py b/benchmarks/rwkv/rwkv-v4neo/src/utils.py index ea25990b4..87da098db 100644 --- a/benchmarks/rwkv/rwkv-v4neo/src/utils.py +++ b/benchmarks/rwkv/rwkv-v4neo/src/utils.py @@ -6,6 +6,7 @@ time_slot = {} time_ref = time.time_ns() + def record_time(name): if name not in time_slot: time_slot[name] = 1e20 @@ -13,20 +14,23 @@ def record_time(name): if tt < time_slot[name]: time_slot[name] = tt -class TOKENIZER(): - def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'): - if 'list' in str(type(WORD_NAME)): + +class TOKENIZER: + def __init__(self, WORD_NAME, UNKNOWN_CHAR="\ue083"): + if "list" in str(type(WORD_NAME)): self.charMode = False if WORD_NAME[0] == WORD_NAME[1]: from transformers import PreTrainedTokenizerFast + self.tokenizer = PreTrainedTokenizerFast(tokenizer_file=WORD_NAME[0]) else: from transformers import GPT2TokenizerFast + self.tokenizer = GPT2TokenizerFast(WORD_NAME[0], WORD_NAME[1]) self.vocab_size = len(self.tokenizer) else: self.charMode = True - with open(WORD_NAME + '.json', "r", encoding="utf-16") as result_file: + with open(WORD_NAME + ".json", "r", encoding="utf-16") as result_file: self.word_table = json.load(result_file) self.vocab_size = len(self.word_table) @@ -37,23 +41,25 @@ def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'): self.UNKNOWN_CHAR = self.stoi[UNKNOWN_CHAR] def refine_context(self, context): - context = context.strip().split('\n') + context = context.strip().split("\n") for c in range(len(context)): - context[c] = context[c].strip().strip('\u3000').strip('\r') - context = list(filter(lambda c: c != '', context)) - context = '\n' + ('\n'.join(context)).strip() - if context == '': - context = '\n' + context[c] = context[c].strip().strip("\u3000").strip("\r") + context = list(filter(lambda c: c != "", context)) + context = "\n" + ("\n".join(context)).strip() + if context == "": + context = "\n" return context - def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=None, top_p_newline=None): + def sample_logits( + self, out, x, ctx_len, temperature=1.0, top_p_usual=None, top_p_newline=None + ): # out[self.UNKNOWN_CHAR] = -float('Inf') lastChar = int(x[-1]) probs = F.softmax(out, dim=-1) if self.charMode: - if self.itos[lastChar] == '\n': + if self.itos[lastChar] == "\n": top_p = top_p_newline else: top_p = top_p_usual @@ -81,6 +87,7 @@ def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=None, top_ out = torch.multinomial(probs, num_samples=1)[0] return out + def MaybeIsPrime(number): if FermatPrimalityTest(number) and MillerRabinPrimalityTest(number): return True @@ -121,7 +128,9 @@ def MillerRabinPrimalityTest(number): if (randomNumberWithPower != 1) and (randomNumberWithPower != number - 1): iterationNumber = 1 - while (iterationNumber <= timesTwoDividNumber - 1) and (randomNumberWithPower != number - 1): + while (iterationNumber <= timesTwoDividNumber - 1) and ( + randomNumberWithPower != number - 1 + ): randomNumberWithPower = pow(randomNumberWithPower, 2, number) iterationNumber = iterationNumber + 1 if randomNumberWithPower != (number - 1): diff --git a/benchmarks/rwkv/rwkv-v4neo/train.py b/benchmarks/rwkv/rwkv-v4neo/train.py index 6dd8ce166..875d9c4eb 100644 --- a/benchmarks/rwkv/rwkv-v4neo/train.py +++ b/benchmarks/rwkv/rwkv-v4neo/train.py @@ -52,53 +52,91 @@ parser = ArgumentParser() parser.add_argument("--load_model", default="", type=str) # full path, with .pth - parser.add_argument("--wandb", default="", type=str) # wandb project name. if "" then don't use wandb - parser.add_argument("--proj_dir", default=os.environ.get("MILABENCH_BASE", ".") + "/proj/rwkv/", type=str) + parser.add_argument( + "--wandb", default="", type=str + ) # wandb project name. if "" then don't use wandb + parser.add_argument( + "--proj_dir", + default=os.environ.get("MILABENCH_BASE", ".") + "/proj/rwkv/", + type=str, + ) parser.add_argument("--random_seed", default="-1", type=int) parser.add_argument("--data_file", default="", type=str) parser.add_argument("--data_type", default="utf-8", type=str) - parser.add_argument("--vocab_size", default=0, type=int) # vocab_size = 0 means auto (for char-level LM and .txt data) + parser.add_argument( + "--vocab_size", default=0, type=int + ) # vocab_size = 0 means auto (for char-level LM and .txt data) parser.add_argument("--ctx_len", default=1024, type=int) - parser.add_argument("--epoch_steps", default=1000, type=int) # a mini "epoch" has [epoch_steps] steps - parser.add_argument("--epoch_count", default=500, type=int) # train for this many "epochs". will continue afterwards with lr = lr_final - parser.add_argument("--epoch_begin", default=0, type=int) # if you load a model trained for x "epochs", set epoch_begin = x - parser.add_argument("--epoch_save", default=5, type=int) # save the model every [epoch_save] "epochs" - - parser.add_argument("--micro_bsz", default=12, type=int) # micro batch size (batch size per GPU) + parser.add_argument( + "--epoch_steps", default=1000, type=int + ) # a mini "epoch" has [epoch_steps] steps + parser.add_argument( + "--epoch_count", default=500, type=int + ) # train for this many "epochs". will continue afterwards with lr = lr_final + parser.add_argument( + "--epoch_begin", default=0, type=int + ) # if you load a model trained for x "epochs", set epoch_begin = x + parser.add_argument( + "--epoch_save", default=5, type=int + ) # save the model every [epoch_save] "epochs" + + parser.add_argument( + "--micro_bsz", default=12, type=int + ) # micro batch size (batch size per GPU) parser.add_argument("--n_layer", default=6, type=int) parser.add_argument("--n_embd", default=512, type=int) parser.add_argument("--dim_att", default=0, type=int) parser.add_argument("--dim_ffn", default=0, type=int) - parser.add_argument("--pre_ffn", default=0, type=int) # replace first att layer by ffn (sometimes better) + parser.add_argument( + "--pre_ffn", default=0, type=int + ) # replace first att layer by ffn (sometimes better) parser.add_argument("--head_qk", default=0, type=int) # my headQK trick parser.add_argument("--tiny_att_dim", default=0, type=int) # tiny attention dim - parser.add_argument("--tiny_att_layer", default=-999, type=int) # tiny attention @ which layer + parser.add_argument( + "--tiny_att_layer", default=-999, type=int + ) # tiny attention @ which layer - parser.add_argument("--lr_init", default=6e-4, type=float) # 6e-4 for L12-D768, 4e-4 for L24-D1024, 3e-4 for L24-D2048 + parser.add_argument( + "--lr_init", default=6e-4, type=float + ) # 6e-4 for L12-D768, 4e-4 for L24-D1024, 3e-4 for L24-D2048 parser.add_argument("--lr_final", default=1e-5, type=float) - parser.add_argument("--warmup_steps", default=0, type=int) # try 50 if you load a model + parser.add_argument( + "--warmup_steps", default=0, type=int + ) # try 50 if you load a model parser.add_argument("--beta1", default=0.9, type=float) - parser.add_argument("--beta2", default=0.99, type=float) # use 0.999 when your model is close to convergence + parser.add_argument( + "--beta2", default=0.99, type=float + ) # use 0.999 when your model is close to convergence parser.add_argument("--adam_eps", default=1e-8, type=float) - parser.add_argument("--grad_cp", default=0, type=int) # gradient checkpt: saves VRAM, but slower + parser.add_argument( + "--grad_cp", default=0, type=int + ) # gradient checkpt: saves VRAM, but slower - parser.add_argument("--my_pile_version", default=1, type=int) # my special pile version + parser.add_argument( + "--my_pile_version", default=1, type=int + ) # my special pile version parser.add_argument("--my_pile_stage", default=0, type=int) # my special pile mode - parser.add_argument("--my_pile_shift", default=-1, type=int) # my special pile mode - text shift + parser.add_argument( + "--my_pile_shift", default=-1, type=int + ) # my special pile mode - text shift parser.add_argument("--my_pile_edecay", default=0, type=int) - parser.add_argument("--layerwise_lr", default=1, type=int) # layerwise lr for faster convergence (but slower it/s) - parser.add_argument("--ds_bucket_mb", default=200, type=int) # deepspeed bucket size in MB. 200 seems enough + parser.add_argument( + "--layerwise_lr", default=1, type=int + ) # layerwise lr for faster convergence (but slower it/s) + parser.add_argument( + "--ds_bucket_mb", default=200, type=int + ) # deepspeed bucket size in MB. 200 seems enough # parser.add_argument("--cuda_cleanup", default=0, type=int) # extra cuda cleanup (sometimes helpful) parser.add_argument("--my_img_version", default=0, type=str) parser.add_argument("--my_img_size", default=0, type=int) parser.add_argument("--my_img_bit", default=0, type=int) - parser.add_argument("--my_img_clip", default='x', type=str) + parser.add_argument("--my_img_clip", default="x", type=str) parser.add_argument("--my_img_clip_scale", default=1, type=float) parser.add_argument("--my_img_l1_scale", default=0, type=float) - parser.add_argument("--my_img_encoder", default='x', type=str) + parser.add_argument("--my_img_encoder", default="x", type=str) # parser.add_argument("--my_img_noise_scale", default=0, type=float) parser.add_argument("--my_sample_len", default=0, type=int) parser.add_argument("--my_ffn_shift", default=1, type=int) @@ -107,7 +145,7 @@ parser.add_argument("--load_partial", default=0, type=int) parser.add_argument("--magic_prime", default=0, type=int) parser.add_argument("--my_qa_mask", default=0, type=int) - parser.add_argument("--my_testing", default='', type=str) + parser.add_argument("--my_testing", default="", type=str) parser = Trainer.add_argparse_args(parser) args = parser.parse_args() @@ -118,18 +156,26 @@ import numpy as np import torch from torch.utils.data import DataLoader + if "deepspeed" in args.strategy: import deepspeed import pytorch_lightning as pl from pytorch_lightning import seed_everything if args.random_seed >= 0: - print(f"########## WARNING: GLOBAL SEED {args.random_seed} THIS WILL AFFECT MULTIGPU SAMPLING ##########\n" * 3) + print( + f"########## WARNING: GLOBAL SEED {args.random_seed} THIS WILL AFFECT MULTIGPU SAMPLING ##########\n" + * 3 + ) seed_everything(args.random_seed) np.set_printoptions(precision=4, suppress=True, linewidth=200) - warnings.filterwarnings("ignore", ".*Consider increasing the value of the `num_workers` argument*") - warnings.filterwarnings("ignore", ".*The progress bar already tracks a metric with the*") + warnings.filterwarnings( + "ignore", ".*Consider increasing the value of the `num_workers` argument*" + ) + warnings.filterwarnings( + "ignore", ".*The progress bar already tracks a metric with the*" + ) # os.environ["WDS_SHOW_SEED"] = "1" args.my_timestamp = datetime.datetime.today().strftime("%Y-%m-%d-%H-%M-%S") @@ -154,7 +200,9 @@ args.run_name = f"v{args.my_img_version}-{args.my_img_size}-{args.my_img_bit}bit-{args.my_img_clip}x{args.my_img_clip_scale}" args.proj_dir = f"{args.proj_dir}-{args.run_name}" else: - args.run_name = f"{args.vocab_size} ctx{args.ctx_len} L{args.n_layer} D{args.n_embd}" + args.run_name = ( + f"{args.vocab_size} ctx{args.ctx_len} L{args.n_layer} D{args.n_embd}" + ) if not os.path.exists(args.proj_dir): os.makedirs(args.proj_dir) @@ -242,18 +290,32 @@ ) rank_zero_info(str(vars(args)) + "\n") - assert args.data_type in ["utf-8", "utf-16le", "numpy", "binidx", "dummy", "wds_img", "uint16"] + assert args.data_type in [ + "utf-8", + "utf-16le", + "numpy", + "binidx", + "dummy", + "wds_img", + "uint16", + ] if args.lr_final == 0 or args.lr_init == 0: - rank_zero_info("\n\nNote: lr_final = 0 or lr_init = 0. Using linear LR schedule instead.\n\n") + rank_zero_info( + "\n\nNote: lr_final = 0 or lr_init = 0. Using linear LR schedule instead.\n\n" + ) assert args.precision in ["fp32", "tf32", "fp16", "bf16"] os.environ["RWKV_FLOAT_MODE"] = args.precision if args.precision == "fp32": for i in range(10): - rank_zero_info("\n\nNote: you are using fp32 (very slow). Try bf16 / tf32 for faster training.\n\n") + rank_zero_info( + "\n\nNote: you are using fp32 (very slow). Try bf16 / tf32 for faster training.\n\n" + ) if args.precision == "fp16": - rank_zero_info("\n\nNote: you are using fp16 (might overflow). Try bf16 / tf32 for stable training.\n\n") + rank_zero_info( + "\n\nNote: you are using fp16 (might overflow). Try bf16 / tf32 for stable training.\n\n" + ) os.environ["RWKV_JIT_ON"] = "1" if "deepspeed_stage_3" in args.strategy: @@ -283,11 +345,13 @@ train_data = MyDataset(args) args.vocab_size = train_data.vocab_size - if args.data_type == 'wds_img': + if args.data_type == "wds_img": from src.model_img import RWKV_IMG + model = RWKV_IMG(args) else: from src.model import RWKV + model = RWKV(args) # if len(args.load_model) == 0 or args.my_pile_stage == 1: # shall we build the initial weights? @@ -335,10 +399,22 @@ print(f"{str(shape[0]).ljust(5)} {n}") if "deepspeed" in args.strategy: - trainer.strategy.config["zero_optimization"]["allgather_bucket_size"] = args.ds_bucket_mb * 1000 * 1000 - trainer.strategy.config["zero_optimization"]["reduce_bucket_size"] = args.ds_bucket_mb * 1000 * 1000 + trainer.strategy.config["zero_optimization"]["allgather_bucket_size"] = ( + args.ds_bucket_mb * 1000 * 1000 + ) + trainer.strategy.config["zero_optimization"]["reduce_bucket_size"] = ( + args.ds_bucket_mb * 1000 * 1000 + ) # must set shuffle=False, persistent_workers=False (because worker is in another thread) - data_loader = DataLoader(train_data, shuffle=False, pin_memory=True, batch_size=args.micro_bsz, num_workers=1, persistent_workers=False, drop_last=True) + data_loader = DataLoader( + train_data, + shuffle=False, + pin_memory=True, + batch_size=args.micro_bsz, + num_workers=1, + persistent_workers=False, + drop_last=True, + ) trainer.fit(model, data_loader) diff --git a/benchmarks/rwkv/rwkv-v4neo/verify.py b/benchmarks/rwkv/rwkv-v4neo/verify.py index 4f56e392f..695e651f2 100644 --- a/benchmarks/rwkv/rwkv-v4neo/verify.py +++ b/benchmarks/rwkv/rwkv-v4neo/verify.py @@ -7,6 +7,7 @@ import os, sys, types import numpy as np import torch + np.set_printoptions(precision=4, suppress=True, linewidth=200) try: os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] @@ -16,23 +17,24 @@ torch.backends.cudnn.allow_tf32 = False torch.backends.cuda.matmul.allow_tf32 = False -os.environ['RWKV_FLOAT_MODE'] = 'bf16' # bf16 or fp32 -os.environ['RWKV_RUN_DEVICE'] = 'cuda' # currently model_train requires CUDA -RUN_DEVICE = os.environ['RWKV_RUN_DEVICE'] +os.environ["RWKV_FLOAT_MODE"] = "bf16" # bf16 or fp32 +os.environ["RWKV_RUN_DEVICE"] = "cuda" # currently model_train requires CUDA +RUN_DEVICE = os.environ["RWKV_RUN_DEVICE"] -TOKEN_MODE = 'pile' +TOKEN_MODE = "pile" -if TOKEN_MODE == 'pile': - WORD_NAME = ['20B_tokenizer.json', '20B_tokenizer.json'] - MODEL_NAME = '/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-3b/RWKV-4-Pile-3B-20221003-6783' +if TOKEN_MODE == "pile": + WORD_NAME = ["20B_tokenizer.json", "20B_tokenizer.json"] + MODEL_NAME = "/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-3b/RWKV-4-Pile-3B-20221003-6783" n_layer = 32 n_embd = 2560 ctx_len = 1024 UNKNOWN_CHAR = None from src.utils import TOKENIZER + tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR) -if TOKEN_MODE == 'pile': +if TOKEN_MODE == "pile": tokenizer.vocab_size = 50277 ######################################################################################################## @@ -54,23 +56,23 @@ args.my_pos_emb = 0 model_train = RWKV(args).to(RUN_DEVICE) -if os.environ['RWKV_FLOAT_MODE'] == 'fp16': +if os.environ["RWKV_FLOAT_MODE"] == "fp16": model_train = model_train.half() -elif os.environ['RWKV_FLOAT_MODE'] == 'bf16': +elif os.environ["RWKV_FLOAT_MODE"] == "bf16": model_train = model_train.bfloat16() -print('loading ' + MODEL_NAME) -m2 = torch.load(MODEL_NAME + '.pth', map_location='cpu') +print("loading " + MODEL_NAME) +m2 = torch.load(MODEL_NAME + ".pth", map_location="cpu") model_train.load_state_dict(m2) -if os.environ['RWKV_FLOAT_MODE'] == 'fp16': +if os.environ["RWKV_FLOAT_MODE"] == "fp16": model_train = model_train.half() -elif os.environ['RWKV_FLOAT_MODE'] == 'bf16': +elif os.environ["RWKV_FLOAT_MODE"] == "bf16": model_train = model_train.bfloat16() args.MODEL_NAME = MODEL_NAME args.RUN_DEVICE = RUN_DEVICE -args.FLOAT_MODE = os.environ['RWKV_FLOAT_MODE'] +args.FLOAT_MODE = os.environ["RWKV_FLOAT_MODE"] model_rnn = RWKV_RNN(args) ######################################################################################################## @@ -78,27 +80,33 @@ print(f"\nVerifying {os.environ['RWKV_RUN_DEVICE']} {os.environ['RWKV_FLOAT_MODE']}") # context = '\nIn a' -context = '\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese.' +context = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese." -if TOKEN_MODE == 'pile': +if TOKEN_MODE == "pile": ctx = tokenizer.tokenizer.encode(context) -print(f'input len {len(ctx)} data {ctx}') +print(f"input len {len(ctx)} data {ctx}") ######################################################################################################## with torch.no_grad(): - print('\nRWKV-train output') - out = model_train.forward(torch.tensor([ctx]).to(RUN_DEVICE))[0].detach().cpu().float().numpy() - print(out, '\n') - - print('\nRWKV-RNN output') + print("\nRWKV-train output") + out = ( + model_train.forward(torch.tensor([ctx]).to(RUN_DEVICE))[0] + .detach() + .cpu() + .float() + .numpy() + ) + print(out, "\n") + + print("\nRWKV-RNN output") state = None out = None src_len = len(ctx) for i in range(src_len): - x = ctx[:i+1] + x = ctx[: i + 1] out, state = model_rnn.forward(x, state) if i < 3 or i >= src_len - 3: print(out.detach().cpu().numpy()) if i == 2: - print('...') + print("...") diff --git a/benchmarks/stable_baselines3/requirements.cuda.txt b/benchmarks/stable_baselines3/requirements.cuda.txt index a2dcc069c..7af1e0780 100644 --- a/benchmarks/stable_baselines3/requirements.cuda.txt +++ b/benchmarks/stable_baselines3/requirements.cuda.txt @@ -304,7 +304,7 @@ urllib3==1.26.15 # sentry-sdk varname==0.10.0 # via giving -voir==0.2.10 +voir @ git+https://github.com/breuleux/voir.git # via -r benchmarks/stable_baselines3/requirements.in wandb==0.14.0 # via -r benchmarks/stable_baselines3/requirements.in diff --git a/benchmarks/stable_baselines3/requirements.in b/benchmarks/stable_baselines3/requirements.in index 01d3a157b..0cb5acce8 100644 --- a/benchmarks/stable_baselines3/requirements.in +++ b/benchmarks/stable_baselines3/requirements.in @@ -19,4 +19,4 @@ seaborn tqdm # Following limits are for milabench -voir>=0.2.9,<0.3.0 +voir diff --git a/benchmarks/stable_baselines3/requirements.rocm.txt b/benchmarks/stable_baselines3/requirements.rocm.txt index 2096b0592..14b49bd41 100644 --- a/benchmarks/stable_baselines3/requirements.rocm.txt +++ b/benchmarks/stable_baselines3/requirements.rocm.txt @@ -304,7 +304,7 @@ urllib3==1.26.15 # sentry-sdk varname==0.10.0 # via giving -voir==0.2.10 +voir @ git+https://github.com/breuleux/voir.git # via -r benchmarks/stable_baselines3/requirements.in wandb==0.14.0 # via -r benchmarks/stable_baselines3/requirements.in diff --git a/benchmarks/stargan/requirements.cuda.txt b/benchmarks/stargan/requirements.cuda.txt index 16485fb78..8a8cea8eb 100644 --- a/benchmarks/stargan/requirements.cuda.txt +++ b/benchmarks/stargan/requirements.cuda.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/stargan/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-stargan.txt benchmarks/stargan/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/stargan/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-stargan.txt benchmarks/stargan/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 @@ -10,22 +10,18 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -34,11 +30,15 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -52,15 +52,11 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -lit==16.0.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -72,11 +68,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/stargan/requirements.in # torchvision @@ -88,7 +84,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision @@ -96,7 +92,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich @@ -104,7 +100,7 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf @@ -116,7 +112,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -128,23 +124,22 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -torch==2.0.1+cu118 +torch==2.1.0+cu118 # via # -r benchmarks/stargan/requirements.in # torchvision - # triton -torchvision==0.15.2+cu118 +torchvision==0.16.0+cu118 # via -r benchmarks/stargan/requirements.in -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -152,5 +147,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/stargan/requirements.in diff --git a/benchmarks/stargan/requirements.in b/benchmarks/stargan/requirements.in index b57914d92..bae650375 100644 --- a/benchmarks/stargan/requirements.in +++ b/benchmarks/stargan/requirements.in @@ -1,4 +1,4 @@ numpy torch torchvision -voir>=0.2.9,<0.3 +voir diff --git a/benchmarks/stargan/requirements.rocm.txt b/benchmarks/stargan/requirements.rocm.txt index 594c94949..d2b904c55 100644 --- a/benchmarks/stargan/requirements.rocm.txt +++ b/benchmarks/stargan/requirements.rocm.txt @@ -1,28 +1,28 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/stargan/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-stargan.txt benchmarks/stargan/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/stargan/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-stargan.txt benchmarks/stargan/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -34,11 +34,15 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm # torch +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -52,15 +56,15 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==16.0.5 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -72,11 +76,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/stargan/requirements.in # torchvision @@ -88,7 +92,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision @@ -96,7 +100,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich @@ -104,11 +108,11 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf @@ -120,7 +124,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir @@ -132,19 +136,19 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/stargan/requirements.in # pytorch-triton-rocm # torchvision -torchvision==0.15.2+rocm5.4.2 +torchvision==0.16.0+rocm5.6 # via -r benchmarks/stargan/requirements.in -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -152,5 +156,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/stargan/requirements.in diff --git a/benchmarks/stargan/stargan/data_loader.py b/benchmarks/stargan/stargan/data_loader.py index d0c5eacb8..2f79594c6 100644 --- a/benchmarks/stargan/stargan/data_loader.py +++ b/benchmarks/stargan/stargan/data_loader.py @@ -23,14 +23,14 @@ def __init__(self, image_dir, attr_path, selected_attrs, transform, mode): self.idx2attr = {} self.preprocess() - if mode == 'train': + if mode == "train": self.num_images = len(self.train_dataset) else: self.num_images = len(self.test_dataset) def preprocess(self): """Preprocess the CelebA attribute file.""" - lines = [line.rstrip() for line in open(self.attr_path, 'r')] + lines = [line.rstrip() for line in open(self.attr_path, "r")] all_attr_names = lines[1].split() for i, attr_name in enumerate(all_attr_names): self.attr2idx[attr_name] = i @@ -47,18 +47,18 @@ def preprocess(self): label = [] for attr_name in self.selected_attrs: idx = self.attr2idx[attr_name] - label.append(values[idx] == '1') + label.append(values[idx] == "1") - if (i+1) < 2000: + if (i + 1) < 2000: self.test_dataset.append([filename, label]) else: self.train_dataset.append([filename, label]) - print('Finished preprocessing the CelebA dataset...') + print("Finished preprocessing the CelebA dataset...") def __getitem__(self, index): """Return one image and its corresponding attribute label.""" - dataset = self.train_dataset if self.mode == 'train' else self.test_dataset + dataset = self.train_dataset if self.mode == "train" else self.test_dataset filename, label = dataset[index] image = Image.open(os.path.join(self.image_dir, filename)) return self.transform(image), torch.FloatTensor(label) @@ -68,11 +68,20 @@ def __len__(self): return self.num_images -def get_loader(image_dir, attr_path, selected_attrs, crop_size=178, image_size=128, - batch_size=16, dataset='CelebA', mode='train', num_workers=1): +def get_loader( + image_dir, + attr_path, + selected_attrs, + crop_size=178, + image_size=128, + batch_size=16, + dataset="CelebA", + mode="train", + num_workers=1, +): """Build and return a data loader.""" transform = [] - if mode == 'train': + if mode == "train": transform.append(T.RandomHorizontalFlip()) transform.append(T.CenterCrop(crop_size)) transform.append(T.Resize(image_size)) @@ -80,13 +89,15 @@ def get_loader(image_dir, attr_path, selected_attrs, crop_size=178, image_size=1 transform.append(T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))) transform = T.Compose(transform) - if dataset == 'CelebA': + if dataset == "CelebA": dataset = CelebA(image_dir, attr_path, selected_attrs, transform, mode) - elif dataset == 'RaFD': + elif dataset == "RaFD": dataset = ImageFolder(image_dir, transform) - data_loader = data.DataLoader(dataset=dataset, - batch_size=batch_size, - shuffle=(mode=='train'), - num_workers=num_workers) - return data_loader \ No newline at end of file + data_loader = data.DataLoader( + dataset=dataset, + batch_size=batch_size, + shuffle=(mode == "train"), + num_workers=num_workers, + ) + return data_loader diff --git a/benchmarks/stargan/stargan/logger.py b/benchmarks/stargan/stargan/logger.py index f30431e8b..ffed8a260 100644 --- a/benchmarks/stargan/stargan/logger.py +++ b/benchmarks/stargan/stargan/logger.py @@ -11,4 +11,4 @@ def __init__(self, log_dir): def scalar_summary(self, tag, value, step): """Add scalar summary.""" summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) - self.writer.add_summary(summary, step) \ No newline at end of file + self.writer.add_summary(summary, step) diff --git a/benchmarks/stargan/stargan/main.py b/benchmarks/stargan/stargan/main.py index 754c7efa2..d7b411fdc 100644 --- a/benchmarks/stargan/stargan/main.py +++ b/benchmarks/stargan/stargan/main.py @@ -9,9 +9,9 @@ from torch.utils.data import DataLoader - def str2bool(v): - return v.lower() in ('true') + return v.lower() in ("true") + def main(config): # For fast training. @@ -28,15 +28,32 @@ def main(config): rafd_loader = None synth_loader = None - if config.dataset in ['CelebA', 'Both']: - celeba_loader = get_loader(config.celeba_image_dir, config.attr_path, config.selected_attrs, - config.celeba_crop_size, config.image_size, config.batch_size, - 'CelebA', config.mode, config.num_workers) - if config.dataset in ['RaFD', 'Both']: - rafd_loader = get_loader(config.rafd_image_dir, None, None, - config.rafd_crop_size, config.image_size, config.batch_size, - 'RaFD', config.mode, config.num_workers) + if config.dataset in ["CelebA", "Both"]: + celeba_loader = get_loader( + config.celeba_image_dir, + config.attr_path, + config.selected_attrs, + config.celeba_crop_size, + config.image_size, + config.batch_size, + "CelebA", + config.mode, + config.num_workers, + ) + if config.dataset in ["RaFD", "Both"]: + rafd_loader = get_loader( + config.rafd_image_dir, + None, + None, + config.rafd_crop_size, + config.image_size, + config.batch_size, + "RaFD", + config.mode, + config.num_workers, + ) if config.dataset == "synth": + def igen(): return torch.rand((3, config.image_size, config.image_size)) * 2 - 1 @@ -48,81 +65,158 @@ def ogen(): n=config.batch_size, repeat=10000, ) - synth_loader = DataLoader(synth_dataset, batch_size=config.batch_size, num_workers=config.num_workers) - + synth_loader = DataLoader( + synth_dataset, batch_size=config.batch_size, num_workers=config.num_workers + ) # Solver for training and testing StarGAN. solver = Solver(celeba_loader, rafd_loader, synth_loader, config) - if config.mode == 'train': - if config.dataset in ['CelebA', 'RaFD', 'synth']: + if config.mode == "train": + if config.dataset in ["CelebA", "RaFD", "synth"]: solver.train() - elif config.dataset in ['Both']: + elif config.dataset in ["Both"]: solver.train_multi() - elif config.mode == 'test': - if config.dataset in ['CelebA', 'RaFD', 'synth']: + elif config.mode == "test": + if config.dataset in ["CelebA", "RaFD", "synth"]: solver.test() - elif config.dataset in ['Both']: + elif config.dataset in ["Both"]: solver.test_multi() -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() # Model configuration. - parser.add_argument('--c_dim', type=int, default=5, help='dimension of domain labels (1st dataset)') - parser.add_argument('--c2_dim', type=int, default=8, help='dimension of domain labels (2nd dataset)') - parser.add_argument('--celeba_crop_size', type=int, default=178, help='crop size for the CelebA dataset') - parser.add_argument('--rafd_crop_size', type=int, default=256, help='crop size for the RaFD dataset') - parser.add_argument('--image_size', type=int, default=128, help='image resolution') - parser.add_argument('--g_conv_dim', type=int, default=64, help='number of conv filters in the first layer of G') - parser.add_argument('--d_conv_dim', type=int, default=64, help='number of conv filters in the first layer of D') - parser.add_argument('--g_repeat_num', type=int, default=6, help='number of residual blocks in G') - parser.add_argument('--d_repeat_num', type=int, default=6, help='number of strided conv layers in D') - parser.add_argument('--lambda_cls', type=float, default=1, help='weight for domain classification loss') - parser.add_argument('--lambda_rec', type=float, default=10, help='weight for reconstruction loss') - parser.add_argument('--lambda_gp', type=float, default=10, help='weight for gradient penalty') - + parser.add_argument( + "--c_dim", type=int, default=5, help="dimension of domain labels (1st dataset)" + ) + parser.add_argument( + "--c2_dim", type=int, default=8, help="dimension of domain labels (2nd dataset)" + ) + parser.add_argument( + "--celeba_crop_size", + type=int, + default=178, + help="crop size for the CelebA dataset", + ) + parser.add_argument( + "--rafd_crop_size", type=int, default=256, help="crop size for the RaFD dataset" + ) + parser.add_argument("--image_size", type=int, default=128, help="image resolution") + parser.add_argument( + "--g_conv_dim", + type=int, + default=64, + help="number of conv filters in the first layer of G", + ) + parser.add_argument( + "--d_conv_dim", + type=int, + default=64, + help="number of conv filters in the first layer of D", + ) + parser.add_argument( + "--g_repeat_num", type=int, default=6, help="number of residual blocks in G" + ) + parser.add_argument( + "--d_repeat_num", type=int, default=6, help="number of strided conv layers in D" + ) + parser.add_argument( + "--lambda_cls", + type=float, + default=1, + help="weight for domain classification loss", + ) + parser.add_argument( + "--lambda_rec", type=float, default=10, help="weight for reconstruction loss" + ) + parser.add_argument( + "--lambda_gp", type=float, default=10, help="weight for gradient penalty" + ) + # Training configuration. - parser.add_argument('--dataset', type=str, default='synth', choices=['CelebA', 'RaFD', 'Both', 'synth']) - parser.add_argument('--batch_size', type=int, default=16, help='mini-batch size') - parser.add_argument('--num_iters', type=int, default=200000, help='number of total iterations for training D') - parser.add_argument('--num_iters_decay', type=int, default=100000, help='number of iterations for decaying lr') - parser.add_argument('--g_lr', type=float, default=0.0001, help='learning rate for G') - parser.add_argument('--d_lr', type=float, default=0.0001, help='learning rate for D') - parser.add_argument('--n_critic', type=int, default=5, help='number of D updates per each G update') - parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for Adam optimizer') - parser.add_argument('--beta2', type=float, default=0.999, help='beta2 for Adam optimizer') - parser.add_argument('--resume_iters', type=int, default=None, help='resume training from this step') - parser.add_argument('--selected_attrs', '--list', nargs='+', help='selected attributes for the CelebA dataset', - default=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young']) + parser.add_argument( + "--dataset", + type=str, + default="synth", + choices=["CelebA", "RaFD", "Both", "synth"], + ) + parser.add_argument("--batch_size", type=int, default=16, help="mini-batch size") + parser.add_argument( + "--num_iters", + type=int, + default=200000, + help="number of total iterations for training D", + ) + parser.add_argument( + "--num_iters_decay", + type=int, + default=100000, + help="number of iterations for decaying lr", + ) + parser.add_argument( + "--g_lr", type=float, default=0.0001, help="learning rate for G" + ) + parser.add_argument( + "--d_lr", type=float, default=0.0001, help="learning rate for D" + ) + parser.add_argument( + "--n_critic", type=int, default=5, help="number of D updates per each G update" + ) + parser.add_argument( + "--beta1", type=float, default=0.5, help="beta1 for Adam optimizer" + ) + parser.add_argument( + "--beta2", type=float, default=0.999, help="beta2 for Adam optimizer" + ) + parser.add_argument( + "--resume_iters", type=int, default=None, help="resume training from this step" + ) + parser.add_argument( + "--selected_attrs", + "--list", + nargs="+", + help="selected attributes for the CelebA dataset", + default=["Black_Hair", "Blond_Hair", "Brown_Hair", "Male", "Young"], + ) # Test configuration. - parser.add_argument('--test_iters', type=int, default=200000, help='test model from this step') + parser.add_argument( + "--test_iters", type=int, default=200000, help="test model from this step" + ) # Miscellaneous. - parser.add_argument('--num_workers', type=int, default=1) - parser.add_argument('--mode', type=str, default='train', choices=['train', 'test']) - parser.add_argument('--use_tensorboard', type=str2bool, default=False) + parser.add_argument("--num_workers", type=int, default=1) + parser.add_argument("--mode", type=str, default="train", choices=["train", "test"]) + parser.add_argument("--use_tensorboard", type=str2bool, default=False) mbconfig = json.loads(os.environ["MILABENCH_CONFIG"]) datadir = mbconfig["dirs"]["extra"] # Directories. - parser.add_argument('--celeba_image_dir', type=str, default='data/celeba/images') - parser.add_argument('--attr_path', type=str, default='data/celeba/list_attr_celeba.txt') - parser.add_argument('--rafd_image_dir', type=str, default='data/RaFD/train') - parser.add_argument('--log_dir', type=str, default=os.path.join(datadir, 'logs')) - parser.add_argument('--model_save_dir', type=str, default=os.path.join(datadir, 'models')) - parser.add_argument('--sample_dir', type=str, default=os.path.join(datadir, 'samples')) - parser.add_argument('--result_dir', type=str, default=os.path.join(datadir, 'results')) + parser.add_argument("--celeba_image_dir", type=str, default="data/celeba/images") + parser.add_argument( + "--attr_path", type=str, default="data/celeba/list_attr_celeba.txt" + ) + parser.add_argument("--rafd_image_dir", type=str, default="data/RaFD/train") + parser.add_argument("--log_dir", type=str, default=os.path.join(datadir, "logs")) + parser.add_argument( + "--model_save_dir", type=str, default=os.path.join(datadir, "models") + ) + parser.add_argument( + "--sample_dir", type=str, default=os.path.join(datadir, "samples") + ) + parser.add_argument( + "--result_dir", type=str, default=os.path.join(datadir, "results") + ) # Step size. - parser.add_argument('--log_step', type=int, default=10) - parser.add_argument('--sample_step', type=int, default=1000) - parser.add_argument('--model_save_step', type=int, default=10000) - parser.add_argument('--lr_update_step', type=int, default=1000) + parser.add_argument("--log_step", type=int, default=10) + parser.add_argument("--sample_step", type=int, default=1000) + parser.add_argument("--model_save_step", type=int, default=10000) + parser.add_argument("--lr_update_step", type=int, default=1000) config = parser.parse_args() print(config) - main(config) \ No newline at end of file + main(config) diff --git a/benchmarks/stargan/stargan/model.py b/benchmarks/stargan/stargan/model.py index 3d0e62755..a9ecb43e3 100644 --- a/benchmarks/stargan/stargan/model.py +++ b/benchmarks/stargan/stargan/model.py @@ -6,6 +6,7 @@ class ResidualBlock(nn.Module): """Residual Block with instance normalization.""" + def __init__(self, dim_in, dim_out): super(ResidualBlock, self).__init__() self.main = nn.Sequential( @@ -13,7 +14,8 @@ def __init__(self, dim_in, dim_out): nn.InstanceNorm2d(dim_out, affine=True, track_running_stats=True), nn.ReLU(inplace=True), nn.Conv2d(dim_out, dim_out, kernel_size=3, stride=1, padding=1, bias=False), - nn.InstanceNorm2d(dim_out, affine=True, track_running_stats=True)) + nn.InstanceNorm2d(dim_out, affine=True, track_running_stats=True), + ) def forward(self, x): return x + self.main(x) @@ -21,19 +23,37 @@ def forward(self, x): class Generator(nn.Module): """Generator network.""" + def __init__(self, conv_dim=64, c_dim=5, repeat_num=6): super(Generator, self).__init__() layers = [] - layers.append(nn.Conv2d(3+c_dim, conv_dim, kernel_size=7, stride=1, padding=3, bias=False)) - layers.append(nn.InstanceNorm2d(conv_dim, affine=True, track_running_stats=True)) + layers.append( + nn.Conv2d( + 3 + c_dim, conv_dim, kernel_size=7, stride=1, padding=3, bias=False + ) + ) + layers.append( + nn.InstanceNorm2d(conv_dim, affine=True, track_running_stats=True) + ) layers.append(nn.ReLU(inplace=True)) # Down-sampling layers. curr_dim = conv_dim for i in range(2): - layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1, bias=False)) - layers.append(nn.InstanceNorm2d(curr_dim*2, affine=True, track_running_stats=True)) + layers.append( + nn.Conv2d( + curr_dim, + curr_dim * 2, + kernel_size=4, + stride=2, + padding=1, + bias=False, + ) + ) + layers.append( + nn.InstanceNorm2d(curr_dim * 2, affine=True, track_running_stats=True) + ) layers.append(nn.ReLU(inplace=True)) curr_dim = curr_dim * 2 @@ -43,12 +63,25 @@ def __init__(self, conv_dim=64, c_dim=5, repeat_num=6): # Up-sampling layers. for i in range(2): - layers.append(nn.ConvTranspose2d(curr_dim, curr_dim//2, kernel_size=4, stride=2, padding=1, bias=False)) - layers.append(nn.InstanceNorm2d(curr_dim//2, affine=True, track_running_stats=True)) + layers.append( + nn.ConvTranspose2d( + curr_dim, + curr_dim // 2, + kernel_size=4, + stride=2, + padding=1, + bias=False, + ) + ) + layers.append( + nn.InstanceNorm2d(curr_dim // 2, affine=True, track_running_stats=True) + ) layers.append(nn.ReLU(inplace=True)) curr_dim = curr_dim // 2 - layers.append(nn.Conv2d(curr_dim, 3, kernel_size=7, stride=1, padding=3, bias=False)) + layers.append( + nn.Conv2d(curr_dim, 3, kernel_size=7, stride=1, padding=3, bias=False) + ) layers.append(nn.Tanh()) self.main = nn.Sequential(*layers) @@ -64,6 +97,7 @@ def forward(self, x, c): class Discriminator(nn.Module): """Discriminator network with PatchGAN.""" + def __init__(self, image_size=128, conv_dim=64, c_dim=5, repeat_num=6): super(Discriminator, self).__init__() layers = [] @@ -72,15 +106,19 @@ def __init__(self, image_size=128, conv_dim=64, c_dim=5, repeat_num=6): curr_dim = conv_dim for i in range(1, repeat_num): - layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1)) + layers.append( + nn.Conv2d(curr_dim, curr_dim * 2, kernel_size=4, stride=2, padding=1) + ) layers.append(nn.LeakyReLU(0.01)) curr_dim = curr_dim * 2 kernel_size = int(image_size / np.power(2, repeat_num)) self.main = nn.Sequential(*layers) - self.conv1 = nn.Conv2d(curr_dim, 1, kernel_size=3, stride=1, padding=1, bias=False) + self.conv1 = nn.Conv2d( + curr_dim, 1, kernel_size=3, stride=1, padding=1, bias=False + ) self.conv2 = nn.Conv2d(curr_dim, c_dim, kernel_size=kernel_size, bias=False) - + def forward(self, x): h = self.main(x) out_src = self.conv1(h) diff --git a/benchmarks/stargan/stargan/solver.py b/benchmarks/stargan/stargan/solver.py index 00ee93cd9..d45bb6f9e 100644 --- a/benchmarks/stargan/stargan/solver.py +++ b/benchmarks/stargan/stargan/solver.py @@ -53,7 +53,7 @@ def __init__(self, celeba_loader, rafd_loader, synth_loader, config): # Miscellaneous. self.use_tensorboard = config.use_tensorboard - self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Directories. self.log_dir = config.log_dir @@ -74,18 +74,31 @@ def __init__(self, celeba_loader, rafd_loader, synth_loader, config): def build_model(self): """Create a generator and a discriminator.""" - if self.dataset in ['CelebA', 'RaFD', 'synth']: + if self.dataset in ["CelebA", "RaFD", "synth"]: self.G = Generator(self.g_conv_dim, self.c_dim, self.g_repeat_num) - self.D = Discriminator(self.image_size, self.d_conv_dim, self.c_dim, self.d_repeat_num) - elif self.dataset in ['Both']: - self.G = Generator(self.g_conv_dim, self.c_dim+self.c2_dim+2, self.g_repeat_num) # 2 for mask vector. - self.D = Discriminator(self.image_size, self.d_conv_dim, self.c_dim+self.c2_dim, self.d_repeat_num) - - self.g_optimizer = torch.optim.Adam(self.G.parameters(), self.g_lr, [self.beta1, self.beta2]) - self.d_optimizer = torch.optim.Adam(self.D.parameters(), self.d_lr, [self.beta1, self.beta2]) - self.print_network(self.G, 'G') - self.print_network(self.D, 'D') - + self.D = Discriminator( + self.image_size, self.d_conv_dim, self.c_dim, self.d_repeat_num + ) + elif self.dataset in ["Both"]: + self.G = Generator( + self.g_conv_dim, self.c_dim + self.c2_dim + 2, self.g_repeat_num + ) # 2 for mask vector. + self.D = Discriminator( + self.image_size, + self.d_conv_dim, + self.c_dim + self.c2_dim, + self.d_repeat_num, + ) + + self.g_optimizer = torch.optim.Adam( + self.G.parameters(), self.g_lr, [self.beta1, self.beta2] + ) + self.d_optimizer = torch.optim.Adam( + self.D.parameters(), self.d_lr, [self.beta1, self.beta2] + ) + self.print_network(self.G, "G") + self.print_network(self.D, "D") + self.G.to(self.device) self.D.to(self.device) @@ -100,23 +113,28 @@ def print_network(self, model, name): def restore_model(self, resume_iters): """Restore the trained generator and discriminator.""" - print('Loading the trained models from step {}...'.format(resume_iters)) - G_path = os.path.join(self.model_save_dir, '{}-G.ckpt'.format(resume_iters)) - D_path = os.path.join(self.model_save_dir, '{}-D.ckpt'.format(resume_iters)) - self.G.load_state_dict(torch.load(G_path, map_location=lambda storage, loc: storage)) - self.D.load_state_dict(torch.load(D_path, map_location=lambda storage, loc: storage)) + print("Loading the trained models from step {}...".format(resume_iters)) + G_path = os.path.join(self.model_save_dir, "{}-G.ckpt".format(resume_iters)) + D_path = os.path.join(self.model_save_dir, "{}-D.ckpt".format(resume_iters)) + self.G.load_state_dict( + torch.load(G_path, map_location=lambda storage, loc: storage) + ) + self.D.load_state_dict( + torch.load(D_path, map_location=lambda storage, loc: storage) + ) def build_tensorboard(self): """Build a tensorboard logger.""" from logger import Logger + self.logger = Logger(self.log_dir) def update_lr(self, g_lr, d_lr): """Decay learning rates of the generator and discriminator.""" for param_group in self.g_optimizer.param_groups: - param_group['lr'] = g_lr + param_group["lr"] = g_lr for param_group in self.d_optimizer.param_groups: - param_group['lr'] = d_lr + param_group["lr"] = d_lr def reset_grad(self): """Reset the gradient buffers.""" @@ -131,16 +149,18 @@ def denorm(self, x): def gradient_penalty(self, y, x): """Compute gradient penalty: (L2_norm(dy/dx) - 1)**2.""" weight = torch.ones(y.size()).to(self.device) - dydx = torch.autograd.grad(outputs=y, - inputs=x, - grad_outputs=weight, - retain_graph=True, - create_graph=True, - only_inputs=True)[0] + dydx = torch.autograd.grad( + outputs=y, + inputs=x, + grad_outputs=weight, + retain_graph=True, + create_graph=True, + only_inputs=True, + )[0] dydx = dydx.view(dydx.size(0), -1) dydx_l2norm = torch.sqrt(torch.sum(dydx**2, dim=1)) - return torch.mean((dydx_l2norm-1)**2) + return torch.mean((dydx_l2norm - 1) ** 2) def label2onehot(self, labels, dim): """Convert label indices to one-hot vectors.""" @@ -149,54 +169,60 @@ def label2onehot(self, labels, dim): out[np.arange(batch_size), labels.long()] = 1 return out - def create_labels(self, c_org, c_dim=5, dataset='CelebA', selected_attrs=None): + def create_labels(self, c_org, c_dim=5, dataset="CelebA", selected_attrs=None): """Generate target domain labels for debugging and testing.""" # Get hair color indices. - if dataset == 'CelebA': + if dataset == "CelebA": hair_color_indices = [] for i, attr_name in enumerate(selected_attrs): - if attr_name in ['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Gray_Hair']: + if attr_name in ["Black_Hair", "Blond_Hair", "Brown_Hair", "Gray_Hair"]: hair_color_indices.append(i) c_trg_list = [] for i in range(c_dim): - if dataset == 'CelebA': + if dataset == "CelebA": c_trg = c_org.clone() - if i in hair_color_indices: # Set one hair color to 1 and the rest to 0. + if ( + i in hair_color_indices + ): # Set one hair color to 1 and the rest to 0. c_trg[:, i] = 1 for j in hair_color_indices: if j != i: c_trg[:, j] = 0 else: - c_trg[:, i] = (c_trg[:, i] == 0) # Reverse attribute value. - elif dataset == 'RaFD' or dataset == "synth": - c_trg = self.label2onehot(torch.ones(c_org.size(0))*i, c_dim) + c_trg[:, i] = c_trg[:, i] == 0 # Reverse attribute value. + elif dataset == "RaFD" or dataset == "synth": + c_trg = self.label2onehot(torch.ones(c_org.size(0)) * i, c_dim) c_trg_list.append(c_trg.to(self.device)) return c_trg_list - def classification_loss(self, logit, target, dataset='CelebA'): + def classification_loss(self, logit, target, dataset="CelebA"): """Compute binary or softmax cross entropy loss.""" - if dataset == 'CelebA' or dataset == "synth": - return F.binary_cross_entropy_with_logits(logit, target, size_average=False) / logit.size(0) - elif dataset == 'RaFD': + if dataset == "CelebA" or dataset == "synth": + return F.binary_cross_entropy_with_logits( + logit, target, size_average=False + ) / logit.size(0) + elif dataset == "RaFD": return F.cross_entropy(logit, target) def train(self): """Train StarGAN within a single dataset.""" # Set data loader. - if self.dataset == 'CelebA': + if self.dataset == "CelebA": data_loader = self.celeba_loader - elif self.dataset == 'RaFD': + elif self.dataset == "RaFD": data_loader = self.rafd_loader - elif self.dataset == 'synth': + elif self.dataset == "synth": data_loader = self.synth_loader # Fetch fixed inputs for debugging. data_iter = voir.iterate("train", data_loader, report_batch=True) x_fixed, c_org = next(data_iter) x_fixed = x_fixed.to(self.device) - c_fixed_list = self.create_labels(c_org, self.c_dim, self.dataset, self.selected_attrs) + c_fixed_list = self.create_labels( + c_org, self.c_dim, self.dataset, self.selected_attrs + ) # Learning rate cache for decaying. g_lr = self.g_lr @@ -209,10 +235,9 @@ def train(self): self.restore_model(self.resume_iters) # Start training. - print('Start training...') + print("Start training...") start_time = time.time() for i in range(start_iters, self.num_iters): - # =================================================================================== # # 1. Preprocess input data # # =================================================================================== # @@ -228,18 +253,22 @@ def train(self): rand_idx = torch.randperm(label_org.size(0)) label_trg = label_org[rand_idx] - if self.dataset == 'CelebA' or self.dataset == 'synth': + if self.dataset == "CelebA" or self.dataset == "synth": c_org = label_org.clone() c_trg = label_trg.clone() - elif self.dataset == 'RaFD': + elif self.dataset == "RaFD": c_org = self.label2onehot(label_org, self.c_dim) c_trg = self.label2onehot(label_trg, self.c_dim) - x_real = x_real.to(self.device) # Input images. - c_org = c_org.to(self.device) # Original domain labels. - c_trg = c_trg.to(self.device) # Target domain labels. - label_org = label_org.to(self.device) # Labels for computing classification loss. - label_trg = label_trg.to(self.device) # Labels for computing classification loss. + x_real = x_real.to(self.device) # Input images. + c_org = c_org.to(self.device) # Original domain labels. + c_trg = c_trg.to(self.device) # Target domain labels. + label_org = label_org.to( + self.device + ) # Labels for computing classification loss. + label_trg = label_trg.to( + self.device + ) # Labels for computing classification loss. # =================================================================================== # # 2. Train the discriminator # @@ -247,7 +276,7 @@ def train(self): # Compute loss with real images. out_src, out_cls = self.D(x_real) - d_loss_real = - torch.mean(out_src) + d_loss_real = -torch.mean(out_src) d_loss_cls = self.classification_loss(out_cls, label_org, self.dataset) # Compute loss with fake images. @@ -257,12 +286,19 @@ def train(self): # Compute loss for gradient penalty. alpha = torch.rand(x_real.size(0), 1, 1, 1).to(self.device) - x_hat = (alpha * x_real.data + (1 - alpha) * x_fake.data).requires_grad_(True) + x_hat = (alpha * x_real.data + (1 - alpha) * x_fake.data).requires_grad_( + True + ) out_src, _ = self.D(x_hat) d_loss_gp = self.gradient_penalty(out_src, x_hat) # Backward and optimize. - d_loss = d_loss_real + d_loss_fake + self.lambda_cls * d_loss_cls + self.lambda_gp * d_loss_gp + d_loss = ( + d_loss_real + + d_loss_fake + + self.lambda_cls * d_loss_cls + + self.lambda_gp * d_loss_gp + ) give(task="train", loss=d_loss.item()) self.reset_grad() d_loss.backward() @@ -270,20 +306,20 @@ def train(self): # Logging. loss = {} - loss['D/loss_real'] = d_loss_real.item() - loss['D/loss_fake'] = d_loss_fake.item() - loss['D/loss_cls'] = d_loss_cls.item() - loss['D/loss_gp'] = d_loss_gp.item() - + loss["D/loss_real"] = d_loss_real.item() + loss["D/loss_fake"] = d_loss_fake.item() + loss["D/loss_cls"] = d_loss_cls.item() + loss["D/loss_gp"] = d_loss_gp.item() + # =================================================================================== # # 3. Train the generator # # =================================================================================== # - - if (i+1) % self.n_critic == 0: + + if (i + 1) % self.n_critic == 0: # Original-to-target domain. x_fake = self.G(x_real, c_trg) out_src, out_cls = self.D(x_fake) - g_loss_fake = - torch.mean(out_src) + g_loss_fake = -torch.mean(out_src) g_loss_cls = self.classification_loss(out_cls, label_trg, self.dataset) # Target-to-original domain. @@ -291,61 +327,73 @@ def train(self): g_loss_rec = torch.mean(torch.abs(x_real - x_reconst)) # Backward and optimize. - g_loss = g_loss_fake + self.lambda_rec * g_loss_rec + self.lambda_cls * g_loss_cls + g_loss = ( + g_loss_fake + + self.lambda_rec * g_loss_rec + + self.lambda_cls * g_loss_cls + ) self.reset_grad() g_loss.backward() self.g_optimizer.step() # Logging. - loss['G/loss_fake'] = g_loss_fake.item() - loss['G/loss_rec'] = g_loss_rec.item() - loss['G/loss_cls'] = g_loss_cls.item() + loss["G/loss_fake"] = g_loss_fake.item() + loss["G/loss_rec"] = g_loss_rec.item() + loss["G/loss_cls"] = g_loss_cls.item() # =================================================================================== # # 4. Miscellaneous # # =================================================================================== # # Print out training information. - if (i+1) % self.log_step == 0: + if (i + 1) % self.log_step == 0: et = time.time() - start_time et = str(datetime.timedelta(seconds=et))[:-7] - log = "Elapsed [{}], Iteration [{}/{}]".format(et, i+1, self.num_iters) + log = "Elapsed [{}], Iteration [{}/{}]".format( + et, i + 1, self.num_iters + ) for tag, value in loss.items(): log += ", {}: {:.4f}".format(tag, value) print(log) if self.use_tensorboard: for tag, value in loss.items(): - self.logger.scalar_summary(tag, value, i+1) + self.logger.scalar_summary(tag, value, i + 1) # Translate fixed images for debugging. - if (i+1) % self.sample_step == 0: + if (i + 1) % self.sample_step == 0: with torch.no_grad(): x_fake_list = [x_fixed] for c_fixed in c_fixed_list: x_fake_list.append(self.G(x_fixed, c_fixed)) x_concat = torch.cat(x_fake_list, dim=3) - sample_path = os.path.join(self.sample_dir, '{}-images.jpg'.format(i+1)) - save_image(self.denorm(x_concat.data.cpu()), sample_path, nrow=1, padding=0) - print('Saved real and fake images into {}...'.format(sample_path)) + sample_path = os.path.join( + self.sample_dir, "{}-images.jpg".format(i + 1) + ) + save_image( + self.denorm(x_concat.data.cpu()), sample_path, nrow=1, padding=0 + ) + print("Saved real and fake images into {}...".format(sample_path)) # Save model checkpoints. - if (i+1) % self.model_save_step == 0: - G_path = os.path.join(self.model_save_dir, '{}-G.ckpt'.format(i+1)) - D_path = os.path.join(self.model_save_dir, '{}-D.ckpt'.format(i+1)) + if (i + 1) % self.model_save_step == 0: + G_path = os.path.join(self.model_save_dir, "{}-G.ckpt".format(i + 1)) + D_path = os.path.join(self.model_save_dir, "{}-D.ckpt".format(i + 1)) torch.save(self.G.state_dict(), G_path) torch.save(self.D.state_dict(), D_path) - print('Saved model checkpoints into {}...'.format(self.model_save_dir)) + print("Saved model checkpoints into {}...".format(self.model_save_dir)) # Decay learning rates. - if (i+1) % self.lr_update_step == 0 and (i+1) > (self.num_iters - self.num_iters_decay): - g_lr -= (self.g_lr / float(self.num_iters_decay)) - d_lr -= (self.d_lr / float(self.num_iters_decay)) + if (i + 1) % self.lr_update_step == 0 and (i + 1) > ( + self.num_iters - self.num_iters_decay + ): + g_lr -= self.g_lr / float(self.num_iters_decay) + d_lr -= self.d_lr / float(self.num_iters_decay) self.update_lr(g_lr, d_lr) - print ('Decayed learning rates, g_lr: {}, d_lr: {}.'.format(g_lr, d_lr)) + print("Decayed learning rates, g_lr: {}, d_lr: {}.".format(g_lr, d_lr)) def train_multi(self): - """Train StarGAN with multiple datasets.""" + """Train StarGAN with multiple datasets.""" # Data iterators. celeba_iter = iter(self.celeba_loader) rafd_iter = iter(self.rafd_loader) @@ -353,12 +401,22 @@ def train_multi(self): # Fetch fixed inputs for debugging. x_fixed, c_org = next(celeba_iter) x_fixed = x_fixed.to(self.device) - c_celeba_list = self.create_labels(c_org, self.c_dim, 'CelebA', self.selected_attrs) - c_rafd_list = self.create_labels(c_org, self.c2_dim, 'RaFD') - zero_celeba = torch.zeros(x_fixed.size(0), self.c_dim).to(self.device) # Zero vector for CelebA. - zero_rafd = torch.zeros(x_fixed.size(0), self.c2_dim).to(self.device) # Zero vector for RaFD. - mask_celeba = self.label2onehot(torch.zeros(x_fixed.size(0)), 2).to(self.device) # Mask vector: [1, 0]. - mask_rafd = self.label2onehot(torch.ones(x_fixed.size(0)), 2).to(self.device) # Mask vector: [0, 1]. + c_celeba_list = self.create_labels( + c_org, self.c_dim, "CelebA", self.selected_attrs + ) + c_rafd_list = self.create_labels(c_org, self.c2_dim, "RaFD") + zero_celeba = torch.zeros(x_fixed.size(0), self.c_dim).to( + self.device + ) # Zero vector for CelebA. + zero_rafd = torch.zeros(x_fixed.size(0), self.c2_dim).to( + self.device + ) # Zero vector for RaFD. + mask_celeba = self.label2onehot(torch.zeros(x_fixed.size(0)), 2).to( + self.device + ) # Mask vector: [1, 0]. + mask_rafd = self.label2onehot(torch.ones(x_fixed.size(0)), 2).to( + self.device + ) # Mask vector: [0, 1]. # Learning rate cache for decaying. g_lr = self.g_lr @@ -371,25 +429,24 @@ def train_multi(self): self.restore_model(self.resume_iters) # Start training. - print('Start training...') + print("Start training...") start_time = time.time() for i in range(start_iters, self.num_iters): - for dataset in ['CelebA', 'RaFD']: - + for dataset in ["CelebA", "RaFD"]: # =================================================================================== # # 1. Preprocess input data # # =================================================================================== # - + # Fetch real images and labels. - data_iter = celeba_iter if dataset == 'CelebA' else rafd_iter - + data_iter = celeba_iter if dataset == "CelebA" else rafd_iter + try: x_real, label_org = next(data_iter) except: - if dataset == 'CelebA': + if dataset == "CelebA": celeba_iter = iter(self.celeba_loader) x_real, label_org = next(celeba_iter) - elif dataset == 'RaFD': + elif dataset == "RaFD": rafd_iter = iter(self.rafd_loader) x_real, label_org = next(rafd_iter) @@ -397,14 +454,14 @@ def train_multi(self): rand_idx = torch.randperm(label_org.size(0)) label_trg = label_org[rand_idx] - if dataset == 'CelebA': + if dataset == "CelebA": c_org = label_org.clone() c_trg = label_trg.clone() zero = torch.zeros(x_real.size(0), self.c2_dim) mask = self.label2onehot(torch.zeros(x_real.size(0)), 2) c_org = torch.cat([c_org, zero, mask], dim=1) c_trg = torch.cat([c_trg, zero, mask], dim=1) - elif dataset == 'RaFD': + elif dataset == "RaFD": c_org = self.label2onehot(label_org, self.c2_dim) c_trg = self.label2onehot(label_trg, self.c2_dim) zero = torch.zeros(x_real.size(0), self.c_dim) @@ -412,11 +469,15 @@ def train_multi(self): c_org = torch.cat([zero, c_org, mask], dim=1) c_trg = torch.cat([zero, c_trg, mask], dim=1) - x_real = x_real.to(self.device) # Input images. - c_org = c_org.to(self.device) # Original domain labels. - c_trg = c_trg.to(self.device) # Target domain labels. - label_org = label_org.to(self.device) # Labels for computing classification loss. - label_trg = label_trg.to(self.device) # Labels for computing classification loss. + x_real = x_real.to(self.device) # Input images. + c_org = c_org.to(self.device) # Original domain labels. + c_trg = c_trg.to(self.device) # Target domain labels. + label_org = label_org.to( + self.device + ) # Labels for computing classification loss. + label_trg = label_trg.to( + self.device + ) # Labels for computing classification loss. # =================================================================================== # # 2. Train the discriminator # @@ -424,8 +485,12 @@ def train_multi(self): # Compute loss with real images. out_src, out_cls = self.D(x_real) - out_cls = out_cls[:, :self.c_dim] if dataset == 'CelebA' else out_cls[:, self.c_dim:] - d_loss_real = - torch.mean(out_src) + out_cls = ( + out_cls[:, : self.c_dim] + if dataset == "CelebA" + else out_cls[:, self.c_dim :] + ) + d_loss_real = -torch.mean(out_src) d_loss_cls = self.classification_loss(out_cls, label_org, dataset) # Compute loss with fake images. @@ -435,33 +500,44 @@ def train_multi(self): # Compute loss for gradient penalty. alpha = torch.rand(x_real.size(0), 1, 1, 1).to(self.device) - x_hat = (alpha * x_real.data + (1 - alpha) * x_fake.data).requires_grad_(True) + x_hat = ( + alpha * x_real.data + (1 - alpha) * x_fake.data + ).requires_grad_(True) out_src, _ = self.D(x_hat) d_loss_gp = self.gradient_penalty(out_src, x_hat) # Backward and optimize. - d_loss = d_loss_real + d_loss_fake + self.lambda_cls * d_loss_cls + self.lambda_gp * d_loss_gp + d_loss = ( + d_loss_real + + d_loss_fake + + self.lambda_cls * d_loss_cls + + self.lambda_gp * d_loss_gp + ) self.reset_grad() d_loss.backward() self.d_optimizer.step() # Logging. loss = {} - loss['D/loss_real'] = d_loss_real.item() - loss['D/loss_fake'] = d_loss_fake.item() - loss['D/loss_cls'] = d_loss_cls.item() - loss['D/loss_gp'] = d_loss_gp.item() - + loss["D/loss_real"] = d_loss_real.item() + loss["D/loss_fake"] = d_loss_fake.item() + loss["D/loss_cls"] = d_loss_cls.item() + loss["D/loss_gp"] = d_loss_gp.item() + # =================================================================================== # # 3. Train the generator # # =================================================================================== # - if (i+1) % self.n_critic == 0: + if (i + 1) % self.n_critic == 0: # Original-to-target domain. x_fake = self.G(x_real, c_trg) out_src, out_cls = self.D(x_fake) - out_cls = out_cls[:, :self.c_dim] if dataset == 'CelebA' else out_cls[:, self.c_dim:] - g_loss_fake = - torch.mean(out_src) + out_cls = ( + out_cls[:, : self.c_dim] + if dataset == "CelebA" + else out_cls[:, self.c_dim :] + ) + g_loss_fake = -torch.mean(out_src) g_loss_cls = self.classification_loss(out_cls, label_trg, dataset) # Target-to-original domain. @@ -469,35 +545,41 @@ def train_multi(self): g_loss_rec = torch.mean(torch.abs(x_real - x_reconst)) # Backward and optimize. - g_loss = g_loss_fake + self.lambda_rec * g_loss_rec + self.lambda_cls * g_loss_cls + g_loss = ( + g_loss_fake + + self.lambda_rec * g_loss_rec + + self.lambda_cls * g_loss_cls + ) self.reset_grad() g_loss.backward() self.g_optimizer.step() # Logging. - loss['G/loss_fake'] = g_loss_fake.item() - loss['G/loss_rec'] = g_loss_rec.item() - loss['G/loss_cls'] = g_loss_cls.item() + loss["G/loss_fake"] = g_loss_fake.item() + loss["G/loss_rec"] = g_loss_rec.item() + loss["G/loss_cls"] = g_loss_cls.item() # =================================================================================== # # 4. Miscellaneous # # =================================================================================== # # Print out training info. - if (i+1) % self.log_step == 0: + if (i + 1) % self.log_step == 0: et = time.time() - start_time et = str(datetime.timedelta(seconds=et))[:-7] - log = "Elapsed [{}], Iteration [{}/{}], Dataset [{}]".format(et, i+1, self.num_iters, dataset) + log = "Elapsed [{}], Iteration [{}/{}], Dataset [{}]".format( + et, i + 1, self.num_iters, dataset + ) for tag, value in loss.items(): log += ", {}: {:.4f}".format(tag, value) print(log) if self.use_tensorboard: for tag, value in loss.items(): - self.logger.scalar_summary(tag, value, i+1) + self.logger.scalar_summary(tag, value, i + 1) # Translate fixed images for debugging. - if (i+1) % self.sample_step == 0: + if (i + 1) % self.sample_step == 0: with torch.no_grad(): x_fake_list = [x_fixed] for c_fixed in c_celeba_list: @@ -507,42 +589,49 @@ def train_multi(self): c_trg = torch.cat([zero_celeba, c_fixed, mask_rafd], dim=1) x_fake_list.append(self.G(x_fixed, c_trg)) x_concat = torch.cat(x_fake_list, dim=3) - sample_path = os.path.join(self.sample_dir, '{}-images.jpg'.format(i+1)) - save_image(self.denorm(x_concat.data.cpu()), sample_path, nrow=1, padding=0) - print('Saved real and fake images into {}...'.format(sample_path)) + sample_path = os.path.join( + self.sample_dir, "{}-images.jpg".format(i + 1) + ) + save_image( + self.denorm(x_concat.data.cpu()), sample_path, nrow=1, padding=0 + ) + print("Saved real and fake images into {}...".format(sample_path)) # Save model checkpoints. - if (i+1) % self.model_save_step == 0: - G_path = os.path.join(self.model_save_dir, '{}-G.ckpt'.format(i+1)) - D_path = os.path.join(self.model_save_dir, '{}-D.ckpt'.format(i+1)) + if (i + 1) % self.model_save_step == 0: + G_path = os.path.join(self.model_save_dir, "{}-G.ckpt".format(i + 1)) + D_path = os.path.join(self.model_save_dir, "{}-D.ckpt".format(i + 1)) torch.save(self.G.state_dict(), G_path) torch.save(self.D.state_dict(), D_path) - print('Saved model checkpoints into {}...'.format(self.model_save_dir)) + print("Saved model checkpoints into {}...".format(self.model_save_dir)) # Decay learning rates. - if (i+1) % self.lr_update_step == 0 and (i+1) > (self.num_iters - self.num_iters_decay): - g_lr -= (self.g_lr / float(self.num_iters_decay)) - d_lr -= (self.d_lr / float(self.num_iters_decay)) + if (i + 1) % self.lr_update_step == 0 and (i + 1) > ( + self.num_iters - self.num_iters_decay + ): + g_lr -= self.g_lr / float(self.num_iters_decay) + d_lr -= self.d_lr / float(self.num_iters_decay) self.update_lr(g_lr, d_lr) - print ('Decayed learning rates, g_lr: {}, d_lr: {}.'.format(g_lr, d_lr)) + print("Decayed learning rates, g_lr: {}, d_lr: {}.".format(g_lr, d_lr)) def test(self): """Translate images using StarGAN trained on a single dataset.""" # Load the trained generator. self.restore_model(self.test_iters) - + # Set data loader. - if self.dataset == 'CelebA': + if self.dataset == "CelebA": data_loader = self.celeba_loader - elif self.dataset == 'RaFD': + elif self.dataset == "RaFD": data_loader = self.rafd_loader - + with torch.no_grad(): for i, (x_real, c_org) in enumerate(data_loader): - # Prepare input images and target domain labels. x_real = x_real.to(self.device) - c_trg_list = self.create_labels(c_org, self.c_dim, self.dataset, self.selected_attrs) + c_trg_list = self.create_labels( + c_org, self.c_dim, self.dataset, self.selected_attrs + ) # Translate images. x_fake_list = [x_real] @@ -551,26 +640,39 @@ def test(self): # Save the translated images. x_concat = torch.cat(x_fake_list, dim=3) - result_path = os.path.join(self.result_dir, '{}-images.jpg'.format(i+1)) - save_image(self.denorm(x_concat.data.cpu()), result_path, nrow=1, padding=0) - print('Saved real and fake images into {}...'.format(result_path)) + result_path = os.path.join( + self.result_dir, "{}-images.jpg".format(i + 1) + ) + save_image( + self.denorm(x_concat.data.cpu()), result_path, nrow=1, padding=0 + ) + print("Saved real and fake images into {}...".format(result_path)) def test_multi(self): """Translate images using StarGAN trained on multiple datasets.""" # Load the trained generator. self.restore_model(self.test_iters) - + with torch.no_grad(): for i, (x_real, c_org) in enumerate(self.celeba_loader): - # Prepare input images and target domain labels. x_real = x_real.to(self.device) - c_celeba_list = self.create_labels(c_org, self.c_dim, 'CelebA', self.selected_attrs) - c_rafd_list = self.create_labels(c_org, self.c2_dim, 'RaFD') - zero_celeba = torch.zeros(x_real.size(0), self.c_dim).to(self.device) # Zero vector for CelebA. - zero_rafd = torch.zeros(x_real.size(0), self.c2_dim).to(self.device) # Zero vector for RaFD. - mask_celeba = self.label2onehot(torch.zeros(x_real.size(0)), 2).to(self.device) # Mask vector: [1, 0]. - mask_rafd = self.label2onehot(torch.ones(x_real.size(0)), 2).to(self.device) # Mask vector: [0, 1]. + c_celeba_list = self.create_labels( + c_org, self.c_dim, "CelebA", self.selected_attrs + ) + c_rafd_list = self.create_labels(c_org, self.c2_dim, "RaFD") + zero_celeba = torch.zeros(x_real.size(0), self.c_dim).to( + self.device + ) # Zero vector for CelebA. + zero_rafd = torch.zeros(x_real.size(0), self.c2_dim).to( + self.device + ) # Zero vector for RaFD. + mask_celeba = self.label2onehot(torch.zeros(x_real.size(0)), 2).to( + self.device + ) # Mask vector: [1, 0]. + mask_rafd = self.label2onehot(torch.ones(x_real.size(0)), 2).to( + self.device + ) # Mask vector: [0, 1]. # Translate images. x_fake_list = [x_real] @@ -583,6 +685,10 @@ def test_multi(self): # Save the translated images. x_concat = torch.cat(x_fake_list, dim=3) - result_path = os.path.join(self.result_dir, '{}-images.jpg'.format(i+1)) - save_image(self.denorm(x_concat.data.cpu()), result_path, nrow=1, padding=0) - print('Saved real and fake images into {}...'.format(result_path)) \ No newline at end of file + result_path = os.path.join( + self.result_dir, "{}-images.jpg".format(i + 1) + ) + save_image( + self.denorm(x_concat.data.cpu()), result_path, nrow=1, padding=0 + ) + print("Saved real and fake images into {}...".format(result_path)) diff --git a/benchmarks/super-slomo/requirements.cuda.txt b/benchmarks/super-slomo/requirements.cuda.txt index 40caddc4d..9613eeb92 100644 --- a/benchmarks/super-slomo/requirements.cuda.txt +++ b/benchmarks/super-slomo/requirements.cuda.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/super-slomo/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-super-slomo.txt benchmarks/super-slomo/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/super-slomo/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-super-slomo.txt benchmarks/super-slomo/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 @@ -10,22 +10,18 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -34,11 +30,15 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -52,15 +52,11 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -lit==16.0.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -72,11 +68,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/super-slomo/requirements.in # opencv-python @@ -85,13 +81,13 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -opencv-python==4.7.0.72 +opencv-python==4.8.1.78 # via -r benchmarks/super-slomo/requirements.in ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision @@ -99,7 +95,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich @@ -107,7 +103,7 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf @@ -119,7 +115,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -131,25 +127,24 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -torch==2.0.1+cu118 +torch==2.1.0+cu118 # via # -r benchmarks/super-slomo/requirements.in # torchvision - # triton -torchvision==0.15.2+cu118 +torchvision==0.16.0+cu118 # via -r benchmarks/super-slomo/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via -r benchmarks/super-slomo/requirements.in -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -157,5 +152,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/super-slomo/requirements.in diff --git a/benchmarks/super-slomo/requirements.in b/benchmarks/super-slomo/requirements.in index 25e36ecdf..7fe6ea467 100644 --- a/benchmarks/super-slomo/requirements.in +++ b/benchmarks/super-slomo/requirements.in @@ -3,5 +3,4 @@ torchvision numpy tqdm opencv-python - -voir>=0.2.9,<0.3 +voir diff --git a/benchmarks/super-slomo/requirements.rocm.txt b/benchmarks/super-slomo/requirements.rocm.txt index a3557abc5..02ff9f070 100644 --- a/benchmarks/super-slomo/requirements.rocm.txt +++ b/benchmarks/super-slomo/requirements.rocm.txt @@ -1,28 +1,28 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/super-slomo/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-super-slomo.txt benchmarks/super-slomo/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/super-slomo/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-super-slomo.txt benchmarks/super-slomo/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -34,11 +34,15 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm # torch +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -52,15 +56,15 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==16.0.5 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -72,11 +76,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -r benchmarks/super-slomo/requirements.in # opencv-python @@ -85,13 +89,13 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -opencv-python==4.7.0.72 +opencv-python==4.8.1.78 # via -r benchmarks/super-slomo/requirements.in ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision @@ -99,7 +103,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich @@ -107,11 +111,11 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf @@ -123,7 +127,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir @@ -135,21 +139,21 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/super-slomo/requirements.in # pytorch-triton-rocm # torchvision -torchvision==0.15.2+rocm5.4.2 +torchvision==0.16.0+rocm5.6 # via -r benchmarks/super-slomo/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via -r benchmarks/super-slomo/requirements.in -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -157,5 +161,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/super-slomo/requirements.in diff --git a/benchmarks/super-slomo/slomo/data/create_dataset.py b/benchmarks/super-slomo/slomo/data/create_dataset.py index 29e7eee17..dfde2169c 100644 --- a/benchmarks/super-slomo/slomo/data/create_dataset.py +++ b/benchmarks/super-slomo/slomo/data/create_dataset.py @@ -6,13 +6,33 @@ # For parsing commandline arguments parser = argparse.ArgumentParser() -parser.add_argument("--ffmpeg_dir", type=str, required=True, help='path to ffmpeg.exe') -parser.add_argument("--dataset", type=str, default="custom", help='specify if using "adobe240fps" or custom video dataset') -parser.add_argument("--videos_folder", type=str, required=True, help='path to the folder containing videos') -parser.add_argument("--dataset_folder", type=str, required=True, help='path to the output dataset folder') +parser.add_argument("--ffmpeg_dir", type=str, required=True, help="path to ffmpeg.exe") +parser.add_argument( + "--dataset", + type=str, + default="custom", + help='specify if using "adobe240fps" or custom video dataset', +) +parser.add_argument( + "--videos_folder", + type=str, + required=True, + help="path to the folder containing videos", +) +parser.add_argument( + "--dataset_folder", + type=str, + required=True, + help="path to the output dataset folder", +) parser.add_argument("--img_width", type=int, default=640, help="output image width") parser.add_argument("--img_height", type=int, default=360, help="output image height") -parser.add_argument("--train_test_split", type=tuple, default=(90, 10), help="train test split for custom dataset") +parser.add_argument( + "--train_test_split", + type=tuple, + default=(90, 10), + help="train test split for custom dataset", +) args = parser.parse_args() @@ -34,10 +54,17 @@ def extract_frames(videos, inDir, outDir): None """ - for video in videos: os.mkdir(os.path.join(outDir, os.path.splitext(video)[0])) - retn = os.system('{} -i {} -vf scale={}:{} -vsync 0 -qscale:v 2 {}/%04d.jpg'.format(os.path.join(args.ffmpeg_dir, "ffmpeg"), os.path.join(inDir, video), args.img_width, args.img_height, os.path.join(outDir, os.path.splitext(video)[0]))) + retn = os.system( + "{} -i {} -vf scale={}:{} -vsync 0 -qscale:v 2 {}/%04d.jpg".format( + os.path.join(args.ffmpeg_dir, "ffmpeg"), + os.path.join(inDir, video), + args.img_width, + args.img_height, + os.path.join(outDir, os.path.splitext(video)[0]), + ) + ) if retn: print("Error converting file:{}. Exiting.".format(video)) @@ -59,7 +86,6 @@ def create_clips(root, destination): None """ - folderCounter = -1 files = os.listdir(root) @@ -70,36 +96,40 @@ def create_clips(root, destination): for imageCounter, image in enumerate(images): # Bunch images in groups of 12 frames - if (imageCounter % 12 == 0): - if (imageCounter + 11 >= len(images)): + if imageCounter % 12 == 0: + if imageCounter + 11 >= len(images): break folderCounter += 1 os.mkdir("{}/{}".format(destination, folderCounter)) - move("{}/{}/{}".format(root, file, image), "{}/{}/{}".format(destination, folderCounter, image)) + move( + "{}/{}/{}".format(root, file, image), + "{}/{}/{}".format(destination, folderCounter, image), + ) rmtree(os.path.join(root, file)) + def main(): # Create dataset folder if it doesn't exist already. if not os.path.isdir(args.dataset_folder): os.mkdir(args.dataset_folder) - extractPath = os.path.join(args.dataset_folder, "extracted") - trainPath = os.path.join(args.dataset_folder, "train") - testPath = os.path.join(args.dataset_folder, "test") - validationPath = os.path.join(args.dataset_folder, "validation") + extractPath = os.path.join(args.dataset_folder, "extracted") + trainPath = os.path.join(args.dataset_folder, "train") + testPath = os.path.join(args.dataset_folder, "test") + validationPath = os.path.join(args.dataset_folder, "validation") os.mkdir(extractPath) os.mkdir(trainPath) os.mkdir(testPath) os.mkdir(validationPath) - if(args.dataset == "adobe240fps"): + if args.dataset == "adobe240fps": f = open("adobe240fps/test_list.txt", "r") - videos = f.read().split('\n') + videos = f.read().split("\n") extract_frames(videos, args.videos_folder, extractPath) create_clips(extractPath, testPath) f = open("adobe240fps/train_list.txt", "r") - videos = f.read().split('\n') + videos = f.read().split("\n") extract_frames(videos, args.videos_folder, extractPath) create_clips(extractPath, trainPath) @@ -109,17 +139,18 @@ def main(): for index in indices: move("{}/{}".format(testPath, index), "{}/{}".format(validationPath, index)) - else: # custom dataset - + else: # custom dataset # Extract video names videos = os.listdir(args.videos_folder) # Create random train-test split. - testIndices = random.sample(range(len(videos)), int((args.train_test_split[1] * len(videos)) / 100)) + testIndices = random.sample( + range(len(videos)), int((args.train_test_split[1] * len(videos)) / 100) + ) trainIndices = [x for x in range((len(videos))) if x not in testIndices] # Create list of video names - testVideoNames = [videos[index] for index in testIndices] + testVideoNames = [videos[index] for index in testIndices] trainVideoNames = [videos[index] for index in trainIndices] # Create train-test dataset @@ -130,10 +161,13 @@ def main(): # Select clips at random from test set for validation set. testClips = os.listdir(testPath) - indices = random.sample(range(len(testClips)), min(100, int(len(testClips) / 5))) + indices = random.sample( + range(len(testClips)), min(100, int(len(testClips) / 5)) + ) for index in indices: move("{}/{}".format(testPath, index), "{}/{}".format(validationPath, index)) rmtree(extractPath) + main() diff --git a/benchmarks/super-slomo/slomo/dataloader.py b/benchmarks/super-slomo/slomo/dataloader.py index a008c6f1d..2704e5cca 100644 --- a/benchmarks/super-slomo/slomo/dataloader.py +++ b/benchmarks/super-slomo/slomo/dataloader.py @@ -27,7 +27,6 @@ def _make_dataset(dir): 2D list described above. """ - framesPath = [] # Find and loop over all the clips in root `dir`. for index, folder in enumerate(os.listdir(dir)): @@ -42,6 +41,7 @@ def _make_dataset(dir): framesPath[index].append(os.path.join(clipsFolderPath, image)) return framesPath + def _make_video_dataset(dir): """ Creates a 1D list of all the frames. @@ -60,7 +60,6 @@ def _make_video_dataset(dir): 1D list described above. """ - framesPath = [] # Find and loop over all the frames in root `dir`. for image in sorted(os.listdir(dir)): @@ -68,6 +67,7 @@ def _make_video_dataset(dir): framesPath.append(os.path.join(dir, image)) return framesPath + def _pil_loader(path, cropArea=None, resizeDim=None, frameFlip=0): """ Opens image at `path` using pil and applies data augmentation. @@ -89,19 +89,22 @@ def _pil_loader(path, cropArea=None, resizeDim=None, frameFlip=0): 2D list described above. """ - # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) - with open(path, 'rb') as f: + with open(path, "rb") as f: img = Image.open(f) # Resize image if specified. - resized_img = img.resize(resizeDim, Image.ANTIALIAS) if (resizeDim != None) else img + resized_img = ( + img.resize(resizeDim, Image.ANTIALIAS) if (resizeDim != None) else img + ) # Crop image if crop area specified. cropped_img = img.crop(cropArea) if (cropArea != None) else resized_img # Flip image horizontally if specified. - flipped_img = cropped_img.transpose(Image.FLIP_LEFT_RIGHT) if frameFlip else cropped_img - return flipped_img.convert('RGB') - - + flipped_img = ( + cropped_img.transpose(Image.FLIP_LEFT_RIGHT) if frameFlip else cropped_img + ) + return flipped_img.convert("RGB") + + class SuperSloMo(data.Dataset): """ A dataloader for loading N samples arranged in this way: @@ -144,8 +147,14 @@ class SuperSloMo(data.Dataset): Returns printable representation of the dataset object. """ - - def __init__(self, root, transform=None, dim=(640, 360), randomCropSize=(352, 352), train=True): + def __init__( + self, + root, + transform=None, + dim=(640, 360), + randomCropSize=(352, 352), + train=True, + ): """ Parameters ---------- @@ -161,27 +170,26 @@ def __init__(self, root, transform=None, dim=(640, 360), randomCropSize=(352, 35 Dimensions of random crop to be applied. Default: (352, 352) train : boolean, optional Specifies if the dataset is for training or testing/validation. - `True` returns samples with data augmentation like random + `True` returns samples with data augmentation like random flipping, random cropping, etc. while `False` returns the samples without randomization. Default: True """ - # Populate the list with image paths for all the # frame in `root`. framesPath = _make_dataset(root) # Raise error if no images found in root. if len(framesPath) == 0: - raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n")) - + raise (RuntimeError("Found 0 files in subfolders of: " + root + "\n")) + self.randomCropSize = randomCropSize - self.cropX0 = dim[0] - randomCropSize[0] - self.cropY0 = dim[1] - randomCropSize[1] - self.root = root - self.transform = transform - self.train = train + self.cropX0 = dim[0] - randomCropSize[0] + self.cropY0 = dim[1] - randomCropSize[1] + self.root = root + self.transform = transform + self.train = train - self.framesPath = framesPath + self.framesPath = framesPath def __getitem__(self, index): """ @@ -199,28 +207,32 @@ def __getitem__(self, index): Returns ------- tuple - (sample, returnIndex) where sample is - [I0, intermediate_frame, I1] and returnIndex is - the position of `random_intermediate_frame`. + (sample, returnIndex) where sample is + [I0, intermediate_frame, I1] and returnIndex is + the position of `random_intermediate_frame`. e.g.- `returnIndex` of frame next to I0 would be 0 and frame before I1 would be 6. """ - sample = [] - - if (self.train): + + if self.train: ### Data Augmentation ### # To select random 9 frames from 12 frames in a clip firstFrame = random.randint(0, 3) # Apply random crop on the 9 input frames cropX = random.randint(0, self.cropX0) cropY = random.randint(0, self.cropY0) - cropArea = (cropX, cropY, cropX + self.randomCropSize[0], cropY + self.randomCropSize[1]) + cropArea = ( + cropX, + cropY, + cropX + self.randomCropSize[0], + cropY + self.randomCropSize[1], + ) # Random reverse frame - #frameRange = range(firstFrame, firstFrame + 9) if (random.randint(0, 1)) else range(firstFrame + 8, firstFrame - 1, -1) + # frameRange = range(firstFrame, firstFrame + 9) if (random.randint(0, 1)) else range(firstFrame + 8, firstFrame - 1, -1) IFrameIndex = random.randint(firstFrame + 1, firstFrame + 7) - if (random.randint(0, 1)): + if random.randint(0, 1): frameRange = [firstFrame, IFrameIndex, firstFrame + 8] returnIndex = IFrameIndex - firstFrame - 1 else: @@ -233,22 +245,25 @@ def __getitem__(self, index): # For validation/test sets. firstFrame = 0 cropArea = (0, 0, self.randomCropSize[0], self.randomCropSize[1]) - IFrameIndex = ((index) % 7 + 1) + IFrameIndex = (index) % 7 + 1 returnIndex = IFrameIndex - 1 frameRange = [0, IFrameIndex, 8] randomFrameFlip = 0 - + # Loop over for all frames corresponding to the `index`. for frameIndex in frameRange: # Open image using pil and augment the image. - image = _pil_loader(self.framesPath[index][frameIndex], cropArea=cropArea, frameFlip=randomFrameFlip) + image = _pil_loader( + self.framesPath[index][frameIndex], + cropArea=cropArea, + frameFlip=randomFrameFlip, + ) # Apply transformation if specified. if self.transform is not None: image = self.transform(image) sample.append(image) - - return sample, returnIndex + return sample, returnIndex def __len__(self): """ @@ -260,7 +275,6 @@ def __len__(self): number of samples. """ - return len(self.framesPath) def __repr__(self): @@ -273,14 +287,16 @@ def __repr__(self): info. """ - - fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' - fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) - fmt_str += ' Root Location: {}\n'.format(self.root) - tmp = ' Transforms (if any): ' - fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + fmt_str = "Dataset " + self.__class__.__name__ + "\n" + fmt_str += " Number of datapoints: {}\n".format(self.__len__()) + fmt_str += " Root Location: {}\n".format(self.root) + tmp = " Transforms (if any): " + fmt_str += "{0}{1}\n".format( + tmp, self.transform.__repr__().replace("\n", "\n" + " " * len(tmp)) + ) return fmt_str - + + class UCI101Test(data.Dataset): """ A dataloader for loading N samples arranged in this way: @@ -317,7 +333,6 @@ class UCI101Test(data.Dataset): Returns printable representation of the dataset object. """ - def __init__(self, root, transform=None): """ Parameters @@ -330,17 +345,16 @@ def __init__(self, root, transform=None): E.g, ``transforms.RandomCrop`` for images. """ - # Populate the list with image paths for all the # frame in `root`. framesPath = _make_dataset(root) # Raise error if no images found in root. if len(framesPath) == 0: - raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n")) + raise (RuntimeError("Found 0 files in subfolders of: " + root + "\n")) - self.root = root - self.framesPath = framesPath - self.transform = transform + self.root = root + self.framesPath = framesPath + self.transform = transform def __getitem__(self, index): """ @@ -357,15 +371,14 @@ def __getitem__(self, index): Returns ------- tuple - (sample, returnIndex) where sample is - [I0, intermediate_frame, I1] and returnIndex is + (sample, returnIndex) where sample is + [I0, intermediate_frame, I1] and returnIndex is the position of `intermediate_frame`. The returnIndex is always 3 and is being returned to maintain compatibility with the `SuperSloMo` dataloader where 3 corresponds to the middle frame. """ - sample = [] # Loop over for all frames corresponding to the `index`. for framePath in self.framesPath[index]: @@ -377,7 +390,6 @@ def __getitem__(self, index): sample.append(image) return sample, 3 - def __len__(self): """ Returns the size of dataset. Invoked as len(datasetObj). @@ -388,7 +400,6 @@ def __len__(self): number of samples. """ - return len(self.framesPath) def __repr__(self): @@ -401,14 +412,16 @@ def __repr__(self): info. """ - - fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' - fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) - fmt_str += ' Root Location: {}\n'.format(self.root) - tmp = ' Transforms (if any): ' - fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) + fmt_str = "Dataset " + self.__class__.__name__ + "\n" + fmt_str += " Number of datapoints: {}\n".format(self.__len__()) + fmt_str += " Root Location: {}\n".format(self.root) + tmp = " Transforms (if any): " + fmt_str += "{0}{1}\n".format( + tmp, self.transform.__repr__().replace("\n", "\n" + " " * len(tmp)) + ) return fmt_str + class Video(data.Dataset): """ A dataloader for loading all video frames in a folder: @@ -440,7 +453,6 @@ class Video(data.Dataset): Returns printable representation of the dataset object. """ - def __init__(self, root, transform=None): """ Parameters @@ -453,23 +465,22 @@ def __init__(self, root, transform=None): E.g, ``transforms.RandomCrop`` for images. """ - # Populate the list with image paths for all the # frame in `root`. framesPath = _make_video_dataset(root) # Get dimensions of frames - frame = _pil_loader(framesPath[0]) + frame = _pil_loader(framesPath[0]) self.origDim = frame.size - self.dim = int(self.origDim[0] / 32) * 32, int(self.origDim[1] / 32) * 32 + self.dim = int(self.origDim[0] / 32) * 32, int(self.origDim[1] / 32) * 32 # Raise error if no images found in root. if len(framesPath) == 0: - raise(RuntimeError("Found 0 files in: " + root + "\n")) + raise (RuntimeError("Found 0 files in: " + root + "\n")) - self.root = root - self.framesPath = framesPath - self.transform = transform + self.root = root + self.framesPath = framesPath + self.transform = transform def __getitem__(self, index): """ @@ -489,7 +500,6 @@ def __getitem__(self, index): `index` and I1 is the next frame. """ - sample = [] # Loop over for all frames corresponding to the `index`. for framePath in [self.framesPath[index], self.framesPath[index + 1]]: @@ -501,7 +511,6 @@ def __getitem__(self, index): sample.append(image) return sample - def __len__(self): """ Returns the size of dataset. Invoked as len(datasetObj). @@ -512,11 +521,10 @@ def __len__(self): number of samples. """ - # Using `-1` so that dataloader accesses only upto # frames [N-1, N] and not [N, N+1] which because frame # N+1 doesn't exist. - return len(self.framesPath) - 1 + return len(self.framesPath) - 1 def __repr__(self): """ @@ -528,10 +536,11 @@ def __repr__(self): info. """ - - fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' - fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) - fmt_str += ' Root Location: {}\n'.format(self.root) - tmp = ' Transforms (if any): ' - fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) - return fmt_str \ No newline at end of file + fmt_str = "Dataset " + self.__class__.__name__ + "\n" + fmt_str += " Number of datapoints: {}\n".format(self.__len__()) + fmt_str += " Root Location: {}\n".format(self.root) + tmp = " Transforms (if any): " + fmt_str += "{0}{1}\n".format( + tmp, self.transform.__repr__().replace("\n", "\n" + " " * len(tmp)) + ) + return fmt_str diff --git a/benchmarks/super-slomo/slomo/eval.py b/benchmarks/super-slomo/slomo/eval.py index 1c3cb9801..fd3273021 100644 --- a/benchmarks/super-slomo/slomo/eval.py +++ b/benchmarks/super-slomo/slomo/eval.py @@ -21,8 +21,12 @@ mean = [0.429, 0.431, 0.397] mea0 = [-m for m in mean] std = [1] * 3 - trans_forward = transforms.Compose([trans_forward, transforms.Normalize(mean=mean, std=std)]) - trans_backward = transforms.Compose([transforms.Normalize(mean=mea0, std=std), trans_backward]) + trans_forward = transforms.Compose( + [trans_forward, transforms.Normalize(mean=mean, std=std)] + ) + trans_backward = transforms.Compose( + [transforms.Normalize(mean=mea0, std=std), trans_backward] + ) flow = model.UNet(6, 4).to(device) interp = model.UNet(20, 5).to(device) @@ -36,9 +40,9 @@ def setup_back_warp(w, h): def load_models(checkpoint): - states = torch.load(checkpoint, map_location='cpu') - interp.load_state_dict(states['state_dictAT']) - flow.load_state_dict(states['state_dictFC']) + states = torch.load(checkpoint, map_location="cpu") + interp.load_state_dict(states["state_dictAT"]) + flow.load_state_dict(states["state_dictFC"]) def interpolate_batch(frames, factor): @@ -78,8 +82,9 @@ def interpolate_batch(frames, factor): co_eff = [1 - t, t] - ft_p = (co_eff[0] * vt0 * gi0ft0f + co_eff[1] * vt1 * gi1ft1f) / \ - (co_eff[0] * vt0 + co_eff[1] * vt1) + ft_p = (co_eff[0] * vt0 * gi0ft0f + co_eff[1] * vt1 * gi1ft1f) / ( + co_eff[0] * vt0 + co_eff[1] * vt1 + ) frame_buffer.append(ft_p) @@ -97,7 +102,7 @@ def load_batch(video_in, batch_size, batch, w, h): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = Image.fromarray(frame) frame = frame.resize((w, h), Image.ANTIALIAS) - frame = frame.convert('RGB') + frame = frame.convert("RGB") frame = trans_forward(frame) batch.append(frame) @@ -108,14 +113,18 @@ def denorm_frame(frame, w0, h0): frame = frame.cpu() frame = trans_backward(frame) frame = frame.resize((w0, h0), Image.BILINEAR) - frame = frame.convert('RGB') + frame = frame.convert("RGB") return np.array(frame)[:, :, ::-1].copy() -def convert_video(source, dest, factor, batch_size=10, output_format='mp4v', output_fps=30): +def convert_video( + source, dest, factor, batch_size=10, output_format="mp4v", output_fps=30 +): vin = cv2.VideoCapture(source) count = vin.get(cv2.CAP_PROP_FRAME_COUNT) - w0, h0 = int(vin.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vin.get(cv2.CAP_PROP_FRAME_HEIGHT)) + w0, h0 = int(vin.get(cv2.CAP_PROP_FRAME_WIDTH)), int( + vin.get(cv2.CAP_PROP_FRAME_HEIGHT) + ) codec = cv2.VideoWriter_fourcc(*output_format) vout = cv2.VideoWriter(dest, codec, float(output_fps), (w0, h0)) @@ -150,28 +159,34 @@ def convert_video(source, dest, factor, batch_size=10, output_format='mp4v', out vout.release() -@click.command('Evaluate Model by converting a low-FPS video to high-fps') -@click.argument('input') -@click.option('--checkpoint', help='Path to model checkpoint') -@click.option('--output', help='Path to output file to save') -@click.option('--batch', default=2, help='Number of frames to process in single forward pass') -@click.option('--scale', default=4, help='Scale Factor of FPS') -@click.option('--fps', default=30, help='FPS of output video') +@click.command("Evaluate Model by converting a low-FPS video to high-fps") +@click.argument("input") +@click.option("--checkpoint", help="Path to model checkpoint") +@click.option("--output", help="Path to output file to save") +@click.option( + "--batch", default=2, help="Number of frames to process in single forward pass" +) +@click.option("--scale", default=4, help="Scale Factor of FPS") +@click.option("--fps", default=30, help="FPS of output video") def main(input, checkpoint, output, batch, scale, fps): - avg = lambda x, n, x0: (x * n/(n+1) + x0 / (n+1), n+1) + avg = lambda x, n, x0: (x * n / (n + 1) + x0 / (n + 1), n + 1) load_models(checkpoint) t0 = time() n0 = 0 fpx = 0 - for dl, fd, fc in convert_video(input, output, int(scale), int(batch), output_fps=int(fps)): + for dl, fd, fc in convert_video( + input, output, int(scale), int(batch), output_fps=int(fps) + ): fpx, n0 = avg(fpx, n0, dl / (time() - t0)) - prg = int(100*fd/fc) + prg = int(100 * fd / fc) eta = (fc - fd) / fpx - print('\rDone: {:03d}% FPS: {:05.2f} ETA: {:.2f}s'.format(prg, fpx, eta) + ' '*5, end='') + print( + "\rDone: {:03d}% FPS: {:05.2f} ETA: {:.2f}s".format(prg, fpx, eta) + + " " * 5, + end="", + ) t0 = time() -if __name__ == '__main__': +if __name__ == "__main__": main() - - diff --git a/benchmarks/super-slomo/slomo/model.py b/benchmarks/super-slomo/slomo/model.py index bc3e3509a..ef706a2a2 100644 --- a/benchmarks/super-slomo/slomo/model.py +++ b/benchmarks/super-slomo/slomo/model.py @@ -10,9 +10,9 @@ class down(nn.Module): """ A class for creating neural network blocks containing layers: - + Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU - + This is used in the UNet Class to create a UNet like NN architecture. ... @@ -24,7 +24,6 @@ class down(nn.Module): block. """ - def __init__(self, inChannels, outChannels, filterSize): """ Parameters @@ -40,12 +39,23 @@ def __init__(self, inChannels, outChannels, filterSize): a N x N filter. """ - super(down, self).__init__() # Initialize convolutional layers. - self.conv1 = nn.Conv2d(inChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2)) - self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2)) - + self.conv1 = nn.Conv2d( + inChannels, + outChannels, + filterSize, + stride=1, + padding=int((filterSize - 1) / 2), + ) + self.conv2 = nn.Conv2d( + outChannels, + outChannels, + filterSize, + stride=1, + padding=int((filterSize - 1) / 2), + ) + def forward(self, x): """ Returns output tensor after passing input `x` to the neural network @@ -62,21 +72,21 @@ def forward(self, x): output of the NN block. """ - # Average pooling with kernel size 2 (2 x 2). x = F.avg_pool2d(x, 2) # Convolution + Leaky ReLU - x = F.leaky_relu(self.conv1(x), negative_slope = 0.1) + x = F.leaky_relu(self.conv1(x), negative_slope=0.1) # Convolution + Leaky ReLU - x = F.leaky_relu(self.conv2(x), negative_slope = 0.1) + x = F.leaky_relu(self.conv2(x), negative_slope=0.1) return x - + + class up(nn.Module): """ A class for creating neural network blocks containing layers: - + Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU - + This is used in the UNet Class to create a UNet like NN architecture. ... @@ -88,7 +98,6 @@ class up(nn.Module): block. """ - def __init__(self, inChannels, outChannels): """ Parameters @@ -101,13 +110,12 @@ def __init__(self, inChannels, outChannels): the second convolutional layer. """ - super(up, self).__init__() # Initialize convolutional layers. - self.conv1 = nn.Conv2d(inChannels, outChannels, 3, stride=1, padding=1) + self.conv1 = nn.Conv2d(inChannels, outChannels, 3, stride=1, padding=1) # (2 * outChannels) is used for accommodating skip connection. self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1) - + def forward(self, x, skpCn): """ Returns output tensor after passing input `x` to the neural network @@ -127,20 +135,19 @@ def forward(self, x, skpCn): """ # Bilinear interpolation with scaling 2. - x = F.interpolate(x, scale_factor=2, mode='bilinear') + x = F.interpolate(x, scale_factor=2, mode="bilinear") # Convolution + Leaky ReLU - x = F.leaky_relu(self.conv1(x), negative_slope = 0.1) + x = F.leaky_relu(self.conv1(x), negative_slope=0.1) # Convolution + Leaky ReLU on (`x`, `skpCn`) - x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope = 0.1) + x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1) return x - class UNet(nn.Module): """ A class for creating UNet like architecture as specified by the Super SloMo paper. - + ... Methods @@ -150,7 +157,6 @@ class UNet(nn.Module): block. """ - def __init__(self, inChannels, outChannels): """ Parameters @@ -161,7 +167,6 @@ def __init__(self, inChannels, outChannels): number of output channels for the UNet. """ - super(UNet, self).__init__() # Initialize neural network blocks. self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3) @@ -171,13 +176,13 @@ def __init__(self, inChannels, outChannels): self.down3 = down(128, 256, 3) self.down4 = down(256, 512, 3) self.down5 = down(512, 512, 3) - self.up1 = up(512, 512) - self.up2 = up(512, 256) - self.up3 = up(256, 128) - self.up4 = up(128, 64) - self.up5 = up(64, 32) + self.up1 = up(512, 512) + self.up2 = up(512, 256) + self.up3 = up(256, 128) + self.up4 = up(128, 64) + self.up5 = up(64, 32) self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1) - + def forward(self, x): """ Returns output tensor after passing input `x` to the neural network. @@ -193,20 +198,19 @@ def forward(self, x): output of the UNet. """ - - x = F.leaky_relu(self.conv1(x), negative_slope = 0.1) - s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1) + x = F.leaky_relu(self.conv1(x), negative_slope=0.1) + s1 = F.leaky_relu(self.conv2(x), negative_slope=0.1) s2 = self.down1(s1) s3 = self.down2(s2) s4 = self.down3(s3) s5 = self.down4(s4) - x = self.down5(s5) - x = self.up1(x, s5) - x = self.up2(x, s4) - x = self.up3(x, s3) - x = self.up4(x, s2) - x = self.up5(x, s1) - x = F.leaky_relu(self.conv3(x), negative_slope = 0.1) + x = self.down5(s5) + x = self.up1(x, s5) + x = self.up2(x, s4) + x = self.up3(x, s3) + x = self.up4(x, s2) + x = self.up5(x, s1) + x = F.leaky_relu(self.conv3(x), negative_slope=0.1) return x @@ -216,7 +220,7 @@ class backWarp(nn.Module): This is used for backwarping to an image: - Given optical flow from frame I0 to I1 --> F_0_1 and frame I1, + Given optical flow from frame I0 to I1 --> F_0_1 and frame I1, it generates I0 <-- backwarp(F_0_1, I1). ... @@ -228,7 +232,6 @@ class backWarp(nn.Module): block. """ - def __init__(self, W, H, device): """ Parameters @@ -238,10 +241,9 @@ def __init__(self, W, H, device): H : int height of the image. device : device - computation device (cpu/cuda). + computation device (cpu/cuda). """ - super(backWarp, self).__init__() # create a grid gridX, gridY = np.meshgrid(np.arange(W), np.arange(H)) @@ -249,7 +251,7 @@ def __init__(self, W, H, device): self.H = H self.gridX = torch.tensor(gridX, requires_grad=False, device=device) self.gridY = torch.tensor(gridY, requires_grad=False, device=device) - + def forward(self, img, flow): """ Returns output tensor after passing input `img` and `flow` to the backwarping @@ -269,27 +271,27 @@ def forward(self, img, flow): frame I0. """ - # Extract horizontal and vertical flows. u = flow[:, 0, :, :] v = flow[:, 1, :, :] x = self.gridX.unsqueeze(0).expand_as(u).float() + u y = self.gridY.unsqueeze(0).expand_as(v).float() + v # range -1 to 1 - x = 2*(x/self.W - 0.5) - y = 2*(y/self.H - 0.5) + x = 2 * (x / self.W - 0.5) + y = 2 * (y / self.H - 0.5) # stacking X and Y - grid = torch.stack((x,y), dim=3) + grid = torch.stack((x, y), dim=3) # Sample pixels using bilinear interpolation. imgOut = torch.nn.functional.grid_sample(img, grid) return imgOut # Creating an array of `t` values for the 7 intermediate frames between -# reference frames I0 and I1. +# reference frames I0 and I1. t = np.linspace(0.125, 0.875, 7) -def getFlowCoeff (indices, device): + +def getFlowCoeff(indices, device): """ Gets flow coefficients used for calculating intermediate optical flows from optical flows between I0 and I1: F_0_1 and F_1_0. @@ -309,7 +311,7 @@ def getFlowCoeff (indices, device): indices corresponding to the intermediate frame positions of all samples in the batch. device : device - computation device (cpu/cuda). + computation device (cpu/cuda). Returns ------- @@ -317,17 +319,22 @@ def getFlowCoeff (indices, device): coefficients C00, C01, C10, C11. """ - # Convert indices tensor to numpy array ind = indices.detach().numpy() - C11 = C00 = - (1 - (t[ind])) * (t[ind]) + C11 = C00 = -(1 - (t[ind])) * (t[ind]) C01 = (t[ind]) * (t[ind]) C10 = (1 - (t[ind])) * (1 - (t[ind])) - return torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device) + return ( + torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), + torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), + torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), + torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device), + ) + -def getWarpCoeff (indices, device): +def getWarpCoeff(indices, device): """ - Gets coefficients used for calculating final intermediate + Gets coefficients used for calculating final intermediate frame `It_gen` from backwarped images using flows F_t_0 and F_t_1. It_gen = (C0 x V_t_0 x g_I_0_F_t_0 + C1 x V_t_1 x g_I_1_F_t_1) / (C0 x V_t_0 + C1 x V_t_1) @@ -345,7 +352,7 @@ def getWarpCoeff (indices, device): indices corresponding to the intermediate frame positions of all samples in the batch. device : device - computation device (cpu/cuda). + computation device (cpu/cuda). Returns ------- @@ -353,9 +360,10 @@ def getWarpCoeff (indices, device): coefficients C0 and C1. """ - # Convert indices tensor to numpy array ind = indices.detach().numpy() C0 = 1 - t[ind] C1 = t[ind] - return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device) \ No newline at end of file + return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to( + device + ), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device) diff --git a/benchmarks/super-slomo/slomo/synth.py b/benchmarks/super-slomo/slomo/synth.py index 1b69407a4..57835e360 100644 --- a/benchmarks/super-slomo/slomo/synth.py +++ b/benchmarks/super-slomo/slomo/synth.py @@ -1,4 +1,3 @@ - class SyntheticData: def __init__(self, generators, n, repeat): self.n = n diff --git a/benchmarks/super-slomo/slomo/train.py b/benchmarks/super-slomo/slomo/train.py index 0c680cae7..7fea1a045 100644 --- a/benchmarks/super-slomo/slomo/train.py +++ b/benchmarks/super-slomo/slomo/train.py @@ -1,5 +1,4 @@ - -#[Super SloMo] +# [Super SloMo] ##High Quality Estimation of Multiple Intermediate Frames for Video Interpolation import argparse @@ -17,18 +16,59 @@ def main(): - # For parsing commandline arguments parser = argparse.ArgumentParser() - parser.add_argument("--dataset_root", type=str, required=False, help='path to dataset folder containing train-test-validation folders') - parser.add_argument("--checkpoint", type=str, help='path of checkpoint for pretrained model') - parser.add_argument("--train_continue", type=bool, default=False, help='If resuming from checkpoint, set to True and set `checkpoint` path. Default: False.') - parser.add_argument("--epochs", type=int, default=200, help='number of epochs to train. Default: 200.') - parser.add_argument("--train_batch_size", type=int, default=6, help='batch size for training. Default: 6.') - parser.add_argument("--validation_batch_size", type=int, default=10, help='batch size for validation. Default: 10.') - parser.add_argument("--init_learning_rate", type=float, default=0.0001, help='set initial learning rate. Default: 0.0001.') - parser.add_argument("--milestones", type=list, default=[100, 150], help='Set to epoch values where you want to decrease learning rate by a factor of 0.1. Default: [100, 150]') - parser.add_argument("--progress_iter", type=int, default=100, help='frequency of reporting progress and validation. N: after every N iterations. Default: 100.') + parser.add_argument( + "--dataset_root", + type=str, + required=False, + help="path to dataset folder containing train-test-validation folders", + ) + parser.add_argument( + "--checkpoint", type=str, help="path of checkpoint for pretrained model" + ) + parser.add_argument( + "--train_continue", + type=bool, + default=False, + help="If resuming from checkpoint, set to True and set `checkpoint` path. Default: False.", + ) + parser.add_argument( + "--epochs", + type=int, + default=200, + help="number of epochs to train. Default: 200.", + ) + parser.add_argument( + "--train_batch_size", + type=int, + default=6, + help="batch size for training. Default: 6.", + ) + parser.add_argument( + "--validation_batch_size", + type=int, + default=10, + help="batch size for validation. Default: 10.", + ) + parser.add_argument( + "--init_learning_rate", + type=float, + default=0.0001, + help="set initial learning rate. Default: 0.0001.", + ) + parser.add_argument( + "--milestones", + type=list, + default=[100, 150], + help="Set to epoch values where you want to decrease learning rate by a factor of 0.1. Default: [100, 150]", + ) + parser.add_argument( + "--progress_iter", + type=int, + default=100, + help="frequency of reporting progress and validation. N: after every N iterations. Default: 100.", + ) parser.add_argument( "--no-tf32", dest="allow_tf32", @@ -42,29 +82,23 @@ def main(): torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True - ###Initialize flow computation and arbitrary-time flow interpolation CNNs. - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") flowComp = model.UNet(6, 4) flowComp.to(device) ArbTimeFlowIntrp = model.UNet(20, 5) ArbTimeFlowIntrp.to(device) - ###Initialze backward warpers for train and validation datasets - - trainFlowBackWarp = model.backWarp(352, 352, device) - trainFlowBackWarp = trainFlowBackWarp.to(device) + trainFlowBackWarp = model.backWarp(352, 352, device) + trainFlowBackWarp = trainFlowBackWarp.to(device) validationFlowBackWarp = model.backWarp(640, 352, device) validationFlowBackWarp = validationFlowBackWarp.to(device) - ###Load Datasets - # # Channel wise mean calculated on adobe240-fps training dataset # mean = [0.429, 0.431, 0.397] # std = [1, 1, 1] @@ -86,27 +120,20 @@ def ogen(): return torch.randint(0, 7, ()) trainset = SyntheticData( - n=args.train_batch_size, - repeat=10000, - generators=[igen, ogen] + n=args.train_batch_size, repeat=10000, generators=[igen, ogen] ) trainloader = torch.utils.data.DataLoader( - trainset, - batch_size=args.train_batch_size, - num_workers=2 + trainset, batch_size=args.train_batch_size, num_workers=2 ) - ###Utils - + def get_lr(optimizer): for param_group in optimizer.param_groups: - return param_group['lr'] - + return param_group["lr"] ###Loss and Optimizer - L1_lossFn = nn.L1Loss() MSE_LossFn = nn.MSELoss() @@ -114,105 +141,126 @@ def get_lr(optimizer): optimizer = optim.Adam(params, lr=args.init_learning_rate) # scheduler to decrease learning rate by a factor of 10 at milestones. - scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=0.1) + scheduler = optim.lr_scheduler.MultiStepLR( + optimizer, milestones=args.milestones, gamma=0.1 + ) ###Initializing VGG16 model for perceptual loss - vgg16 = torchvision.models.vgg16(pretrained=True) vgg16_conv_4_3 = nn.Sequential(*list(vgg16.children())[0][:22]) vgg16_conv_4_3.to(device) for param in vgg16_conv_4_3.parameters(): - param.requires_grad = False - + param.requires_grad = False ### Initialization - if args.train_continue: dict1 = torch.load(args.checkpoint) - ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT']) - flowComp.load_state_dict(dict1['state_dictFC']) + ArbTimeFlowIntrp.load_state_dict(dict1["state_dictAT"]) + flowComp.load_state_dict(dict1["state_dictFC"]) else: - dict1 = {'loss': [], 'valLoss': [], 'valPSNR': [], 'epoch': -1} - + dict1 = {"loss": [], "valLoss": [], "valPSNR": [], "epoch": -1} ### Training - cLoss = dict1['loss'] - valLoss = dict1['valLoss'] - valPSNR = dict1['valPSNR'] + cLoss = dict1["loss"] + valLoss = dict1["valLoss"] + valPSNR = dict1["valPSNR"] ### Main training loop - for epoch in range(dict1['epoch'] + 1, args.epochs): + for epoch in range(dict1["epoch"] + 1, args.epochs): print("Epoch: ", epoch) - + # Append and reset cLoss.append([]) valLoss.append([]) valPSNR.append([]) iLoss = 0 - - # Increment scheduler count + + # Increment scheduler count scheduler.step() - + # for trainIndex, (trainData, trainFrameIndex) in enumerate(trainloader, 0): - for trainIndex, (trainData, trainFrameIndex) in enumerate(voir.iterate("train", trainloader, report_batch=True, batch_size=lambda batch: batch[1].shape[0]), 0): + for trainIndex, (trainData, trainFrameIndex) in enumerate( + voir.iterate( + "train", + trainloader, + report_batch=True, + batch_size=lambda batch: batch[1].shape[0], + ), + 0, + ): ## Getting the input and the target from the training set frame0, frameT, frame1 = trainData - + I0 = frame0.to(device) I1 = frame1.to(device) IFrame = frameT.to(device) - + optimizer.zero_grad() - + # Calculate flow between reference frames I0 and I1 flowOut = flowComp(torch.cat((I0, I1), dim=1)) - + # Extracting flows between I0 and I1 - F_0_1 and F_1_0 - F_0_1 = flowOut[:,:2,:,:] - F_1_0 = flowOut[:,2:,:,:] - + F_0_1 = flowOut[:, :2, :, :] + F_1_0 = flowOut[:, 2:, :, :] + fCoeff = model.getFlowCoeff(trainFrameIndex, device) - + # Calculate intermediate flows F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0 F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0 - + # Get intermediate frames from the intermediate flows g_I0_F_t_0 = trainFlowBackWarp(I0, F_t_0) g_I1_F_t_1 = trainFlowBackWarp(I1, F_t_1) - + # Calculate optical flow residuals and visibility maps - intrpOut = ArbTimeFlowIntrp(torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1)) - + intrpOut = ArbTimeFlowIntrp( + torch.cat( + (I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1 + ) + ) + # Extract optical flow residuals and visibility maps F_t_0_f = intrpOut[:, :2, :, :] + F_t_0 F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1 - V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :]) - V_t_1 = 1 - V_t_0 - + V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :]) + V_t_1 = 1 - V_t_0 + # Get intermediate frames from the intermediate flows g_I0_F_t_0_f = trainFlowBackWarp(I0, F_t_0_f) g_I1_F_t_1_f = trainFlowBackWarp(I1, F_t_1_f) - + wCoeff = model.getWarpCoeff(trainFrameIndex, device) - - # Calculate final intermediate frame - Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) - + + # Calculate final intermediate frame + Ft_p = ( + wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f + ) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1) + # Loss recnLoss = L1_lossFn(Ft_p, IFrame) - + prcpLoss = MSE_LossFn(vgg16_conv_4_3(Ft_p), vgg16_conv_4_3(IFrame)) - - warpLoss = L1_lossFn(g_I0_F_t_0, IFrame) + L1_lossFn(g_I1_F_t_1, IFrame) + L1_lossFn(trainFlowBackWarp(I0, F_1_0), I1) + L1_lossFn(trainFlowBackWarp(I1, F_0_1), I0) - - loss_smooth_1_0 = torch.mean(torch.abs(F_1_0[:, :, :, :-1] - F_1_0[:, :, :, 1:])) + torch.mean(torch.abs(F_1_0[:, :, :-1, :] - F_1_0[:, :, 1:, :])) - loss_smooth_0_1 = torch.mean(torch.abs(F_0_1[:, :, :, :-1] - F_0_1[:, :, :, 1:])) + torch.mean(torch.abs(F_0_1[:, :, :-1, :] - F_0_1[:, :, 1:, :])) + + warpLoss = ( + L1_lossFn(g_I0_F_t_0, IFrame) + + L1_lossFn(g_I1_F_t_1, IFrame) + + L1_lossFn(trainFlowBackWarp(I0, F_1_0), I1) + + L1_lossFn(trainFlowBackWarp(I1, F_0_1), I0) + ) + + loss_smooth_1_0 = torch.mean( + torch.abs(F_1_0[:, :, :, :-1] - F_1_0[:, :, :, 1:]) + ) + torch.mean(torch.abs(F_1_0[:, :, :-1, :] - F_1_0[:, :, 1:, :])) + loss_smooth_0_1 = torch.mean( + torch.abs(F_0_1[:, :, :, :-1] - F_0_1[:, :, :, 1:]) + ) + torch.mean(torch.abs(F_0_1[:, :, :-1, :] - F_0_1[:, :, 1:, :])) loss_smooth = loss_smooth_1_0 + loss_smooth_0_1 - + # Total Loss - Coefficients 204 and 102 are used instead of 0.8 and 0.4 # since the loss in paper is calculated for input pixels in range 0-255 # and the input to our network is in range 0-1 diff --git a/benchmarks/timm/benchfile.py b/benchmarks/timm/benchfile.py index 50f7e69dc..f8d0652e0 100644 --- a/benchmarks/timm/benchfile.py +++ b/benchmarks/timm/benchfile.py @@ -14,17 +14,21 @@ class TimmBenchmarkPack(Package): def make_env(self): return { **super().make_env(), - "OMP_NUM_THREADS": str(self.config.get("cpus_per_gpu", 8)) + "OMP_NUM_THREADS": str(self.config.get("cpus_per_gpu", 8)), } @property def argv(self): return [ *super().argv, - "--data-dir", self.dirs.data, - "--dataset", "FakeImageNet", - "--output", self.dirs.extra / self.logdir.name / self.tag, - "--checkpoint-hist", 1, + "--data-dir", + self.dirs.data, + "--dataset", + "FakeImageNet", + "--output", + self.dirs.extra / self.logdir.name / self.tag, + "--checkpoint-hist", + 1, ] async def install(self): @@ -32,7 +36,9 @@ async def install(self): timm = self.dirs.code / "pytorch-image-models" if not timm.exists(): - timm.clone_subtree("https://github.com/huggingface/pytorch-image-models", BRANCH) + timm.clone_subtree( + "https://github.com/huggingface/pytorch-image-models", BRANCH + ) def build_run_plan(self): # self.config is not the right config for this diff --git a/benchmarks/timm/requirements.cuda.txt b/benchmarks/timm/requirements.cuda.txt index e23485257..5ff47f552 100644 --- a/benchmarks/timm/requirements.cuda.txt +++ b/benchmarks/timm/requirements.cuda.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/timm/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-timm.txt benchmarks/timm/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/timm/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-timm.txt benchmarks/timm/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 @@ -10,22 +10,18 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -34,22 +30,23 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # torch # triton -fsspec==2023.5.0 +fsspec==2023.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -huggingface-hub==0.14.1 +huggingface-hub==0.17.3 # via -r benchmarks/timm/requirements.in idna==3.4 # via @@ -59,15 +56,11 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -lit==16.0.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -79,11 +72,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision @@ -95,11 +88,11 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision @@ -107,7 +100,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich @@ -115,7 +108,7 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pyyaml==6.0 +pyyaml==6.0.1 # via # -r benchmarks/timm/requirements.in # huggingface-hub @@ -129,11 +122,11 @@ requests==2.31.0 # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -safetensors==0.3.1 +safetensors==0.4.0 # via -r benchmarks/timm/requirements.in six==1.16.0 # via @@ -143,28 +136,27 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -torch==2.0.1+cu118 +torch==2.1.0+cu118 # via # -r benchmarks/timm/requirements.in # torchvision - # triton -torchvision==0.15.2+cu118 +torchvision==0.16.0+cu118 # via -r benchmarks/timm/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -172,5 +164,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/timm/requirements.in diff --git a/benchmarks/timm/requirements.in b/benchmarks/timm/requirements.in index 391761f1a..1ccb50e60 100644 --- a/benchmarks/timm/requirements.in +++ b/benchmarks/timm/requirements.in @@ -3,4 +3,4 @@ torchvision pyyaml huggingface_hub safetensors>=0.2 -voir>=0.2.9,<0.3 +voir diff --git a/benchmarks/timm/requirements.rocm.txt b/benchmarks/timm/requirements.rocm.txt index 12864565b..6b15125f5 100644 --- a/benchmarks/timm/requirements.rocm.txt +++ b/benchmarks/timm/requirements.rocm.txt @@ -1,28 +1,28 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/timm/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-timm.txt benchmarks/timm/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/timm/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-timm.txt benchmarks/timm/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -34,22 +34,23 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # pytorch-triton-rocm # torch -fsspec==2023.5.0 +fsspec==2023.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # ptera # voir -huggingface-hub==0.14.1 +huggingface-hub==0.17.3 # via -r benchmarks/timm/requirements.in idna==3.4 # via @@ -59,15 +60,15 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==16.0.5 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -79,11 +80,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision @@ -95,11 +96,11 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -packaging==23.1 +packaging==23.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision @@ -107,7 +108,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich @@ -115,11 +116,11 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pyyaml==6.0 +pyyaml==6.0.1 # via # -r benchmarks/timm/requirements.in # huggingface-hub @@ -133,11 +134,11 @@ requests==2.31.0 # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -safetensors==0.3.1 +safetensors==0.4.0 # via -r benchmarks/timm/requirements.in six==1.16.0 # via @@ -147,24 +148,24 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/timm/requirements.in # pytorch-triton-rocm # torchvision -torchvision==0.15.2+rocm5.4.2 +torchvision==0.16.0+rocm5.6 # via -r benchmarks/timm/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -172,5 +173,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/timm/requirements.in diff --git a/benchmarks/timm/voirfile.py b/benchmarks/timm/voirfile.py index 19ac71fa5..5f17d8408 100644 --- a/benchmarks/timm/voirfile.py +++ b/benchmarks/timm/voirfile.py @@ -33,12 +33,14 @@ def setup(args): ov.require(dash) ov.require( - log("value", "progress", "rate", "units", "loss", "gpudata", context="task"), + log( + "value", "progress", "rate", "units", "loss", "gpudata", context="task" + ), rate( interval=options.interval, skip=options.skip, sync=torch.cuda.synchronize if torch.cuda.is_available() else None, - batch_size_calc=lambda b: len(b) * args.world_size + batch_size_calc=lambda b: len(b) * args.world_size, ), early_stop(n=options.stop, key="rate", task="train", signal="stop"), gpu_monitor(poll_interval=options.gpu_poll), @@ -46,8 +48,7 @@ def setup(args): # Loss ( - loss_probe - .throttle(1)["loss"] + loss_probe.throttle(1)["loss"] .map(lambda loss: {"task": "train", "loss": float(loss)}) .give() ) diff --git a/benchmarks/torchvision/main.py b/benchmarks/torchvision/main.py index a52cbbec0..843f2246a 100644 --- a/benchmarks/torchvision/main.py +++ b/benchmarks/torchvision/main.py @@ -165,7 +165,10 @@ def main(): if data_directory: args.data = os.path.join(data_directory, "FakeImageNet") - use_cuda = not args.no_cuda and torch.cuda.is_available() + if not args.no_cuda: + assert torch.cuda.is_available(), "Why is CUDA not available" + + use_cuda = not args.no_cuda torch.manual_seed(args.seed) if use_cuda: diff --git a/benchmarks/torchvision/requirements.cuda.txt b/benchmarks/torchvision/requirements.cuda.txt index aa60bdcfb..6bacdaea2 100644 --- a/benchmarks/torchvision/requirements.cuda.txt +++ b/benchmarks/torchvision/requirements.cuda.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/torchvision/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-torchvision.txt benchmarks/torchvision/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/torchvision/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-torchvision.txt benchmarks/torchvision/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 @@ -10,22 +10,18 @@ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -cmake==3.26.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton codefind==0.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -34,11 +30,15 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -52,15 +52,11 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -lit==16.0.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # triton -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 @@ -72,11 +68,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision @@ -88,7 +84,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision @@ -96,7 +92,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # rich @@ -104,7 +100,7 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # omegaconf @@ -116,7 +112,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir @@ -128,25 +124,24 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -torch==2.0.1+cu118 +torch==2.1.0+cu118 # via # -r benchmarks/torchvision/requirements.in # torchvision - # triton -torchvision==0.15.2+cu118 +torchvision==0.16.0+cu118 # via -r benchmarks/torchvision/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via -r benchmarks/torchvision/requirements.in -triton==2.0.0 +triton==2.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -154,5 +149,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/torchvision/requirements.in diff --git a/benchmarks/torchvision/requirements.in b/benchmarks/torchvision/requirements.in index bfda01636..4e537c03c 100644 --- a/benchmarks/torchvision/requirements.in +++ b/benchmarks/torchvision/requirements.in @@ -1,4 +1,4 @@ torch torchvision tqdm -voir>=0.2.9,<0.3 +voir \ No newline at end of file diff --git a/benchmarks/torchvision/requirements.rocm.txt b/benchmarks/torchvision/requirements.rocm.txt index 09ebf1695..618dff8f8 100644 --- a/benchmarks/torchvision/requirements.rocm.txt +++ b/benchmarks/torchvision/requirements.rocm.txt @@ -1,28 +1,28 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --output-file=benchmarks/torchvision/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-torchvision.txt benchmarks/torchvision/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/torchvision/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-torchvision.txt benchmarks/torchvision/requirements.in # ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf -asttokens==2.2.1 +asttokens==2.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -certifi==2023.5.7 +certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.1.0 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -cmake==3.26.3 +cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -34,11 +34,15 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.0 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm # torch +fsspec==2023.10.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch giving==0.4.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -52,15 +56,15 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==16.0.5 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich -markupsafe==2.1.2 +markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 @@ -72,11 +76,11 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.1 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -numpy==1.24.3 +numpy==1.26.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision @@ -88,7 +92,7 @@ ovld==0.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pillow==9.5.0 +pillow==10.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision @@ -96,7 +100,7 @@ ptera==1.4.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pygments==2.15.1 +pygments==2.16.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # rich @@ -104,11 +108,11 @@ pynvml==11.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -pytorch-triton-rocm==2.0.2 +pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -pyyaml==6.0 +pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # omegaconf @@ -120,7 +124,7 @@ requests==2.31.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision -rich==13.3.5 +rich==13.6.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir @@ -132,21 +136,21 @@ sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -torch==2.0.1+rocm5.4.2 +torch==2.1.0+rocm5.6 # via # -r benchmarks/torchvision/requirements.in # pytorch-triton-rocm # torchvision -torchvision==0.15.2+rocm5.4.2 +torchvision==0.16.0+rocm5.6 # via -r benchmarks/torchvision/requirements.in -tqdm==4.65.0 +tqdm==4.66.1 # via -r benchmarks/torchvision/requirements.in -typing-extensions==4.6.2 +typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # reactivex # torch -urllib3==1.26.16 +urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -154,5 +158,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir==0.2.10 +voir==0.2.11 # via -r benchmarks/torchvision/requirements.in diff --git a/config/base.yaml b/config/base.yaml index 109d3ee10..e5043e8e4 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -23,6 +23,42 @@ _torchvision: --no-stdout: true --epochs: 50 +_flops: + inherits: _defaults + definition: ../benchmarks/flops + group: flops + install_group: torch + plan: + method: per_gpu + + tags: + - diagnostic + - flops + + argv: + --number: 10 + --repeat: 90 + +llama: + inherits: _defaults + definition: ../benchmarks/llama + group: llm + install_group: torch + max_duration: 800 + + voir: + options: + stop: 30 + interval: "1s" + + plan: + method: per_gpu + + tags: + - nlp + - llm + + _hf: inherits: _defaults definition: ../benchmarks/huggingface @@ -86,6 +122,44 @@ _accelerate_opt: use_deepspeed: true num_machines: 1 + +fp16: + inherits: _flops + + argv: + --number: 30 + --repeat: 90 + --m: 8192 + --n: 8192 + --dtype: fp16 + + +bf16: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: bf16 + +tf32: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: fp32 + --tf32: true + +fp32: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: fp32 + + resnet50: inherits: _torchvision tags: @@ -100,7 +174,7 @@ resnet50: efficientnet_b4: inherits: _torchvision - tags: + tags: - vision - classification diff --git a/config/scaling.yaml b/config/scaling.yaml new file mode 100644 index 000000000..f4947c213 --- /dev/null +++ b/config/scaling.yaml @@ -0,0 +1,239 @@ +bert-fp16: + arg: --batch-size + model: + 1: 4108.75 MiB + 8: 8614.75 MiB + 16: 14254.75 MiB + 32: 24604.75 MiB + 64: 47216.75 MiB + 112: 81140.75 MiB + optimized: 128 +bert-fp32: + arg: --batch-size + model: + 1: 4206.75 MiB + 8: 10240.75 MiB + 16: 17646.75 MiB + 32: 31568.75 MiB + 64: 61200.75 MiB + 80: 76034.75 MiB + optimized: 128 +bert-tf32: + arg: --batch-size + model: + 1: 4204.75 MiB + 8: 10242.75 MiB + 16: 17648.75 MiB + 32: 31570.75 MiB + 64: 61202.75 MiB + 80: 76036.75 MiB + optimized: 128 +bert-tf32-fp16: + arg: --batch-size + model: + 1: 4108.75 MiB + 8: 8614.75 MiB + 16: 14254.75 MiB + 32: 24604.75 MiB + 64: 47216.75 MiB + 112: 81140.75 MiB + optimized: 128 +convnext_large-fp16: + arg: --batch-size + model: + 1: 3228.75 MiB + 8: 4726.75 MiB + 16: 6254.75 MiB + 32: 9418.75 MiB + 40: 10940.75 MiB + 64: 15238.75 MiB + 128: 27466.75 MiB + 416: 80628.75 MiB + optimized: 128 +convnext_large-fp32: + arg: --batch-size + model: + 1: 3268.75 MiB + 8: 5824.75 MiB + 16: 8774.75 MiB + 32: 14548.75 MiB + 64: 26274.75 MiB + 128: 49586.75 MiB + 216: 80694.75 MiB + optimized: 128 +convnext_large-tf32: + arg: --batch-size + model: + 1: 3268.75 MiB + 8: 5824.75 MiB + 16: 8774.75 MiB + 32: 14548.75 MiB + 64: 26274.75 MiB + 128: 49586.75 MiB + 216: 80694.75 MiB + optimized: 128 +convnext_large-tf32-fp16: + arg: --batch-size + model: + 1: 3228.75 MiB + 8: 4726.75 MiB + 16: 6254.75 MiB + 32: 9418.75 MiB + 40: 10940.75 MiB + 64: 15238.75 MiB + 128: 27466.75 MiB + 416: 80628.75 MiB + optimized: 128 +davit_large: + arg: --batch-size + model: + 1: 4882.75 MiB + 8: 6330.75 MiB + 16: 8216.75 MiB + 24: 10182.75 MiB + 32: 12240.75 MiB + 64: 19422.75 MiB + 128: 34492.75 MiB + 328: 81502.75 MiB + optimized: 128 +davit_large-multi: + arg: --batch-size + model: + 1: 4862.75 MiB + 8: 6330.75 MiB + 16: 8216.75 MiB + 24: 10730.75 MiB + 32: 12240.75 MiB + 64: 19422.75 MiB + 128: 34248.75 MiB + 328: 81742.75 MiB + optimized: 128 +focalnet: + arg: --batch-size + model: + 1: 3128.75 MiB + 8: 4368.75 MiB + 16: 5608.75 MiB + 32: 8566.75 MiB + 40: 9850.75 MiB + 64: 14750.75 MiB + 128: 26398.75 MiB + 424: 81368.75 MiB + optimized: 128 +opt-1_3b: + arg: --per_gpu_batch_size + optimized: 1 +opt-1_3b-multinode: + arg: --per_gpu_batch_size + optimized: 1 +opt-6_7b-multinode: + arg: --per_gpu_batch_size + optimized: 1 +reformer: + arg: --batch-size + model: + 1: 1916.75 MiB + 8: 4512.75 MiB + 16: 7486.75 MiB + 24: 10470.75 MiB + 32: 13454.75 MiB + 64: 25408.75 MiB + 128: 49280.75 MiB + 208: 79120.75 MiB + optimized: 128 +regnet_y_128gf: + arg: --batch-size + model: + 1: 6876.75 MiB + 8: 8524.75 MiB + 16: 11426.75 MiB + 32: 18324.75 MiB + 64: 31558.75 MiB + 128: 56484.75 MiB + 184: 78714.75 MiB + optimized: 128 +resnet152: + arg: --batch-size + model: + 1: 2710.75 MiB + 8: 3298.75 MiB + 16: 4164.75 MiB + 32: 6202.75 MiB + 64: 10120.75 MiB + 72: 10860.75 MiB + 128: 18076.75 MiB + 640: 81354.75 MiB + optimized: 128 +resnet152-multi: + arg: --batch-size + model: + 1: 2600.75 MiB + 8: 3374.75 MiB + 16: 4148.75 MiB + 32: 6374.75 MiB + 64: 10338.75 MiB + 72: 10582.75 MiB + 128: 18104.75 MiB + 640: 81820.75 MiB + optimized: 128 +resnet50: + arg: --batch-size + model: + 1: 1962.75 MiB + 8: 2134.75 MiB + 16: 2460.75 MiB + 32: 3206.75 MiB + 64: 4734.75 MiB + 128: 8242.75 MiB + 184: 11072.75 MiB + 256: 14854.75 MiB + 512: 27900.75 MiB + 1552: 81146.75 MiB + 1560: 81590.75 MiB + optimized: 64 +rwkv: + arg: --micro_bsz + model: + 1: 3602.75 MiB + 8: 4530.75 MiB + 16: 5594.75 MiB + 64: 11452.75 MiB + 128: 19448.75 MiB + 632: 81880.75 MiB + optimized: 16 +stargan: + arg: --batch_size + model: + 1: 37896.75 MiB + 8: 19165.75 MiB + 16: 37430.75 MiB + 32: 73824.75 MiB + optimized: 16 +super-slomo: + arg: --train_batch_size + model: + 1: 3016.75 MiB + 8: 10288.75 MiB + 16: 18718.75 MiB + 64: 66308.75 MiB + 80: 81180.75 MiB + optimized: 32 +t5: + arg: --batch-size + model: + 1: 4396.75 MiB + 8: 18684.75 MiB + 16: 35448.75 MiB + 32: 68876.75 MiB + optimized: 128 +whisper: + arg: --batch-size + model: + 1: 2070.75 MiB + 8: 6108.75 MiB + 16: 10540.75 MiB + 32: 19282.75 MiB + 64: 36728.75 MiB + 128: 71638.75 MiB + 144: 80412.75 MiB + optimized: 128 diff --git a/config/standard.yaml b/config/standard.yaml index 809f0a134..c6b3065c2 100644 --- a/config/standard.yaml +++ b/config/standard.yaml @@ -5,6 +5,10 @@ include: # Enabled tests # ################# +llama: + enabled: true + weight: 1.0 + resnet50: enabled: true weight: 1.0 @@ -121,6 +125,22 @@ rwkv: enabled: true weight: 1.0 +fp16: + enabled: true + weight: 0.0 + +bf16: + enabled: true + weight: 0.0 + +tf32: + enabled: true + weight: 0.0 + +fp32: + enabled: true + weight: 0.0 + ################## # Disabled tests # ################## diff --git a/constraints/cuda.txt b/constraints/cuda.txt index 68242da32..cb2bbd770 100644 --- a/constraints/cuda.txt +++ b/constraints/cuda.txt @@ -1,2 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -deepspeed==0.8.3 +voir > 0.2.10 + diff --git a/constraints/rocm.txt b/constraints/rocm.txt index 8d434fa36..9b46f6813 100644 --- a/constraints/rocm.txt +++ b/constraints/rocm.txt @@ -1,2 +1,2 @@ ---extra-index-url https://download.pytorch.org/whl/rocm5.4.2/ -deepspeed==0.8.3 +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ +voir > 0.2.10 \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 05621224f..07b723a60 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,9 +17,9 @@ # -- Project information ----------------------------------------------------- -project = 'milabench' -copyright = '2022, Mila IDT' -author = 'Mila IDT' +project = "milabench" +copyright = "2022, Mila IDT" +author = "Mila IDT" # -- General configuration --------------------------------------------------- @@ -34,12 +34,12 @@ ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -52,4 +52,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ["_static"] diff --git a/docs/index.rst b/docs/index.rst index 31c2ceb8e..3ac990fcf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,7 +11,7 @@ Welcome to milabench's documentation! dev-usage.rst new_benchmarks.rst reference.rst - + sizer.rst Indices and tables ================== diff --git a/docs/sizer.rst b/docs/sizer.rst new file mode 100644 index 000000000..14066a07a --- /dev/null +++ b/docs/sizer.rst @@ -0,0 +1,78 @@ +Scaling +======= + +Milabench is able to select a batch size depending on the +underlying GPU capacity. + +The feature is drivent by the ``config/scaling.yaml`` file, +which holds information about the memory usage of a given bench +given the batch size. + + +.. code-block:: yaml + + convnext_large-fp32: + arg: --batch-size + default: 128 + model: + 8: 5824.75 MiB + 16: 8774.75 MiB + 32: 14548.75 MiB + 64: 26274.75 MiB + 128: 49586.75 MiB + + +Auto Batch size +--------------- + +To enable batch resizing an environment variable can be specified. +It will use the capacity inside the `system.yaml` configurattion file. + +.. code-block:: yaml + + system: + arch: cuda + gpu: + capacity: 81920 MiB + nodes: [] + + +.. code-block:: bash + + MILABENCH_SIZER_AUTO=1 milabench run --system system.yaml + + +For better performance, a multiple constraint can be added. +This will force batch size to be a multiple of 8. + +.. code-block:: bash + + MILABENCH_SIZER_MULTIPLE=8 milabench run + + +Batch size override +------------------- + +The batch size can be globally overriden + +.. code-block:: bash + + MILABENCH_SIZER_BATCH_SIZE=64 milabench run + + +Memory Usage Extractor +---------------------- + +To automate batch size ``<=>`` memory usage data gathering +a validation layer that retrieve the batch size and the memory usage +can be enabled. + +In the example below, once milabench has finished running it will +generate a new scaling configuration with the data extracted from the run. + + +.. code-block:: bash + + export MILABENCH_SIZER_SAVE="newscaling.yaml" + MILABENCH_SIZER_BATCH_SIZE=64 milabench run + diff --git a/milabench/_version.py b/milabench/_version.py index 5a191cf45..4f989718f 100644 --- a/milabench/_version.py +++ b/milabench/_version.py @@ -1,5 +1,5 @@ """This file is generated, do not modify""" -__tag__ = "cce4c99" -__commit__ = "cce4c994559481334f9c15b01caa2d4e7b0cf82a" -__date__ = "2023-07-04 17:33:49 -0400" +__tag__ = "v0.0.6-30-g62b44f0" +__commit__ = "62b44f0e6190dd686c9c2f9ac89c2a383b8ebb9f" +__date__ = "2023-11-07 10:45:02 -0500" diff --git a/milabench/cli.py b/milabench/cli.py index d163114ff..5b6a7599e 100644 --- a/milabench/cli.py +++ b/milabench/cli.py @@ -1,6 +1,7 @@ import json import os import re +import io import runpy import shutil import subprocess @@ -12,7 +13,7 @@ from coleo import Option, config as configuration, default, run_cli, tooled from omegaconf import OmegaConf -from voir.instruments.gpu import deduce_backend, select_backend +from voir.instruments.gpu import deduce_backend, select_backend, get_gpu_info from milabench.alt_async import proceed from milabench.utils import blabla, validation_layers, multilogger, available_layers @@ -33,6 +34,8 @@ from .report import make_report from .slurm import expand_node_list from .summary import aggregate, make_summary +from .schedule import launch_milabench, post_comment_on_pr +from .sizer import MemoryUsageExtractor def main(argv=None): @@ -202,7 +205,7 @@ def _get_multipack( if base is None: base = os.environ.get("MILABENCH_BASE", None) - if not base: + if not return_config and not base: sys.exit("Error: Neither --base nor $MILABENCH_BASE are set.") base = base and os.path.abspath(os.path.expanduser(base)) @@ -253,6 +256,30 @@ def is_selected(defn): ) +def _parse_report(pth): + with pth.open() as f: + lines = f.readlines() + data = [] + good_lines = 0 + bad_lines = 0 + + for line in lines: + try: + data.append(json.loads(line)) + good_lines += 1 + except Exception: + import traceback + + print(f"Could not parse line inside {pth}\n\t- {line}") + traceback.print_exc() + bad_lines += 1 + + if good_lines == 0: + print(f"Unknow format for file {pth}") + + return data + + def _read_reports(*runs): all_data = {} for folder in runs: @@ -261,17 +288,8 @@ def _read_reports(*runs): if not file.endswith(".data"): continue pth = XPath(parent) / file - with pth.open() as f: - lines = f.readlines() - try: - data = [json.loads(line) for line in lines] - except Exception: - import traceback - - print(f"Could not parse line inside {pth}\n\t- {line}") - traceback.print_exc() - else: - all_data[str(pth)] = data + all_data[str(pth)] = _parse_report(pth) + return all_data @@ -353,7 +371,7 @@ def run(): report: Option & bool = True # Which type of dashboard to show (short, long, or no) - dash: Option & str = os.environ.get("MILABENCH_DASH", "long") + dash: Option & str = os.getenv("MILABENCH_DASH", "long") noterm: Option & bool = os.getenv("MILABENCH_NOTERM", "0") == "1" @@ -379,11 +397,13 @@ def run(): # Terminal Formatter slows down the dashboard, # if lots of info needs to be printed # in particular rwkv - TerminalFormatter() if not noterm else None, - dash_class and dash_class(), + # TerminalFormatter() if not noterm else None, + # dash_class and dash_class(), + TerminalFormatter(), TextReporter("stdout"), TextReporter("stderr"), DataReporter(), + MemoryUsageExtractor(), *validation_layers(*layers, short=not fulltrace), ], mp=mp, @@ -399,7 +419,10 @@ def run(): reports = None if runs: reports = _read_reports(*runs) + assert len(reports) != 0, "No reports found" + summary = make_summary(reports.values()) + assert len(summary) != 0, "No summaries" make_report( summary, @@ -667,6 +690,7 @@ def report(): title=None, sources=runs, errdata=reports and _error_report(reports), + stream=sys.stdout, ) def pip(): @@ -686,16 +710,29 @@ def slurm_system(): node_list = expand_node_list(os.getenv("SLURM_JOB_NODELIST", "")) def make_node(i, ip): - node = {"name": ip, "ip": ip, "user": getpass.getuser(), "main": i == 0} + node = { + "name": ip, + "ip": ip, + "user": getpass.getuser(), + "main": i == 0, + } if i == 0: node["port"] = 8123 return node - system = dict( - arch="cuda", nodes=[make_node(i, ip) for i, ip in enumerate(node_list)] - ) + capacity = float("+inf") + + for k, v in get_gpu_info("cuda")["gpus"].items(): + capacity = min(v["memory"]["total"], capacity) + + # nvidia-smi --query-gpu=memory.total --format=csv + system = { + "arch": "cuda", + "gpu": {"capacity": f"{int(capacity)} MiB"}, + "nodes": [make_node(i, ip) for i, ip in enumerate(node_list)], + } import yaml @@ -734,201 +771,71 @@ def publish(): backend = SQLAlchemy(uri, meta_override=meta) publish_archived_run(backend, folder) - def container(): - """Build a container image (might not work properly at the moment).""" + def schedule(): + """Launch a slurm job to run milabench""" + # milabench schedule --sync -- --select resnet50 - # Configuration file - # [positional] - config_file: Option & str = None + # tail -f on the slurm job + sync: Option & bool = False - config = _get_multipack(config, return_config=True) - config_file = XPath(config["defaults"]["config_file"]) - config_base = XPath(config["defaults"]["config_base"]) - benchmarks = config["benchmarks"] + # Print the command and return without running it + dry: Option & bool = False - # The container type to create - type: Option & str = None - - # Include the dataset in the image - include_data: Option & bool = False - - # Optional path to copy build dir to, instead of building the image. - # This directory must not exist and will be created. - output_dir: Option & str = None - - # File in which to generate the SIF image (Singularity). - # Defaults to milabench.sif. - # [alias: -o] - output_file: Option & str = None + # pip arguments + # [remainder] + args: Option = [] - # Optional python version to use for the image, ignored for - # conda-based benchmarks. Can be specified as any of - # ('3', '3.9', '3.9.2') - python_version: Option & str = "3.9" + launch_milabench(args, sbatch_args=None, dry=dry, sync=sync) - # Milabench source to clone from - milabench: Option & str = "v2" + def write_report_to_pr(): + remote: str & Option - # The tag for the generated container - tag: Option & str = None + branch: str & Option - if type not in ["docker", "singularity"]: - sys.exit(f"Unsupported type {type}") + base: Option & str = os.getenv("MILABENCH_BASE", None) - with tempfile.TemporaryDirectory() as base: - root = XPath(base) + config: Option & str = os.getenv("MILABENCH_CONFIG", None) - common_base = config_base + token: str & Option = os.getenv("MILABENCH_GITHUB_PAT") - # Figure out common base between the benchmark config and all - # the benchmarks. - for defn in benchmarks.values(): - pack = XPath(defn["definition"]).expanduser() - while not pack.is_relative_to(common_base): - common_base = common_base.parent + assert base is not None - def _transfer(pth): - dest = root / pth.relative_to(common_base) - shutil.copytree(pth, dest, dirs_exist_ok=True) + runfolder = os.path.join(base, "runs") - for defn in benchmarks.values(): - _transfer(XPath(defn["definition"])) + def filter(folder): + for f in ("install", "prepare"): + if f in folder: + return False + return True - _transfer(config_base) + runs = [] + for folder in os.listdir(runfolder): + if filter(folder): + runs.append(os.path.join(runfolder, folder)) - # We check all configs since they may not have all the same setting - use_conda = any( - defn["venv"]["type"] == "conda" for defn in benchmarks.values() - ) + report = _short_make_report(runs, config) - if "//" not in milabench: - milabench = ( - f"git+https://github.com/mila-iqia/milabench.git@{milabench}" - ) + post_comment_on_pr(remote, branch, "```\n" + report + "\n```", token) - if type == "docker": - if output_file is not None: - sys.exit("Error: --output-file only valid with Singularity") - tag = tag or "milabench" - with (root / "Dockerfile").open("w") as f: - f.write( - dockerfile_template( - milabench_req=milabench, - include_data=include_data, - use_conda=use_conda, - python_version=python_version, - config_file=config_file.relative_to(common_base), - ) - ) - if output_dir: - root.copy(output_dir) - else: - subprocess.check_call(["docker", "build", ".", "-t", tag], cwd=root) - - elif type == "singularity": - if tag is not None: - sys.exit("Error: --tag only valid with Docker") - output_file = output_file or "milabench.sif" - - with (root / "milabench.def").open("w") as f: - f.write( - singularitydef_template( - milabench_req=milabench, - include_data=include_data, - use_conda=use_conda, - python_version=python_version, - config_file=config_file.relative_to(common_base), - ) - ) - if output_dir: - root.copy(output_dir) - else: - user = os.environ["USER"] - filename = str(XPath(output_file).absolute()) - singularity = subprocess.check_output( - ["which", "singularity"] - ).strip() - subprocess.check_call( - ["sudo", singularity, "build", filename, "milabench.def"], - cwd=root, - ) - subprocess.check_call(["sudo", "chown", f"{user}:{user}", filename]) +def _short_make_report(runs, config): + reports = None -def dockerfile_template( - milabench_req, include_data, use_conda, python_version, config_file -): - conda_clean = "conda clean -a" if use_conda else "echo" - return f""" -FROM { 'continuumio/miniconda3' if use_conda else f'python:{python_version}-slim' } + if runs: + reports = _read_reports(*runs) + summary = make_summary(reports.values()) -RUN apt-get update && apt-get install --no-install-suggests --no-install-recommends -y \ - git \ - wget \ - patch \ - && apt-get clean - -RUN mkdir /bench && mkdir /base -ENV MILABENCH_BASE /base -# This is to signal to milabench to use that as fallback -ENV VIRTUAL_ENV /base/venv/_ -ENV MILABENCH_CONFIG /bench/{ config_file } -ENV HEADLESS 1 -WORKDIR /base - -RUN echo '{ milabench_req }' > /version.txt - -COPY / /bench - -RUN pip install -U pip && \ - pip install -r /version.txt && \ - milabench install && \ - { conda_clean } && \ - pip cache purge - -{ 'RUN milabench prepare' if include_data else '' } + if config: + config = _get_multipack(config, return_config=True) -CMD ["milabench", "run"] -""" + stream = io.StringIO() + make_report( + summary, + weights=config, + stream=stream, + sources=runs, + errdata=reports and _error_report(reports), + ) -def singularitydef_template( - milabench_req, include_data, use_conda, python_version, config_file -): - conda_clean = "conda clean -a" if use_conda else "echo" - return f"""\ -BootStrap: docker -From: { 'continuumio/miniconda3' if use_conda else f'python:{python_version}-slim' } - -%files - . /bench - -%environment - export MILABENCH_BASE=/base - export MILABENCH_CONFIG=/bench/{ config_file } - export HEADLESS=1 - -%post - export MILABENCH_BASE=/base - export MILABENCH_CONFIG=/bench/{ config_file } - export HEADLESS=1 - - apt-get update && apt-get install --no-install-suggests --no-install-recommends -y git wget patch - apt-get clean - - mkdir /base - cd /bench - - echo '{ milabench_req }' > /version.txt - pip install -U pip && \ - pip install -r /version.txt && \ - milabench install && \ - { conda_clean } && \ - pip cache purge -{ ' milabench prepare' if include_data else '' } - - chmod -R o+rwx /base /bench - -%runscript - milabench run -""" + return stream.getvalue() diff --git a/milabench/config.py b/milabench/config.py index da29da294..13fad3161 100644 --- a/milabench/config.py +++ b/milabench/config.py @@ -1,4 +1,5 @@ import socket +import contextvars import yaml from omegaconf import OmegaConf @@ -6,6 +7,11 @@ from .fs import XPath from .merge import merge +from voir.instruments.gpu import get_gpu_info + + +system_global = contextvars.ContextVar("system") +config_global = contextvars.ContextVar("Config") def relative_to(pth, cwd): @@ -69,10 +75,14 @@ def build_config(*config_files): all_configs = {} for layer in _config_layers(config_files): all_configs = merge(all_configs, layer) + for name, bench_config in all_configs.items(): all_configs[name] = resolve_inheritance(bench_config, all_configs) + for name, bench_config in all_configs.items(): all_configs[name] = finalize_config(name, bench_config) + + config_global.set(all_configs) return all_configs @@ -101,6 +111,26 @@ def get_remote_ip(): return set(result) +def _resolve_ip(ip): + # Resolve the IP + try: + hostname, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) + lazy_raise = None + except socket.gaierror as err: + # Get Addr Info (GAI) Error + # + # When we are connecting to a node through a ssh proxy jump + # the node IPs/Hostnames are not available until we reach + # the first node inside the cluster + # + hostname = ip + aliaslist = [] + ipaddrlist = [] + lazy_raise = err + + return hostname, aliaslist, ipaddrlist, lazy_raise + + def resolve_addresses(nodes): # Note: it is possible for self to be none # if we are running milabench on a node that is not part of the system @@ -111,27 +141,21 @@ def resolve_addresses(nodes): ip_list = get_remote_ip() for node in nodes: - # Resolve the IP - try: - hostname, aliaslist, ipaddrlist = socket.gethostbyaddr(node["ip"]) - - except socket.gaierror as err: - # Get Addr Info (GAI) Error - # - # When we are connecting to a node through a ssh proxy jump - # the node IPs/Hostnames are not available until we reach - # the first node inside the cluster - # - hostname = node["ip"] - aliaslist = [] - ipaddrlist = [] - - lazy_raise = err + hostname, aliaslist, ipaddrlist, lazy_raise = _resolve_ip(node["ip"]) node["hostname"] = hostname node["aliaslist"] = aliaslist node["ipaddrlist"] = ipaddrlist + if hostname.endswith(".server.mila.quebec.server.mila.quebec"): + print() + print("Hostname was extra long for no reason") + print(hostname, socket.gethostname()) + print() + + # why is this happening + hostname = hostname[: -len(".server.mila.quebec")] + is_local = ( ("127.0.0.1" in ipaddrlist) or (hostname in ("localhost", socket.gethostname())) @@ -151,6 +175,15 @@ def resolve_addresses(nodes): return self +def get_gpu_capacity(): + capacity = float("+inf") + + for k, v in get_gpu_info("cuda")["gpus"].items(): + capacity = min(v["memory"]["total"], capacity) + + return capacity + + def build_system_config(config_file, defaults=None): """Load the system configuration, verify its validity and resolve ip addresses @@ -172,6 +205,9 @@ def build_system_config(config_file, defaults=None): system = config.get("system", {}) + if "gpu" not in system: + system["gpu"] = {"capacity": f"{int(get_gpu_capacity())} MiB"} + if system.get("sshkey") is not None: system["sshkey"] = str(XPath(system["sshkey"]).resolve()) @@ -180,4 +216,5 @@ def build_system_config(config_file, defaults=None): self = resolve_addresses(system["nodes"]) system["self"] = self + system_global.set(system) return config diff --git a/milabench/dashboard/__init__.py b/milabench/dashboard/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/dashboard/live_report.py b/milabench/dashboard/live_report.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/dashboard/rawoutput.py b/milabench/dashboard/rawoutput.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/executors.py b/milabench/executors.py index 5e5f9a3d8..3307aea37 100644 --- a/milabench/executors.py +++ b/milabench/executors.py @@ -123,7 +123,6 @@ async def execute(self, phase="run", timeout=False, timeout_delay=600, **kwargs) for pack in self.packs(): pack.phase = phase - timeout_tasks = [] for pack, argv, _kwargs in self.commands(): await pack.send(event="config", data=pack.config) @@ -630,6 +629,19 @@ def __init__(self, executor: Executor, gpus: list = None, **kwargs) -> None: super().__init__(*executors, **kwargs) +# +# Check if we need this +# I think if we use python script.py it will load +# the right env and we do not need the activator +# +class ActivatorExecutor(SingleCmdExecutor): + def __init__(self, pack: pack.BasePackage, **kwargs): + super().__init__(pack, **kwargs) + + def _argv(self, **_) -> List: + return [f"{self.pack.dirs.code / 'activator'}", f"{self.pack.dirs.venv}"] + + # Accelerate class AccelerateLaunchExecutor(SingleCmdExecutor): """Execute a `BasePackage` with Accelerate @@ -660,7 +672,7 @@ def _argv(self, **_) -> List: ngpu = len(get_gpu_info()["gpus"].values()) nproc = ngpu * num_machines - assert nproc > 0 + assert nproc > 0, f"nproc: {nproc} num_machines: {num_machines} ngpu: {ngpu}" deepspeed_argv = ( [ diff --git a/milabench/pack.py b/milabench/pack.py index 2656fec55..ba8d48723 100644 --- a/milabench/pack.py +++ b/milabench/pack.py @@ -95,7 +95,10 @@ def copy(self, config): @property def argv(self): - return assemble_options(self.config.get("argv", [])) + # Circular import + from .sizer import scale_argv + + return scale_argv(self, assemble_options(self.config.get("argv", []))) @property def tag(self): @@ -203,11 +206,14 @@ async def execute(self, *args, cwd=None, env={}, external=False, **kwargs): args = [str(x) for x in args] if cwd is None: cwd = self.dirs.code + + exec_env = self.full_env(env) if not external else {**os.environ, **env} + return await run( args, **kwargs, info={"pack": self}, - env=self.full_env(env) if not external else {**os.environ, **env}, + env=exec_env, constructor=BenchLogEntry, cwd=cwd, process_accumulator=self.processes, @@ -376,7 +382,7 @@ async def pin( ivar = self.config.get("install_variant", None) if ivar == "unpinned": raise Exception("Cannot pin the 'unpinned' variant.") - assert self.phase == "pin" + # assert self.phase == "pin" for base_reqs, reqs in self.requirements_map().items(): if not base_reqs.exists(): raise FileNotFoundError( diff --git a/milabench/report.py b/milabench/report.py index 1c0c63f56..51a59d162 100644 --- a/milabench/report.py +++ b/milabench/report.py @@ -41,15 +41,19 @@ def _make_row(summary, compare, weights): # Sum of all the GPU performance # to get the overall perf of the whole machine - acc = 0 - for _, metrics in summary["per_gpu"].items(): - acc += metrics[metric] + + if "per_gpu" in summary: + acc = 0 + for _, metrics in summary["per_gpu"].items(): + acc += metrics[metric] + else: + acc = row["perf"] success_ratio = 1 - row["fail"] / row["n"] score = (acc if acc > 0 else row["perf"]) * success_ratio row["score"] = score - row["weight"] = weights.get("weight", summary["weight"]) + row["weight"] = weights.get("weight", summary.get("weight", 0)) # ---- return row @@ -219,6 +223,7 @@ def make_report( sources=None, errdata=None, weights=None, + stream=sys.stdout, ): if weights is None: weights = dict() @@ -228,7 +233,7 @@ def make_report( # Reorder columns df = df[sorted(df.columns, key=lambda k: columns_order.get(k, 0))] - out = Outputter(stdout=sys.stdout, html=html) + out = Outputter(stdout=stream, html=html) if sources: if isinstance(sources, str): diff --git a/milabench/schedule.py b/milabench/schedule.py new file mode 100644 index 000000000..a8ddefa42 --- /dev/null +++ b/milabench/schedule.py @@ -0,0 +1,219 @@ +from dataclasses import dataclass +import re +import importlib_resources +import subprocess +import requests +import os + + +def popen(cmd, callback=None): + def println(line): + print(line, end="") + + if callback is None: + callback = println + + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + shell=False, + ) as process: + + def readoutput(): + process.stdout.flush() + for line in process.stdout.readlines(): + if callback: + callback(line) + + try: + while process.poll() is None: + readoutput() + + readoutput() + return 0 + + except KeyboardInterrupt: + print("Stopping due to user interrupt") + process.kill() + return -1 + + +def sbatch(args, sync=False, tags=None, **kwargs): + jobid_regex = re.compile(r"Submitted batch job (?P[0-9]*)") + jobid = None + + def readline(line): + nonlocal jobid + + if match := jobid_regex.match(line): + data = match.groupdict() + jobid = data["jobid"] + + print(line, end="") + + code = popen(["sbatch"] + args, readline) + + if jobid is not None and sync: + try: + subprocess.run(["touch", f"slurm-{jobid}.out"]) + subprocess.run(["tail", "-f", f"slurm-{jobid}.out"]) + except KeyboardInterrupt: + pass + + return code, jobid + + +def shell(cmd): + return subprocess.check_output( + cmd.split(" "), stderr=subprocess.STDOUT, text=True + ).strip() + + +class SlurmBatchOptions: + pass + + +@dataclass +class SetupOptions: + branch: str = "master" + origin: str = "https://github.com/mila-iqia/milabench.git" + config: str = "milabench/config/standard.yaml" + env: str = "./env" + python: str = "3.9" + + def deduce_remote(self, current_branch): + prefix = "refs/heads/" + + # Fetch all remotes + remotes = shell("git remote").splitlines() + possible_remotes = [] + + # Find remotes that have our branch + for remote in remotes: + branches = shell(f"git ls-remote --heads {remote}").splitlines() + + for branch in branches: + _, name = branch.split("\t") + name = name[len(prefix) :] + + if current_branch == name: + possible_remotes.append(remote) + + if len(possible_remotes) == 1: + return possible_remotes[0] + + raise RuntimeError(f"Multiple suitable remotes found {possible_remotes}") + + def deduce_from_repository(self, remote=None): + self.branch = shell("git rev-parse --abbrev-ref HEAD") + + if remote is None: + remote = self.deduce_remote(self.branch) + + self.origin = shell(f"git remote get-url {remote}") + + def arguments(self): + return [ + "-b", + self.branch, + "-o", + self.origin, + "-c", + self.config, + "-e", + self.env, + "-p", + self.python, + ] + + +def launch_milabench(args, sbatch_args=None, dry: bool = False, sync: bool = False): + sbatch_script = ( + importlib_resources.files(__name__) / "scripts" / "milabench_run.bash" + ) + sbatch_script = str(sbatch_script) + + # salloc --gres=gpu:rtx8000:1 --mem=64G --cpus-per-gpu=4 + + if sbatch_args is None: + sbatch_args = [ + "--ntasks=1", + "--gpus-per-task=rtx8000:1", + "--cpus-per-task=4", + "--time=01:30:00", + "--ntasks-per-node=1", + "--mem=64G", + ] + + script_args = SetupOptions() + script_args.deduce_from_repository() + script_args = script_args.arguments() + + cmd = sbatch_args + [sbatch_script] + script_args + args + + if dry: + print("sbatch " + " ".join(cmd)) + code = 0 + else: + code, _ = sbatch(cmd, sync=sync, tags=None) + + return code + + +def get_remote_owner(remote): + sshremote = re.compile( + r"git@[A-Za-z]*\.[A-Za-z]*:(?P[A-Za-z\-.0-9]*)\/([A-Za-z]*).([A-Za-z]*)" + ) + httpsremote = re.compile( + r"https:\/\/[A-Za-z]*\.[A-Za-z]*\/(?P[A-Za-z\-.0-9]*)\/([A-Za-z]*).([A-Za-z]*)" + ) + + patterns = [sshremote, httpsremote] + + for pat in patterns: + if match := sshremote.match(remote): + results = match.groupdict() + return results["owner"] + + return None + + +def post_comment_on_pr(remote, branch, comment, access_token=None): + owner = get_remote_owner(remote) + assert owner is not None, "Remote owner not found" + + if access_token is None: + access_token = os.getenv("MILABENCH_GITHUB_PAT") + + url = "https://api.github.com/repos/mila-iqia/milabench/pulls" + + response = requests.get(url, params={"head": f"{owner}:{branch}"}) + + if response.status_code != 200: + raise RuntimeError(response) + + pull_requests = response.json() + + if not pull_requests: + raise RuntimeError("No matching pull requests found.") + + assert len(pull_requests) == 1, "Multiple PR found" + + pr = pull_requests[0] + post_url = pr["_links"]["comments"]["href"] + + data = { + "body": comment, + } + + headers = { + "Authorization": f"Bearer {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.post(post_url, json=data, headers=headers) + + if response.status_code != 201: + raise RuntimeError(response, response.json()) diff --git a/milabench/scripts/milabench_docker.bash b/milabench/scripts/milabench_docker.bash new file mode 100644 index 000000000..7a9bfcc19 --- /dev/null +++ b/milabench/scripts/milabench_docker.bash @@ -0,0 +1,5 @@ +#!/bin/bash + + +# CPU only + diff --git a/milabench/scripts/milabench_pin.bash b/milabench/scripts/milabench_pin.bash new file mode 100644 index 000000000..c2f7ad399 --- /dev/null +++ b/milabench/scripts/milabench_pin.bash @@ -0,0 +1,15 @@ +#!/bin/bash + +# CPU only +# 16Gb + + + +MILABENCH_GPU_ARCH=cuda milabench pin --config config/standard.yaml --from-scratch --base /tmp +MILABENCH_GPU_ARCH=rocm milabench pin --config config/standard.yaml --from-scratch --base /tmp + + +cd $SLURM_TMPDIR/milabench +git add --all +git commit -m "milabench pin" +git push $ORIGIN $BRANCH \ No newline at end of file diff --git a/milabench/scripts/milabench_run.bash b/milabench/scripts/milabench_run.bash new file mode 100755 index 000000000..693a80139 --- /dev/null +++ b/milabench/scripts/milabench_run.bash @@ -0,0 +1,144 @@ +#!/bin/bash + +function usage() { + echo "Usage: $0 [-m] [-p]" + echo " -h Display this help message." + echo " -b arch GPU arch (default: cuda)" + echo " -b BRANCH Branch to checkout (default: master)" + echo " -o ORIGIN Origin to use (default: github/mila/milabench)" + echo " -c CONFIG Configuration (default: milabench/config/standard.yaml)" + echo " -e ENV Environment (default: ./env)" + echo " -p PYTHON Python version (default: 3.9)" + echo " ARGUMENT Any additional argument you want to process." + exit 1 +} + +ARCH="cuda" +PYTHON="3.9" +BRANCH="master" +ORIGIN="https://github.com/mila-iqia/milabench.git" +LOC="$SLURM_TMPDIR" +CONFIG="$LOC/milabench/config/standard.yaml" +BASE="$LOC/base" +ENV="./env" +REMAINING_ARGS="" + + +while getopts ":hm:p:e:b:o:c:" opt; do + case $opt in + h) + usage + ;; + p) + PYTHON="$OPTARG" + ;; + b) + BRANCH="$OPTARG" + ;; + o) + ORIGIN="$OPTARG" + ;; + c) + CONFIG="$OPTARG" + ;; + e) + ENV="$OPTARG" + ;; + a) + ARCH="$OPTARG" + ;; + l) + # FIX ME + LOC="$OPTARG" + CONFIG="$LOC/milabench/config/standard.yaml" + BASE="$LOC/base" + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + usage + ;; + esac +done + +shift "$((OPTIND-1))" +REMAINING_ARGS="$@" + +echo " PYTHON: $PYTHON" +echo " branch: $BRANCH" +echo " origin: $ORIGIN" +echo " config: $CONFIG" +echo " env: $ENV" +echo " args: $REMAINING_ARGS" +# +# Fix problem with conda saying it is not "init properly" +# +CONDA_EXEC="$(which conda)" +CONDA_BASE=$(dirname $CONDA_EXEC) +source $CONDA_BASE/../etc/profile.d/conda.sh + +if [ -e $HOME/.credentials.env ]; then + source $HOME/.credentials.env +fi + +cd $LOC +# +# Create a new environment +# +if [ ! -d "$ENV" ] && [ "$ENV" != "base" ] && [ ! -d "$CONDA_ENVS/$ENV" ]; then + conda create --prefix $ENV python=$PYTHON -y +fi +conda activate $ENV + +export HF_HOME=$BASE/cache +export HF_DATASETS_CACHE=$BASE/cache +export TORCH_HOME=$BASE/cache +export XDG_CACHE_HOME=$BASE/cache +export MILABENCH_GPU_ARCH=$ARCH + +export MILABENCH_DASH=no +export PYTHONUNBUFFERED=1 +export MILABENCH_BASE=$BASE +export MILABENCH_CONFIG=$CONFIG +# +# Fetch the repo +# +git clone --single-branch --depth 1 -b $BRANCH $ORIGIN +python -m pip install -e ./milabench + +SYSTEM="$LOC/system.yaml" + +echo "" +echo "System" +echo "------" + +milabench slurm_system +milabench slurm_system > $SYSTEM + +module load gcc/9.3.0 +module load cuda/11.8 + +echo "" +echo "Install" +echo "-------" +milabench install --config $CONFIG --system $SYSTEM --base $BASE $REMAINING_ARGS + + +echo "" +echo "Prepare" +echo "-------" +milabench prepare --config $CONFIG --system $SYSTEM --base $BASE $REMAINING_ARGS + +echo "" +echo "Run" +echo "---" +milabench run --config $CONFIG --system $SYSTEM --base $BASE $REMAINING_ARGS + +echo "" +echo "Report" +echo "------" + +milabench write_report_to_pr --remote $ORIGIN --branch $BRANCH --config $CONFIG + +echo "----" +echo "Done after $SECONDS" +echo "" diff --git a/milabench/scripts/setup.bash b/milabench/scripts/setup.bash new file mode 100644 index 000000000..dd3e3f496 --- /dev/null +++ b/milabench/scripts/setup.bash @@ -0,0 +1,111 @@ +#!/bin/bash + +function usage() { + echo "Usage: $0 [-m] [-p]" + echo " -h Display this help message." + echo " -b arch GPU arch (default: cuda)" + echo " -b BRANCH Branch to checkout (default: master)" + echo " -o ORIGIN Origin to use (default: github/mila/milabench)" + echo " -c CONFIG Configuration (default: milabench/config/standard.yaml)" + echo " -e ENV Environment (default: ./env)" + echo " -p PYTHON Python version (default: 3.9)" + echo " ARGUMENT Any additional argument you want to process." + exit 1 +} + +ARCH="cuda" +PYTHON="3.9" +BRANCH="master" +ORIGIN="https://github.com/mila-iqia/milabench.git" +CONFIG="$SLURM_TMPDIR/milabench/config/standard.yaml" +BASE="$SLURM_TMPDIR/base" +ENV="./env" +REMAINING_ARGS="" + +while getopts ":hm:p:e:b:o:c:" opt; do + case $opt in + h) + usage + ;; + p) + PYTHON="$OPTARG" + ;; + b) + BRANCH="$OPTARG" + ;; + o) + ORIGIN="$OPTARG" + ;; + c) + CONFIG="$OPTARG" + ;; + e) + ENV="$OPTARG" + ;; + a) + ARCH="$OPTARG" + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + usage + ;; + esac +done + +shift "$((OPTIND-1))" +REMAINING_ARGS="$@" + +echo " PYTHON: $PYTHON" +echo " branch: $BRANCH" +echo " origin: $ORIGIN" +echo " config: $CONFIG" +echo " env: $ENV" +echo " args: $REMAINING_ARGS" +# +# Fix problem with conda saying it is not "init properly" +# +CONDA_EXEC="$(which conda)" +CONDA_BASE=$(dirname $CONDA_EXEC) +source $CONDA_BASE/../etc/profile.d/conda.sh + +if [ -e $HOME/.credentials.env ]; then + source $HOME/.credentials.env +fi + +cd $SLURM_TMPDIR +# +# Create a new environment +# +if [ ! -d "$ENV" ] && [ "$ENV" != "base" ] && [ ! -d "$CONDA_ENVS/$ENV" ]; then + conda create --prefix $ENV python=$PYTHON -y +fi +conda activate $ENV + +export HF_HOME=$BASE/cache +export HF_DATASETS_CACHE=$BASE/cache +export TORCH_HOME=$BASE/cache +export XDG_CACHE_HOME=$BASE/cache +export MILABENCH_GPU_ARCH=$ARCH + +export MILABENCH_DASH=no +export PYTHONUNBUFFERED=1 +export MILABENCH_BASE=$BASE +export MILABENCH_CONFIG=$CONFIG + +# +# Fetch the repo +# +git clone --single-branch --depth 1 -b $BRANCH $ORIGIN +python -m pip install -e ./milabench + +SYSTEM="$SLURM_TMPDIR/system.yaml" + +echo "" +echo "System" +echo "------" + +milabench slurm_system +milabench slurm_system > $SYSTEM + +module load gcc/9.3.0 +module load cuda/11.8 diff --git a/milabench/sizer.py b/milabench/sizer.py new file mode 100644 index 000000000..240296bf6 --- /dev/null +++ b/milabench/sizer.py @@ -0,0 +1,274 @@ +from dataclasses import dataclass +import os +from copy import deepcopy +import yaml +import contextvars + +import numpy as np + +from .validation.validation import ValidationLayer +from .config import system_global + + +ROOT = os.path.dirname(__file__) + +default_scaling_config = os.path.join(ROOT, "..", "config", "scaling.yaml") + + +def is_autoscale_enabled(): + return ( + os.getenv("MILABENCH_SIZER_AUTO", False) + or os.getenv("MILABENCH_SIZER_MULTIPLE") is not None + ) + + +def getenv(name, type): + value = os.getenv(name) + + if value is not None: + return type(value) + + return value + + +@dataclass +class SizerOptions: + size: int = getenv("MILABENCH_SIZER_BATCH_SIZE", int) + autoscale: bool = is_autoscale_enabled() + multiple: int = getenv("MILABENCH_SIZER_MULTIPLE", int) + optimized: bool = getenv("MILABENCH_SIZER_OPTIMIZED", int) + capacity: str = getenv("MILABENCH_SIZER_CAPACITY", str) + + +metric_prefixes = { + "T": (12, 4), + "G": (9, 3), + "M": (6, 2), + "k": (3, 1), + "h": (2, None), + "da": (1, None), + "d": (-1, None), + "c": (-2, None), + "m": (-3, None), + "u": (-6, None), + "n": (-9, None), + "p": (-12, None), +} + + +def to_octet(value: str) -> float: + for p, (vm, vb) in metric_prefixes.items(): + if f"{p}iB" in value or f"{p}io" in value: + return float(value[: -(len(p) + 2)]) * 1024**vb + + if f"{p}B" in value or f"{p}o" in value: + return float(value[: -(len(p) + 1)]) * 10**vm + + if "io" in value: + return float(value.replace("io", "")) + + if "o" in value: + return float(value.replace("o", "")) + + return float(value) + + +class Sizer: + """Automatically scale the batch size to match GPU spec""" + + def __init__(self, options=SizerOptions(), scaling_config=None): + self.options = options + self.path = scaling_config + + if scaling_config is None: + scaling_config = default_scaling_config + + with open(scaling_config, "r") as sconf: + self.scaling_config = yaml.safe_load(sconf) + + def benchscaling(self, benchmark): + # key + if isinstance(benchmark, str): + return self.scaling_config.get(benchmark) + + # benchmark config + if isinstance(benchmark, dict) and "name" in benchmark: + return benchmark + + # pack + return self.scaling_config.get(benchmark.config["name"]) + + def get_capacity(self, capacity): + if self.options.capacity is not None: + capacity = self.options.capacity + + if isinstance(capacity, str): + capacity = to_octet(capacity) + + return capacity + + def auto_size(self, benchmark, capacity): + capacity = self.get_capacity(capacity) + + config = self.benchscaling(benchmark) + + data = list(sorted(config["model"].items(), key=lambda x: x[0])) + mem = [to_octet(v[1]) for v in data] + size = [float(v[0]) for v in data] + + # This does not extrapolate + # int(np.interp(capacity, mem, size)) + + # Use polynomial of degree 1 so it is essentially linear interpolation + model = np.poly1d(np.polyfit(mem, size, deg=1)) + + newsize_f = model(capacity) + newsize_i = int(newsize_f) + + if (newsize_f - newsize_i) > 0.5: + newsize_i += 1 + + if self.options.multiple is not None: + newsize_i = (newsize_i // self.options.multiple) * self.options.multiple + + return max(newsize_i, 1) + + def size(self, benchmark, capacity): + config = self.benchscaling(benchmark) + + if self.options.size is not None: + return self.options.size + + if self.options.optimized: + return config["optimized"] + + if self.options.autoscale: + return self.auto_size(benchmark, capacity) + + return None + + def argv(self, benchmark, capacity, argv): + """Find the batch size and override it with a new value""" + + config = self.benchscaling(benchmark) + if config is None: + return argv + + newsize = self.size(benchmark, capacity) + + if newsize is None: + return argv + + # + argv = list(argv) + argname = config.get("arg") + if argname is None: + return argv + + for i, arg in enumerate(argv): + if arg.endswith(argname): + break + else: + # add the new argument + return argv + [argname, str(newsize)] + + argv[i + 1] = str(newsize) + return argv + + +sizer_global = contextvars.ContextVar("sizer_global", default=Sizer()) + + +def scale_argv(pack, argv): + sizer = sizer_global.get() + system = system_global.get() + + capacity = system["gpu"]["capacity"] + + return sizer.argv(pack, capacity, argv) + + +class MemoryUsageExtractor(ValidationLayer): + """Extract max memory usage per benchmark to populate the memory model""" + + def __init__(self): + self.filepath = getenv("MILABENCH_SIZER_SAVE", str) + + self.memory = deepcopy(sizer_global.get().scaling_config) + self.scaling = None + self.benchname = None + self.batch_size = 0 + self.max_usage = float("-inf") + self.early_stopped = False + + def on_start(self, entry): + if self.filepath is None: + return + + argv = entry.data["command"] + self.benchname = entry.pack.config["name"] + self.batch_size = None + self.max_usage = float("-inf") + + config = self.memory.get(self.benchname, dict()) + scalingarg = config.get("arg", None) + + if scalingarg is None: + self.benchname = None + return + + found = None + for i, arg in enumerate(argv): + if arg.endswith(scalingarg): + found = i + break + + if found: + self.batch_size = int(argv[found + 1]) + + def on_data(self, entry): + if self.filepath is None: + return + + if entry.data is None: + return + + gpudata = entry.data.get("gpudata") + if gpudata is not None: + current_usage = [] + for device, data in gpudata.items(): + usage, total = data.get("memory", [0, 1]) + current_usage.append(usage) + + self.max_usage = max(*current_usage, self.max_usage) + + def on_stop(self, entry): + self.early_stopped = True + + def on_end(self, entry): + if self.filepath is None: + return + + if ( + self.benchname is None + or self.batch_size is None + or self.max_usage == float("-inf") + ): + return + + # Only update is successful + rc = entry.data["return_code"] + if rc == 0 or self.early_stopped: + config = self.memory.setdefault(self.benchname, dict()) + model = config.setdefault("model", dict()) + model[self.batch_size] = f"{self.max_usage} MiB" + config["model"] = dict(sorted(model.items(), key=lambda x: x[0])) + + self.benchname = None + self.batch_size = None + self.max_usage = float("-inf") + + def report(self, *args): + if self.filepath is not None: + with open(self.filepath, "w") as file: + yaml.dump(self.memory, file) diff --git a/poetry.lock b/poetry.lock index 6c0c15f1d..942bff025 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "alabaster" version = "0.7.13" description = "A configurable sidebar-enabled Sphinx theme" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -16,7 +15,6 @@ files = [ name = "ansicon" version = "1.89.0" description = "Python wrapper for loading Jason Hood's ANSICON" -category = "main" optional = false python-versions = "*" files = [ @@ -28,7 +26,6 @@ files = [ name = "antlr4-python3-runtime" version = "4.9.3" description = "ANTLR 4.9.3 runtime for Python 3.7" -category = "main" optional = false python-versions = "*" files = [ @@ -39,7 +36,6 @@ files = [ name = "argcomplete" version = "1.12.3" description = "Bash tab completion for argparse" -category = "main" optional = false python-versions = "*" files = [ @@ -54,7 +50,6 @@ test = ["coverage", "flake8", "pexpect", "wheel"] name = "asttokens" version = "2.2.1" description = "Annotate AST trees with source code positions" -category = "main" optional = false python-versions = "*" files = [ @@ -72,7 +67,6 @@ test = ["astroid", "pytest"] name = "atomicwrites" version = "1.4.1" description = "Atomic file writes." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -83,7 +77,6 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -102,7 +95,6 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "babel" version = "2.12.1" description = "Internationalization utilities" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -117,7 +109,6 @@ pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} name = "black" version = "23.3.0" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -167,7 +158,6 @@ uvloop = ["uvloop (>=0.15.2)"] name = "blessed" version = "1.20.0" description = "Easy, practical library for making terminal apps, by providing an elegant, well-documented interface to Colors, Keyboard input, and screen Positioning capabilities." -category = "main" optional = false python-versions = ">=2.7" files = [ @@ -184,7 +174,6 @@ wcwidth = ">=0.1.4" name = "build" version = "0.10.0" description = "A simple, correct Python build frontend" -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -208,7 +197,6 @@ virtualenv = ["virtualenv (>=20.0.35)"] name = "certifi" version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -220,7 +208,6 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -305,7 +292,6 @@ files = [ name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -320,7 +306,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "codefind" version = "0.1.3" description = "Find code objects and their referents" -category = "main" optional = false python-versions = ">=3.8,<4.0" files = [ @@ -332,7 +317,6 @@ files = [ name = "coleo" version = "0.3.2" description = "The nicest way to develop a command-line interface" -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -351,7 +335,6 @@ yaml = ["pyyaml (>=5.3,<6.0)"] name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -363,7 +346,6 @@ files = [ name = "colorlog" version = "6.7.0" description = "Add colours to the output of Python's logging module." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -381,7 +363,6 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"] name = "coverage" version = "7.2.7" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -457,7 +438,6 @@ toml = ["tomli"] name = "cp-template" version = "0.3.0" description = "A tool to copy templated directories" -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -473,7 +453,6 @@ pystache = ">=0.6.0,<0.7.0" name = "distlib" version = "0.3.6" description = "Distribution utilities" -category = "main" optional = false python-versions = "*" files = [ @@ -485,7 +464,6 @@ files = [ name = "dnspython" version = "2.3.0" description = "DNS toolkit" -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -506,7 +484,6 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"] name = "docutils" version = "0.17.1" description = "Docutils -- Python Documentation Utilities" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -518,7 +495,6 @@ files = [ name = "executing" version = "1.2.0" description = "Get the currently executing AST node of a frame, and other information" -category = "main" optional = false python-versions = "*" files = [ @@ -533,7 +509,6 @@ tests = ["asttokens", "littleutils", "pytest", "rich"] name = "filelock" version = "3.12.1" description = "A platform independent file lock." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -549,7 +524,6 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "p name = "flake8" version = "4.0.1" description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -566,7 +540,6 @@ pyflakes = ">=2.4.0,<2.5.0" name = "gitdb" version = "4.0.10" description = "Git Object Database" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -581,7 +554,6 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.31" description = "GitPython is a Python library used to interact with Git repositories" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -596,7 +568,6 @@ gitdb = ">=4.0.1,<5" name = "giving" version = "0.4.2" description = "Reactive logging" -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -613,7 +584,6 @@ varname = ">=0.10.0,<0.11.0" name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" -category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -622,6 +592,7 @@ files = [ {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"}, {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, @@ -630,6 +601,7 @@ files = [ {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, @@ -659,6 +631,7 @@ files = [ {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, @@ -667,6 +640,7 @@ files = [ {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"}, {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, @@ -687,7 +661,6 @@ test = ["objgraph", "psutil"] name = "hrepr" version = "0.4.1" description = "Extensible HTML representation for Python objects." -category = "main" optional = false python-versions = ">=3.6,<4.0" files = [ @@ -702,7 +675,6 @@ ovld = ">=0.3.2,<0.4.0" name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -714,7 +686,6 @@ files = [ name = "imagesize" version = "1.4.1" description = "Getting image size from png/jpeg/jpeg2000/gif file" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -726,7 +697,6 @@ files = [ name = "importlib-metadata" version = "6.6.0" description = "Read metadata from Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -742,11 +712,28 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker perf = ["ipython"] testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] +[[package]] +name = "importlib-resources" +version = "6.1.0" +description = "Read resources from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.1.0-py3-none-any.whl", hash = "sha256:aa50258bbfa56d4e33fbd8aa3ef48ded10d1735f11532b8df95388cc6bdb7e83"}, + {file = "importlib_resources-6.1.0.tar.gz", hash = "sha256:9d48dcccc213325e810fd723e7fbb45ccb39f6cf5c31f00cf2b965f5f10f3cb9"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "zipp (>=3.17)"] + [[package]] name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -758,7 +745,6 @@ files = [ name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." -category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -776,7 +762,6 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -794,7 +779,6 @@ i18n = ["Babel (>=2.7)"] name = "jinxed" version = "1.2.0" description = "Jinxed Terminal Library" -category = "main" optional = false python-versions = "*" files = [ @@ -809,7 +793,6 @@ ansicon = {version = "*", markers = "platform_system == \"Windows\""} name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -834,7 +817,6 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -858,6 +840,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -894,7 +886,6 @@ files = [ name = "mccabe" version = "0.6.1" description = "McCabe checker, plugin for flake8" -category = "dev" optional = false python-versions = "*" files = [ @@ -906,7 +897,6 @@ files = [ name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -918,7 +908,6 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -930,7 +919,6 @@ files = [ name = "nox" version = "2021.10.1" description = "Flexible test automation." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -952,7 +940,6 @@ tox-to-nox = ["jinja2", "tox"] name = "numpy" version = "1.24.3" description = "Fundamental package for array computing in Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -990,7 +977,6 @@ files = [ name = "omegaconf" version = "2.3.0" description = "A flexible configuration library" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -999,14 +985,13 @@ files = [ ] [package.dependencies] -antlr4-python3-runtime = ">=4.9.0,<4.10.0" +antlr4-python3-runtime = "==4.9.*" PyYAML = ">=5.1.0" [[package]] name = "ovld" version = "0.3.2" description = "Overloading Python functions" -category = "main" optional = false python-versions = ">=3.6,<4.0" files = [ @@ -1018,7 +1003,6 @@ files = [ name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1030,7 +1014,6 @@ files = [ name = "pandas" version = "1.5.3" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1066,7 +1049,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.1" pytz = ">=2020.1" @@ -1078,7 +1062,6 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] name = "pathspec" version = "0.9.0" description = "Utility library for gitignore style pattern matching of file paths." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -1090,7 +1073,6 @@ files = [ name = "pip" version = "23.1.2" description = "The PyPA recommended tool for installing Python packages." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1102,7 +1084,6 @@ files = [ name = "pip-tools" version = "6.13.0" description = "pip-tools keeps your pinned dependencies fresh." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1125,7 +1106,6 @@ testing = ["flit-core (>=2,<4)", "poetry-core (>=1.0.0)", "pytest (>=7.2.0)", "p name = "platformdirs" version = "3.5.3" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1141,7 +1121,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1157,7 +1136,6 @@ testing = ["pytest", "pytest-benchmark"] name = "psutil" version = "5.9.5" description = "Cross-platform lib for process and system monitoring in Python." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1184,7 +1162,6 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "psycopg2-binary" version = "2.9.6" description = "psycopg2 - Python-PostgreSQL Database Adapter" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -1256,7 +1233,6 @@ files = [ name = "ptera" version = "1.4.1" description = "Call graph addressing library." -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1272,7 +1248,6 @@ giving = ">=0.4.1,<0.5.0" name = "py" version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1284,7 +1259,6 @@ files = [ name = "py-cpuinfo" version = "9.0.0" description = "Get CPU info with pure Python" -category = "main" optional = false python-versions = "*" files = [ @@ -1296,7 +1270,6 @@ files = [ name = "pycodestyle" version = "2.8.0" description = "Python style guide checker" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1308,7 +1281,6 @@ files = [ name = "pyflakes" version = "2.4.0" description = "passive checker of Python programs" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1320,7 +1292,6 @@ files = [ name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1335,7 +1306,6 @@ plugins = ["importlib-metadata"] name = "pymongo" version = "4.3.3" description = "Python driver for MongoDB " -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1430,7 +1400,6 @@ zstd = ["zstandard"] name = "pynvml" version = "11.5.0" description = "Python Bindings for the NVIDIA Management Library" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1442,7 +1411,6 @@ files = [ name = "pyproject-hooks" version = "1.0.0" description = "Wrappers to call pyproject.toml-based build backend hooks." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1457,7 +1425,6 @@ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} name = "pystache" version = "0.6.0" description = "Mustache for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1472,7 +1439,6 @@ test = ["nose"] name = "pytest" version = "6.2.5" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1497,7 +1463,6 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xm name = "pytest-cov" version = "3.0.0" description = "Pytest plugin for measuring coverage." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1516,7 +1481,6 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale name = "pytest-datadir" version = "1.4.1" description = "pytest plugin for test data directories and files" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1531,7 +1495,6 @@ pytest = ">=5.0" name = "pytest-regressions" version = "2.4.2" description = "Easy to use fixtures to write regression tests." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1554,7 +1517,6 @@ num = ["numpy", "pandas"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -1569,7 +1531,6 @@ six = ">=1.5" name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -1581,7 +1542,6 @@ files = [ name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1631,7 +1591,6 @@ files = [ name = "reactivex" version = "4.0.4" description = "ReactiveX (Rx) for Python" -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1646,7 +1605,6 @@ typing-extensions = ">=4.1.1,<5.0.0" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1668,7 +1626,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rich" version = "13.4.2" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -1688,7 +1645,6 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "setuptools" version = "67.8.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1705,7 +1661,6 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1717,7 +1672,6 @@ files = [ name = "smmap" version = "5.0.0" description = "A pure Python implementation of a sliding window memory map manager" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1729,7 +1683,6 @@ files = [ name = "snowballstemmer" version = "2.2.0" description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." -category = "dev" optional = false python-versions = "*" files = [ @@ -1741,7 +1694,6 @@ files = [ name = "sphinx" version = "4.5.0" description = "Python documentation generator" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1777,7 +1729,6 @@ test = ["cython", "html5lib", "pytest", "pytest-cov", "typed-ast"] name = "sphinx-rtd-theme" version = "1.2.2" description = "Read the Docs theme for Sphinx" -category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -1797,7 +1748,6 @@ dev = ["bump2version", "sphinxcontrib-httpdomain", "transifex-client", "wheel"] name = "sphinxcontrib-applehelp" version = "1.0.4" description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1813,7 +1763,6 @@ test = ["pytest"] name = "sphinxcontrib-devhelp" version = "1.0.2" description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1829,7 +1778,6 @@ test = ["pytest"] name = "sphinxcontrib-htmlhelp" version = "2.0.1" description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1845,7 +1793,6 @@ test = ["html5lib", "pytest"] name = "sphinxcontrib-jquery" version = "4.1" description = "Extension to include jQuery on newer Sphinx releases" -category = "dev" optional = false python-versions = ">=2.7" files = [ @@ -1860,7 +1807,6 @@ Sphinx = ">=1.8" name = "sphinxcontrib-jsmath" version = "1.0.1" description = "A sphinx extension which renders display math in HTML via JavaScript" -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1875,7 +1821,6 @@ test = ["flake8", "mypy", "pytest"] name = "sphinxcontrib-qthelp" version = "1.0.3" description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1891,7 +1836,6 @@ test = ["pytest"] name = "sphinxcontrib-serializinghtml" version = "1.1.5" description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)." -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1907,7 +1851,6 @@ test = ["pytest"] name = "sqlalchemy" version = "2.0.16" description = "Database Abstraction Library" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1986,7 +1929,6 @@ sqlcipher = ["sqlcipher3-binary"] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" -category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1998,7 +1940,6 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2010,7 +1951,6 @@ files = [ name = "tqdm" version = "4.65.0" description = "Fast, Extensible Progress Meter" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2031,7 +1971,6 @@ telegram = ["requests"] name = "typing-extensions" version = "4.6.3" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2043,7 +1982,6 @@ files = [ name = "urllib3" version = "2.0.3" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2061,7 +1999,6 @@ zstd = ["zstandard (>=0.18.0)"] name = "varname" version = "0.10.0" description = "Dark magics about variable names in python." -category = "main" optional = false python-versions = ">=3.6,<4.0" files = [ @@ -2079,7 +2016,6 @@ all = ["asttokens (>=2.0.0,<3.0.0)", "pure_eval (<1.0.0)"] name = "virtualenv" version = "20.23.0" description = "Virtual Python Environment builder" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2100,7 +2036,6 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "coverage-enable-subprocess name = "voir" version = "0.2.10" description = "Instrument, extend and visualize your programs" -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -2120,7 +2055,6 @@ rich = ">=13.3.2,<14.0.0" name = "wcwidth" version = "0.2.6" description = "Measures the displayed width of unicode strings in a terminal" -category = "main" optional = false python-versions = "*" files = [ @@ -2132,7 +2066,6 @@ files = [ name = "wheel" version = "0.40.0" description = "A built-package format for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2147,7 +2080,6 @@ test = ["pytest (>=6.0.0)"] name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2161,5 +2093,5 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" -python-versions = ">=3.8,<3.11" -content-hash = "2852dd9dc4b604714a06f452ee27c295e946ad86b9955ac133da9ca3b92ad1f7" +python-versions = ">=3.8,<4.0" +content-hash = "0407b1f9e231b83ca25d848e4c21033a7016d5825c31a86ce075479b4b419fa8" diff --git a/pyproject.toml b/pyproject.toml index 8dca0260f..f349e2dea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,8 @@ authors = ["Olivier Breuleux "] license = "MIT" [tool.poetry.dependencies] -python = ">=3.8,<3.11" +voir = {git = "https://github.com/breuleux/voir", branch = "master"} +python = ">=3.8,<4.0" giving = "^0.4.0" ptera = "^1.2.0" coleo = "^0.3.0" @@ -19,7 +20,6 @@ hrepr = "^0.4.0" blessed = "^1.19.1" pathspec = "^0.9.0" cp-template = "^0.3.0" -voir = "^0.2.10" pandas = "^1.4.2" numpy = ">=1.23.0" pynvml = "^11.4.1" @@ -32,6 +32,7 @@ pymongo = "^4.3.3" psycopg2-binary = {version = "^2.9.6", optional = true} py-cpuinfo = "^9.0.0" psutil = "^5.9.5" +importlib-resources = "^6.1.0" [tool.poetry.dev-dependencies] diff --git a/tests/config/scaling.yaml b/tests/config/scaling.yaml new file mode 100644 index 000000000..664996f79 --- /dev/null +++ b/tests/config/scaling.yaml @@ -0,0 +1,7 @@ +benchio: + arg: --batch_size + optimized: 138 + model: + 64: 12Go + 128: 24Go + 256: 48Go diff --git a/tests/conftest.py b/tests/conftest.py index b4df14967..c9d414486 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,3 +26,37 @@ def get_config(name): @pytest.fixture def replayfolder(): return here / "replays" + + +@pytest.fixture(scope="session", autouse=True) +def set_env(): + os.environ["MILABENCH_CONFIG"] = "config/ci.yaml" + os.environ["MILABENCH_BASE"] = "output" + os.environ["MILABENCH_GPU_ARCH"] = "cuda" + os.environ["MILABENCH_DASH"] = "no" + + +@pytest.fixture +def multipack(config, tmp_path): + from milabench.cli import _get_multipack + + bench_config = config("benchio") + system_path = config("system") + base = tmp_path + + use_current_env = True + select = None + exclude = None + run_name = "test" + overrides = {} + + return _get_multipack( + bench_config, + system_path, + base, + use_current_env, + select, + exclude, + run_name=run_name, + overrides=overrides, + ) diff --git a/tests/test_scaler.py b/tests/test_scaler.py new file mode 100644 index 000000000..283048c8b --- /dev/null +++ b/tests/test_scaler.py @@ -0,0 +1,94 @@ +import pytest + +from milabench.sizer import Sizer, SizerOptions, sizer_global + + +def test_scaler_use_override(multipack, config): + sizer = Sizer(SizerOptions(size=64, autoscale=False), config("scaling")) + for k, pack in multipack.packs.items(): + assert sizer.size(pack, "48Go") == 64 + + +def test_scaler_use_optimized(multipack, config): + sizer = Sizer( + SizerOptions( + size=None, + autoscale=False, + optimized=True, + ), + config("scaling"), + ) + for k, pack in multipack.packs.items(): + assert sizer.size(pack, "48Go") == 138 + + +_values = [ + ("5Go", 27), # Not a multiple of 8 + ("6Go", 32), + ("12Go", 64), + ("18Go", 96), + ("24Go", 128), + ("30Go", 160), + ("48Go", 256), + ("72Go", 384), +] + + +@pytest.mark.parametrize("capacity,expected", _values) +def test_scaler_autoscaler_lerp(multipack, config, capacity, expected): + sizer = Sizer(SizerOptions(size=None, autoscale=True), config("scaling")) + for k, pack in multipack.packs.items(): + assert sizer.size(pack, capacity) == expected + + +_values_2 = [ + ("5Go", 24), # a multiple of 8 + ("6Go", 32), +] + + +@pytest.mark.parametrize("capacity,expected", _values_2) +def test_scaler_autoscaler_lerp_multiple(multipack, config, capacity, expected): + sizer = Sizer( + SizerOptions( + size=None, + autoscale=True, + multiple=8, + ), + config("scaling"), + ) + for k, pack in multipack.packs.items(): + assert sizer.size(pack, capacity) == expected + + +def test_scaler_disabled(multipack): + for k, pack in multipack.packs.items(): + assert pack.argv == [] + + +def test_scaler_enabled(multipack, config): + from milabench.config import system_global + import contextvars + + ctx = contextvars.copy_context() + + def update_ctx(): + sizer = Sizer( + SizerOptions( + size=None, + autoscale=True, + multiple=8, + ), + config("scaling"), + ) + sizer_global.set(sizer) + system = system_global.get() + system["gpu"]["capacity"] = "41920 MiB" + + ctx.run(update_ctx) + + for k, pack in multipack.packs.items(): + assert ctx.run(lambda: pack.argv) == ["--batch_size", "232"] + + # Sizer is only enabled inside the context + assert pack.argv == [] diff --git a/tests/test_summary.py b/tests/test_summary.py index ee55708a2..702cf82d8 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -5,13 +5,17 @@ def test_report(runs_folder, capsys, file_regression, config): folder = runs_folder / "rijubigo.2023-03-24_13:45:27.512446" try: - main(["report", "--runs", folder, "--config", config("benchio")]) + main(["report", "--runs", str(folder), "--config", config("benchio")]) except SystemExit as exc: assert not exc.code + assert exc.code is None - output = capsys.readouterr().out - output = output.replace(str(folder), "XXX") - file_regression.check(output) + all = capsys.readouterr() + stdout = all.out + assert stdout != "" + + stdout = stdout.replace(str(folder), "XXX") + file_regression.check(stdout) def test_summary(file_regression): @@ -63,9 +67,9 @@ def get_output(data): assert output.strip() == "" -def test_report_folder_does_average(runs_folder, capsys, file_regression): +def test_report_folder_does_average(runs_folder, capsys, config, file_regression): try: - main(["report", "--runs", runs_folder]) + main(["report", "--runs", runs_folder, "--config", config("benchio")]) except SystemExit as exc: assert not exc.code diff --git a/tests/test_summary/test_report_folder_does_average.txt b/tests/test_summary/test_report_folder_does_average.txt index 1b9b00f4c..3cc299dbf 100644 --- a/tests/test_summary/test_report_folder_does_average.txt +++ b/tests/test_summary/test_report_folder_does_average.txt @@ -3,7 +3,7 @@ Source: XXX Benchmark results ================= fail n perf sem% std% peak_memory score weight -benchio 0 6 7878.45 2.5% 18.0% 24456 7878.451302 1.00 +benchio 0 6 7878.45 2.5% 18.0% 24456 7878.451302 2.00 Scores ------ diff --git a/tests/test_utils.py b/tests/test_utils.py index e3fc9955f..0cb05e8a2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,10 +7,9 @@ def test_enumerate_rank(): {"main": False}, {"main": True}, {"main": False}, - ] ranks = [r for r, _ in enumerate_rank(nodes)] - + assert ranks == [1, 2, 0, 3] @@ -20,8 +19,7 @@ def test_select_nodes(): {"main": False}, {"main": True}, {"main": False}, - ] - + selected = select_nodes(nodes, 3) - assert selected == [{"main": True}, {"main": False}, {"main": False}] \ No newline at end of file + assert selected == [{"main": True}, {"main": False}, {"main": False}] diff --git a/tests/test_validation.py b/tests/test_validation.py index 1b6542847..eb765508c 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -6,11 +6,11 @@ from milabench.testing import interleave, replay -def replay_scenario(folder, name, filename=None): +def replay_validation_scenario(folder, *validation, filename=None): """Replay events from a data file or folder""" gen = None - path = folder / f"{filename or name}" + path = folder / filename file = str(path) + ".txt" if os.path.isdir(path): @@ -20,13 +20,20 @@ def replay_scenario(folder, name, filename=None): if os.path.isfile(file): gen = replay(file) - with multilogger(*validation_layers(name)) as log: + with multilogger(*validation) as log: for entry in gen: log(entry) return log +def replay_scenario(folder, name, filename=None): + """Replay events from a data file or folder""" + return replay_validation_scenario( + folder, *validation_layers(name), filename=filename or name + ) + + def test_error_layer(replayfolder): log = replay_scenario(replayfolder, "error") assert log.result() != 0 @@ -95,3 +102,33 @@ def test_planning_layer_per_gpu_bad(replayfolder, monkeypatch): log = replay_scenario(replayfolder, "planning", "planning_per_gpu_bad") assert log.result() != 0 + + +def test_memory_tracking(replayfolder, config): + import contextvars + from milabench.sizer import ( + MemoryUsageExtractor, + Sizer, + SizerOptions, + sizer_global, + system_global, + ) + + ctx = contextvars.copy_context() + + def update_ctx(): + sizer = Sizer( + SizerOptions( + size=None, + autoscale=True, + multiple=8, + ), + config("scaling"), + ) + sizer_global.set(sizer) + system = system_global.set({"gpu": {"capacity": "41920 MiB"}}) + + ctx.run(update_ctx) + layer = MemoryUsageExtractor() + + ctx.run(lambda: replay_validation_scenario(replayfolder, layer, filename="usage"))