diff --git a/.pin/constraints-hpu-torch.txt b/.pin/constraints-hpu-torch.txt index 6481e8c67..92a55858c 100644 --- a/.pin/constraints-hpu-torch.txt +++ b/.pin/constraints-hpu-torch.txt @@ -2,204 +2,359 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=.pin/constraints-hpu-torch.txt .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/brax/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/llama/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in benchmarks/torchvision_ddp/requirements.in +# pip-compile --output-file=.pin/constraints-hpu-torch.txt .pin/tmp-constraints.txt benchmarks/brax/requirements.in benchmarks/diffusion/requirements.in benchmarks/dinov2/requirements.in benchmarks/flops/requirements.in benchmarks/geo_gnn/requirements-pre.in benchmarks/geo_gnn/requirements.in benchmarks/huggingface/requirements.in benchmarks/lightning/requirements.in benchmarks/llama/requirements.in benchmarks/llava/requirements.in benchmarks/llm/requirements.in benchmarks/purejaxrl/requirements.in benchmarks/recursiongfn/requirements.in benchmarks/rlhf/requirements.in benchmarks/timm/requirements.in benchmarks/torchatari/requirements.in benchmarks/torchvision/requirements.in benchmarks/torchvision_ddp/requirements.in benchmarks/vjepa/requirements.in constraints/extra/torch.hpu.txt # ---extra-index-url https://pypi.ngc.nvidia.com ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ---trusted-host pypi.ngc.nvidia.com - absl-py==2.1.0 # via # brax # chex + # distrax # dm-env # ml-collections # mujoco # mujoco-mjx # optax # orbax-checkpoint + # rlax # tensorboard -accelerate==0.32.1 - # via -r benchmarks/accelerate_opt/requirements.in -aiohttp==3.9.5 + # tensorflow-probability +accelerate==0.34.2 + # via + # -r benchmarks/diffusion/requirements.in + # -r benchmarks/llava/requirements.in + # -r benchmarks/llm/requirements.in + # -r benchmarks/rlhf/requirements.in + # diffusers + # trl +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.10.8 # via # datasets # fsspec + # torch-geometric aiosignal==1.3.1 # via aiohttp -annotated-types==0.7.0 - # via pydantic antlr4-python3-runtime==4.9.3 # via omegaconf +appdirs==1.4.4 + # via cantilever +argklass==1.4.4 + # via + # -r benchmarks/diffusion/requirements.in + # -r benchmarks/llm/requirements.in + # -r benchmarks/purejaxrl/requirements.in +astroid==3.3.4 + # via pylint asttokens==2.4.1 # via giving async-timeout==4.0.3 # via aiohttp -attrs==23.2.0 +attrs==24.2.0 # via aiohttp -beautifulsoup4==4.12.3 - # via gdown +beartype==0.19.0 + # via -r benchmarks/vjepa/requirements.in +black==24.8.0 + # via navix blinker==1.8.2 # via flask +blobfile==3.0.0 + # via + # -r benchmarks/llm/requirements.txt + # torchtune +blosc2==2.7.1 + # via tables +botorch==0.12.0 + # via -r benchmarks/recursiongfn/requirements.in +braceexpand==0.1.7 + # via + # -r benchmarks/vjepa/requirements.in + # webdataset brax==0.10.5 - # via -r benchmarks/brax/requirements.in -certifi==2024.6.2 - # via requests + # via + # -r benchmarks/brax/requirements.in + # -r benchmarks/purejaxrl/requirements.in +cantilever==0.1.0 + # via -r benchmarks/torchatari/requirements.in +certifi==2024.8.30 + # via + # requests + # sentry-sdk charset-normalizer==3.3.2 # via requests -chex==0.1.86 - # via optax +chex==0.1.87 + # via + # distrax + # evosax + # flashbax + # gymnax + # optax + # rlax click==8.1.7 - # via flask + # via + # black + # flask + # wandb cloudpickle==3.0.0 - # via gym -codefind==0.1.6 + # via + # gym + # gymnasium + # submitit + # tensorflow-probability +codefind==0.1.7 # via ptera contextlib2==21.6.0 # via ml-collections -datasets==2.20.0 - # via - # -r benchmarks/accelerate_opt/requirements.in +contourpy==1.3.0 + # via matplotlib +cvxopt==1.3.2 + # via -r benchmarks/recursiongfn/requirements.in +cycler==0.12.1 + # via matplotlib +datasets==3.0.1 + # via + # -r benchmarks/diffusion/requirements.in # -r benchmarks/llama/requirements.in - # evaluate -deepspeed==0.14.4 - # via -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/llava/requirements.in + # -r benchmarks/rlhf/requirements.in + # torchtune + # trl +decorator==5.1.1 + # via tensorflow-probability +decord==0.6.0 + # via -r benchmarks/vjepa/requirements.in +diffusers[torch]==0.30.3 + # via -r benchmarks/diffusion/requirements.in dill==0.3.8 # via # datasets - # evaluate # multiprocess + # pylint +distrax==0.1.5 + # via + # -r benchmarks/purejaxrl/requirements.in + # rlax dm-env==1.6 - # via brax + # via + # brax + # envpool + # rlax dm-tree==0.1.8 - # via dm-env -docker==7.1.0 - # via torchx + # via + # dm-env + # tensorflow-probability +docker-pycreds==0.4.0 + # via wandb docstring-parser==0.16 - # via torchx -etils[epath,epy]==1.7.0 + # via tyro +dotmap==1.3.30 + # via evosax +einops==0.8.0 + # via -r benchmarks/vjepa/requirements.in +envpool==0.8.4 + # via -r benchmarks/torchatari/requirements.in +etils[epath,epy]==1.9.4 # via # brax # mujoco # mujoco-mjx # optax # orbax-checkpoint -evaluate==0.4.2 - # via -r benchmarks/accelerate_opt/requirements.in -executing==1.2.0 +evosax==0.1.6 + # via -r benchmarks/purejaxrl/requirements.in +exceptiongroup==1.2.2 + # via pytest +executing==2.1.0 # via varname fairscale==0.4.13 - # via -r benchmarks/llama/requirements.in -fbgemm-gpu==0.7.0 - # via torchrec -filelock==3.15.4 # via + # -r benchmarks/llama/requirements.in + # -r benchmarks/llm/requirements.in + # -r benchmarks/llm/requirements.txt +farama-notifications==0.0.4 + # via gymnasium +filelock==3.16.1 + # via + # blobfile # datasets - # gdown + # diffusers # huggingface-hub # torch - # torchx # transformers # triton -fire==0.6.0 - # via -r benchmarks/llama/requirements.in +fire==0.7.0 + # via + # -r benchmarks/llama/requirements.in + # -r benchmarks/llm/requirements.txt +flake8==7.1.1 + # via navix +flashbax==0.1.2 + # via -r benchmarks/purejaxrl/requirements.in flask==3.0.3 # via # brax # flask-cors -flask-cors==4.0.1 - # via brax -flax==0.8.5 +flask-cors==5.0.0 # via brax +flax==0.9.0 + # via + # -r benchmarks/purejaxrl/requirements.in + # brax + # evosax + # flashbax + # gymnax + # navix +fonttools==4.54.1 + # via matplotlib frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec[http]==2024.5.0 +fsspec[http]==2024.6.1 # via # datasets # etils - # evaluate # huggingface-hub + # lightning + # pytorch-lightning # torch - # torchx -future==1.0.0 - # via -r benchmarks/dlrm/requirements.in -gdown==5.2.0 - # via -r benchmarks/stargan/requirements.in -giving==0.4.2 + # torch-geometric +fvcore==0.1.5.post20221221 + # via -r benchmarks/dinov2/requirements.in +gast==0.6.0 + # via tensorflow-probability +gitdb==4.0.11 + # via gitpython +gitpython==3.1.43 + # via + # -r benchmarks/recursiongfn/requirements.in + # wandb +giving==0.4.3 # via # ptera # voir glfw==2.7.0 # via mujoco -graphviz==0.20.3 - # via torchviz -grpcio==1.65.1 +gpytorch==1.13 + # via + # -r benchmarks/recursiongfn/requirements.in + # botorch +grpcio==1.66.2 # via # brax # tensorboard gym==0.26.2 - # via brax + # via + # -r benchmarks/torchatari/requirements.in + # brax + # envpool + # gymnax gym-notices==0.0.8 # via gym +gymnasium==0.29.1 + # via + # envpool + # gymnax +gymnax==0.0.8 + # via + # -c .pin/../constraints/hpu.txt + # -r benchmarks/purejaxrl/requirements.in hjson==3.1.0 - # via deepspeed -huggingface-hub==0.24.0 + # via argklass +huggingface-hub==0.25.1 # via # -r benchmarks/timm/requirements.in # accelerate # datasets - # evaluate + # diffusers + # timm # tokenizers + # torchtune # transformers -idna==3.7 +humanize==4.10.0 + # via orbax-checkpoint +idna==3.10 # via # requests # yarl -importlib-metadata==8.0.0 - # via torchx -importlib-resources==6.4.0 +importlib-metadata==8.5.0 + # via diffusers +importlib-resources==6.4.5 # via + # argklass + # cantilever # etils # torchcompat +iniconfig==2.0.0 + # via pytest +iopath==0.1.10 + # via + # -r benchmarks/dinov2/requirements.in + # fvcore +isort==5.13.2 + # via pylint itsdangerous==2.2.0 # via flask -jax[cuda12]==0.4.28 +jax==0.4.33 # via # -r benchmarks/brax/requirements.in + # -r benchmarks/purejaxrl/requirements.in # brax # chex + # distrax + # evosax + # flashbax # flax + # gymnax # jaxopt # mujoco-mjx # optax # orbax-checkpoint -jax-cuda12-pjrt==0.4.28 - # via jax-cuda12-plugin -jax-cuda12-plugin==0.4.28 - # via jax -jaxlib==0.4.28+cuda12.cudnn89 + # rlax +jaxlib==0.4.33 # via # brax # chex + # distrax + # evosax + # flashbax + # gymnax # jax # jaxopt # mujoco-mjx # optax # orbax-checkpoint + # rlax jaxopt==0.8.3 # via brax +jaxtyping==0.2.19 + # via + # gpytorch + # linear-operator jinja2==3.1.4 # via # brax # flask # torch + # torch-geometric joblib==1.4.2 # via scikit-learn -lightning-utilities==0.11.5 - # via torchmetrics -markdown==3.6 +kiwisolver==1.4.7 + # via matplotlib +lightning==2.4.0 + # via -r benchmarks/lightning/requirements.in +lightning-utilities==0.11.7 + # via + # lightning + # pytorch-lightning + # torchmetrics +linear-operator==0.5.3 + # via + # botorch + # gpytorch +lxml==5.3.0 + # via blobfile +markdown==3.7 # via tensorboard markdown-it-py==3.0.0 # via rich @@ -207,410 +362,634 @@ markupsafe==2.1.5 # via # jinja2 # werkzeug +matplotlib==3.9.2 + # via + # evosax + # gymnax + # seaborn +mccabe==0.7.0 + # via + # flake8 + # pylint mdurl==0.1.2 # via markdown-it-py ml-collections==0.1.1 # via brax -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # jax # jaxlib # tensorstore mpmath==1.3.0 - # via sympy -msgpack==1.0.8 # via + # botorch + # gpytorch + # linear-operator + # sympy +msgpack==1.1.0 + # via + # blosc2 # flax # orbax-checkpoint -mujoco==3.2.0 +mujoco==3.2.3 # via # brax # mujoco-mjx -mujoco-mjx==3.2.0 +mujoco-mjx==3.2.3 # via brax -multidict==6.0.5 +multidict==6.1.0 # via # aiohttp # yarl +multipledispatch==1.0.0 + # via botorch multiprocess==0.70.16 - # via - # datasets - # evaluate + # via datasets mypy-extensions==1.0.0 - # via typing-inspect + # via black +navix==0.7.0 + # via -r benchmarks/purejaxrl/requirements.in +ndindex==1.9.2 + # via blosc2 nest-asyncio==1.6.0 # via orbax-checkpoint networkx==3.3 - # via torch -ninja==1.11.1.1 - # via deepspeed + # via + # -r benchmarks/recursiongfn/requirements.in + # torch +numexpr==2.10.1 + # via + # blosc2 + # tables numpy==1.26.4 # via - # -r benchmarks/dlrm/requirements.in - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/llava/requirements.in + # -r benchmarks/purejaxrl/requirements.in + # -r benchmarks/torchatari/requirements.in + # -r benchmarks/vjepa/requirements.in # accelerate + # blosc2 # brax # chex + # contourpy # datasets - # deepspeed + # decord + # diffusers + # distrax # dm-env - # evaluate + # envpool + # evosax # fairscale - # fbgemm-gpu - # flax + # flashbax + # fvcore # gym + # gymnasium # jax # jaxlib # jaxopt + # jaxtyping + # matplotlib # ml-dtypes # mujoco - # onnx + # navix + # numexpr # opencv-python - # opt-einsum # optax # orbax-checkpoint # pandas # pyarrow + # pyro-ppl + # rdkit + # rlax # scikit-learn # scipy + # seaborn + # tables # tensorboard # tensorboardx + # tensorflow-probability # tensorstore + # torch-geometric # torchmetrics + # torchtune # torchvision # transformers # trimesh + # trl + # webdataset + # xformers nvidia-cublas-cu12==12.1.3.1 # via - # jax # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 - # via - # jax - # torch -nvidia-cuda-nvcc-cu12==12.5.82 - # via - # jax - # jax-cuda12-plugin + # via torch nvidia-cuda-nvrtc-cu12==12.1.105 # via torch nvidia-cuda-runtime-cu12==12.1.105 - # via - # jax - # torch -nvidia-cudnn-cu12==8.9.2.26 - # via - # jax - # torch + # via torch +nvidia-cudnn-cu12==9.1.0.70 + # via torch nvidia-cufft-cu12==11.0.2.54 - # via - # jax - # torch + # via torch nvidia-curand-cu12==10.3.2.106 # via torch nvidia-cusolver-cu12==11.4.5.107 - # via - # jax - # torch + # via torch nvidia-cusparse-cu12==12.1.0.106 # via - # jax # nvidia-cusolver-cu12 # torch -nvidia-ml-py==12.555.43 - # via deepspeed +nvidia-ml-py==12.560.30 + # via voir nvidia-nccl-cu12==2.20.5 + # via torch +nvidia-nvjitlink-cu12==12.6.77 # via - # jax - # torch -nvidia-nvjitlink-cu12==12.5.82 - # via - # jax # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 # via torch omegaconf==2.3.0 - # via voir -onnx==1.16.1 - # via -r benchmarks/dlrm/requirements.in + # via + # -r benchmarks/dinov2/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # torchtune + # voir opencv-python==4.10.0.84 - # via -r benchmarks/super-slomo/requirements.in -opt-einsum==3.3.0 - # via jax + # via -r benchmarks/vjepa/requirements.in +opt-einsum==3.4.0 + # via + # jax + # pyro-ppl optax==0.2.3 # via + # -r benchmarks/purejaxrl/requirements.in # brax # flax -orbax-checkpoint==0.5.21 +optree==0.13.0 + # via envpool +orbax-checkpoint==0.6.4 # via # brax # flax -ovld==0.3.5 +ovld==0.3.9 # via voir packaging==24.1 # via # accelerate + # black # datasets - # deepspeed - # evaluate + # envpool # huggingface-hub + # lightning # lightning-utilities + # matplotlib + # pytest + # pytorch-lightning + # setuptools-scm + # tables + # tensorboard # tensorboardx # torchmetrics # transformers -pandas==2.2.2 +pandas==2.2.3 # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/vjepa/requirements.in # datasets - # evaluate + # seaborn +pathspec==0.12.1 + # via black pillow==10.4.0 # via + # -r benchmarks/huggingface/requirements.in + # -r benchmarks/llava/requirements.in # brax + # diffusers + # fvcore + # matplotlib + # navix + # rdkit # torchvision -protobuf==4.25.3 +platformdirs==4.3.6 + # via + # black + # pylint + # wandb +pluggy==1.5.0 + # via pytest +portalocker==2.10.1 + # via iopath +protobuf==5.28.2 # via - # onnx # orbax-checkpoint # tensorboard # tensorboardx + # wandb psutil==5.9.8 # via # accelerate - # deepspeed + # torch-geometric # voir + # wandb ptera==1.4.1 # via voir py-cpuinfo==9.0.0 - # via deepspeed + # via + # blosc2 + # tables pyarrow==17.0.0 - # via datasets -pyarrow-hotfix==0.6 - # via datasets -pydantic==2.7.4 - # via deepspeed -pydantic-core==2.18.4 - # via pydantic -pydot==3.0.1 - # via -r benchmarks/dlrm/requirements.in + # via + # -r benchmarks/recursiongfn/requirements.in + # datasets +pycodestyle==2.12.1 + # via flake8 +pycryptodomex==3.21.0 + # via blobfile +pyflakes==3.2.0 + # via flake8 pygments==2.18.0 # via rich -pynvml==11.5.3 - # via voir +pylint==3.3.1 + # via navix pyopengl==3.1.7 # via mujoco -pyparsing==3.1.2 - # via pydot -pyre-extensions==0.0.30 - # via torchx -pysocks==1.7.1 - # via requests +pyparsing==3.1.4 + # via + # matplotlib + # torch-geometric +pyro-api==0.1.2 + # via pyro-ppl +pyro-ppl==1.9.1 + # via + # -r benchmarks/recursiongfn/requirements.in + # botorch +pytest==8.3.3 + # via navix python-dateutil==2.9.0.post0 - # via pandas + # via + # matplotlib + # pandas pytinyrenderer==0.0.14 # via brax -pytz==2024.1 +pytorch-lightning==2.4.0 + # via lightning +pytz==2024.2 # via pandas -pyyaml==6.0.1 +pyyaml==6.0.2 # via + # -r benchmarks/llm/requirements.in # -r benchmarks/timm/requirements.in + # -r benchmarks/vjepa/requirements.in # accelerate # datasets + # evosax # flax + # fvcore + # gymnax # huggingface-hub + # lightning # ml-collections # omegaconf # orbax-checkpoint - # torchx + # pytorch-lightning + # timm # transformers + # wandb + # webdataset + # yacs +rdkit==2024.3.5 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in reactivex==4.0.4 # via giving -regex==2024.5.15 - # via transformers -requests[socks]==2.32.3 +regex==2024.9.11 + # via + # diffusers + # tiktoken + # transformers +requests==2.32.3 # via # datasets - # docker - # evaluate - # gdown + # diffusers # huggingface-hub + # tiktoken + # torch-geometric # transformers -rich==13.7.1 + # wandb +rich==13.9.1 # via - # -r benchmarks/accelerate_opt/requirements.in # flax + # tyro # voir -safetensors==0.4.3 +rlax==0.1.6 + # via navix +safetensors==0.4.5 # via # -r benchmarks/timm/requirements.in # accelerate + # diffusers + # timm + # torchtune # transformers -scikit-learn==1.5.1 - # via -r benchmarks/dlrm/requirements.in -scipy==1.14.0 +scikit-learn==1.5.2 + # via gpytorch +scipy==1.14.1 # via + # -r benchmarks/dinov2/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # botorch # brax + # gpytorch # jax # jaxlib # jaxopt + # linear-operator # mujoco-mjx # scikit-learn + # torch-cluster + # torch-sparse +seaborn==0.13.2 + # via gymnax sentencepiece==0.2.0 - # via -r benchmarks/llama/requirements.in + # via + # -r benchmarks/llama/requirements.in + # torchtune +sentry-sdk==2.15.0 + # via wandb +setproctitle==1.3.3 + # via wandb +setuptools-scm==8.1.0 + # via navix +shtab==1.7.1 + # via tyro six==1.16.0 # via # asttokens - # fire + # docker-pycreds # ml-collections # python-dateutil # tensorboard -soupsieve==2.5 - # via beautifulsoup4 -sympy==1.13.0 + # tensorflow-probability +smmap==5.0.1 + # via gitdb +submitit==1.5.2 + # via + # -r benchmarks/dinov2/requirements.in + # -r benchmarks/vjepa/requirements.in +sympy==1.13.3 # via torch +tables==3.10.1 + # via -r benchmarks/recursiongfn/requirements.in tabulate==0.9.0 - # via torchx -tensorboard==2.17.0 - # via -r benchmarks/dlrm/requirements.in + # via fvcore +tensorboard==2.18.0 + # via + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/torchatari/requirements.in tensorboard-data-server==0.7.2 # via tensorboard tensorboardx==2.6.2.2 # via brax -tensorstore==0.1.63 +tensorflow-probability==0.24.0 + # via distrax +tensorstore==0.1.66 # via + # flashbax # flax # orbax-checkpoint termcolor==2.4.0 - # via fire + # via + # fire + # fvcore threadpoolctl==3.5.0 # via scikit-learn +tiktoken==0.7.0 + # via torchtune +timm==1.0.9 + # via -r benchmarks/vjepa/requirements.in tokenizers==0.19.1 # via transformers +tomli==2.0.2 + # via + # black + # pylint + # pytest + # setuptools-scm +tomlkit==0.13.2 + # via pylint toolz==0.12.1 # via chex -torch==2.3.1 +torch==2.4.1 # via - # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/brax/requirements.in - # -r benchmarks/dlrm/requirements.in + # -r benchmarks/dinov2/requirements.in # -r benchmarks/flops/requirements.in + # -r benchmarks/geo_gnn/requirements-pre.in # -r benchmarks/huggingface/requirements.in + # -r benchmarks/lightning/requirements.in # -r benchmarks/llama/requirements.in - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/llava/requirements.in + # -r benchmarks/llm/requirements.in + # -r benchmarks/llm/requirements.txt + # -r benchmarks/purejaxrl/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/rlhf/requirements.in # -r benchmarks/timm/requirements.in + # -r benchmarks/torchatari/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in + # -r benchmarks/vjepa/requirements.in # accelerate - # deepspeed + # botorch + # diffusers # fairscale - # torchaudio + # lightning + # linear-operator + # pyro-ppl + # pytorch-lightning + # timm # torchmetrics # torchvision - # torchviz -torchaudio==2.3.1 - # via -r benchmarks/accelerate_opt/requirements.in + # trl + # xformers +torch-cluster==1.6.3 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in +torch-geometric==2.6.1 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in +torch-scatter==2.1.2 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in +torch-sparse==0.6.18 + # via + # -r benchmarks/geo_gnn/requirements.in + # -r benchmarks/recursiongfn/requirements.in +torchao==0.3.1 + # via + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llm/requirements.in + # torchtune torchcompat==1.1.4 # via # -c .pin/../constraints/hpu.txt # -r benchmarks/flops/requirements.in + # -r benchmarks/lightning/requirements.in + # -r benchmarks/torchatari/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in -torchmetrics==1.0.3 - # via torchrec -torchrec==0.7.0 - # via -r benchmarks/dlrm/requirements.in -torchvision==0.18.1 +torchmetrics==1.4.2 + # via + # -r benchmarks/dinov2/requirements.in + # lightning + # pytorch-lightning +torchtune==0.2.1 + # via + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llm/requirements.in +torchvision==0.19.1 # via - # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/diffusion/requirements.in + # -r benchmarks/dinov2/requirements.in # -r benchmarks/flops/requirements.in - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/lightning/requirements.in # -r benchmarks/timm/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in -torchviz==0.0.2 - # via -r benchmarks/dlrm/requirements.in -torchx==0.7.0 - # via -r benchmarks/dlrm/requirements.in -tqdm==4.66.4 + # -r benchmarks/vjepa/requirements.in + # timm +tqdm==4.66.5 # via - # -r benchmarks/dlrm/requirements.in + # -r benchmarks/diffusion/requirements.in # -r benchmarks/flops/requirements.in - # -r benchmarks/super-slomo/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in # datasets - # deepspeed - # evaluate - # gdown + # fvcore # huggingface-hub - # torchrec + # iopath + # lightning + # pyro-ppl + # pytorch-lightning + # torch-geometric + # torchtune # transformers -transformers==4.42.4 +transformers==4.44.2 # via - # -r benchmarks/accelerate_opt/requirements.in + # -c .pin/../constraints/hpu.txt + # -r benchmarks/diffusion/requirements.in # -r benchmarks/huggingface/requirements.in # -r benchmarks/llama/requirements.in -trimesh==4.4.3 + # -r benchmarks/llava/requirements.in + # -r benchmarks/llm/requirements.in + # -r benchmarks/rlhf/requirements.in + # trl +trimesh==4.4.9 # via # brax # mujoco-mjx -triton==2.3.1 +triton==3.0.0 # via torch +trl==0.10.1 + # via + # -c .pin/../constraints/hpu.txt + # -r benchmarks/rlhf/requirements.in +typeguard==4.3.0 + # via jaxtyping +types-protobuf==5.28.0.20240924 + # via envpool typing-extensions==4.12.2 # via + # astroid + # black + # botorch # brax # chex + # envpool # etils + # flashbax # flax + # gymnasium # huggingface-hub + # iopath + # jaxtyping + # lightning # lightning-utilities + # multidict + # navix + # optree # orbax-checkpoint - # pydantic - # pydantic-core - # pyre-extensions + # pytorch-lightning # reactivex + # rich + # submitit + # tables # torch - # typing-inspect -typing-inspect==0.9.0 - # via pyre-extensions -tzdata==2024.1 + # typeguard + # tyro +tyro==0.8.11 + # via + # -r benchmarks/torchatari/requirements.in + # navix + # trl +tzdata==2024.2 # via pandas -urllib3==1.26.19 +urllib3==2.2.3 # via - # docker + # blobfile # requests - # torchx -varname==0.10.0 + # sentry-sdk +varname==0.13.3 # via giving voir==0.2.19 # via # -c .pin/../constraints/hpu.txt - # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/brax/requirements.in - # -r benchmarks/dlrm/requirements.in + # -r benchmarks/diffusion/requirements.in + # -r benchmarks/dinov2/requirements.in # -r benchmarks/flops/requirements.in + # -r benchmarks/geo_gnn/requirements.in # -r benchmarks/huggingface/requirements.in + # -r benchmarks/lightning/requirements.in # -r benchmarks/llama/requirements.in - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/llava/requirements.in + # -r benchmarks/llm/requirements.in + # -r benchmarks/purejaxrl/requirements.in + # -r benchmarks/recursiongfn/requirements.in + # -r benchmarks/rlhf/requirements.in # -r benchmarks/timm/requirements.in + # -r benchmarks/torchatari/requirements.in # -r benchmarks/torchvision/requirements.in # -r benchmarks/torchvision_ddp/requirements.in -werkzeug==3.0.3 + # -r benchmarks/vjepa/requirements.in +wandb==0.18.3 + # via + # -r benchmarks/recursiongfn/requirements.in + # navix +webdataset==0.2.100 + # via -r benchmarks/vjepa/requirements.in +werkzeug==3.0.4 # via # flask # tensorboard -xxhash==3.4.1 - # via - # datasets - # evaluate -yarl==1.9.4 +xformers==0.0.28.post1 + # via -r benchmarks/dinov2/requirements.in +xxhash==3.5.0 + # via datasets +yacs==0.1.8 + # via fvcore +yarl==1.13.1 # via aiohttp -zipp==3.19.2 +zipp==3.20.2 # via # etils # importlib-metadata diff --git a/benchmarks/brax/requirements.hpu.txt b/benchmarks/brax/requirements.hpu.txt index cae1147c6..b02ff745f 100644 --- a/benchmarks/brax/requirements.hpu.txt +++ b/benchmarks/brax/requirements.hpu.txt @@ -4,10 +4,6 @@ # # pip-compile --output-file=benchmarks/brax/requirements.hpu.txt .pin/tmp-constraints-hpu-brax.txt benchmarks/brax/requirements.in # ---extra-index-url https://pypi.ngc.nvidia.com ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ---trusted-host pypi.ngc.nvidia.com - absl-py==2.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -35,7 +31,7 @@ brax==0.10.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/brax/requirements.in -chex==0.1.86 +chex==0.1.87 # via # -c .pin/../.pin/constraints-hpu-torch.txt # optax @@ -47,7 +43,7 @@ cloudpickle==3.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # gym -codefind==0.1.6 +codefind==0.1.7 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera @@ -63,7 +59,7 @@ dm-tree==0.1.8 # via # -c .pin/../.pin/constraints-hpu-torch.txt # dm-env -etils[epath,epy]==1.7.0 +etils[epath,epy]==1.9.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax @@ -71,11 +67,11 @@ etils[epath,epy]==1.7.0 # mujoco-mjx # optax # orbax-checkpoint -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # varname -filelock==3.15.4 +filelock==3.16.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -85,20 +81,20 @@ flask==3.0.3 # -c .pin/../.pin/constraints-hpu-torch.txt # brax # flask-cors -flask-cors==4.0.1 +flask-cors==5.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax -flax==0.8.5 +flax==0.9.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax -fsspec==2024.5.0 +fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # etils # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera @@ -107,7 +103,7 @@ glfw==2.7.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # mujoco -grpcio==1.65.1 +grpcio==1.66.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax @@ -119,7 +115,11 @@ gym-notices==0.0.8 # via # -c .pin/../.pin/constraints-hpu-torch.txt # gym -importlib-resources==6.4.0 +humanize==4.10.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # orbax-checkpoint +importlib-resources==6.4.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # etils @@ -127,7 +127,7 @@ itsdangerous==2.2.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # flask -jax[cuda12]==0.4.28 +jax==0.4.33 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/brax/requirements.in @@ -138,15 +138,7 @@ jax[cuda12]==0.4.28 # mujoco-mjx # optax # orbax-checkpoint -jax-cuda12-pjrt==0.4.28 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # jax-cuda12-plugin -jax-cuda12-plugin==0.4.28 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # jax -jaxlib==0.4.28+cuda12.cudnn89 +jaxlib==0.4.33 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax @@ -183,7 +175,7 @@ ml-collections==0.1.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax -ml-dtypes==0.4.0 +ml-dtypes==0.5.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # jax @@ -193,17 +185,17 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # sympy -msgpack==1.0.8 +msgpack==1.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # flax # orbax-checkpoint -mujoco==3.2.0 +mujoco==3.2.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax # mujoco-mjx -mujoco-mjx==3.2.0 +mujoco-mjx==3.2.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax @@ -221,14 +213,12 @@ numpy==1.26.4 # brax # chex # dm-env - # flax # gym # jax # jaxlib # jaxopt # ml-dtypes # mujoco - # opt-einsum # optax # orbax-checkpoint # scipy @@ -238,19 +228,13 @@ numpy==1.26.4 nvidia-cublas-cu12==12.1.3.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # torch -nvidia-cuda-nvcc-cu12==12.5.82 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # jax nvidia-cuda-nvrtc-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -258,17 +242,14 @@ nvidia-cuda-nvrtc-cu12==12.1.105 nvidia-cuda-runtime-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # torch nvidia-cufft-cu12==11.0.2.54 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # torch nvidia-curand-cu12==10.3.2.106 # via @@ -277,23 +258,23 @@ nvidia-curand-cu12==10.3.2.106 nvidia-cusolver-cu12==11.4.5.107 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # torch nvidia-cusparse-cu12==12.1.0.106 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # nvidia-cusolver-cu12 # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir nvidia-nccl-cu12==2.20.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # torch -nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvjitlink-cu12==12.6.77 # via # -c .pin/../.pin/constraints-hpu-torch.txt - # jax # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 @@ -304,7 +285,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -opt-einsum==3.3.0 +opt-einsum==3.4.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # jax @@ -313,12 +294,12 @@ optax==0.2.3 # -c .pin/../.pin/constraints-hpu-torch.txt # brax # flax -orbax-checkpoint==0.5.21 +orbax-checkpoint==0.6.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax # flax -ovld==0.3.5 +ovld==0.3.9 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -330,7 +311,7 @@ pillow==10.4.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax -protobuf==4.25.3 +protobuf==5.28.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # orbax-checkpoint @@ -347,10 +328,6 @@ pygments==2.18.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # rich -pynvml==11.5.3 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # voir pyopengl==3.1.7 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -359,7 +336,7 @@ pytinyrenderer==0.0.14 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # flax @@ -370,12 +347,12 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -rich==13.7.1 +rich==13.9.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # flax # voir -scipy==1.14.0 +scipy==1.14.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax @@ -388,7 +365,7 @@ six==1.16.0 # -c .pin/../.pin/constraints-hpu-torch.txt # asttokens # ml-collections -sympy==1.13.0 +sympy==1.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -396,7 +373,7 @@ tensorboardx==2.6.2.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax -tensorstore==0.1.63 +tensorstore==0.1.66 # via # -c .pin/../.pin/constraints-hpu-torch.txt # flax @@ -405,16 +382,16 @@ toolz==0.12.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # chex -torch==2.3.1 +torch==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/brax/requirements.in -trimesh==4.4.3 +trimesh==4.4.9 # via # -c .pin/../.pin/constraints-hpu-torch.txt # brax # mujoco-mjx -triton==2.3.1 +triton==3.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -427,8 +404,9 @@ typing-extensions==4.12.2 # flax # orbax-checkpoint # reactivex + # rich # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving @@ -437,11 +415,11 @@ voir==0.2.19 # -c .pin/../.pin/constraints-hpu-torch.txt # -c .pin/../constraints/hpu.txt # -r benchmarks/brax/requirements.in -werkzeug==3.0.3 +werkzeug==3.0.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # flask -zipp==3.19.2 +zipp==3.20.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # etils diff --git a/benchmarks/diffusion/requirements.hpu.txt b/benchmarks/diffusion/requirements.hpu.txt new file mode 100644 index 000000000..88ccd569e --- /dev/null +++ b/benchmarks/diffusion/requirements.hpu.txt @@ -0,0 +1,381 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/diffusion/requirements.hpu.txt .pin/tmp-constraints-hpu-diffusion-nodes.txt benchmarks/diffusion/requirements.in +# +accelerate==0.34.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/diffusion/requirements.in + # diffusers +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # fsspec +aiosignal==1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +argklass==1.4.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/diffusion/requirements.in +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +async-timeout==4.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +attrs==24.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +datasets==3.0.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/diffusion/requirements.in +diffusers[torch]==0.30.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/diffusion/requirements.in +dill==0.3.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # multiprocess +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # diffusers + # huggingface-hub + # torch + # transformers + # triton +frozenlist==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # aiosignal +fsspec[http]==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +hjson==3.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # argklass +huggingface-hub==0.25.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # diffusers + # tokenizers + # transformers +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # yarl +importlib-metadata==8.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # diffusers +importlib-resources==6.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # argklass +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +multidict==6.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # yarl +multiprocess==0.70.16 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # diffusers + # pandas + # pyarrow + # torchvision + # transformers +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # huggingface-hub + # transformers +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # diffusers + # torchvision +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pyarrow==17.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # huggingface-hub + # omegaconf + # transformers +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +regex==2024.9.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # diffusers + # transformers +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # diffusers + # huggingface-hub + # transformers +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +safetensors==0.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # diffusers + # transformers +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # python-dateutil +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +tokenizers==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # transformers +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # diffusers + # torchvision +torchvision==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/diffusion/requirements.in +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/diffusion/requirements.in + # datasets + # huggingface-hub + # transformers +transformers==4.44.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/diffusion/requirements.in +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub + # multidict + # reactivex + # rich + # torch +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/diffusion/requirements.in +xxhash==3.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +yarl==1.13.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +zipp==3.20.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # importlib-metadata diff --git a/benchmarks/dinov2/requirements.hpu.txt b/benchmarks/dinov2/requirements.hpu.txt new file mode 100644 index 000000000..4a11ccfbc --- /dev/null +++ b/benchmarks/dinov2/requirements.hpu.txt @@ -0,0 +1,267 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/dinov2/requirements.hpu.txt .pin/tmp-constraints-hpu-dinov2-giant-gpus.txt benchmarks/dinov2/requirements.in +# +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +cloudpickle==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # submitit +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # triton +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +fvcore==0.1.5.post20221221 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +iopath==0.1.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in + # fvcore +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +lightning-utilities==0.11.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchmetrics +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fvcore + # scipy + # torchmetrics + # torchvision + # xformers +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning-utilities + # torchmetrics +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fvcore + # torchvision +portalocker==2.10.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # iopath +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fvcore + # omegaconf + # yacs +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +scipy==1.14.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens +submitit==1.5.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +tabulate==0.9.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fvcore +termcolor==2.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fvcore +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in + # torchmetrics + # torchvision + # xformers +torchmetrics==1.4.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in +torchvision==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fvcore + # iopath +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # iopath + # lightning-utilities + # reactivex + # rich + # submitit + # torch +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/dinov2/requirements.in +xformers==0.0.28.post1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/dinov2/requirements.in +yacs==0.1.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fvcore + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/benchmarks/flops/requirements.hpu.txt b/benchmarks/flops/requirements.hpu.txt index 77595d5f7..91e5677fe 100644 --- a/benchmarks/flops/requirements.hpu.txt +++ b/benchmarks/flops/requirements.hpu.txt @@ -4,10 +4,6 @@ # # pip-compile --output-file=benchmarks/flops/requirements.hpu.txt .pin/tmp-constraints-hpu-flops.txt benchmarks/flops/requirements.in # ---extra-index-url https://pypi.ngc.nvidia.com ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ---trusted-host pypi.ngc.nvidia.com - antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -16,29 +12,29 @@ asttokens==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -codefind==0.1.6 +codefind==0.1.7 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # varname -filelock==3.15.4 +filelock==3.16.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch # triton -fsspec==2024.5.0 +fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera # voir -importlib-resources==6.4.0 +importlib-resources==6.4.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torchcompat @@ -88,7 +84,7 @@ nvidia-cuda-runtime-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -109,11 +105,15 @@ nvidia-cusparse-cu12==12.1.0.106 # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir nvidia-nccl-cu12==2.20.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvjitlink-cu12==12.6.77 # via # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 @@ -126,7 +126,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -ovld==0.3.5 +ovld==0.3.9 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -146,11 +146,7 @@ pygments==2.18.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # rich -pynvml==11.5.3 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # voir -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # omegaconf @@ -158,7 +154,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -rich==13.7.1 +rich==13.9.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -166,11 +162,11 @@ six==1.16.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # asttokens -sympy==1.13.0 +sympy==1.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -torch==2.3.1 +torch==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/flops/requirements.in @@ -180,15 +176,15 @@ torchcompat==1.1.4 # -c .pin/../.pin/constraints-hpu-torch.txt # -c .pin/../constraints/hpu.txt # -r benchmarks/flops/requirements.in -torchvision==0.18.1 +torchvision==0.19.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/flops/requirements.in -tqdm==4.66.4 +tqdm==4.66.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/flops/requirements.in -triton==2.3.1 +triton==3.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -196,8 +192,9 @@ typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # reactivex + # rich # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving diff --git a/benchmarks/geo_gnn/requirements-pre.hpu.txt b/benchmarks/geo_gnn/requirements-pre.hpu.txt new file mode 100644 index 000000000..db910c1ae --- /dev/null +++ b/benchmarks/geo_gnn/requirements-pre.hpu.txt @@ -0,0 +1,99 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/geo_gnn/requirements-pre.hpu.txt .pin/tmp-constraints-hpu-dimenet.txt benchmarks/geo_gnn/requirements-pre.in +# +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # triton +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.in +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch diff --git a/benchmarks/geo_gnn/requirements.hpu.txt b/benchmarks/geo_gnn/requirements.hpu.txt new file mode 100644 index 000000000..9c6bb6d69 --- /dev/null +++ b/benchmarks/geo_gnn/requirements.hpu.txt @@ -0,0 +1,321 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/geo_gnn/requirements.hpu.txt .pin/tmp-constraints-hpu-dimenet.txt benchmarks/geo_gnn/requirements-pre.hpu.txt benchmarks/geo_gnn/requirements.in +# +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric +aiosignal==1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +async-timeout==4.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +attrs==24.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch + # triton +frozenlist==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # aiosignal +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch + # torch-geometric +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # yarl +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch + # torch-geometric +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # sympy +multidict==6.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # yarl +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements.in + # pandas + # rdkit + # scipy + # torch-geometric +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements.in +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rdkit +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +pyparsing==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +rdkit==2024.3.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements.in +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +scipy==1.14.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-cluster + # torch-sparse +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # python-dateutil +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt +torch-cluster==1.6.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements.in +torch-geometric==2.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements.in +torch-scatter==2.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements.in +torch-sparse==0.6.18 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements.in +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/geo_gnn/requirements-pre.hpu.txt + # multidict + # reactivex + # rich + # torch +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/geo_gnn/requirements.in +yarl==1.13.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp diff --git a/benchmarks/huggingface/requirements.hpu.txt b/benchmarks/huggingface/requirements.hpu.txt index a504cba14..b5e21d99e 100644 --- a/benchmarks/huggingface/requirements.hpu.txt +++ b/benchmarks/huggingface/requirements.hpu.txt @@ -4,10 +4,6 @@ # # pip-compile --output-file=benchmarks/huggingface/requirements.hpu.txt .pin/tmp-constraints-hpu-hf.txt benchmarks/huggingface/requirements.in # ---extra-index-url https://pypi.ngc.nvidia.com ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ---trusted-host pypi.ngc.nvidia.com - antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -16,7 +12,7 @@ asttokens==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -certifi==2024.6.2 +certifi==2024.8.30 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests @@ -24,37 +20,37 @@ charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests -codefind==0.1.6 +codefind==0.1.7 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # varname -filelock==3.15.4 +filelock==3.16.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # torch # transformers # triton -fsspec==2024.5.0 +fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera # voir -huggingface-hub==0.24.0 +huggingface-hub==0.25.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # tokenizers # transformers -idna==3.7 +idna==3.10 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests @@ -104,7 +100,7 @@ nvidia-cuda-runtime-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -125,11 +121,15 @@ nvidia-cusparse-cu12==12.1.0.106 # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir nvidia-nccl-cu12==2.20.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvjitlink-cu12==12.6.77 # via # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 @@ -142,7 +142,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -ovld==0.3.5 +ovld==0.3.9 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -151,6 +151,10 @@ packaging==24.1 # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # transformers +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/huggingface/requirements.in psutil==5.9.8 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -163,11 +167,7 @@ pygments==2.18.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # rich -pynvml==11.5.3 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # voir -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub @@ -177,7 +177,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -regex==2024.5.15 +regex==2024.9.11 # via # -c .pin/../.pin/constraints-hpu-torch.txt # transformers @@ -186,11 +186,11 @@ requests==2.32.3 # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # transformers -rich==13.7.1 +rich==13.9.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -safetensors==0.4.3 +safetensors==0.4.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # transformers @@ -198,7 +198,7 @@ six==1.16.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # asttokens -sympy==1.13.0 +sympy==1.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -206,20 +206,21 @@ tokenizers==0.19.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # transformers -torch==2.3.1 +torch==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/huggingface/requirements.in -tqdm==4.66.4 +tqdm==4.66.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # transformers -transformers==4.42.4 +transformers==4.44.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt # -r benchmarks/huggingface/requirements.in -triton==2.3.1 +triton==3.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -228,12 +229,13 @@ typing-extensions==4.12.2 # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # reactivex + # rich # torch -urllib3==1.26.19 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving diff --git a/benchmarks/lightning/main.py b/benchmarks/lightning/main.py index aca89ee47..4c3d1206f 100644 --- a/benchmarks/lightning/main.py +++ b/benchmarks/lightning/main.py @@ -1,14 +1,17 @@ #!/usr/bin/env python + import argparse import os +# FIXME this is HPU only +os.environ["PT_HPU_LAZY_MODE"] = str(int(int(os.getenv("WORLD_SIZE", -1)) <= 0)) + import torch import torch.nn.functional as F import lightning as L import torchvision.models as torchvision_models -import torchcompat.core as accelerator from benchmate.dataloader import imagenet_dataloader, dataloader_arguments @@ -37,7 +40,7 @@ def configure_optimizers(self): def prepare_voir(): from benchmate.observer import BenchObserver from benchmate.monitor import bench_monitor - + import torchcompat.core as accelerator observer = BenchObserver( accelerator.Event, earlystop=100, @@ -49,6 +52,10 @@ def prepare_voir(): return observer, bench_monitor def main(): + rank = int(os.getenv("RANK", 0)) + world_size = int(os.getenv("WORLD_SIZE", 1)) + local_world_size = int(os.getenv("LOCAL_WORLD_SIZE", 1)) + parser = argparse.ArgumentParser(description='simple distributed training job') parser.add_argument( "--epochs", @@ -64,11 +71,10 @@ def main(): args = parser.parse_args() model = getattr(torchvision_models, args.model)() - rank = int(os.getenv("RANK", 0)) - world_size = int(os.getenv("WORLD_SIZE", 1)) - local_world_size = int(os.getenv("LOCAL_WORLD_SIZE", 1)) - + import torchcompat.core as accelerator + n = accelerator.device_count() + n = local_world_size nnodes = world_size // local_world_size model = TorchvisionLightning(model) @@ -83,9 +89,9 @@ def main(): accelerator="auto", devices=n, num_nodes=nnodes, - strategy="ddp", + strategy="auto", max_epochs=args.epochs, - precision="16-mixed", + precision="bf16-mixed", enable_checkpointing=False, enable_progress_bar=False, reload_dataloaders_every_n_epochs=1, diff --git a/benchmarks/lightning/requirements.hpu.txt b/benchmarks/lightning/requirements.hpu.txt new file mode 100644 index 000000000..f86fb064d --- /dev/null +++ b/benchmarks/lightning/requirements.hpu.txt @@ -0,0 +1,285 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/lightning/requirements.hpu.txt .pin/tmp-constraints-hpu-lightning-gpus.txt benchmarks/lightning/requirements.in +# +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fsspec +aiosignal==1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +async-timeout==4.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +attrs==24.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # triton +frozenlist==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # aiosignal +fsspec[http]==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning + # pytorch-lightning + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # yarl +importlib-resources==6.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchcompat +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +lightning==2.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/lightning/requirements.in +lightning-utilities==0.11.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning + # pytorch-lightning + # torchmetrics +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +multidict==6.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # yarl +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchmetrics + # torchvision +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning + # lightning-utilities + # pytorch-lightning + # torchmetrics +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchvision +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +pytorch-lightning==2.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning + # omegaconf + # pytorch-lightning +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/lightning/requirements.in + # lightning + # pytorch-lightning + # torchmetrics + # torchvision +torchcompat==1.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/lightning/requirements.in +torchmetrics==1.4.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning + # pytorch-lightning +torchvision==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/lightning/requirements.in +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning + # pytorch-lightning +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # lightning + # lightning-utilities + # multidict + # pytorch-lightning + # reactivex + # rich + # torch +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/lightning/requirements.in +yarl==1.13.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/benchmarks/llama/requirements.hpu.txt b/benchmarks/llama/requirements.hpu.txt index 2368c1502..9c01a4dd6 100644 --- a/benchmarks/llama/requirements.hpu.txt +++ b/benchmarks/llama/requirements.hpu.txt @@ -4,11 +4,11 @@ # # pip-compile --output-file=benchmarks/llama/requirements.hpu.txt .pin/tmp-constraints-hpu-llm.txt benchmarks/llama/requirements.in # ---extra-index-url https://pypi.ngc.nvidia.com ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ---trusted-host pypi.ngc.nvidia.com - -aiohttp==3.9.5 +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets @@ -29,11 +29,11 @@ async-timeout==4.0.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # aiohttp -attrs==23.2.0 +attrs==24.2.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # aiohttp -certifi==2024.6.2 +certifi==2024.8.30 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests @@ -41,11 +41,11 @@ charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests -codefind==0.1.6 +codefind==0.1.7 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera -datasets==2.20.0 +datasets==3.0.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/llama/requirements.in @@ -54,7 +54,7 @@ dill==0.3.8 # -c .pin/../.pin/constraints-hpu-torch.txt # datasets # multiprocess -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # varname @@ -62,7 +62,7 @@ fairscale==0.4.13 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/llama/requirements.in -filelock==3.15.4 +filelock==3.16.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets @@ -70,7 +70,7 @@ filelock==3.15.4 # torch # transformers # triton -fire==0.6.0 +fire==0.7.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/llama/requirements.in @@ -79,24 +79,24 @@ frozenlist==1.4.1 # -c .pin/../.pin/constraints-hpu-torch.txt # aiohttp # aiosignal -fsspec[http]==2024.5.0 +fsspec[http]==2024.6.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera # voir -huggingface-hub==0.24.0 +huggingface-hub==0.25.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets # tokenizers # transformers -idna==3.7 +idna==3.10 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests @@ -121,7 +121,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # sympy -multidict==6.0.5 +multidict==6.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # aiohttp @@ -160,7 +160,7 @@ nvidia-cuda-runtime-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -181,11 +181,15 @@ nvidia-cusparse-cu12==12.1.0.106 # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir nvidia-nccl-cu12==2.20.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvjitlink-cu12==12.6.77 # via # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 @@ -198,7 +202,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -ovld==0.3.5 +ovld==0.3.9 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -208,7 +212,7 @@ packaging==24.1 # datasets # huggingface-hub # transformers -pandas==2.2.2 +pandas==2.2.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets @@ -224,27 +228,19 @@ pyarrow==17.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets -pyarrow-hotfix==0.6 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # datasets pygments==2.18.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # rich -pynvml==11.5.3 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # voir python-dateutil==2.9.0.post0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # pandas -pytz==2024.1 +pytz==2024.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # pandas -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets @@ -255,7 +251,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -regex==2024.5.15 +regex==2024.9.11 # via # -c .pin/../.pin/constraints-hpu-torch.txt # transformers @@ -265,11 +261,11 @@ requests==2.32.3 # datasets # huggingface-hub # transformers -rich==13.7.1 +rich==13.9.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -safetensors==0.4.3 +safetensors==0.4.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # transformers @@ -281,9 +277,8 @@ six==1.16.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # asttokens - # fire # python-dateutil -sympy==1.13.0 +sympy==1.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -295,22 +290,23 @@ tokenizers==0.19.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # transformers -torch==2.3.1 +torch==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/llama/requirements.in # fairscale -tqdm==4.66.4 +tqdm==4.66.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets # huggingface-hub # transformers -transformers==4.42.4 +transformers==4.44.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt # -r benchmarks/llama/requirements.in -triton==2.3.1 +triton==3.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -318,17 +314,19 @@ typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub + # multidict # reactivex + # rich # torch -tzdata==2024.1 +tzdata==2024.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # pandas -urllib3==1.26.19 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving @@ -337,11 +335,11 @@ voir==0.2.19 # -c .pin/../.pin/constraints-hpu-torch.txt # -c .pin/../constraints/hpu.txt # -r benchmarks/llama/requirements.in -xxhash==3.4.1 +xxhash==3.5.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # datasets -yarl==1.9.4 +yarl==1.13.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # aiohttp diff --git a/benchmarks/llava/benchfile.py b/benchmarks/llava/benchfile.py index 3bc06eaa7..d6d40d6e7 100644 --- a/benchmarks/llava/benchfile.py +++ b/benchmarks/llava/benchfile.py @@ -19,7 +19,9 @@ class Llava(Package): def make_env(self): # Return a dict of environment variables for prepare_script and # main_script. - return super().make_env() + env = super().make_env() + env["PT_HPU_LAZY_MODE"] = "0" + return env async def install(self): await super().install() # super() call installs the requirements diff --git a/benchmarks/llava/main.py b/benchmarks/llava/main.py index 879baca01..233ae2ebb 100755 --- a/benchmarks/llava/main.py +++ b/benchmarks/llava/main.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from dataclasses import dataclass - import torch from accelerate import Accelerator from accelerate.utils import set_seed @@ -90,8 +89,11 @@ def batch_size_fn(batch): optimizer = observer.optimizer(torch.optim.AdamW(model.parameters(), lr=5e-5)) model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader) + # model = torch.compile(model,backend="hpu_backend") + for epoch in range(args.epochs): for i, batch in enumerate(observer.iterate(dataloader)): + print("HERE") images = batch["images"][0] # Access the first item in the list of images texts = batch["texts"] prompt = apply_chat_template(texts) @@ -124,7 +126,9 @@ def batch_size_fn(batch): if accelerator.sync_gradients: accelerator.clip_grad_norm_(model.parameters(), 1.0) + compat.mark_step() optimizer.step() + compat.mark_step() optimizer.zero_grad() observer.record_loss(loss) diff --git a/benchmarks/llava/requirements.hpu.txt b/benchmarks/llava/requirements.hpu.txt new file mode 100644 index 000000000..3bd40dff2 --- /dev/null +++ b/benchmarks/llava/requirements.hpu.txt @@ -0,0 +1,343 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/llava/requirements.hpu.txt .pin/tmp-constraints-hpu-llava-single.txt benchmarks/llava/requirements.in +# +accelerate==0.34.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llava/requirements.in +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # fsspec +aiosignal==1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +async-timeout==4.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +attrs==24.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +datasets==3.0.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llava/requirements.in +dill==0.3.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # multiprocess +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # torch + # transformers + # triton +frozenlist==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # aiosignal +fsspec[http]==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +huggingface-hub==0.25.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # tokenizers + # transformers +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # yarl +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +multidict==6.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # yarl +multiprocess==0.70.16 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llava/requirements.in + # accelerate + # datasets + # pandas + # pyarrow + # transformers +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # huggingface-hub + # transformers +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llava/requirements.in +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pyarrow==17.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # huggingface-hub + # omegaconf + # transformers +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +regex==2024.9.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # transformers +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # transformers +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +safetensors==0.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # transformers +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # python-dateutil +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +tokenizers==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # transformers +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llava/requirements.in + # accelerate +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # transformers +transformers==4.44.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llava/requirements.in +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub + # multidict + # reactivex + # rich + # torch +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llava/requirements.in +xxhash==3.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +yarl==1.13.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp diff --git a/benchmarks/llm/configs/llama3_70B_full.yaml b/benchmarks/llm/configs/llama3_70B_full.yaml index ae5cf2afb..22b52b796 100644 --- a/benchmarks/llm/configs/llama3_70B_full.yaml +++ b/benchmarks/llm/configs/llama3_70B_full.yaml @@ -82,7 +82,7 @@ optimizer: foreach: False # Note: highly recommended to use fused=True optimizer flag # with CPU offload for faster optimizer step. - fused: True + fused: False loss: _component_: torch.nn.CrossEntropyLoss @@ -94,9 +94,9 @@ gradient_accumulation_steps: 1 device: cuda # Memory management -enable_activation_checkpointing: True -memory_efficient_fsdp_wrap: True -fsdp_cpu_offload: True +enable_activation_checkpointing: false +memory_efficient_fsdp_wrap: false +fsdp_cpu_offload: false # Reduced precision dtype: bf16 diff --git a/benchmarks/llm/recipes/full_finetune_distributed.py b/benchmarks/llm/recipes/full_finetune_distributed.py index 3a51842da..19556ec71 100755 --- a/benchmarks/llm/recipes/full_finetune_distributed.py +++ b/benchmarks/llm/recipes/full_finetune_distributed.py @@ -16,6 +16,7 @@ import torch from omegaconf import DictConfig, ListConfig +import torchcompat.core as acc from torch import nn from torch.distributed import init_process_group from torch.distributed.fsdp import ( @@ -38,6 +39,8 @@ log = utils.get_logger("DEBUG") +HPU_UNSUPPORTED = False + class FullFinetuneRecipeDistributed(FTRecipeInterface): """ @@ -98,7 +101,8 @@ class FullFinetuneRecipeDistributed(FTRecipeInterface): def __init__(self, cfg: DictConfig) -> None: - self._device = utils.get_device(device=cfg.device) + import os + self._device = acc.fetch_device(int(os.getenv("LOCAL_RANK", "0"))) self._dtype = utils.get_dtype(cfg.dtype, device=self._device) if self._dtype == torch.float16: @@ -131,7 +135,10 @@ def __init__(self, cfg: DictConfig) -> None: # These are public properties which are updated by the checkpoint loader # when ``resume_from_checkpoint`` is `True` or validated in tests - self.seed = utils.set_seed(seed=cfg.seed) + if HPU_UNSUPPORTED: + self.seed = utils.set_seed(seed=cfg.seed) + else: + self.seed = 1 self.epochs_run = 0 self.total_epochs = cfg.epochs self.max_steps_per_epoch = cfg.max_steps_per_epoch @@ -351,8 +358,10 @@ def _setup_model( ) if self._is_rank_zero: - memory_stats = utils.get_memory_stats(device=self._device) - utils.log_memory_stats(memory_stats) + if HPU_UNSUPPORTED: + pass + #memory_stats = utils.get_memory_stats(device=self._device) + #utils.log_memory_stats(memory_stats) # synchronize before training begins torch.distributed.barrier() @@ -413,6 +422,7 @@ def _setup_data( dataset=ds, batch_size=batch_size, sampler=sampler, + # persistent_workers=True, collate_fn=partial( utils.padded_collate, padding_idx=self._tokenizer.pad_id, @@ -543,31 +553,14 @@ def train(self) -> None: f"{curr_epoch+1}|{self.global_step}|Loss: {loss_to_log}" ) - # Log per-step metrics - if ( - self.global_step % self._log_every_n_steps == 0 - and self._is_rank_zero - ): - time_per_step = time.perf_counter() - t0 - log_dict = { - "loss": loss_to_log, - "lr": self._optimizer.param_groups[0]["lr"], - "tokens_per_second_per_gpu": num_tokens / time_per_step, - } - if self._log_peak_memory_stats: - log_dict.update(utils.get_memory_stats(device=self._device)) - self._metric_logger.log_dict( - log_dict, - step=self.global_step, - ) - # Reset running stats for the next step running_loss = 0 num_tokens = 0 t0 = time.perf_counter() - + + print("HERE") self.epochs_run += 1 - self.save_checkpoint(epoch=curr_epoch) + # self.save_checkpoint(epoch=curr_epoch) def cleanup(self) -> None: if self._is_rank_zero: @@ -618,7 +611,8 @@ def recipe_main(cfg: DictConfig) -> None: "If using tune CLI, please specify --nnodes 1 and --nproc_per_node [num_gpus]" ) - init_process_group(backend="gloo" if cfg.device == "cpu" else "nccl") + acc.init_process_group() + if cfg.get("fsdp_cpu_offload", False): # Utilize all available CPU cores for intra-op parallelism. This provides ~2x # speed up when benchmarking fused AdamW on CPU diff --git a/benchmarks/llm/recipes/full_finetune_single_device.py b/benchmarks/llm/recipes/full_finetune_single_device.py index 98322579f..629b0e9a9 100755 --- a/benchmarks/llm/recipes/full_finetune_single_device.py +++ b/benchmarks/llm/recipes/full_finetune_single_device.py @@ -97,7 +97,7 @@ class FullFinetuneRecipeSingleDevice(FTRecipeInterface): """ def __init__(self, cfg: DictConfig) -> None: - self._device = utils.get_device(device=cfg.device) + self._device = accelerator.fetch_device(int(os.getenv("HABANA_VISIBLE_MODULES", "0").split(",")[0])) self._dtype = utils.get_dtype(cfg.dtype, device=self._device) # Disable for fp16, as we haven't validated "full" fp16 with this recipe, nor # enabled necessary features such as gradient scaling. @@ -279,9 +279,9 @@ def _setup_model( log.info("Compiling model with torch.compile...") backend = os.environ.get("TORCH_COMPILE_BACKEND", "inductor") model.compile(backend=backend) - if self._device.type == "cuda": - memory_stats = utils.get_memory_stats(device=self._device) - utils.log_memory_stats(memory_stats) + # if self._device.type == "cuda": + # memory_stats = utils.get_memory_stats(device=self._device) + # utils.log_memory_stats(memory_stats) return model @@ -487,8 +487,8 @@ def train(self) -> None: ), "tokens_per_second_per_gpu": num_tokens / time_per_step, } - if self._device.type == "cuda" and self._log_peak_memory_stats: - log_dict.update(utils.get_memory_stats(device=self._device)) + # if self._device.type == "cuda" and self._log_peak_memory_stats: + # log_dict.update(utils.get_memory_stats(device=self._device)) self._metric_logger.log_dict( log_dict, step=self.global_step, diff --git a/benchmarks/llm/recipes/lora_finetune_distributed.py b/benchmarks/llm/recipes/lora_finetune_distributed.py index 18b736fbf..ae7c5b403 100755 --- a/benchmarks/llm/recipes/lora_finetune_distributed.py +++ b/benchmarks/llm/recipes/lora_finetune_distributed.py @@ -16,6 +16,7 @@ import torch from omegaconf import DictConfig, ListConfig +import torchcompat.core as acc from torch import nn from torch.distributed import destroy_process_group, init_process_group @@ -44,6 +45,9 @@ log = utils.get_logger("DEBUG") +HPU_UNSUPPORTED = False + + class LoRAFinetuneRecipeDistributed(FTRecipeInterface): """ Distributed LoRA finetuning recipe for dense transformer-based LLMs such as Llama2. This recipe supports @@ -108,7 +112,7 @@ class LoRAFinetuneRecipeDistributed(FTRecipeInterface): """ def __init__(self, cfg: DictConfig) -> None: - self._device = utils.get_device(device=cfg.device) + self._device = acc.fetch_device(int(os.getenv("LOCAL_RANK", "0"))) self._dtype = utils.get_dtype(cfg.dtype, device=self._device) if self._dtype == torch.float16: @@ -132,7 +136,11 @@ def __init__(self, cfg: DictConfig) -> None: # These attributes constitute the recipe state and are updated by ``load_checkpoint`` # when ``resume_from_checkpoint`` is ``True`` - self.seed = utils.set_seed(seed=cfg.seed) + if HPU_UNSUPPORTED: + self.seed = utils.set_seed(seed=cfg.seed) + else: + self.seed = 1 + self.epochs_run = 0 self.total_epochs = cfg.epochs self.max_steps_per_epoch = cfg.max_steps_per_epoch @@ -428,7 +436,7 @@ def _setup_model( # Initialize empty modules on all non-zero ranks param_init_fn=( lambda module: module.to_empty( - device=torch.device("cuda"), recurse=False + device=self._device, recurse=False ) if not self._is_rank_zero else None @@ -443,8 +451,10 @@ def _setup_model( model, auto_wrap_policy={modules.TransformerDecoderLayer} ) if self._is_rank_zero: - memory_stats = utils.get_memory_stats(device=self._device) - utils.log_memory_stats(memory_stats) + if HPU_UNSUPPORTED: + pass + # memory_stats = utils.get_memory_stats(device=self._device) + # utils.log_memory_stats(memory_stats) # synchronize before training begins torch.distributed.barrier() @@ -703,8 +713,9 @@ def train(self) -> None: "lr": self._optimizer.param_groups[0]["lr"], "tokens_per_second_per_gpu": num_tokens / time_per_step, } - if self._log_peak_memory_stats: - log_dict.update(utils.get_memory_stats(device=self._device)) + # if self._log_peak_memory_stats: + # if HPU_UNSUPPORTED: + # log_dict.update(utils.get_memory_stats(device=self._device)) self._metric_logger.log_dict( log_dict, step=self.global_step, @@ -773,7 +784,7 @@ def recipe_main(cfg: DictConfig) -> None: "If using tune CLI, please specify --nnodes 1 and --nproc_per_node [num_gpus]" ) os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1" - init_process_group(backend="gloo" if cfg.device == "cpu" else "nccl") + acc.init_process_group() config.log_config(recipe_name="LoRAFinetuneRecipeDistributed", cfg=cfg) diff --git a/benchmarks/llm/recipes/lora_finetune_single_device.py b/benchmarks/llm/recipes/lora_finetune_single_device.py index cf5256ead..9060d2036 100755 --- a/benchmarks/llm/recipes/lora_finetune_single_device.py +++ b/benchmarks/llm/recipes/lora_finetune_single_device.py @@ -101,8 +101,9 @@ class LoRAFinetuneRecipeSingleDevice(FTRecipeInterface): """ def __init__(self, cfg: DictConfig) -> None: - - self._device = utils.get_device(device=cfg.device) + import torchcompat.core as accelerator + + self._device = accelerator.fetch_device(int(os.getenv("HABANA_VISIBLE_MODULES", "0").split(",")[0])) # Reduced precision logic self._dtype = utils.get_dtype(cfg.dtype, device=self._device) # fp16 precision is explicitly disabled as it is not supported in this @@ -388,9 +389,9 @@ def _setup_model( log.info("Compiling model with torch.compile...") backend = os.environ.get("TORCH_COMPILE_BACKEND", "inductor") model.compile(backend=backend) - if self._device.type == "cuda": - memory_stats = utils.get_memory_stats(device=self._device) - utils.log_memory_stats(memory_stats) + # if self._device.type == "cuda": + # memory_stats = utils.get_memory_stats(device=self._device) + # utils.log_memory_stats(memory_stats) return model def _setup_optimizer( @@ -528,7 +529,8 @@ def train(self) -> None: """ The core training loop. """ - + import torchcompat.core as accelerator + if self._model_compile: log.info( "NOTE: torch.compile is enabled and model is compiled in first forward. Expect a relatively slow first iteration." @@ -579,10 +581,13 @@ def train(self) -> None: loss = self._loss_fn(logits, labels) / self._gradient_accumulation_steps running_loss += loss loss.backward() + accelerator.mark_step() # Step with optimizer if (idx + 1) % self._gradient_accumulation_steps == 0: self._optimizer.step() + accelerator.mark_step() + self._optimizer.zero_grad(set_to_none=True) self._lr_scheduler.step() # Update the number of steps when the weights are updated @@ -603,13 +608,13 @@ def train(self) -> None: "lr": self._optimizer.param_groups[0]["lr"], "tokens_per_second_per_gpu": num_tokens / time_per_step, } - if ( - self._device.type == "cuda" - and self._log_peak_memory_stats - ): - log_dict.update( - utils.get_memory_stats(device=self._device) - ) + # if ( + # self._device.type == "cuda" + # and self._log_peak_memory_stats + # ): + # log_dict.update( + # utils.get_memory_stats(device=self._device) + # ) self._metric_logger.log_dict( log_dict, step=self.global_step, diff --git a/benchmarks/llm/recipes/ppo_full_finetune_single_device.py b/benchmarks/llm/recipes/ppo_full_finetune_single_device.py index 8ee77c06a..fbf8630a2 100644 --- a/benchmarks/llm/recipes/ppo_full_finetune_single_device.py +++ b/benchmarks/llm/recipes/ppo_full_finetune_single_device.py @@ -496,9 +496,9 @@ def _setup_model( ref_policy_model.compile(backend=backend) value_model.compile(backend=backend) - if self._device.type == "cuda": - memory_stats = utils.get_memory_stats(device=self._device) - utils.log_memory_stats(memory_stats) + # if self._device.type == "cuda": + # memory_stats = utils.get_memory_stats(device=self._device) + # utils.log_memory_stats(memory_stats) return policy_model, value_model, reward_model, ref_policy_model @@ -1031,8 +1031,8 @@ def log_metrics( "approx_policy_kl": ppo_stats.approx_policy_kls.mean(), "response_lengths": trajectory.seq_lens.float().mean(), } - if self._device.type == "cuda" and self._log_peak_memory_stats: - log_dict.update(utils.get_memory_stats(device=self._device)) + # if self._device.type == "cuda" and self._log_peak_memory_stats: + # log_dict.update(utils.get_memory_stats(device=self._device)) self._metric_logger.log_dict(log_dict, step=self.global_step) diff --git a/benchmarks/llm/requirements.hpu.txt b/benchmarks/llm/requirements.hpu.txt new file mode 100644 index 000000000..9b88be532 --- /dev/null +++ b/benchmarks/llm/requirements.hpu.txt @@ -0,0 +1,408 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/llm/requirements.hpu.txt .pin/tmp-constraints-hpu-llm-full-mp-nodes.txt benchmarks/llm/requirements.in +# +accelerate==0.34.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llm/requirements.in +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # fsspec +aiosignal==1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +argklass==1.4.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llm/requirements.in +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +async-timeout==4.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +attrs==24.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +blobfile==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llm/requirements.txt + # torchtune +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +datasets==3.0.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchtune +dill==0.3.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # multiprocess +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +fairscale==0.4.13 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llm/requirements.in + # -r benchmarks/llm/requirements.txt +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blobfile + # datasets + # huggingface-hub + # torch + # transformers + # triton +fire==0.7.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llm/requirements.txt +frozenlist==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # aiosignal +fsspec[http]==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +hjson==3.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # argklass +huggingface-hub==0.25.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # tokenizers + # torchtune + # transformers +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # yarl +importlib-resources==6.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # argklass +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +lxml==5.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blobfile +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +multidict==6.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # yarl +multiprocess==0.70.16 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # fairscale + # pandas + # pyarrow + # torchtune + # transformers +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchtune + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # huggingface-hub + # transformers +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pyarrow==17.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +pycryptodomex==3.21.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blobfile +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llm/requirements.in + # accelerate + # datasets + # huggingface-hub + # omegaconf + # transformers +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +regex==2024.9.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tiktoken + # transformers +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # tiktoken + # transformers +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +safetensors==0.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # torchtune + # transformers +sentencepiece==0.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchtune +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # python-dateutil +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +termcolor==2.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # fire +tiktoken==0.7.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchtune +tokenizers==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # transformers +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/llm/requirements.in + # -r benchmarks/llm/requirements.txt + # accelerate + # fairscale +torchao==0.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llm/requirements.in + # torchtune +torchtune==0.2.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llm/requirements.in +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # torchtune + # transformers +transformers==4.44.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llm/requirements.in +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub + # multidict + # reactivex + # rich + # torch +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blobfile + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/llm/requirements.in +xxhash==3.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +yarl==1.13.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp diff --git a/benchmarks/purejaxrl/requirements.hpu.txt b/benchmarks/purejaxrl/requirements.hpu.txt new file mode 100644 index 000000000..aeb2b1101 --- /dev/null +++ b/benchmarks/purejaxrl/requirements.hpu.txt @@ -0,0 +1,743 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/purejaxrl/requirements.hpu.txt .pin/tmp-constraints-hpu-ppo.txt benchmarks/purejaxrl/requirements.in +# +absl-py==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # chex + # distrax + # dm-env + # ml-collections + # mujoco + # mujoco-mjx + # optax + # orbax-checkpoint + # rlax + # tensorflow-probability +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +argklass==1.4.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in +astroid==3.3.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pylint +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +black==24.8.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +blinker==1.8.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flask +brax==0.10.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # sentry-sdk +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +chex==0.1.87 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # distrax + # evosax + # flashbax + # gymnax + # optax + # rlax +click==8.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # black + # flask + # wandb +cloudpickle==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gym + # gymnasium + # tensorflow-probability +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +contextlib2==21.6.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ml-collections +contourpy==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # matplotlib +cycler==0.12.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # matplotlib +decorator==5.1.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorflow-probability +dill==0.3.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pylint +distrax==0.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in + # rlax +dm-env==1.6 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # rlax +dm-tree==0.1.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # dm-env + # tensorflow-probability +docker-pycreds==0.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +docstring-parser==0.16 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro +dotmap==1.3.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # evosax +etils[epath,epy]==1.9.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # mujoco + # mujoco-mjx + # optax + # orbax-checkpoint +evosax==0.1.6 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in +exceptiongroup==1.2.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pytest +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +farama-notifications==0.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gymnasium +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # triton +flake8==7.1.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +flashbax==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in +flask==3.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # flask-cors +flask-cors==5.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax +flax==0.9.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in + # brax + # evosax + # flashbax + # gymnax + # navix +fonttools==4.54.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # matplotlib +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # etils + # torch +gast==0.6.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorflow-probability +gitdb==4.0.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gitpython +gitpython==3.1.43 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +glfw==2.7.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # mujoco +grpcio==1.66.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax +gym==0.26.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # gymnax +gym-notices==0.0.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gym +gymnasium==0.29.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gymnax +gymnax==0.0.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/purejaxrl/requirements.in +hjson==3.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # argklass +humanize==4.10.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # orbax-checkpoint +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +importlib-resources==6.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # argklass + # etils +iniconfig==2.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pytest +isort==5.13.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pylint +itsdangerous==2.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flask +jax==0.4.33 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in + # brax + # chex + # distrax + # evosax + # flashbax + # flax + # gymnax + # jaxopt + # mujoco-mjx + # optax + # orbax-checkpoint + # rlax +jaxlib==0.4.33 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # chex + # distrax + # evosax + # flashbax + # gymnax + # jax + # jaxopt + # mujoco-mjx + # optax + # orbax-checkpoint + # rlax +jaxopt==0.8.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # flask + # torch +kiwisolver==1.4.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # matplotlib +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 + # werkzeug +matplotlib==3.9.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # evosax + # gymnax + # seaborn +mccabe==0.7.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flake8 + # pylint +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +ml-collections==0.1.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax +ml-dtypes==0.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jax + # jaxlib + # tensorstore +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +msgpack==1.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flax + # orbax-checkpoint +mujoco==3.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # mujoco-mjx +mujoco-mjx==3.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax +mypy-extensions==1.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # black +navix==0.7.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in +nest-asyncio==1.6.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # orbax-checkpoint +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in + # brax + # chex + # contourpy + # distrax + # dm-env + # evosax + # flashbax + # gym + # gymnasium + # jax + # jaxlib + # jaxopt + # matplotlib + # ml-dtypes + # mujoco + # navix + # optax + # orbax-checkpoint + # pandas + # rlax + # scipy + # seaborn + # tensorboardx + # tensorflow-probability + # tensorstore + # trimesh +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +opt-einsum==3.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jax +optax==0.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in + # brax + # flax +orbax-checkpoint==0.6.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # flax +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # black + # matplotlib + # pytest + # setuptools-scm + # tensorboardx +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # seaborn +pathspec==0.12.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # black +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # matplotlib + # navix +platformdirs==4.3.6 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # black + # pylint + # wandb +pluggy==1.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pytest +protobuf==5.28.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # orbax-checkpoint + # tensorboardx + # wandb +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir + # wandb +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pycodestyle==2.12.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flake8 +pyflakes==3.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flake8 +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +pylint==3.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +pyopengl==3.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # mujoco +pyparsing==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # matplotlib +pytest==8.3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # matplotlib + # pandas +pytinyrenderer==0.0.14 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # evosax + # flax + # gymnax + # ml-collections + # omegaconf + # orbax-checkpoint + # wandb +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flax + # tyro + # voir +rlax==0.1.6 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +scipy==1.14.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # jax + # jaxlib + # jaxopt + # mujoco-mjx +seaborn==0.13.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gymnax +sentry-sdk==2.15.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +setproctitle==1.3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +setuptools-scm==8.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +shtab==1.7.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # docker-pycreds + # ml-collections + # python-dateutil + # tensorflow-probability +smmap==5.0.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gitdb +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +tensorboardx==2.6.2.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax +tensorflow-probability==0.24.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # distrax +tensorstore==0.1.66 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flashbax + # flax + # orbax-checkpoint +tomli==2.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # black + # pylint + # pytest + # setuptools-scm +tomlkit==0.13.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pylint +toolz==0.12.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # chex +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/purejaxrl/requirements.in +trimesh==4.4.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # brax + # mujoco-mjx +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # astroid + # black + # brax + # chex + # etils + # flashbax + # flax + # gymnasium + # navix + # orbax-checkpoint + # reactivex + # rich + # torch + # tyro +tyro==0.8.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # sentry-sdk +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/purejaxrl/requirements.in +wandb==0.18.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # navix +werkzeug==3.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # flask +zipp==3.20.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # etils + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/benchmarks/recursiongfn/requirements.hpu.txt b/benchmarks/recursiongfn/requirements.hpu.txt new file mode 100644 index 000000000..4e362ae63 --- /dev/null +++ b/benchmarks/recursiongfn/requirements.hpu.txt @@ -0,0 +1,493 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/recursiongfn/requirements.hpu.txt .pin/tmp-constraints-hpu-recursiongfn.txt benchmarks/recursiongfn/requirements.in +# +absl-py==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric +aiosignal==1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +async-timeout==4.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +attrs==24.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +blosc2==2.7.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tables +botorch==0.12.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # sentry-sdk +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +click==8.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +cvxopt==1.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +docker-pycreds==0.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # triton +frozenlist==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # aiosignal +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # torch-geometric +gitdb==4.0.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gitpython +gitpython==3.1.43 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in + # wandb +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +gpytorch==1.13 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in + # botorch +grpcio==1.66.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # yarl +jaxtyping==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gpytorch + # linear-operator +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # torch-geometric +joblib==1.4.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # scikit-learn +linear-operator==0.5.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # botorch + # gpytorch +markdown==3.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 + # werkzeug +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # botorch + # gpytorch + # linear-operator + # sympy +msgpack==1.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blosc2 +multidict==6.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # yarl +multipledispatch==1.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # botorch +ndindex==1.9.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blosc2 +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in + # torch +numexpr==2.10.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blosc2 + # tables +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blosc2 + # jaxtyping + # numexpr + # pandas + # pyarrow + # pyro-ppl + # rdkit + # scikit-learn + # scipy + # tables + # tensorboard + # torch-geometric +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in + # voir +opt-einsum==3.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pyro-ppl +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tables + # tensorboard +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rdkit +platformdirs==4.3.6 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +protobuf==5.28.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard + # wandb +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric + # voir + # wandb +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +py-cpuinfo==9.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # blosc2 + # tables +pyarrow==17.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +pyparsing==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric +pyro-api==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pyro-ppl +pyro-ppl==1.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in + # botorch +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf + # wandb +rdkit==2024.3.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch-geometric + # wandb +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +scikit-learn==1.5.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gpytorch +scipy==1.14.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in + # botorch + # gpytorch + # linear-operator + # scikit-learn + # torch-cluster + # torch-sparse +sentry-sdk==2.15.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +setproctitle==1.3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # wandb +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # docker-pycreds + # python-dateutil + # tensorboard +smmap==5.0.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gitdb +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +tables==3.10.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +tensorboard==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +tensorboard-data-server==0.7.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +threadpoolctl==3.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # scikit-learn +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in + # botorch + # linear-operator + # pyro-ppl +torch-cluster==1.6.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +torch-geometric==2.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +torch-scatter==2.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +torch-sparse==0.6.18 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pyro-ppl + # torch-geometric +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typeguard==4.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jaxtyping +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # botorch + # jaxtyping + # multidict + # reactivex + # rich + # tables + # torch + # typeguard +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # sentry-sdk +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/recursiongfn/requirements.in +wandb==0.18.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/recursiongfn/requirements.in +werkzeug==3.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +yarl==1.13.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/benchmarks/rlhf/main.py b/benchmarks/rlhf/main.py index 0be12d282..3a5f1ddab 100755 --- a/benchmarks/rlhf/main.py +++ b/benchmarks/rlhf/main.py @@ -2,6 +2,7 @@ import shutil +import accelerate from accelerate import PartialState from datasets import load_dataset from transformers import ( @@ -15,10 +16,16 @@ from trl.trainer.ppov2_trainer import PPOv2Config, PPOv2Trainer from trl.trainer.utils import SIMPLE_QUERY_CHAT_TEMPLATE +import torchcompat.core as compat + class PPOv2TrainerIntrumented(PPOv2Trainer): def __init__(self, config: PPOv2Config, *args, **kwargs): config.report_to = [] + + # FIXME: better way to monkeypatch this ? + # Use the compatibility accelerator class + accelerate.Accelerator = compat.accelerate.Accelerator super().__init__(config, *args, **kwargs) def batch_size_fn(batch): @@ -46,9 +53,13 @@ def save_model(self, *args, **kwargs): def main(): + parser = HfArgumentParser((PPOv2Config, ModelConfig)) config, model_config = parser.parse_args_into_dataclasses() + + import torchcompat.core + # remove output_dir if exists shutil.rmtree(config.output_dir, ignore_errors=True) diff --git a/benchmarks/rlhf/requirements.hpu.txt b/benchmarks/rlhf/requirements.hpu.txt new file mode 100644 index 000000000..a6c127653 --- /dev/null +++ b/benchmarks/rlhf/requirements.hpu.txt @@ -0,0 +1,362 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/rlhf/requirements.hpu.txt .pin/tmp-constraints-hpu-rlhf-gpus.txt benchmarks/rlhf/requirements.in +# +accelerate==0.34.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/rlhf/requirements.in + # trl +aiohappyeyeballs==2.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +aiohttp==3.10.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # fsspec +aiosignal==1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +async-timeout==4.0.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +attrs==24.2.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +datasets==3.0.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/rlhf/requirements.in + # trl +dill==0.3.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # multiprocess +docstring-parser==0.16 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # torch + # transformers + # triton +frozenlist==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # aiosignal +fsspec[http]==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +huggingface-hub==0.25.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # tokenizers + # transformers +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests + # yarl +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +multidict==6.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp + # yarl +multiprocess==0.70.16 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # pandas + # pyarrow + # transformers + # trl +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # huggingface-hub + # transformers +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pyarrow==17.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # datasets + # huggingface-hub + # omegaconf + # transformers +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +regex==2024.9.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # transformers +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # transformers +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro + # voir +safetensors==0.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # accelerate + # transformers +shtab==1.7.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # python-dateutil +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +tokenizers==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # transformers +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/rlhf/requirements.in + # accelerate + # trl +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets + # huggingface-hub + # transformers +transformers==4.44.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/rlhf/requirements.in + # trl +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +trl==0.10.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/rlhf/requirements.in +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub + # multidict + # reactivex + # rich + # torch + # tyro +tyro==0.8.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # trl +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/rlhf/requirements.in +xxhash==3.5.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # datasets +yarl==1.13.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # aiohttp diff --git a/benchmarks/timm/requirements.hpu.txt b/benchmarks/timm/requirements.hpu.txt index 432c91bc4..e626bd1f0 100644 --- a/benchmarks/timm/requirements.hpu.txt +++ b/benchmarks/timm/requirements.hpu.txt @@ -4,10 +4,6 @@ # # pip-compile --output-file=benchmarks/timm/requirements.hpu.txt .pin/tmp-constraints-hpu-timm.txt benchmarks/timm/requirements.in # ---extra-index-url https://pypi.ngc.nvidia.com ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ---trusted-host pypi.ngc.nvidia.com - antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -16,7 +12,7 @@ asttokens==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -certifi==2024.6.2 +certifi==2024.8.30 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests @@ -24,35 +20,35 @@ charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests -codefind==0.1.6 +codefind==0.1.7 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # varname -filelock==3.15.4 +filelock==3.16.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # torch # triton -fsspec==2024.5.0 +fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera # voir -huggingface-hub==0.24.0 +huggingface-hub==0.25.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/timm/requirements.in -idna==3.7 +idna==3.10 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests @@ -102,7 +98,7 @@ nvidia-cuda-runtime-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -123,11 +119,15 @@ nvidia-cusparse-cu12==12.1.0.106 # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir nvidia-nccl-cu12==2.20.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvjitlink-cu12==12.6.77 # via # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 @@ -140,7 +140,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -ovld==0.3.5 +ovld==0.3.9 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -164,11 +164,7 @@ pygments==2.18.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # rich -pynvml==11.5.3 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # voir -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/timm/requirements.in @@ -182,11 +178,11 @@ requests==2.32.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub -rich==13.7.1 +rich==13.9.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -safetensors==0.4.3 +safetensors==0.4.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/timm/requirements.in @@ -194,24 +190,24 @@ six==1.16.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # asttokens -sympy==1.13.0 +sympy==1.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -torch==2.3.1 +torch==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/timm/requirements.in # torchvision -torchvision==0.18.1 +torchvision==0.19.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/timm/requirements.in -tqdm==4.66.4 +tqdm==4.66.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub -triton==2.3.1 +triton==3.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -220,12 +216,13 @@ typing-extensions==4.12.2 # -c .pin/../.pin/constraints-hpu-torch.txt # huggingface-hub # reactivex + # rich # torch -urllib3==1.26.19 +urllib3==2.2.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # requests -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving diff --git a/benchmarks/torchatari/requirements.hpu.txt b/benchmarks/torchatari/requirements.hpu.txt new file mode 100644 index 000000000..6d7369dfc --- /dev/null +++ b/benchmarks/torchatari/requirements.hpu.txt @@ -0,0 +1,304 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/torchatari/requirements.hpu.txt .pin/tmp-constraints-hpu-torchatari.txt benchmarks/torchatari/requirements.in +# +absl-py==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # dm-env + # tensorboard +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +appdirs==1.4.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # cantilever +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +cantilever==0.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchatari/requirements.in +cloudpickle==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gym + # gymnasium +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +dm-env==1.6 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # envpool +dm-tree==0.1.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # dm-env +docstring-parser==0.16 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro +envpool==0.8.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchatari/requirements.in +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +farama-notifications==0.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gymnasium +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # triton +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +grpcio==1.66.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +gym==0.26.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchatari/requirements.in + # envpool +gym-notices==0.0.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # gym +gymnasium==0.29.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # envpool +importlib-resources==6.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # cantilever + # torchcompat +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +markdown==3.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 + # werkzeug +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchatari/requirements.in + # dm-env + # envpool + # gym + # gymnasium + # tensorboard +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +optree==0.13.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # envpool +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # envpool + # tensorboard +protobuf==5.28.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro + # voir +shtab==1.7.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tyro +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # tensorboard +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +tensorboard==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchatari/requirements.in +tensorboard-data-server==0.7.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchatari/requirements.in +torchcompat==1.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/torchatari/requirements.in +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +types-protobuf==5.28.0.20240924 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # envpool +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # envpool + # gymnasium + # optree + # reactivex + # rich + # torch + # tyro +tyro==0.8.11 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchatari/requirements.in +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/torchatari/requirements.in +werkzeug==3.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # tensorboard + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/benchmarks/torchvision/requirements.hpu.txt b/benchmarks/torchvision/requirements.hpu.txt index 369a1753e..f0b47e914 100644 --- a/benchmarks/torchvision/requirements.hpu.txt +++ b/benchmarks/torchvision/requirements.hpu.txt @@ -4,10 +4,6 @@ # # pip-compile --output-file=benchmarks/torchvision/requirements.hpu.txt .pin/tmp-constraints-hpu-torchvision.txt benchmarks/torchvision/requirements.in # ---extra-index-url https://pypi.ngc.nvidia.com ---find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html ---trusted-host pypi.ngc.nvidia.com - antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt @@ -16,29 +12,29 @@ asttokens==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -codefind==0.1.6 +codefind==0.1.7 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera -executing==1.2.0 +executing==2.1.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # varname -filelock==3.15.4 +filelock==3.16.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch # triton -fsspec==2024.5.0 +fsspec==2024.6.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -giving==0.4.2 +giving==0.4.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # ptera # voir -importlib-resources==6.4.0 +importlib-resources==6.4.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torchcompat @@ -88,7 +84,7 @@ nvidia-cuda-runtime-cu12==12.1.105 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-cudnn-cu12==8.9.2.26 +nvidia-cudnn-cu12==9.1.0.70 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -109,11 +105,15 @@ nvidia-cusparse-cu12==12.1.0.106 # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir nvidia-nccl-cu12==2.20.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -nvidia-nvjitlink-cu12==12.5.82 +nvidia-nvjitlink-cu12==12.6.77 # via # -c .pin/../.pin/constraints-hpu-torch.txt # nvidia-cusolver-cu12 @@ -126,7 +126,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir -ovld==0.3.5 +ovld==0.3.9 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -146,11 +146,7 @@ pygments==2.18.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # rich -pynvml==11.5.3 - # via - # -c .pin/../.pin/constraints-hpu-torch.txt - # voir -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # omegaconf @@ -158,7 +154,7 @@ reactivex==4.0.4 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving -rich==13.7.1 +rich==13.9.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # voir @@ -166,11 +162,11 @@ six==1.16.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # asttokens -sympy==1.13.0 +sympy==1.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch -torch==2.3.1 +torch==2.4.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/torchvision/requirements.in @@ -180,15 +176,15 @@ torchcompat==1.1.4 # -c .pin/../.pin/constraints-hpu-torch.txt # -c .pin/../constraints/hpu.txt # -r benchmarks/torchvision/requirements.in -torchvision==0.18.1 +torchvision==0.19.1 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/torchvision/requirements.in -tqdm==4.66.4 +tqdm==4.66.5 # via # -c .pin/../.pin/constraints-hpu-torch.txt # -r benchmarks/torchvision/requirements.in -triton==2.3.1 +triton==3.0.0 # via # -c .pin/../.pin/constraints-hpu-torch.txt # torch @@ -196,8 +192,9 @@ typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-hpu-torch.txt # reactivex + # rich # torch -varname==0.10.0 +varname==0.13.3 # via # -c .pin/../.pin/constraints-hpu-torch.txt # giving diff --git a/benchmarks/torchvision_ddp/requirements.hpu.txt b/benchmarks/torchvision_ddp/requirements.hpu.txt index e69de29bb..a4174e7bc 100644 --- a/benchmarks/torchvision_ddp/requirements.hpu.txt +++ b/benchmarks/torchvision_ddp/requirements.hpu.txt @@ -0,0 +1,205 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/torchvision_ddp/requirements.hpu.txt .pin/tmp-constraints-hpu-torchvision.txt benchmarks/torchvision_ddp/requirements.in +# +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch + # triton +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +importlib-resources==6.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchcompat +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchvision +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchvision +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchvision_ddp/requirements.in + # torchvision +torchcompat==1.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/torchvision_ddp/requirements.in +torchvision==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchvision_ddp/requirements.in +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/torchvision_ddp/requirements.in +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # reactivex + # rich + # torch +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/torchvision_ddp/requirements.in diff --git a/benchmarks/vjepa/benchfile.py b/benchmarks/vjepa/benchfile.py index d25b47b53..228023ced 100644 --- a/benchmarks/vjepa/benchfile.py +++ b/benchmarks/vjepa/benchfile.py @@ -23,7 +23,9 @@ class Vjepa(Package): def make_env(self): # Return a dict of environment variables for prepare_script and # main_script. - return super().make_env() + env = super().make_env() + env["PT_HPU_LAZY_MODE"] = "0" + return env async def install(self): vjepa = self.dirs.code / "jepa" diff --git a/benchmarks/vjepa/main.py b/benchmarks/vjepa/main.py index 18377b92e..55981859c 100644 --- a/benchmarks/vjepa/main.py +++ b/benchmarks/vjepa/main.py @@ -475,14 +475,19 @@ def reg_fn(z): scaler.unscale_(optimizer) else: loss.backward() + if (epoch > warmup) and (clip_grad is not None): _enc_norm = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip_grad) _pred_norm = torch.nn.utils.clip_grad_norm_(predictor.parameters(), clip_grad) + + acc.mark_step() if mixed_precision: scaler.step(optimizer) scaler.update() else: optimizer.step() + acc.mark_step() + grad_stats = grad_logger(encoder.named_parameters()) grad_stats.global_norm = float(_enc_norm) grad_stats_pred = grad_logger(predictor.named_parameters()) @@ -506,7 +511,8 @@ def reg_fn(z): grad_stats_pred, optim_stats, ) - (loss, loss_jepa, loss_reg, _new_lr, _new_wd, grad_stats, grad_stats_pred, optim_stats,), gpu_etime_ms = gpu_timer(train_step) + loss, loss_jepa, loss_reg, _new_lr, _new_wd, grad_stats, grad_stats_pred, optim_stats = train_step() + iter_elapsed_time_ms = (time.time() - itr_start_time) * 1000. loss_meter.update(loss) input_var = float(AllReduce.apply(clips.view(clips.shape[0], -1).var(dim=1).mean(dim=0))) @@ -515,7 +521,7 @@ def reg_fn(z): input_var_min_meter.update(input_var_min) jepa_loss_meter.update(loss_jepa) reg_loss_meter.update(loss_reg) - gpu_time_meter.update(gpu_etime_ms) + # gpu_time_meter.update(gpu_etime_ms) wall_time_meter.update(iter_elapsed_time_ms) observer.record_loss(loss) @@ -530,7 +536,6 @@ def log_stats(): loss_reg, grad_stats.global_norm, grad_stats_pred.global_norm, - gpu_etime_ms, iter_elapsed_time_ms) if (itr % log_freq == 0) or np.isnan(loss) or np.isinf(loss): logger.info( @@ -637,7 +642,11 @@ def main(): params["nodes"] = nnodes params["tasks_per_node"] = gpu_per_nodes + print("HERE", os.getenv("RANK", -1) ) if os.getenv("RANK", -1) != -1: + print("INIT PROCESS GROUP HERE") + print(acc) + print(acc.init_process_group) acc.init_process_group() try: diff --git a/benchmarks/vjepa/requirements.hpu.txt b/benchmarks/vjepa/requirements.hpu.txt new file mode 100644 index 000000000..b1c986ecb --- /dev/null +++ b/benchmarks/vjepa/requirements.hpu.txt @@ -0,0 +1,297 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=benchmarks/vjepa/requirements.hpu.txt .pin/tmp-constraints-hpu-vjepa-gpus.txt benchmarks/vjepa/requirements.in +# +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +beartype==0.19.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in +braceexpand==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in + # webdataset +certifi==2024.8.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +cloudpickle==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # submitit +codefind==0.1.7 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera +decord==0.6.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in +einops==0.8.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in +executing==2.1.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # varname +filelock==3.16.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub + # torch + # triton +fsspec==2024.6.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub + # torch +giving==0.4.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # ptera + # voir +huggingface-hub==0.25.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # timm +idna==3.10 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +jinja2==3.1.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +markupsafe==2.1.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # sympy +networkx==3.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +numpy==1.26.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in + # decord + # opencv-python + # pandas + # torchvision + # webdataset +nvidia-cublas-cu12==12.1.3.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cuda-runtime-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cudnn-cu12==9.1.0.70 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cufft-cu12==11.0.2.54 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-curand-cu12==10.3.2.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusolver-cu12==11.4.5.107 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # torch +nvidia-ml-py==12.560.30 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +nvidia-nccl-cu12==2.20.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +nvidia-nvjitlink-cu12==12.6.77 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +opencv-python==4.10.0.84 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in +ovld==0.3.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +packaging==24.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub +pandas==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in +pillow==10.4.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torchvision +psutil==5.9.8 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +pygments==2.18.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # rich +python-dateutil==2.9.0.post0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pytz==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +pyyaml==6.0.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in + # huggingface-hub + # omegaconf + # timm + # webdataset +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +requests==2.32.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub +rich==13.9.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # voir +safetensors==0.4.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # timm +six==1.16.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # asttokens + # python-dateutil +submitit==1.5.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in +sympy==1.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +timm==1.0.9 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in +torch==2.4.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in + # timm + # torchvision +torchvision==0.19.1 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in + # timm +tqdm==4.66.5 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub +triton==3.0.0 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # torch +typing-extensions==4.12.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # huggingface-hub + # reactivex + # rich + # submitit + # torch +tzdata==2024.2 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # pandas +urllib3==2.2.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # requests +varname==0.13.3 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # giving +voir==0.2.19 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -c .pin/../constraints/hpu.txt + # -r benchmarks/vjepa/requirements.in +webdataset==0.2.100 + # via + # -c .pin/../.pin/constraints-hpu-torch.txt + # -r benchmarks/vjepa/requirements.in diff --git a/config/base.yaml b/config/base.yaml index d7926799f..1a64d550a 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -348,7 +348,7 @@ reformer: - monogpu argv: --model: "Reformer" - --batch-size: 64 + --batch-size: 32 whisper: inherits: _hf @@ -541,7 +541,7 @@ _llm: tags: - nlp - llm - max_duration: 1200 + max_duration: 3600 num_machines: 1 inherits: _defaults definition: ../benchmarks/llm @@ -566,6 +566,7 @@ llm-lora-single: repo_id="meta-llama/Meta-Llama-3.1-8B": true batch_size=8: true gradient_accumulation_steps=8: true + device={device_name}: true llm-lora-ddp-gpus: @@ -587,7 +588,7 @@ llm-lora-ddp-gpus: repo_id="meta-llama/Meta-Llama-3.1-8B": true batch_size=8: true gradient_accumulation_steps=8: true - + device={device_name}: true llm-lora-ddp-nodes: tags: @@ -610,7 +611,7 @@ llm-lora-ddp-nodes: repo_id="meta-llama/Meta-Llama-3.1-8B": true batch_size=8: true gradient_accumulation_steps=8: true - + device={device_name}: true num_machines: 2 requires_capabilities: - "len(nodes) >= ${num_machines}" @@ -636,8 +637,12 @@ llm-lora-mp-gpus: repo_id="meta-llama/Meta-Llama-3.1-70B": true batch_size=8: true gradient_accumulation_steps=1: true - + device={device_name}: true + llm-full-mp-gpus: + voir: + options: + stop: 30 inherits: _llm tags: - multigpu @@ -658,7 +663,8 @@ llm-full-mp-gpus: safetensors=true: true batch_size=2: true gradient_accumulation_steps=1: true - + device={device_name}: true + llm-full-mp-nodes: tags: - multinode @@ -681,7 +687,8 @@ llm-full-mp-nodes: safetensors=true: true batch_size=2: true gradient_accumulation_steps=1: true - + device={device_name}: true + num_machines: 2 requires_capabilities: - "len(nodes) >= ${num_machines}" @@ -781,6 +788,7 @@ torchatari: --env-id: Breakout-v5 _llava: + max_duration: 3600 inherits: _defaults definition: ../benchmarks/llava install_group: torch diff --git a/constraints/extra/torch.hpu.txt b/constraints/extra/torch.hpu.txt index 1d21c1779..e69de29bb 100644 --- a/constraints/extra/torch.hpu.txt +++ b/constraints/extra/torch.hpu.txt @@ -1,5 +0,0 @@ - -# -# -voir >= 0.2.15 -torchcompat >= 1.0.0 diff --git a/constraints/hpu.txt b/constraints/hpu.txt index 23a110bd2..9f6fe957d 100644 --- a/constraints/hpu.txt +++ b/constraints/hpu.txt @@ -1,8 +1,16 @@ -# FIXME -# Add - # # voir >= 0.2.19 torchcompat >= 1.0.0 -gymnax >= 0.0.8 \ No newline at end of file +gymnax >= 0.0.8 +trl<0.11.0 + +# latest torchtune is slower than before and cause failures +# next version of pytorch seems to work better +# so pending a new version of pytorch this is what we get +torchtune<0.3.0 + +# transformers added torchao support recently +# but only the most recent version we do not support +transformers<4.45.0 +torchvision \ No newline at end of file diff --git a/docker/Dockerfile-hpu b/docker/Dockerfile-hpu new file mode 100644 index 000000000..932959cd6 --- /dev/null +++ b/docker/Dockerfile-hpu @@ -0,0 +1,42 @@ +# FROM artifactory-kfs.habana-labs.com/docker-local/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:1.17.0-462 + +FROM vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + +ENV MILABENCH_GPU_ARCH=hpu + +WORKDIR /workspace + +ENV MILABENCH_CONFIG="/workspace/milabench/config/standard.yaml" + +ENV MILABENCH_WORDIR="/workspace/${MILABENCH_GPU_ARCH}" +ENV MILABENCH_BASE="${MILABENCH_WORDIR}/results" +ENV MILABENCH_VENV="${MILABENCH_WORDIR}/env" +ENV BENCHMARK_VENV="${MILABENCH_WORDIR}/results/venv/torch" + +ARG BENCH=lightning + +RUN mkdir -p ${MILABENCH_WORDIR} +RUN pip install virtualenv +RUN virtualenv --system-site-packages $MILABENCH_VENV + +ARG CACHEBUST=1 +RUN echo "$CACHEBUST" +RUN git clone https://github.com/mila-iqia/milabench.git -b $MILABENCH_GPU_ARCH +RUN $MILABENCH_VENV/bin/pip install -e milabench + +RUN . $MILABENCH_VENV/bin/activate && milabench install --use-current-env --select "${BENCH}" + +RUN $MILABENCH_VENV/bin/pip uninstall torch torchvision torchaudio -y +RUN sed -i 's/pic.numpy(force=True)/pic.numpy()/' /usr/local/lib/python3.10/dist-packages/torchvision/transforms/functional.py + +# This does not work +# RUN . $MILABENCH_VENV/bin/activate && milabench prepare --use-current-env --select "${BENCH}" + + + +# RUN . $MILABENCH_VENV/bin/activate && milabench run --use-current-env --select $BENCH +# RUN huggingface-cli login --token $MILABENCH_HF_TOKEN + +# docker build --build-arg CACHEBUST=`git rev-parse hpu` -f Dockerfile-hpu -t dockerfile-hpu . +# docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --shm-size 50G --cap-add=sys_nice --net=host dockerfile-hpu:latest bash +# . $MILABENCH_VENV/bin/activate && milabench prepare --use-current-env --select lightning && milabench run --use-current-env --select lightning diff --git a/docker/Makefile b/docker/Makefile new file mode 100644 index 000000000..93a402704 --- /dev/null +++ b/docker/Makefile @@ -0,0 +1,17 @@ + + + +bench = rlhf-gpus +# bench = "lightning" +lazy = 0 + +hpu: + git add --all + git commit -m "-" | true + git push origin hpu + docker rmi -f $(docker images --filter "dangling=true" -q --no-trunc) | true + # docker system prune -a -f + # docker image prune -a -f + docker build --build-arg BENCH=$(bench) --build-arg CACHEBUST=`git rev-parse hpu` -f Dockerfile-hpu -t dockerfile-hpu . + docker run --rm -it --runtime=habana -e PT_HPU_LAZY_MODE=$(lazy) -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --shm-size 50G --cap-add=sys_nice --net=host dockerfile-hpu:latest bash -c '. $$MILABENCH_VENV/bin/activate && milabench install --use-current-env --select $(bench) && pip uninstall torch torchvision torchaudio -y && milabench prepare --use-current-env --select $(bench) && milabench run --use-current-env $(args) --select $(bench)' + diff --git a/milabench/_version.py b/milabench/_version.py index e2795a030..a59823141 100644 --- a/milabench/_version.py +++ b/milabench/_version.py @@ -1,5 +1,5 @@ """This file is generated, do not modify""" -__tag__ = "v1.0.0_RC1-12-g3b87cb4" -__commit__ = "3b87cb465e855be452953273c314ab01024e0925" -__date__ = "2024-10-09 12:04:43 -0400" +__tag__ = "v1.0.0_RC1-50-gd2c8ba2" +__commit__ = "d2c8ba2c67e19026293381bdbddeb3f30ba0ee64" +__date__ = "2024-11-11 18:51:45 +0000" diff --git a/milabench/remote.py b/milabench/remote.py index 7e1eef85c..cbe9696b2 100644 --- a/milabench/remote.py +++ b/milabench/remote.py @@ -100,7 +100,7 @@ def worker_commands(pack, worker_plan, setup_for="worker"): def sshnode(node, cmd): host = node["ip"] user = node["user"] - port = node["sshport"] + port = node.get("sshport", 22) return SSHCommand(cmd, user=user, host=host, port=port) diff --git a/milabench/system.py b/milabench/system.py index 3a50d1434..2d5a6ca8e 100644 --- a/milabench/system.py +++ b/milabench/system.py @@ -328,7 +328,7 @@ def _fix_weird(hostname): # If true that means we cannot resolve the ip addresses # so we ignore errors -offline = False +offline = True @contextmanager diff --git a/scripts/article/run_hpu.sh b/scripts/article/run_hpu.sh index 5d875ca14..8f6126d29 100644 --- a/scripts/article/run_hpu.sh +++ b/scripts/article/run_hpu.sh @@ -9,68 +9,84 @@ set -ex export MILABENCH_GPU_ARCH=hpu export MILABENCH_WORDIR="$(pwd)/$MILABENCH_GPU_ARCH" export MILABENCH_BASE="$MILABENCH_WORDIR/results" -export MILABENCH_CONFIG="$MILABENCH_WORDIR/milabench/config/standard.yaml" export MILABENCH_VENV="$MILABENCH_WORDIR/env" export BENCHMARK_VENV="$MILABENCH_WORDIR/results/venv/torch" +export PT_HPU_LAZY_MODE=0 + +if [ -z "${MILABENCH_SOURCE}" ]; then + export MILABENCH_CONFIG="$MILABENCH_WORDIR/milabench/config/standard.yaml" +else + export MILABENCH_CONFIG="$MILABENCH_SOURCE/config/standard.yaml" +fi if [ -z "${MILABENCH_PREPARE}" ]; then export MILABENCH_PREPARE=0 fi +ARGS="$@" + install_prepare() { mkdir -p $MILABENCH_WORDIR cd $MILABENCH_WORDIR virtualenv $MILABENCH_WORDIR/env - git clone https://github.com/mila-iqia/milabench.git - git clone https://github.com/huggingface/optimum-habana.git + if [ -z "${MILABENCH_SOURCE}" ]; then + if [ ! -d "$MILABENCH_WORDIR/milabench" ]; then + git clone https://github.com/mila-iqia/milabench.git + fi + export MILABENCH_SOURCE="$MILABENCH_WORDIR/milabench" + fi + + git clone https://github.com/huggingface/optimum-habana.git -b v1.13.2 # wget -nv https://vault.habana.ai/artifactory/gaudi-installer/1.15.1/habanalabs-installer.sh - wget -nv https://vault.habana.ai/artifactory/gaudi-installer/1.16.1/habanalabs-installer.sh + # wget -nv https://vault.habana.ai/artifactory/gaudi-installer/1.16.1/habanalabs-installer.sh + wget -nv https://vault.habana.ai/artifactory/gaudi-installer/1.17.1/habanalabs-installer.sh chmod +x habanalabs-installer.sh . $MILABENCH_WORDIR/env/bin/activate - pip install -e $MILABENCH_WORDIR/milabench - - - # - # Install milabench's benchmarks in their venv - # - milabench install + pip install -e $MILABENCH_SOURCE which pip # Override dependencies for HPU # milabench needs pyhlml export HABANALABS_VIRTUAL_DIR=$MILABENCH_VENV - ./habanalabs-installer.sh install -t dependencies --venv -y - ./habanalabs-installer.sh install -t pytorch --venv -y + ./habanalabs-installer.sh install -t dependencies --venv -y | true + ./habanalabs-installer.sh install -t pytorch --venv -y | true + + # + # Install milabench's benchmarks in their venv + # + # milabench pin --variant hpu --from-scratch $ARGS + milabench install $ARGS ( . $BENCHMARK_VENV/bin/activate which pip - pip install -e $MILABENCH_WORDIR/optimum-habana - - ( - cd $MILABENCH_WORDIR/milabench/benchmarks/dlrm/dlrm; - git remote add me https://github.com/Delaunay/dlrm.git - git fetch me - git checkout me/main - ) + pip install --no-deps -e $MILABENCH_WORDIR/optimum-habana # Override dependencies for HPU # benchmarks need pytorch - pip uninstall torch torchvision torchaudio + pip uninstall torch torchvision torchaudio -y export HABANALABS_VIRTUAL_DIR=$BENCHMARK_VENV - ./habanalabs-installer.sh install -t dependencies --venv -y - ./habanalabs-installer.sh install -t pytorch --venv -y + ./habanalabs-installer.sh install -t dependencies --venv -y | true + ./habanalabs-installer.sh install -t pytorch --venv -y | true + + if [ -z "${MILABENCH_HF_TOKEN}" ]; then + echo "Missing token" + else + huggingface-cli login --token $MILABENCH_HF_TOKEN + fi ) # # Generate/download datasets, download models etc... # - milabench prepare + # sed -i 's/pic.numpy(force=True)/pic.numpy()/' $BENCHMARK_VENV/lib/python3.10/dist-packages/torchvision/transforms/functional.py + # sed -i 's/range(hpu.device_count())/range(len(available_modules))/' $BENCHMARK_VENV/lib/site-packages/habana_frameworks/torch/hpu/_utils.py + milabench prepare $ARGS } if [ ! -d "$MILABENCH_WORDIR" ]; then @@ -81,12 +97,28 @@ else fi +( + . $BENCHMARK_VENV/bin/activate + pip install lightning-habana + pip install habana-media-loader + # git clone https://github.com/Delaunay/torchcompat.git + # git clone https://github.com/Delaunay/voir.git -b hpu + pip uninstall torchcompat voir -y + pip install -e $MILABENCH_WORDIR/torchcompat + pip install -e $MILABENCH_WORDIR/voir + pip install -e $MILABENCH_WORDIR/optimum-habana + # pip install habana_dataloader +) + if [ "$MILABENCH_PREPARE" -eq 0 ]; then cd $MILABENCH_WORDIR + # python -c "import torch; print(torch.__version__)" + milabench prepare $ARGS --system $MILABENCH_WORDIR/system.yaml + # # Run the benchmakrs - milabench run "$@" + milabench run $ARGS --system $MILABENCH_WORDIR/system.yaml # # Display report