From e598089c0faa1e328c9aa34f1486d72bbc293584 Mon Sep 17 00:00:00 2001
From: Satya Ortiz-Gagne <satya.ortiz-gagne@mila.quebec>
Date: Tue, 3 Sep 2024 16:26:02 -0400
Subject: [PATCH] Fix llm requirements

* rename huggingface token to MILABENCH_* to automatically forward the env var to a remote in such cases
---
 .github/workflows/tests_unit.yml     |  2 +-
 benchmarks/llm/prepare.py            |  9 +++++++-
 benchmarks/llm/requirements.cuda.txt | 34 +++++++++++++++++++++++-----
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/tests_unit.yml b/.github/workflows/tests_unit.yml
index 90d6f4831..28262cf16 100644
--- a/.github/workflows/tests_unit.yml
+++ b/.github/workflows/tests_unit.yml
@@ -74,7 +74,7 @@ jobs:
 
       - name: tests
         env:
-          HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN}}
+          MILABENCH_HF_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN}}
         run: |
           source $(poetry env info -p)/bin/activate
           coverage run --source=milabench -m pytest --ignore=tests/integration tests/ 
diff --git a/benchmarks/llm/prepare.py b/benchmarks/llm/prepare.py
index 9c64ac8fe..221162ffa 100755
--- a/benchmarks/llm/prepare.py
+++ b/benchmarks/llm/prepare.py
@@ -55,18 +55,25 @@ def generate_model(
         model_parallel_size=1
     ):
     try:
+        os.environ["MASTER_ADDR"] = "localhost"
+        os.environ["MASTER_PORT"] = "12355"
+
         torch.distributed.init_process_group(rank=rank, world_size=model_parallel_size)
         fairscale.nn.model_parallel.initialize.initialize_model_parallel(model_parallel_size)
+
         conn.send(os.getpid())
         while not conn.poll():
             time.sleep(0.1)
         conn.recv()
+
         params = json.loads(params_path.read_text())
         model = llama.model.Transformer(ModelArgs(**params))
         torch.save(model.state_dict(), params_path.with_name(f"consolidated.{rank:02}.pth"))
+
     except Exception as e:
         conn.send(e)
         raise
+
     finally:
         conn.close()
 
@@ -101,7 +108,7 @@ def main():
     config = OmegaConf.merge(base, cli)
 
     repo_id = config["repo_id"]
-    hf_token = os.getenv("HUGGING_FACE_TOKEN", None)
+    hf_token = os.getenv("MILABENCH_HF_TOKEN", None)
     output_dir = config["checkpointer"]["output_dir"]
 
     ignore_patterns = ["*.safetensors", "*consolidated.*.pth"]
diff --git a/benchmarks/llm/requirements.cuda.txt b/benchmarks/llm/requirements.cuda.txt
index a95035534..e7825b5f0 100644
--- a/benchmarks/llm/requirements.cuda.txt
+++ b/benchmarks/llm/requirements.cuda.txt
@@ -2,10 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --output-file=benchmarks/llm/requirements.cuda.txt .pin/tmp-constraints-cuda-llm-lora-single.txt benchmarks/llm/requirements.in
+#    pip-compile --output-file=benchmarks/llm/requirements.cuda.txt .pin/tmp-constraints-cuda-llm-full-mp-nodes.txt benchmarks/llm/requirements.in
 #
---extra-index-url https://pypi.ngc.nvidia.com
 --extra-index-url https://download.pytorch.org/whl/cu121
+--extra-index-url https://pypi.ngc.nvidia.com
 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 --trusted-host pypi.ngc.nvidia.com
 
@@ -45,7 +45,7 @@ attrs==24.2.0
 blobfile==3.0.0
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
-    #   -r benchmarks/llm/llama3/requirements.txt
+    #   -r benchmarks/llm/requirements.txt
     #   torchtune
 certifi==2024.7.4
     # via
@@ -75,7 +75,7 @@ executing==1.2.0
 fairscale==0.4.13
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
-    #   -r benchmarks/llm/llama3/requirements.txt
+    #   -r benchmarks/llm/requirements.txt
 filelock==3.15.4
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
@@ -83,11 +83,12 @@ filelock==3.15.4
     #   datasets
     #   huggingface-hub
     #   torch
+    #   transformers
     #   triton
 fire==0.6.0
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
-    #   -r benchmarks/llm/llama3/requirements.txt
+    #   -r benchmarks/llm/requirements.txt
 frozenlist==1.4.1
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
@@ -111,7 +112,9 @@ hjson==3.1.0
 huggingface-hub==0.24.6
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
+    #   accelerate
     #   datasets
+    #   tokenizers
     #   torchtune
 idna==3.8
     # via
@@ -183,6 +186,7 @@ networkx==3.3
 numpy==1.26.4
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
+    #   accelerate
     #   datasets
     #   jax
     #   jaxlib
@@ -192,6 +196,7 @@ numpy==1.26.4
     #   pyarrow
     #   scipy
     #   torchtune
+    #   transformers
 nvidia-cublas-cu12==12.1.3.1
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
@@ -277,8 +282,10 @@ ovld==0.3.9
 packaging==24.1
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
+    #   accelerate
     #   datasets
     #   huggingface-hub
+    #   transformers
 pandas==2.2.2
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
@@ -286,6 +293,7 @@ pandas==2.2.2
 psutil==5.9.8
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
+    #   accelerate
     #   voir
 ptera==1.4.1
     # via
@@ -315,9 +323,11 @@ pyyaml==6.0.2
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
     #   -r benchmarks/llm/requirements.in
+    #   accelerate
     #   datasets
     #   huggingface-hub
     #   omegaconf
+    #   transformers
 reactivex==4.0.4
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
@@ -326,6 +336,7 @@ regex==2024.7.24
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
     #   tiktoken
+    #   transformers
 requests==2.32.3
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
@@ -339,6 +350,7 @@ rich==13.8.0
 safetensors==0.4.4
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
+    #   accelerate
     #   torchtune
 scipy==1.14.1
     # via
@@ -367,11 +379,16 @@ tiktoken==0.7.0
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
     #   torchtune
+tokenizers==0.19.1
+    # via
+    #   -c .pin/../.pin/constraints-cuda-torch.txt
+    #   transformers
 torch==2.4.0+cu121
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
-    #   -r benchmarks/llm/llama3/requirements.txt
     #   -r benchmarks/llm/requirements.in
+    #   -r benchmarks/llm/requirements.txt
+    #   accelerate
     #   fairscale
 torchao==0.3.1+cu121
     # via
@@ -387,6 +404,11 @@ tqdm==4.66.5
     #   datasets
     #   huggingface-hub
     #   torchtune
+    #   transformers
+transformers==4.43.3
+    # via
+    #   -c .pin/../.pin/constraints-cuda-torch.txt
+    #   -r benchmarks/llm/requirements.in
 triton==3.0.0
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt