mosaicml · j316chuck · Nov 21, 2024 · Nov 15, 2024 · Nov 19, 2024 · Nov 20, 2024
@@ -23,6 +23,12 @@ jobs:
         - name: "2.4.0_cu124_aws"
           base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
           dep_groups: "[all]"
+        - name: "2.5.1_cu124"
+          base_image: mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04
+          dep_groups: "[all]"
+        - name: "2.5.1_cu124_aws"
+          base_image: mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04-aws
+          dep_groups: "[all]"
     steps:
 
     - name: Checkout

@@ -132,7 +132,7 @@ def write_ft_checkpoint_from_composer_checkpoint(
     # Extract the HF tokenizer
     print('#' * 30)
     print('Extracting HF Tokenizer...')
-    hf_tokenizer = get_hf_tokenizer_from_composer_state_dict(
+    hf_tokenizer = get_hf_tokenizer_from_composer_state_dict(  # pyright: ignore
         composer_state_dict,
         trust_remote_code,
     )
@@ -141,7 +141,7 @@ def write_ft_checkpoint_from_composer_checkpoint(
 
     # Extract the model weights
     weights_state_dict = composer_state_dict['state']['model']
-    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
+    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(  # pyright: ignore
         weights_state_dict,
         prefix='model.',
     )

@@ -133,7 +133,7 @@ def write_huggingface_pretrained_from_composer_checkpoint(
     weights_state_dict = composer_state_dict
     if 'state' in weights_state_dict:
         weights_state_dict = weights_state_dict['state']['model']
-    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
+    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(  # pyright: ignore
         weights_state_dict,
         prefix='model.',
     )

@@ -57,7 +57,7 @@
     'accelerate>=0.25,<1.2',  # for HF inference `device_map`
     'transformers>=4.43.2,<4.47',
     'mosaicml-streaming>=0.9.0,<0.10',
-    'torch>=2.4.0,<2.4.1',
+    'torch>=2.4.0,<2.5.2',
     'datasets>=2.20.0,<2.21',
     'fsspec==2023.6.0',  # newer version results in a bug in datasets that duplicates data
     'sentencepiece==0.2.0',