Bump version to 0.27.0.dev0 (#3681)

mosaicml · Oct 25, 2024 · d247e26 · d247e26
1 parent 47d3cec
commit d247e26
Show file tree

Hide file tree

Showing 7 changed files with 22 additions and 65 deletions.
diff --git a/composer/_version.py b/composer/_version.py
@@ -3,4 +3,4 @@
 
 """The Composer Version."""
 
-__version__ = '0.26.0.dev0'
+__version__ = '0.27.0.dev0'
diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py
@@ -990,12 +990,6 @@ class Trainer:
 
             To use DeepSpeed with default values, set to the empty dictionary ``{}``.
             To disable DeepSpeed (the default), set to ``None``.
-        fsdp_config (dict[str, Any], optional): Configuration for FSDP.
-            See :doc:`FSDP Documentation </notes/distributed_training>` for more details.
-            To use FSDP with default values, set to the empty dictionary ``{}``. To
-            disable FSDP, set to ``None``. (default: ``None``)
-        fsdp_auto_wrap (bool, optional): option to let trainer wrap the module, or if
-            the module is already wrapped outside, allow the user to disable auto-wrapping.
         parallelism_config (Union[dict[str, Any], ParallelismConfig], optional): Configuration for parallelism options.
             Currently supports fsdp and tensor parallelism, whose respective configs are specified
             as the keys ``fsdp`` and ``tp``. (default: ``None``)
@@ -1156,8 +1150,6 @@ def __init__(
 
         # Parallelism
         deepspeed_config: Optional[dict[str, Any]] = None,
-        fsdp_config: Optional[dict[str, Any]] = None,
-        fsdp_auto_wrap: bool = True,
         parallelism_config: Optional[Union[dict[str, Any], ParallelismConfig]] = None,
 
         # System/Numerics
@@ -1282,43 +1274,6 @@ def __init__(
         assert not isinstance(device_train_microbatch_size, str)
 
         # Distributed
-        if fsdp_config is not None:
-            warnings.warn(
-                VersionedDeprecationWarning(
-                    "fsdp_config is deprecated. Please use parallelism_config['fsdp'] instead.",
-                    remove_version='0.26.0',
-                ),
-            )
-            if parallelism_config is None:
-                parallelism_config = {}
-            if isinstance(parallelism_config, ParallelismConfig):
-                raise ValueError(
-                    'fsdp_config cannot be specified if parallelism_config is a ParallelismConfig object. '
-                    'Please instead pass fsdp_config as a FSDPConfig object when constructing ParallelismConfig.',
-                )
-            elif parallelism_config.get('fsdp') is not None:
-                raise ValueError(
-                    'fsdp_config is specified in both fsdp_config and parallelism_config. Please specify it in only in parallelism_config.',
-                )
-            parallelism_config['fsdp'] = fsdp_config
-        if not fsdp_auto_wrap:
-            warnings.warn(
-                VersionedDeprecationWarning(
-                    "fsdp_auto_wrap=False is deprecated. Please use parallelism_config['fsdp']['auto_wrap'] instead.",
-                    remove_version='0.26.0',
-                ),
-            )
-            if parallelism_config is None:
-                parallelism_config = {}
-            if isinstance(parallelism_config, ParallelismConfig):
-                raise ValueError(
-                    'fsdp_auto_wrap cannot be specified if parallelism_config is a ParallelismConfig object. '
-                    'Please instead pass fsdp_auto_wrap to FSDPConfig as part of ParallelismConfig.',
-                )
-            else:
-                if parallelism_config.get('fsdp') is None:
-                    parallelism_config['fsdp'] = {}
-                parallelism_config['fsdp']['auto_wrap'] = fsdp_auto_wrap
         if parallelism_config is not None and not isinstance(parallelism_config, ParallelismConfig):
             parallelism_config_args = {}
             if 'fsdp' in parallelism_config and parallelism_config['fsdp'] is not None:

diff --git a/docker/README.md b/docker/README.md
@@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the
 <!-- BEGIN_COMPOSER_BUILD_MATRIX -->
 | Composer Version   | CUDA Support   | Docker Tag                                                                                                                                                       |
 |--------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 0.25.0             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.25.0`                 |
-| 0.25.0             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.25.0_cpu` |
+| 0.26.0             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.26.0`                 |
+| 0.26.0             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.26.0_cpu` |
 <!-- END_COMPOSER_BUILD_MATRIX -->
 
 **Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually

diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml
@@ -168,36 +168,36 @@
   TORCHVISION_VERSION: 0.18.1
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.25.0
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.26.0
   CUDA_VERSION: 12.4.1
-  IMAGE_NAME: composer-0-25-0
+  IMAGE_NAME: composer-0-26-0
   MOFED_VERSION: latest-23.10
   NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
   PYTHON_VERSION: '3.11'
   PYTORCH_NIGHTLY_URL: ''
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.5.0
   TAGS:
-  - mosaicml/composer:0.25.0
-  - ghcr.io/databricks-mosaic/composer:0.25.0
+  - mosaicml/composer:0.26.0
+  - ghcr.io/databricks-mosaic/composer:0.26.0
   - mosaicml/composer:latest
   - ghcr.io/databricks-mosaic/composer:latest
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.20.0
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: ubuntu:20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.25.0
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.26.0
   CUDA_VERSION: ''
-  IMAGE_NAME: composer-0-25-0-cpu
+  IMAGE_NAME: composer-0-26-0-cpu
   MOFED_VERSION: latest-23.10
   NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
   PYTHON_VERSION: '3.11'
   PYTORCH_NIGHTLY_URL: ''
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.5.0
   TAGS:
-  - mosaicml/composer:0.25.0_cpu
-  - ghcr.io/databricks-mosaic/composer:0.25.0_cpu
+  - mosaicml/composer:0.26.0_cpu
+  - ghcr.io/databricks-mosaic/composer:0.26.0_cpu
   - mosaicml/composer:latest_cpu
   - ghcr.io/databricks-mosaic/composer:latest_cpu
   TARGET: composer_stage

diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py
@@ -244,7 +244,7 @@ def _main():
     composer_entries = []
 
     # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images
-    composer_versions = ['0.25.0']  # Only build images for the latest composer version
+    composer_versions = ['0.26.0']  # Only build images for the latest composer version
     composer_python_versions = [PRODUCTION_PYTHON_VERSION]  # just build composer against the latest
 
     for product in itertools.product(composer_python_versions, composer_versions, cuda_options):

diff --git a/tests/trainer/test_fsdp.py b/tests/trainer/test_fsdp.py
@@ -259,9 +259,11 @@ def test_automicrobatching_fsdp(world_size: int):
     trainer = Trainer(
         model=model,
         train_dataloader=train_dataloader,
-        fsdp_config={
-            'forward_prefetch_limit': 1,
-            'backward_prefetch_limit': 1,
+        parallelism_config={
+            'fsdp': {
+                'forward_prefetch_limit': 1,
+                'backward_prefetch_limit': 1,
+            },
         },
         max_duration='1ba',
         device='gpu',
@@ -310,9 +312,11 @@ def test_fsdp_automicrobatching_sync_hooks(world_size: int):
         trainer = Trainer(
             model=model,
             train_dataloader=train_dataloader,
-            fsdp_config={
-                'forward_prefetch_limit': 1,
-                'backward_prefetch_limit': 1,
+            parallelism_config={
+                'fsdp': {
+                    'forward_prefetch_limit': 1,
+                    'backward_prefetch_limit': 1,
+                },
             },
             max_duration='4ba',
             device='gpu',

diff --git a/tests/utils/test_autolog_hparams.py b/tests/utils/test_autolog_hparams.py
@@ -176,8 +176,6 @@ def test_extract_hparams_trainer():
 
         # DeepSpeed
         'deepspeed_config': None,
-        'fsdp_config': None,
-        'fsdp_auto_wrap': True,
         'parallelism_config': None,
 
         # System/Numerics