mila-iqia · lebrice · Nov 15, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 8, 2024
diff --git a/...project/datamodules/datamodules_test/test_first_batch/glue_cola_algorithm_no_op_test.yaml b/...project/datamodules/datamodules_test/test_first_batch/glue_cola_algorithm_no_op_test.yaml
@@ -0,0 +1,35 @@
+attention_mask:
+  device: cpu
+  max: 1
+  mean: '1.021e-01'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 418
+input_ids:
+  device: cpu
+  max: 29043
+  mean: '1.648e+02'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 675172
+labels:
+  device: cpu
+  max: -1
+  mean: '-1.e+00'
+  min: -1
+  shape:
+  - 32
+  sum: -32
+token_type_ids:
+  device: cpu
+  max: 0
+  mean: '0.e+00'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 0
diff --git a/...roject/datamodules/datamodules_test/test_first_batch/glue_cola_algorithm_no_op_train.yaml b/...roject/datamodules/datamodules_test/test_first_batch/glue_cola_algorithm_no_op_train.yaml
@@ -0,0 +1,35 @@
+attention_mask:
+  device: cpu
+  max: 1
+  mean: '8.374e-02'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 343
+input_ids:
+  device: cpu
+  max: 26101
+  mean: '1.597e+02'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 654306
+labels:
+  device: cpu
+  max: 1
+  mean: '7.188e-01'
+  min: 0
+  shape:
+  - 32
+  sum: 23
+token_type_ids:
+  device: cpu
+  max: 0
+  mean: '0.e+00'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 0
diff --git a/...ect/datamodules/datamodules_test/test_first_batch/glue_cola_algorithm_no_op_validate.yaml b/...ect/datamodules/datamodules_test/test_first_batch/glue_cola_algorithm_no_op_validate.yaml
@@ -0,0 +1,35 @@
+attention_mask:
+  device: cpu
+  max: 1
+  mean: '9.277e-02'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 380
+input_ids:
+  device: cpu
+  max: 29043
+  mean: '1.362e+02'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 557879
+labels:
+  device: cpu
+  max: 1
+  mean: '7.5e-01'
+  min: 0
+  shape:
+  - 32
+  sum: 24
+token_type_ids:
+  device: cpu
+  max: 0
+  mean: '0.e+00'
+  min: 0
+  shape:
+  - 32
+  - 128
+  sum: 0
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
@@ -9,7 +9,8 @@
   * [Examples 🧪](examples/index.md)
     * [Image Classification (⚡)](examples/torch_sl_example.md)
     * [Image Classification (jax+⚡)](examples/jax_sl_example.md)
-    * [NLP (🤗+⚡)](examples/nlp.md)
+    * [Text Classification (🤗+⚡)](examples/text_classification.md)
+    * [Fine-tuning an LLM (🤗+⚡)](examples/llm_finetuning.md)
     * [RL (jax)](examples/jax_rl_example.md)
     * [Running sweeps](examples/sweeps.md)
     * [Profiling your code📎](examples/profiling.md)

diff --git a/docs/examples/index.md b/docs/examples/index.md
@@ -1,10 +1,21 @@
+---
+additional_python_references:
+  - project.algorithms.jax_rl_example
+  - project.algorithms.example
+  - project.algorithms.jax_example
+  - project.algorithms.text_classification_example
+  - project.algorithms.llm_finetuning
+  - project.trainers.jax_trainer
+---
+
 # Examples
 
 This template includes examples that use either Jax, PyTorch, or both!
 
-| Example link                            | Research Area                              | Reference link     | Frameworks      |
-| --------------------------------------- | ------------------------------------------ | ------------------ | --------------- |
-| [ExampleAlgorithm](torch_sl_example.md) | Supervised Learning (image classification) | `ExampleAlgorithm` | Torch + ⚡       |
-| [JaxExample](jax_sl_example.md)         | Supervised Learning (image classification) | `JaxExample`       | Torch + Jax + ⚡ |
-| [HFExample](nlp.md)                     | NLP (text classification)                  | `HFExample`        | Torch + 🤗 + ⚡   |
-| [JaxRLExample](jax_rl_example.md)       | RL                                         | `JaxRLExample`     | Jax             |
+| Example link                                        | Research Area                              | Reference link              | Frameworks      |
+| --------------------------------------------------- | ------------------------------------------ | --------------------------- | --------------- |
+| [ExampleAlgorithm](torch_sl_example.md)             | Supervised Learning (image classification) | `ExampleAlgorithm`          | Torch + ⚡       |
+| [JaxExample](jax_sl_example.md)                     | Supervised Learning (image classification) | `JaxExample`                | Torch + Jax + ⚡ |
+| [TextClassificationExample](text_classification.md) | NLP (text classification)                  | `TextClassificationExample` | Torch + 🤗 + ⚡   |
+| [JaxRLExample](jax_rl_example.md)                   | RL                                         | `JaxRLExample`              | Jax             |
+| [LLMFinetuningExample](llm_finetuning.md)           | NLP (Causal language modeling)             | `LLMFineTuningExample`      | Torch + 🤗 + ⚡   |
diff --git a/docs/examples/llm_finetuning.md b/docs/examples/llm_finetuning.md
@@ -0,0 +1,22 @@
+---
+additional_python_references:
+  - project.algorithms.llm_finetuning
+---
+# Fine-tuning LLMs
+
+This example is based on [this language modeling example from the HuggingFace transformers documentation](https://huggingface.co/docs/transformers/en/tasks/language_modeling).
+
+To better understand what's going on in this example, it is a good idea to read through these tutorials first:
+* [Causal language modeling simple example - HuggingFace docs](https://huggingface.co/docs/transformers/en/tasks/language_modeling)
+* [Fine-tune a language model - Colab Notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb#scrollTo=X6HrpprwIrIz)
+
+The main difference between this example and the original example from HuggingFace is that the `LLMFinetuningExample` is a `LightningModule`, that is trained by a `lightning.Trainer`.
+
+This also means that this example doesn't use [`accelerate`](https://huggingface.co/docs/accelerate/en/index) or the HuggingFace Trainer.
+
+
+## Running the example
+
+```console
+python project/main.py experiment=llm_finetuning_example
+```
diff --git a/docs/examples/nlp.md b/docs/examples/nlp.md
diff --git a/docs/examples/text_classification.md b/docs/examples/text_classification.md
@@ -0,0 +1,41 @@
+# Text Classification ( + 🤗)
+
+## Overview
+
+The [TextClassificationExample][project.algorithms.text_classification_example.TextClassificationExample] is a [LightningModule][lightning.pytorch.core.module.LightningModule] for a simple text classification task.
+
+It accepts a [TextClassificationDataModule][project.datamodules.text.TextClassificationDataModule] as input, along with a network.
+
+??? note "Click to show the code for HFExample"
+    {{ inline('project.algorithms.text_classification_example.TextClassificationExample', 4) }}
+
+## Config files
+
+### Algorithm config
+
+??? note "Click to show the Algorithm config"
+    Source: project/configs/algorithm/text_classification_example.yaml
+
+    {{ inline('project/configs/algorithm/text_classification_example.yaml', 4) }}
+
+### Datamodule config
+
+??? note "Click to show the Datamodule config"
+    Source: project/configs/datamodule/glue_cola.yaml
+
+    {{ inline('project/configs/datamodule/glue_cola.yaml', 4) }}
+
+## Running the example
+
+Here is a configuration file that you can use to launch a simple experiment:
+
+??? note "Click to show the yaml config file"
+    Source: project/configs/experiment/text_classification_example.yaml
+
+    {{ inline('project/configs/experiment/text_classification_example.yaml', 4) }}
+
+You can use it like so:
+
+```console
+python project/main.py experiment=text_classification_example
+```
diff --git a/docs/features/jax.md b/docs/features/jax.md
@@ -3,7 +3,7 @@ additional_python_references:
   - project.algorithms.jax_rl_example
   - project.algorithms.example
   - project.algorithms.jax_example
-  - project.algorithms.hf_example
+  - project.algorithms.text_classification_example
   - project.trainers.jax_trainer
 ---
 
@@ -13,12 +13,14 @@ additional_python_references:
 
 This template includes examples that use either Jax, PyTorch, or both!
 
-| Example link                                      | Reference          | Framework   | Lightning?   |
-| ------------------------------------------------- | ------------------ | ----------- | ------------ |
-| [ExampleAlgorithm](../examples/jax_sl_example.md) | `ExampleAlgorithm` | Torch       | yes          |
-| [JaxExample](../examples/jax_sl_example.md)       | `JaxExample`       | Torch + Jax | yes          |
-| [HFExample](../examples/nlp.md)                   | `HFExample`        | Torch + 🤗   | yes          |
-| [JaxRLExample](../examples/jax_rl_example.md)     | `JaxRLExample`     | Jax         | no (almost!) |
+<!-- TODO: De-duplicate: This is a bit like a duplicate of the table from the examples/index.md -->
+
+| Example link                                                    | Reference                   | Framework   | Lightning?   |
+| --------------------------------------------------------------- | --------------------------- | ----------- | ------------ |
+| [ExampleAlgorithm](../examples/jax_sl_example.md)               | `ExampleAlgorithm`          | Torch       | yes          |
+| [JaxExample](../examples/jax_sl_example.md)                     | `JaxExample`                | Torch + Jax | yes          |
+| [TextClassificationExample](../examples/text_classification.md) | `TextClassificationExample` | Torch + 🤗   | yes          |
+| [JaxRLExample](../examples/jax_rl_example.md)                   | `JaxRLExample`              | Jax         | no (almost!) |
 
 
 In fact, here you can mix and match both Jax and Torch code. For example, you can use Jax for your dataloading, your network, or the learning algorithm, all while still benefiting from the nice stuff that comes from using PyTorch-Lightning.

diff --git a/project/algorithms/__init__.py b/project/algorithms/__init__.py
@@ -1,13 +1,13 @@
 from .example import ExampleAlgorithm
-from .hf_example import HFExample
 from .jax_example import JaxExample
 from .jax_rl_example import JaxRLExample
 from .no_op import NoOp
+from .text_classification_example import TextClassificationExample
 
 __all__ = [
     "ExampleAlgorithm",
     "JaxExample",
     "NoOp",
-    "HFExample",
+    "TextClassificationExample",
     "JaxRLExample",
 ]
diff --git a/project/algorithms/callbacks/samples_per_second.py b/project/algorithms/callbacks/samples_per_second.py
@@ -1,6 +1,8 @@
 import time
 from typing import Any, Literal
 
+import jax
+import torch
 from lightning import LightningModule, Trainer
 from torch import Tensor
 from torch.optim import Optimizer
@@ -90,6 +92,12 @@ def log(
     def get_num_samples(self, batch: BatchType) -> int:
         if is_sequence_of(batch, Tensor):
             return batch[0].shape[0]
+        if isinstance(batch, dict):
+            return next(
+                v.shape[0]
+                for v in jax.tree.leaves(batch)
+                if isinstance(v, torch.Tensor) and v.ndim > 1
+            )
         raise NotImplementedError(
             f"Don't know how many 'samples' there are in batch of type {type(batch)}"
         )

diff --git a/project/algorithms/example_test.py b/project/algorithms/example_test.py
@@ -1,9 +1,13 @@
 """Example showing how the test suite can be used to add tests for a new algorithm."""
 
+import pytest
 import torch
 from transformers import PreTrainedModel
 
 from project.algorithms.testsuites.algorithm_tests import LearningAlgorithmTests
+from project.configs import Config
+from project.conftest import command_line_overrides
+from project.datamodules.image_classification.cifar10 import CIFAR10DataModule
 from project.datamodules.image_classification.image_classification import (
     ImageClassificationDataModule,
 )
@@ -12,6 +16,19 @@
 from .example import ExampleAlgorithm
 
 
+@pytest.mark.parametrize(
+    command_line_overrides.__name__, ["algorithm=example datamodule=cifar10"], indirect=True
+)
+def test_example_experiment_defaults(experiment_config: Config) -> None:
+    """Test to check that the datamodule is required (even when just an algorithm is set?!)."""
+
+    assert experiment_config.algorithm["_target_"] == (
+        ExampleAlgorithm.__module__ + "." + ExampleAlgorithm.__qualname__
+    )
+
+    assert isinstance(experiment_config.datamodule, CIFAR10DataModule)
+
+
 @run_for_all_configs_of_type("algorithm", ExampleAlgorithm)
 @run_for_all_configs_of_type("datamodule", ImageClassificationDataModule)
 @run_for_all_configs_of_type("algorithm/network", torch.nn.Module, excluding=PreTrainedModel)