Skip to content

Commit

Permalink
Update Gaudi configurations (#35)
Browse files Browse the repository at this point in the history
- Add configurations from the Hub to tests so that non-regression tests rely on publicly available Gaudi configurations for each model
- Remove log_device_mem_alloc in GaudiConfig
  • Loading branch information
regisss authored Apr 25, 2022
1 parent 9177869 commit 7811eb0
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 21 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ from optimum.habana import GaudiConfig, GaudiTrainer, GaudiTrainingArguments
# Loading the GaudiConfig needed by the GaudiTrainer to fine-tune the model on HPUs
gaudi_config = GaudiConfig.from_pretrained(
training_args.gaudi_config_name if training_args.gaudi_config_name else model_args.model_name_or_path,
training_args.gaudi_config_name,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
Expand All @@ -111,15 +111,14 @@ trainer = GaudiTrainer(
)
```
with for example the following Gaudi configuration written in a JSON file:
where `training_args.gaudi_config_name` is the name of a model from the [Hub](https://huggingface.co/Habana) (Gaudi configurations are stored in model repositories). You can also give the path to a custom Gaudi configuration written in a JSON file such as this one:
```json
{
"use_habana_mixed_precision": true,
"hmp_opt_level": "O1",
"hmp_is_verbose": false,
"use_fused_adam": true,
"use_fused_clip_norm": true,
"log_device_mem_alloc": false,
"hmp_bf16_ops": [
"add",
"addmm",
Expand Down
3 changes: 0 additions & 3 deletions optimum/habana/gaudi_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,6 @@ def __init__(self, **kwargs):
# Use Habana's custom fused clip norm implementation
self.use_fused_clip_norm = kwargs.pop("use_fused_clip_norm", False)

# Log live memory allocations on device at the given point
self.log_device_mem_alloc = kwargs.pop("log_device_mem_alloc", False)

def write_bf16_fp32_ops_to_text_files(
self,
path_to_bf16_file: Path,
Expand Down
3 changes: 1 addition & 2 deletions tests/configs/gaudi_config_example_test.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@
"hmp_opt_level": "O1",
"hmp_is_verbose": false,
"use_fused_adam": true,
"use_fused_clip_norm": true,
"log_device_mem_alloc": false
"use_fused_clip_norm": true
}
3 changes: 1 addition & 2 deletions tests/configs/gaudi_config_trainer_test.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@
"hmp_opt_level": "O1",
"hmp_is_verbose": false,
"use_fused_adam": true,
"use_fused_clip_norm": true,
"log_device_mem_alloc": true
"use_fused_clip_norm": true
}
3 changes: 0 additions & 3 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,6 @@ def _create_test(cls, model_name: str, gaudi_config_name: str, multi_card: bool
The test function that runs the example.
"""

if not gaudi_config_name:
gaudi_config_name = PATH_TO_DEFAULT_GAUDI_CONFIG

@slow
def test(self):
if self.EXAMPLE_NAME is None:
Expand Down
1 change: 0 additions & 1 deletion tests/test_gaudi_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def test_default_parameter_types(self):
self.assertIsInstance(gaudi_config.hmp_is_verbose, bool)
self.assertIsInstance(gaudi_config.use_fused_adam, bool)
self.assertIsInstance(gaudi_config.use_fused_clip_norm, bool)
self.assertIsInstance(gaudi_config.log_device_mem_alloc, bool)

self.assertTrue(is_list_of_strings(gaudi_config.hmp_bf16_ops))
self.assertTrue(is_list_of_strings(gaudi_config.hmp_fp32_ops))
Expand Down
14 changes: 7 additions & 7 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@
# TODO: add configuration names once they have been pushed to the hub
MODELS_TO_TEST_MAPPING = {
"bert": [
# ("bert-base-uncased", ""), # removed from CI to save time
("bert-large-uncased-whole-word-masking", ""),
# ("bert-base-uncased", "Habana/bert-base-uncased"), # removed from CI to save time
("bert-large-uncased-whole-word-masking", "Habana/bert-large-uncased-whole-word-masking"),
],
"roberta": [
("roberta-base", ""),
("roberta-large", ""),
("roberta-base", "Habana/roberta-base"),
("roberta-large", "Habana/roberta-large"),
],
"albert": [
("albert-large-v2", ""),
# ("albert-xxlarge-v1", ""), # make Github action job exceed the limit of 6 hours
("albert-large-v2", "Habana/albert-large-v2"),
# ("albert-xxlarge-v1", "Habana/albert-xxlarge-v1"), # make Github action job exceed the limit of 6 hours
],
"distilbert": [
("distilbert-base-uncased", ""),
("distilbert-base-uncased", "Habana/distilbert-base-uncased"),
],
}

Expand Down

0 comments on commit 7811eb0

Please sign in to comment.