diff --git a/README.md b/README.md index 8481760d..93db4c87 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ - [Training](#training) - [Evaluation](#evaluation) - [Tutorials](#tutorials) + - [CUDA acceleration with cuEquivariance](#cuda-acceleration-with-cuequivariance) - [Weights and Biases for experiment tracking](#weights-and-biases-for-experiment-tracking) - [Pretrained Foundation Models](#pretrained-foundation-models) - [MACE-MP: Materials Project Force Fields](#mace-mp-materials-project-force-fields) @@ -171,6 +172,9 @@ We also have a more detailed Colab tutorials on: - [Introduction to MACE active learning and fine-tuning](https://colab.research.google.com/drive/1oCSVfMhWrqHTeHbKgUSQN9hTKxLzoNyb) - [MACE theory and code (advanced)](https://colab.research.google.com/drive/1AlfjQETV_jZ0JQnV5M3FGwAM2SGCl2aU) +## CUDA acceleration with cuEquivariance + +MACE supports CUDA acceleration with the cuEquivariance library. To install the library and use the acceleration, see our documentation at https://mace-docs.readthedocs.io/en/latest/guide/cuda_acceleration.html. ## On-line data loading for large datasets diff --git a/mace/calculators/mace.py b/mace/calculators/mace.py index 56e07375..fc88c051 100644 --- a/mace/calculators/mace.py +++ b/mace/calculators/mace.py @@ -400,24 +400,34 @@ def get_descriptors(self, atoms=None, invariants_only=True, num_layers=-1): atoms = self.atoms if self.model_type != "MACE": raise NotImplementedError("Only implemented for MACE models") + num_interactions = int(self.models[0].num_interactions) if num_layers == -1: - num_layers = int(self.models[0].num_interactions) + num_layers = num_interactions batch = self._atoms_to_batch(atoms) descriptors = [model(batch.to_dict())["node_feats"] for model in self.models] + + irreps_out = self.models[0].products[0].linear.__dict__["irreps_out"] + l_max = irreps_out.lmax + num_invariant_features = irreps_out.dim // (l_max + 1) ** 2 + per_layer_features = [irreps_out.dim for _ in range(num_interactions)] + per_layer_features[-1] = ( + num_invariant_features # Equivariant features not created for the last layer + ) + if invariants_only: - irreps_out = self.models[0].products[0].linear.__dict__["irreps_out"] - l_max = irreps_out.lmax - num_features = irreps_out.dim // (l_max + 1) ** 2 descriptors = [ extract_invariant( descriptor, num_layers=num_layers, - num_features=num_features, + num_features=num_invariant_features, l_max=l_max, ) for descriptor in descriptors ] - descriptors = [descriptor.detach().cpu().numpy() for descriptor in descriptors] + to_keep = np.sum(per_layer_features[:num_layers]) + descriptors = [ + descriptor[:, :to_keep].detach().cpu().numpy() for descriptor in descriptors + ] if self.num_models == 1: return descriptors[0] diff --git a/mace/tools/model_script_utils.py b/mace/tools/model_script_utils.py index 3f49eb41..d937446c 100644 --- a/mace/tools/model_script_utils.py +++ b/mace/tools/model_script_utils.py @@ -146,15 +146,18 @@ def _build_model( args, model_config, model_config_foundation, heads ): # pylint: disable=too-many-return-statements if args.model == "MACE": + if args.interaction_first not in [ + "RealAgnosticInteractionBlock", + "RealAgnosticDensityInteractionBlock", + ]: + args.interaction_first = "RealAgnosticInteractionBlock" return modules.ScaleShiftMACE( **model_config, pair_repulsion=args.pair_repulsion, distance_transform=args.distance_transform, correlation=args.correlation, gate=modules.gate_dict[args.gate], - interaction_cls_first=modules.interaction_classes[ - "RealAgnosticInteractionBlock" - ], + interaction_cls_first=modules.interaction_classes[args.interaction_first], MLP_irreps=o3.Irreps(args.MLP_irreps), atomic_inter_scale=args.std, atomic_inter_shift=[0.0] * len(heads), diff --git a/tests/test_calculator.py b/tests/test_calculator.py index 74a0ffa3..158cad64 100644 --- a/tests/test_calculator.py +++ b/tests/test_calculator.py @@ -481,24 +481,42 @@ def test_calculator_descriptor(fitting_configs, trained_equivariant_model): desc_invariant = calc.get_descriptors(at, invariants_only=True) desc_invariant_rotated = calc.get_descriptors(at_rotated, invariants_only=True) - desc_single_layer = calc.get_descriptors(at, invariants_only=True, num_layers=1) - desc_single_layer_rotated = calc.get_descriptors( + desc_invariant_single_layer = calc.get_descriptors( + at, invariants_only=True, num_layers=1 + ) + desc_invariant_single_layer_rotated = calc.get_descriptors( at_rotated, invariants_only=True, num_layers=1 ) desc = calc.get_descriptors(at, invariants_only=False) + desc_single_layer = calc.get_descriptors(at, invariants_only=False, num_layers=1) desc_rotated = calc.get_descriptors(at_rotated, invariants_only=False) + desc_rotated_single_layer = calc.get_descriptors( + at_rotated, invariants_only=False, num_layers=1 + ) assert desc_invariant.shape[0] == 3 assert desc_invariant.shape[1] == 32 - assert desc_single_layer.shape[0] == 3 - assert desc_single_layer.shape[1] == 16 + assert desc_invariant_single_layer.shape[0] == 3 + assert desc_invariant_single_layer.shape[1] == 16 assert desc.shape[0] == 3 assert desc.shape[1] == 80 + assert desc_single_layer.shape[0] == 3 + assert desc_single_layer.shape[1] == 16 * 4 + assert desc_rotated_single_layer.shape[0] == 3 + assert desc_rotated_single_layer.shape[1] == 16 * 4 np.testing.assert_allclose(desc_invariant, desc_invariant_rotated, atol=1e-6) - np.testing.assert_allclose(desc_single_layer, desc_invariant[:, :16], atol=1e-6) np.testing.assert_allclose( - desc_single_layer_rotated, desc_invariant[:, :16], atol=1e-6 + desc_invariant_single_layer, desc_invariant[:, :16], atol=1e-6 + ) + np.testing.assert_allclose( + desc_invariant_single_layer_rotated, desc_invariant[:, :16], atol=1e-6 + ) + np.testing.assert_allclose( + desc_single_layer[:, :16], desc_rotated_single_layer[:, :16], atol=1e-6 + ) + assert not np.allclose( + desc_single_layer[:, 16:], desc_rotated_single_layer[:, 16:], atol=1e-6 ) assert not np.allclose(desc, desc_rotated, atol=1e-6) diff --git a/tests/test_cueq.py b/tests/test_cueq.py index 21ec0e2e..8d713c78 100644 --- a/tests/test_cueq.py +++ b/tests/test_cueq.py @@ -1,3 +1,4 @@ +from copy import deepcopy from typing import Any, Dict import pytest @@ -17,7 +18,7 @@ except ImportError: CUET_AVAILABLE = False -torch.set_default_dtype(torch.float64) +CUDA_AVAILABLE = torch.cuda.is_available() @pytest.mark.skipif(not CUET_AVAILABLE, reason="cuequivariance not installed") @@ -49,9 +50,11 @@ def model_config(self, interaction_cls_first, hidden_irreps) -> Dict[str, Any]: } @pytest.fixture - def batch(self, device: str): + def batch(self, device: str, default_dtype: torch.dtype) -> Dict[str, torch.Tensor]: from ase import build + torch.set_default_dtype(default_dtype) + table = tools.AtomicNumberTable([6]) atoms = build.bulk("C", "diamond", a=3.567, cubic=True) @@ -74,7 +77,10 @@ def batch(self, device: str): batch = next(iter(data_loader)) return batch.to(device).to_dict() - @pytest.mark.parametrize("device", ["cpu"]) + @pytest.mark.parametrize( + "device", + ["cpu"] + (["cuda"] if CUDA_AVAILABLE else []), + ) @pytest.mark.parametrize( "interaction_cls_first", [ @@ -91,35 +97,41 @@ def batch(self, device: str): o3.Irreps("32x0e"), ], ) + @pytest.mark.parametrize("default_dtype", [torch.float32, torch.float64]) def test_bidirectional_conversion( self, model_config: Dict[str, Any], batch: Dict[str, torch.Tensor], + device: str, + default_dtype: torch.dtype, ): + if device == "cuda" and not CUDA_AVAILABLE: + pytest.skip("CUDA not available") torch.manual_seed(42) # Create original E3nn model - model_e3nn = modules.ScaleShiftMACE(**model_config) - # model_e3nn = model_e3nn.to(device) + model_e3nn = modules.ScaleShiftMACE(**model_config).to(device) # Convert E3nn to CuEq - model_cueq = run_e3nn_to_cueq(model_e3nn) - # model_cueq = model_cueq.to(device) + model_cueq = run_e3nn_to_cueq(model_e3nn).to(device) # Convert CuEq back to E3nn - model_e3nn_back = run_cueq_to_e3nn(model_cueq) - # model_e3nn_back = model_e3nn_back.to(device) + model_e3nn_back = run_cueq_to_e3nn(model_cueq).to(device) # Test forward pass equivalence - out_e3nn = model_e3nn(batch, training=True) - out_cueq = model_cueq(batch, training=True) - out_e3nn_back = model_e3nn_back(batch, training=True) + out_e3nn = model_e3nn(deepcopy(batch), training=True, compute_stress=True) + out_cueq = model_cueq(deepcopy(batch), training=True, compute_stress=True) + out_e3nn_back = model_e3nn_back( + deepcopy(batch), training=True, compute_stress=True + ) # Check outputs match for both conversions torch.testing.assert_close(out_e3nn["energy"], out_cueq["energy"]) torch.testing.assert_close(out_cueq["energy"], out_e3nn_back["energy"]) torch.testing.assert_close(out_e3nn["forces"], out_cueq["forces"]) torch.testing.assert_close(out_cueq["forces"], out_e3nn_back["forces"]) + torch.testing.assert_close(out_e3nn["stress"], out_cueq["stress"]) + torch.testing.assert_close(out_cueq["stress"], out_e3nn_back["stress"]) # Test backward pass equivalence loss_e3nn = out_e3nn["energy"].sum() @@ -131,6 +143,8 @@ def test_bidirectional_conversion( loss_e3nn_back.backward() # Compare gradients for all conversions + tol = 1e-4 if default_dtype == torch.float32 else 1e-8 + def print_gradient_diff(name1, p1, name2, p2, conv_type): if p1.grad is not None and p1.grad.shape == p2.grad.shape: if name1.split(".", 2)[:2] == name2.split(".", 2)[:2]: @@ -138,7 +152,7 @@ def print_gradient_diff(name1, p1, name2, p2, conv_type): print( f"{conv_type} - Parameter {name1}/{name2}, Max error: {error.max()}" ) - torch.testing.assert_close(p1.grad, p2.grad, atol=1e-5, rtol=1e-10) + torch.testing.assert_close(p1.grad, p2.grad, atol=tol, rtol=1e-10) # E3nn to CuEq gradients for (name_e3nn, p_e3nn), (name_cueq, p_cueq) in zip( @@ -161,49 +175,3 @@ def print_gradient_diff(name1, p1, name2, p2, conv_type): print_gradient_diff( name_e3nn, p_e3nn, name_e3nn_back, p_e3nn_back, "Full circle" ) - - # def test_jit_compile( - # self, - # model_config: Dict[str, Any], - # batch: Dict[str, torch.Tensor], - # device: str, - # ): - # torch.manual_seed(42) - - # # Create original E3nn model - # model_e3nn = modules.ScaleShiftMACE(**model_config) - # model_e3nn = model_e3nn.to(device) - - # # Convert E3nn to CuEq - # model_cueq = run_e3nn_to_cueq(model_e3nn) - # model_cueq = model_cueq.to(device) - - # # Convert CuEq back to E3nn - # model_e3nn_back = run_cueq_to_e3nn(model_cueq) - # model_e3nn_back = model_e3nn_back.to(device) - - # # # Compile all models - # model_e3nn_compiled = jit.compile(model_e3nn) - # model_cueq_compiled = jit.compile(model_cueq) - # model_e3nn_back_compiled = jit.compile(model_e3nn_back) - - # # Test forward pass equivalence - # out_e3nn = model_e3nn(batch, training=True) - # out_cueq = model_cueq(batch, training=True) - # out_e3nn_back = model_e3nn_back(batch, training=True) - - # out_e3nn_compiled = model_e3nn_compiled(batch, training=True) - # out_cueq_compiled = model_cueq_compiled(batch, training=True) - # out_e3nn_back_compiled = model_e3nn_back_compiled(batch, training=True) - - # # Check outputs match for both conversions - # torch.testing.assert_close(out_e3nn["energy"], out_cueq["energy"]) - # torch.testing.assert_close(out_cueq["energy"], out_e3nn_back["energy"]) - # torch.testing.assert_close(out_e3nn["forces"], out_cueq["forces"]) - # torch.testing.assert_close(out_cueq["forces"], out_e3nn_back["forces"]) - - # torch.testing.assert_close(out_e3nn["energy"], out_e3nn_compiled["energy"]) - # torch.testing.assert_close(out_cueq["energy"], out_cueq_compiled["energy"]) - # torch.testing.assert_close(out_e3nn_back["energy"], out_e3nn_back_compiled["energy"]) - # torch.testing.assert_close(out_e3nn["forces"], out_e3nn_compiled["forces"]) - # torch.testing.assert_close(out_cueq["forces"], out_cueq_compiled["forces"])