diff --git a/src/equisolve/vanilla/models/__init__.py b/src/equisolve/numpy/models/__init__.py similarity index 100% rename from src/equisolve/vanilla/models/__init__.py rename to src/equisolve/numpy/models/__init__.py diff --git a/src/equisolve/numpy/models/generate.py b/src/equisolve/numpy/models/generate.py new file mode 100644 index 0000000..427e707 --- /dev/null +++ b/src/equisolve/numpy/models/generate.py @@ -0,0 +1,71 @@ +import ase.io +import urllib.request +import numpy as np +import equistore.io +from equistore import Labels, TensorBlock, TensorMap +import rascaline + +dataset = "https://raw.githubusercontent.com/Luthaf/rascaline/858d2c2ba286019d1bb263535661412325c50b12/docs/static/datasets.xyz" +structures_fn, _ = urllib.request.urlretrieve(dataset) +frames = ase.io.read(structures_fn, ":10") +calculator = rascaline.SphericalExpansion( + cutoff=3.5, + max_radial=6, + max_angular=6, + atomic_gaussian_width=0.3, + radial_basis={"Gto": {}}, + center_atom_weight=1.0, + cutoff_function={"ShiftedCosine": {"width": 0.5}}, +) +descriptor = calculator.compute(frames) +descriptor.keys_to_samples("species_center") +descriptor.keys_to_properties("species_neighbor") +equistore.io.save("spherical-expansion.npz", descriptor) +calculator = rascaline.SoapPowerSpectrum( + cutoff=3.5, + max_radial=6, + max_angular=6, + atomic_gaussian_width=0.3, + radial_basis={"Gto": {}}, + center_atom_weight=1.0, + cutoff_function={"ShiftedCosine": {"width": 0.5}}, +) +descriptor = calculator.compute(frames, gradients=["positions"]) +descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"]) + +equistore.io.save("power-spectrum.npz", descriptor) + + +energy = [] +forces = [] + +for frame in frames: + energy.append(frame.info["energy"]) + forces.append(frame.arrays["forces"]) + +block = TensorBlock( + values=np.vstack(energy), + samples=Labels( + names=["structure"], + values=np.array([[s] for s in range(len(frames))], dtype=np.int32), + ), + components=[], + properties=Labels(names=["energy"], values=np.array([[0]], dtype=np.int32)), +) +block.add_gradient( + "positions", + data=-(np.vstack(forces).reshape(-1, 3, 1)), + samples=Labels( + names=["sample", "structure", "center"], + values=np.array( + [[s, s, c] for s, f in enumerate(frames) for c in range(len(f))], + dtype=np.int32, + ), + ), + components=[ + Labels(names=["direction"], values=np.array([[0], [1], [2]], dtype=np.int32)), + ], +) + +energies = TensorMap(Labels.single(), [block]) +equistore.io.save("energies.npz", energies) diff --git a/src/equisolve/numpy/models/ridge.py b/src/equisolve/numpy/models/ridge.py new file mode 100644 index 0000000..37c7d52 --- /dev/null +++ b/src/equisolve/numpy/models/ridge.py @@ -0,0 +1,45 @@ +from equistore import Labels, TensorBlock, TensorMap +from sklearn.linear_model import Ridge, RidgeCV + + +class EquiRidge: + """ """ + + def __init__(self, regressor_dictionary): + self.regressor_dictionary = {} + self.fitted_properties_ = {} + + for key, regressor in regressor_dictionary.items(): + if isinstance(regressor, Ridge): + self.regressor_dictionary[key] = regressor + elif isinstance(regressor, dict): + self.regressor_dictionary[key] = Ridge(**regressor) + else: + raise ValueError + + def fit(self, X, y): + for key in X.keys: + if key not in y.keys: + raise ValueError("X and y must have the same keys.") + + for key in X.keys: + if key not in self.regressor_dictionary: + raise ValueError("You must supply a regression type for every key.") + + for key in X.keys: + self.regressor_dictionary[key].fit(X.block(key).values, y.block(key).values) + self.fitted_properties_[key] = y.block(key).properties + return self + + def predict(self, X): + blocks = [] + for key in X.keys: + blocks.append( + TensorBlock( + values=self.regressor_dictionary[key].predict(X.block(key).values), + samples=X.block(key).samples, + components=X.block(key).components, + properties=self.fitted_properties_[key], + ) + ) + return TensorMap(X.keys, blocks) diff --git a/src/equisolve/numpy/models/test.py b/src/equisolve/numpy/models/test.py new file mode 100644 index 0000000..72cdc82 --- /dev/null +++ b/src/equisolve/numpy/models/test.py @@ -0,0 +1,42 @@ +from ridge import EquiRidge +import equistore.io +from utils import structure_sum +import unittest + + +class LinearModelTests(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.X = equistore.io.load("power-spectrum.npz") + self.y = equistore.io.load("energies.npz") + self.X.keys_to_samples("species_center") + self.X = structure_sum(self.X) + self.regressors = {k: {"alpha": 1e-2} for k in self.X.keys} + + def test_wrong_y_keys(self): + + X = equistore.io.load("power-spectrum.npz") + X = structure_sum(X) + + eridge = EquiRidge(self.regressors) + with self.assertRaises(ValueError) as cm: + eridge.fit(X, self.y) + self.assertEquals(cm.message, "X and y must have the same keys.") + + def test_wrong_regression_keys(self): + + eridge = EquiRidge({(1,): {"alpha": 1e-2}}) + with self.assertRaises(ValueError) as cm: + eridge.fit(self.X, self.y) + self.assertEquals( + cm.message, "You must supply a regression type for every key." + ) + + def test_pass(self): + eridge = EquiRidge(self.regressors) + eridge.fit(self.X, self.y) + eridge.predict(self.X) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/equisolve/numpy/models/utils.py b/src/equisolve/numpy/models/utils.py new file mode 100644 index 0000000..ec2e1b4 --- /dev/null +++ b/src/equisolve/numpy/models/utils.py @@ -0,0 +1,86 @@ +import numpy as np +from equistore import Labels, TensorBlock, TensorMap + + +def structure_sum(descriptor): + blocks = [] + for _, block in descriptor: + + # no lambda kernels for now + assert len(block.components) == 0 + + structures = np.unique(block.samples["structure"]) + properties = block.properties + + result = np.zeros_like( + block.values, + shape=(len(structures), properties.shape[0]), + ) + + if block.has_gradient("positions"): + do_gradients = True + gradient = block.gradient("positions") + + assert np.all(np.unique(gradient.samples["structure"]) == structures) + + gradient_data = [] + new_gradient_samples = [] + atom_index_positions = [] + for structure_i, s1 in enumerate(structures): + mask = gradient.samples["structure"] == s1 + atoms = np.unique(gradient.samples[mask]["atom"]) + + gradient_data.append( + np.zeros_like( + gradient.data, + shape=(len(atoms), 3, properties.shape[0]), + ) + ) + + new_gradient_samples.append( + np.array( + [[structure_i, s1, atom] for atom in atoms], + dtype=np.int32, + ) + ) + + atom_index_positions.append({atom: i for i, atom in enumerate(atoms)}) + + new_gradient_samples = Labels( + names=["sample", "structure", "atom"], + values=np.concatenate(new_gradient_samples), + ) + + else: + do_gradients = False + + for structure_i, s1 in enumerate(structures): + s1_idx = block.samples["structure"] == s1 + + result[structure_i, :] = np.sum(block.values[s1_idx, :], axis=0) + + if do_gradients: + for sample_i in np.where(gradient.samples["structure"] == s1)[0]: + grad_sample = gradient.samples[sample_i] + + atom_i = atom_index_positions[structure_i][grad_sample["atom"]] + gradient_data[structure_i][atom_i, :, :] += gradient.data[ + sample_i, :, : + ] + + new_block = TensorBlock( + values=result, + samples=Labels(["structure"], structures.reshape(-1, 1)), + components=[], + properties=properties, + ) + + if do_gradients: + gradient_data = np.vstack(gradient_data).reshape(-1, 3, properties.shape[0]) + new_block.add_gradient( + "positions", gradient_data, new_gradient_samples, gradient.components + ) + + blocks.append(new_block) + + return TensorMap(keys=descriptor.keys, blocks=blocks) diff --git a/src/equisolve/vanilla/transformers/__init__.py b/src/equisolve/numpy/transformers/__init__.py similarity index 100% rename from src/equisolve/vanilla/transformers/__init__.py rename to src/equisolve/numpy/transformers/__init__.py