Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Initial commit #2

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/equisolve/numpy/models/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import ase.io
import urllib.request
import numpy as np
import equistore.io
from equistore import Labels, TensorBlock, TensorMap
import rascaline

dataset = "https://raw.githubusercontent.com/Luthaf/rascaline/858d2c2ba286019d1bb263535661412325c50b12/docs/static/datasets.xyz"
structures_fn, _ = urllib.request.urlretrieve(dataset)
frames = ase.io.read(structures_fn, ":10")
calculator = rascaline.SphericalExpansion(
cutoff=3.5,
max_radial=6,
max_angular=6,
atomic_gaussian_width=0.3,
radial_basis={"Gto": {}},
center_atom_weight=1.0,
cutoff_function={"ShiftedCosine": {"width": 0.5}},
)
descriptor = calculator.compute(frames)
descriptor.keys_to_samples("species_center")
descriptor.keys_to_properties("species_neighbor")
equistore.io.save("spherical-expansion.npz", descriptor)
calculator = rascaline.SoapPowerSpectrum(
cutoff=3.5,
max_radial=6,
max_angular=6,
atomic_gaussian_width=0.3,
radial_basis={"Gto": {}},
center_atom_weight=1.0,
cutoff_function={"ShiftedCosine": {"width": 0.5}},
)
descriptor = calculator.compute(frames, gradients=["positions"])
descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])

equistore.io.save("power-spectrum.npz", descriptor)


energy = []
forces = []

for frame in frames:
energy.append(frame.info["energy"])
forces.append(frame.arrays["forces"])

block = TensorBlock(
values=np.vstack(energy),
samples=Labels(
names=["structure"],
values=np.array([[s] for s in range(len(frames))], dtype=np.int32),
),
components=[],
properties=Labels(names=["energy"], values=np.array([[0]], dtype=np.int32)),
)
block.add_gradient(
"positions",
data=-(np.vstack(forces).reshape(-1, 3, 1)),
samples=Labels(
names=["sample", "structure", "center"],
values=np.array(
[[s, s, c] for s, f in enumerate(frames) for c in range(len(f))],
dtype=np.int32,
),
),
components=[
Labels(names=["direction"], values=np.array([[0], [1], [2]], dtype=np.int32)),
],
)

energies = TensorMap(Labels.single(), [block])
equistore.io.save("energies.npz", energies)
45 changes: 45 additions & 0 deletions src/equisolve/numpy/models/ridge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from equistore import Labels, TensorBlock, TensorMap
from sklearn.linear_model import Ridge, RidgeCV


class EquiRidge:
""" """

def __init__(self, regressor_dictionary):
self.regressor_dictionary = {}
self.fitted_properties_ = {}

for key, regressor in regressor_dictionary.items():
if isinstance(regressor, Ridge):
self.regressor_dictionary[key] = regressor
elif isinstance(regressor, dict):
self.regressor_dictionary[key] = Ridge(**regressor)
else:
raise ValueError

def fit(self, X, y):
for key in X.keys:
if key not in y.keys:
raise ValueError("X and y must have the same keys.")

for key in X.keys:
if key not in self.regressor_dictionary:
raise ValueError("You must supply a regression type for every key.")

for key in X.keys:
self.regressor_dictionary[key].fit(X.block(key).values, y.block(key).values)
self.fitted_properties_[key] = y.block(key).properties
return self

def predict(self, X):
blocks = []
for key in X.keys:
blocks.append(
TensorBlock(
values=self.regressor_dictionary[key].predict(X.block(key).values),
samples=X.block(key).samples,
components=X.block(key).components,
properties=self.fitted_properties_[key],
)
)
return TensorMap(X.keys, blocks)
42 changes: 42 additions & 0 deletions src/equisolve/numpy/models/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from ridge import EquiRidge
import equistore.io
from utils import structure_sum
import unittest


class LinearModelTests(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.X = equistore.io.load("power-spectrum.npz")
self.y = equistore.io.load("energies.npz")
self.X.keys_to_samples("species_center")
self.X = structure_sum(self.X)
self.regressors = {k: {"alpha": 1e-2} for k in self.X.keys}

def test_wrong_y_keys(self):

X = equistore.io.load("power-spectrum.npz")
X = structure_sum(X)

eridge = EquiRidge(self.regressors)
with self.assertRaises(ValueError) as cm:
eridge.fit(X, self.y)
self.assertEquals(cm.message, "X and y must have the same keys.")

def test_wrong_regression_keys(self):

eridge = EquiRidge({(1,): {"alpha": 1e-2}})
with self.assertRaises(ValueError) as cm:
eridge.fit(self.X, self.y)
self.assertEquals(
cm.message, "You must supply a regression type for every key."
)

def test_pass(self):
eridge = EquiRidge(self.regressors)
eridge.fit(self.X, self.y)
eridge.predict(self.X)


if __name__ == "__main__":
unittest.main()
86 changes: 86 additions & 0 deletions src/equisolve/numpy/models/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np
from equistore import Labels, TensorBlock, TensorMap


def structure_sum(descriptor):
blocks = []
for _, block in descriptor:

# no lambda kernels for now
assert len(block.components) == 0

structures = np.unique(block.samples["structure"])
properties = block.properties

result = np.zeros_like(
block.values,
shape=(len(structures), properties.shape[0]),
)

if block.has_gradient("positions"):
do_gradients = True
gradient = block.gradient("positions")

assert np.all(np.unique(gradient.samples["structure"]) == structures)

gradient_data = []
new_gradient_samples = []
atom_index_positions = []
for structure_i, s1 in enumerate(structures):
mask = gradient.samples["structure"] == s1
atoms = np.unique(gradient.samples[mask]["atom"])

gradient_data.append(
np.zeros_like(
gradient.data,
shape=(len(atoms), 3, properties.shape[0]),
)
)

new_gradient_samples.append(
np.array(
[[structure_i, s1, atom] for atom in atoms],
dtype=np.int32,
)
)

atom_index_positions.append({atom: i for i, atom in enumerate(atoms)})

new_gradient_samples = Labels(
names=["sample", "structure", "atom"],
values=np.concatenate(new_gradient_samples),
)

else:
do_gradients = False

for structure_i, s1 in enumerate(structures):
s1_idx = block.samples["structure"] == s1

result[structure_i, :] = np.sum(block.values[s1_idx, :], axis=0)

if do_gradients:
for sample_i in np.where(gradient.samples["structure"] == s1)[0]:
grad_sample = gradient.samples[sample_i]

atom_i = atom_index_positions[structure_i][grad_sample["atom"]]
gradient_data[structure_i][atom_i, :, :] += gradient.data[
sample_i, :, :
]

new_block = TensorBlock(
values=result,
samples=Labels(["structure"], structures.reshape(-1, 1)),
components=[],
properties=properties,
)

if do_gradients:
gradient_data = np.vstack(gradient_data).reshape(-1, 3, properties.shape[0])
new_block.add_gradient(
"positions", gradient_data, new_gradient_samples, gradient.components
)

blocks.append(new_block)

return TensorMap(keys=descriptor.keys, blocks=blocks)