Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Commit

Permalink
format example and other files
Browse files Browse the repository at this point in the history
  • Loading branch information
PicoCentauri committed Jan 23, 2023
1 parent b4dae3f commit c7e613e
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 78 deletions.
72 changes: 39 additions & 33 deletions docs/src/examples/linear-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,22 @@
#
# Dataset
# -------
#
#
# As data set we use the SHIFTML set. You can obtain the dataset used in this
# example from our :download:`website<../../static/dataset.xyz>`.
# We read the first 20 structures of the data set using
# `ASE <https://wiki.fysik.dtu.dk/ase/>`.
# `ASE <https://wiki.fysik.dtu.dk/ase/>`.

import ase.io
import numpy as np
from equistore import Labels
from equistore.operations import slice, sum_over_samples
from rascaline import SoapPowerSpectrum

from equisolve.numpy.models.linear_model import Ridge
from equisolve.utils import dictionary_to_tensormap
from equisolve.utils.convert import ase_to_tensormap


frames = ase.io.read("dataset.xyz", ":20")

Expand All @@ -39,7 +48,6 @@
# We construct the descriptor training data with a SOAP powerspectrum using
# rascaline. We first define the hyper parameters for the calculation

from rascaline import SoapPowerSpectrum

HYPER_PARAMETERS = {
"cutoff": 5.0,
Expand Down Expand Up @@ -67,21 +75,19 @@
#
# For more details on how the descriptor works see the documentation of
# rascaline.
#
# We now move all keys into properties. TODO: Explain why we have to do this.
#
# We now move all keys into properties to access them for our model.

descriptor = descriptor.keys_to_properties(
["species_center", "species_neighbor_1", "species_neighbor_2"])
descriptor = descriptor.keys_to_samples(["species_center"])
descriptor = descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])

# %%
#
# The descriptor contains a represenantion with respect to each central atoms per
# structure. However, our energies as target data is per structure only.
# Therefore, we sum the features of each center atom per structure.

from equistore.operations import sum_over_samples

X = sum_over_samples(descriptor, ["center"])
X = sum_over_samples(descriptor, ["center", "species_center"])

# %%
#
Expand All @@ -105,7 +111,6 @@
# We construct the target data by converting energies and forces into a
# :class:`equisolve.TensorMap`.

from equisolve.utils.convert import ase_to_tensormap

y = ase_to_tensormap(frames, energy="energy", forces="forces")

Expand All @@ -120,53 +125,54 @@
# Construct the model
# -------------------
#
# Before we fit the model we have to define our regression values.
# Before we fit the model we have to define our regression values.
#
# For this we create a TensorMap containing with a single sample and the
# same number of features as ``X``.
# For this we create a TensorMap containing with the desired regulerizer


alpha_dict = {"values": 1e-5}
alpha = dictionary_to_tensormap(alpha_dict, X)

# %%
#
# So far ``alpha`` contains the same number of samples as ``X``. However,
# the regulerizer only has to be one sample, because all samples will be
# regulerized in the same way in a linear model.
#
# We remove all sample except the 0th one by using the
# :func:`equistore.operations.slice`.

import numpy as np
from equistore import Labels
from equistore.operations import slice

samples = Labels(
names=["structure"],
values=np.array([(0,)]),
)
alpha = slice(X, samples=samples)
n_features = len(alpha.block().values[:])

alpha.block().values[:] = 1e-5
alpha = slice(alpha, samples=samples)

# %%
#
# In our example we use the same alpha value for all features. However,
# :class:`equisolve.numpy.models.linear_model.Ridge` also allows for different
# regularization of each feature. You can apply a feature wise regularization by
# roviding setting ``alpha.block().values`` with an 1d array of the same length as the
# number of features in your training data.
# In our regulerizer we use the same values for all features. However,
# :class:`equisolve.numpy.models.linear_model.Ridge` can also handle different
# regularization for each feature. You can apply a feature wise regularization by
# setting ``"values"`` of ``alpha_dict`` with an 1d array of the same length as the
# number of features in the training data X (here 7200)
#
# With a valid regulerizer object we now initilize the Ridge object.
# ``parameter_keys`` determines with respect to which parameters the regression is
# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
# ``"positions"`` (forces).

from equisolve.numpy.models.linear_model import Ridge

clf = Ridge(parameter_keys=["values", "positions"],
alpha=alpha)
clf = Ridge(parameter_keys=["values", "positions"], alpha=alpha)

# %%
#
# Next we create a sample weighting :class:`equistiore.TensorMap` that weights energies
# five times more then the forces.

from equisolve.utils import dictionary_to_tensormap

sw_dict = {"values": 5, "positions": 1}
sw = dictionary_to_tensormap(sw_dict, y)


# %%
#
# The function `equisolve.utils.dictionary_to_tensormap` create a
Expand Down
77 changes: 43 additions & 34 deletions examples/linear-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,31 @@
For constructing a linear Model we need the atomic descriptor as training data
``X`` as well as the energies and forces as target data ``y``.
We first import all necessary packages.
"""

import ase.io
import numpy as np
from equistore import Labels
from equistore.operations import slice, sum_over_samples
from rascaline import SoapPowerSpectrum

from equisolve.numpy.models.linear_model import Ridge
from equisolve.utils import dictionary_to_tensormap
from equisolve.utils.convert import ase_to_tensormap


# %%
#
# Dataset
# -------
#
#
# As data set we use the SHIFTML set. You can obtain the dataset used in this
# example from our :download:`website<../../static/dataset.xyz>`.
# We read the first 20 structures of the data set using
# `ASE <https://wiki.fysik.dtu.dk/ase/>`.
# `ASE <https://wiki.fysik.dtu.dk/ase/>`.

import ase.io

frames = ase.io.read("dataset.xyz", ":20")

Expand All @@ -39,7 +51,6 @@
# We construct the descriptor training data with a SOAP powerspectrum using
# rascaline. We first define the hyper parameters for the calculation

from rascaline import SoapPowerSpectrum

HYPER_PARAMETERS = {
"cutoff": 5.0,
Expand Down Expand Up @@ -67,21 +78,19 @@
#
# For more details on how the descriptor works see the documentation of
# rascaline.
#
# We now move all keys into properties. TODO: Explain why we have to do this.
#
# We now move all keys into properties to access them for our model.

descriptor = descriptor.keys_to_properties(
["species_center", "species_neighbor_1", "species_neighbor_2"])
descriptor = descriptor.keys_to_samples(["species_center"])
descriptor = descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])

# %%
#
# The descriptor contains a represenantion with respect to each central atoms per
# structure. However, our energies as target data is per structure only.
# Therefore, we sum the features of each center atom per structure.

from equistore.operations import sum_over_samples

X = sum_over_samples(descriptor, ["center"])
X = sum_over_samples(descriptor, ["center", "species_center"])

# %%
#
Expand All @@ -105,7 +114,6 @@
# We construct the target data by converting energies and forces into a
# :class:`equisolve.TensorMap`.

from equisolve.utils.convert import ase_to_tensormap

y = ase_to_tensormap(frames, energy="energy", forces="forces")

Expand All @@ -120,53 +128,54 @@
# Construct the model
# -------------------
#
# Before we fit the model we have to define our regression values.
# Before we fit the model we have to define our regression values.
#
# For this we create a TensorMap containing with a single sample and the
# same number of features as ``X``.
# For this we create a TensorMap containing with the desired regulerizer


alpha_dict = {"values": 1e-5}
alpha = dictionary_to_tensormap(alpha_dict, X)

# %%
#
# So far ``alpha`` contains the same number of samples as ``X``. However,
# the regulerizer only has to be one sample, because all samples will be
# regulerized in the same way in a linear model.
#
# We remove all sample except the 0th one by using the
# :func:`equistore.operations.slice`.

import numpy as np
from equistore import Labels
from equistore.operations import slice

samples = Labels(
names=["structure"],
values=np.array([(0,)]),
)
alpha = slice(X, samples=samples)
n_features = len(alpha.block().values[:])

alpha.block().values[:] = 1e-5
alpha = slice(alpha, samples=samples)

# %%
#
# In our example we use the same alpha value for all features. However,
# :class:`equisolve.numpy.models.linear_model.Ridge` also allows for different
# regularization of each feature. You can apply a feature wise regularization by
# roviding setting ``alpha.block().values`` with an 1d array of the same length as the
# number of features in your training data.
# In our regulerizer we use the same values for all features. However,
# :class:`equisolve.numpy.models.linear_model.Ridge` can also handle different
# regularization for each feature. You can apply a feature wise regularization by
# setting ``"values"`` of ``alpha_dict`` with an 1d array of the same length as the
# number of features in the training data X (here 7200)
#
# With a valid regulerizer object we now initilize the Ridge object.
# ``parameter_keys`` determines with respect to which parameters the regression is
# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
# ``"positions"`` (forces).

from equisolve.numpy.models.linear_model import Ridge

clf = Ridge(parameter_keys=["values", "positions"],
alpha=alpha)
clf = Ridge(parameter_keys=["values", "positions"], alpha=alpha)

# %%
#
# Next we create a sample weighting :class:`equistiore.TensorMap` that weights energies
# five times more then the forces.

from equisolve.utils import dictionary_to_tensormap

sw_dict = {"values": 5, "positions": 1}
sw = dictionary_to_tensormap(sw_dict, y)


# %%
#
# The function `equisolve.utils.dictionary_to_tensormap` create a
Expand Down
13 changes: 7 additions & 6 deletions src/equisolve/numpy/models/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from equistore import Labels, TensorBlock, TensorMap
from scipy.linalg import solve

from ...utils.metrics import rmse
from ..utils import block_to_array


Expand Down Expand Up @@ -49,7 +48,7 @@ def __init__(self, parameter_keys: Union[List[str], str], alpha: TensorMap) -> N
self.alpha = alpha
self.coef_ = []

def _validate_data(self, X: TensorMap, y: TensorMap = None) -> None:
def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None:
"""Validates :class:`equistore.TensorBlock`'s for the usage in models.
:param X: training data to check
Expand Down Expand Up @@ -84,9 +83,11 @@ def _validate_params(self, X: TensorBlock) -> None:
for i_block, X_block in enumerate(X.blocks()):
alpha_block = self.alpha.block(i_block)
if len(alpha_block.samples) != 1:
raise ValueError("Only one sample is allowed for regularization. "
f"Given alpha contains {len(alpha_block.samples)} "
"samples.")
raise ValueError(
"Only one sample is allowed for regularization. "
f"Given alpha contains {len(alpha_block.samples)} "
"samples."
)

if len(X_block.properties) != len(alpha_block.properties):
raise ValueError("X and y must have the same number of features")
Expand Down Expand Up @@ -180,6 +181,6 @@ def score(self, X: TensorMap, y: TensorMap) -> float:
:returns score: :math:`RMSE` of ``self.predict(X)`` wrt. `y`
"""
y_pred = self.predict(X)
# y_pred = self.predict(X)
# We need a tensormap implementation of rmse :-)
# return rmse(y, y_pred)
12 changes: 7 additions & 5 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ envlist =
docs
format

lint_folders = {toxinidir}/examples {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py

[testenv:lint]
# lint the Python code with flake8 (code linter), black (code formatter), and
# isort (sorting of imports)
Expand All @@ -20,9 +22,9 @@ deps =
black
isort
commands =
flake8 {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
black --check --diff {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
isort --check-only --diff {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
flake8 {[tox]lint_folders}
black --check --diff {[tox]lint_folders}
isort --check-only --diff {[tox]lint_folders}

[testenv]
# configures which environments run with each python version
Expand Down Expand Up @@ -82,8 +84,8 @@ deps =
black
isort
commands =
black {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
isort {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
black {[tox]lint_folders}
isort {[tox]lint_folders}

[flake8]
# https://flake8.pycqa.org/en/latest/#
Expand Down

0 comments on commit c7e613e

Please sign in to comment.