diff --git a/docs/src/examples/linear-model.py b/docs/src/examples/linear-model.py
index 0ef7d24..0d4e133 100644
--- a/docs/src/examples/linear-model.py
+++ b/docs/src/examples/linear-model.py
@@ -16,13 +16,22 @@
#
# Dataset
# -------
-#
+#
# As data set we use the SHIFTML set. You can obtain the dataset used in this
# example from our :download:`website<../../static/dataset.xyz>`.
# We read the first 20 structures of the data set using
-# `ASE `.
+# `ASE `.
import ase.io
+import numpy as np
+from equistore import Labels
+from equistore.operations import slice, sum_over_samples
+from rascaline import SoapPowerSpectrum
+
+from equisolve.numpy.models.linear_model import Ridge
+from equisolve.utils import dictionary_to_tensormap
+from equisolve.utils.convert import ase_to_tensormap
+
frames = ase.io.read("dataset.xyz", ":20")
@@ -39,7 +48,6 @@
# We construct the descriptor training data with a SOAP powerspectrum using
# rascaline. We first define the hyper parameters for the calculation
-from rascaline import SoapPowerSpectrum
HYPER_PARAMETERS = {
"cutoff": 5.0,
@@ -67,11 +75,11 @@
#
# For more details on how the descriptor works see the documentation of
# rascaline.
-#
-# We now move all keys into properties. TODO: Explain why we have to do this.
+#
+# We now move all keys into properties to access them for our model.
-descriptor = descriptor.keys_to_properties(
- ["species_center", "species_neighbor_1", "species_neighbor_2"])
+descriptor = descriptor.keys_to_samples(["species_center"])
+descriptor = descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])
# %%
#
@@ -79,9 +87,7 @@
# structure. However, our energies as target data is per structure only.
# Therefore, we sum the features of each center atom per structure.
-from equistore.operations import sum_over_samples
-
-X = sum_over_samples(descriptor, ["center"])
+X = sum_over_samples(descriptor, ["center", "species_center"])
# %%
#
@@ -105,7 +111,6 @@
# We construct the target data by converting energies and forces into a
# :class:`equisolve.TensorMap`.
-from equisolve.utils.convert import ase_to_tensormap
y = ase_to_tensormap(frames, energy="energy", forces="forces")
@@ -120,53 +125,54 @@
# Construct the model
# -------------------
#
-# Before we fit the model we have to define our regression values.
+# Before we fit the model we have to define our regression values.
#
-# For this we create a TensorMap containing with a single sample and the
-# same number of features as ``X``.
+# For this we create a TensorMap containing with the desired regulerizer
+
+
+alpha_dict = {"values": 1e-5}
+alpha = dictionary_to_tensormap(alpha_dict, X)
+
+# %%
+#
+# So far ``alpha`` contains the same number of samples as ``X``. However,
+# the regulerizer only has to be one sample, because all samples will be
+# regulerized in the same way in a linear model.
+#
+# We remove all sample except the 0th one by using the
+# :func:`equistore.operations.slice`.
-import numpy as np
-from equistore import Labels
-from equistore.operations import slice
samples = Labels(
names=["structure"],
values=np.array([(0,)]),
)
-alpha = slice(X, samples=samples)
-n_features = len(alpha.block().values[:])
-
-alpha.block().values[:] = 1e-5
+alpha = slice(alpha, samples=samples)
# %%
#
-# In our example we use the same alpha value for all features. However,
-# :class:`equisolve.numpy.models.linear_model.Ridge` also allows for different
-# regularization of each feature. You can apply a feature wise regularization by
-# roviding setting ``alpha.block().values`` with an 1d array of the same length as the
-# number of features in your training data.
+# In our regulerizer we use the same values for all features. However,
+# :class:`equisolve.numpy.models.linear_model.Ridge` can also handle different
+# regularization for each feature. You can apply a feature wise regularization by
+# setting ``"values"`` of ``alpha_dict`` with an 1d array of the same length as the
+# number of features in the training data X (here 7200)
#
# With a valid regulerizer object we now initilize the Ridge object.
# ``parameter_keys`` determines with respect to which parameters the regression is
-# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
+# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
# ``"positions"`` (forces).
-from equisolve.numpy.models.linear_model import Ridge
-clf = Ridge(parameter_keys=["values", "positions"],
- alpha=alpha)
+clf = Ridge(parameter_keys=["values", "positions"], alpha=alpha)
# %%
#
# Next we create a sample weighting :class:`equistiore.TensorMap` that weights energies
# five times more then the forces.
-from equisolve.utils import dictionary_to_tensormap
-
sw_dict = {"values": 5, "positions": 1}
sw = dictionary_to_tensormap(sw_dict, y)
-
# %%
#
# The function `equisolve.utils.dictionary_to_tensormap` create a
diff --git a/examples/linear-model.py b/examples/linear-model.py
index 0ef7d24..5c08129 100644
--- a/examples/linear-model.py
+++ b/examples/linear-model.py
@@ -10,19 +10,31 @@
For constructing a linear Model we need the atomic descriptor as training data
``X`` as well as the energies and forces as target data ``y``.
+
+We first import all necessary packages.
"""
+import ase.io
+import numpy as np
+from equistore import Labels
+from equistore.operations import slice, sum_over_samples
+from rascaline import SoapPowerSpectrum
+
+from equisolve.numpy.models.linear_model import Ridge
+from equisolve.utils import dictionary_to_tensormap
+from equisolve.utils.convert import ase_to_tensormap
+
+
# %%
#
# Dataset
# -------
-#
+#
# As data set we use the SHIFTML set. You can obtain the dataset used in this
# example from our :download:`website<../../static/dataset.xyz>`.
# We read the first 20 structures of the data set using
-# `ASE `.
+# `ASE `.
-import ase.io
frames = ase.io.read("dataset.xyz", ":20")
@@ -39,7 +51,6 @@
# We construct the descriptor training data with a SOAP powerspectrum using
# rascaline. We first define the hyper parameters for the calculation
-from rascaline import SoapPowerSpectrum
HYPER_PARAMETERS = {
"cutoff": 5.0,
@@ -67,11 +78,11 @@
#
# For more details on how the descriptor works see the documentation of
# rascaline.
-#
-# We now move all keys into properties. TODO: Explain why we have to do this.
+#
+# We now move all keys into properties to access them for our model.
-descriptor = descriptor.keys_to_properties(
- ["species_center", "species_neighbor_1", "species_neighbor_2"])
+descriptor = descriptor.keys_to_samples(["species_center"])
+descriptor = descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])
# %%
#
@@ -79,9 +90,7 @@
# structure. However, our energies as target data is per structure only.
# Therefore, we sum the features of each center atom per structure.
-from equistore.operations import sum_over_samples
-
-X = sum_over_samples(descriptor, ["center"])
+X = sum_over_samples(descriptor, ["center", "species_center"])
# %%
#
@@ -105,7 +114,6 @@
# We construct the target data by converting energies and forces into a
# :class:`equisolve.TensorMap`.
-from equisolve.utils.convert import ase_to_tensormap
y = ase_to_tensormap(frames, energy="energy", forces="forces")
@@ -120,53 +128,54 @@
# Construct the model
# -------------------
#
-# Before we fit the model we have to define our regression values.
+# Before we fit the model we have to define our regression values.
#
-# For this we create a TensorMap containing with a single sample and the
-# same number of features as ``X``.
+# For this we create a TensorMap containing with the desired regulerizer
+
+
+alpha_dict = {"values": 1e-5}
+alpha = dictionary_to_tensormap(alpha_dict, X)
+
+# %%
+#
+# So far ``alpha`` contains the same number of samples as ``X``. However,
+# the regulerizer only has to be one sample, because all samples will be
+# regulerized in the same way in a linear model.
+#
+# We remove all sample except the 0th one by using the
+# :func:`equistore.operations.slice`.
-import numpy as np
-from equistore import Labels
-from equistore.operations import slice
samples = Labels(
names=["structure"],
values=np.array([(0,)]),
)
-alpha = slice(X, samples=samples)
-n_features = len(alpha.block().values[:])
-
-alpha.block().values[:] = 1e-5
+alpha = slice(alpha, samples=samples)
# %%
#
-# In our example we use the same alpha value for all features. However,
-# :class:`equisolve.numpy.models.linear_model.Ridge` also allows for different
-# regularization of each feature. You can apply a feature wise regularization by
-# roviding setting ``alpha.block().values`` with an 1d array of the same length as the
-# number of features in your training data.
+# In our regulerizer we use the same values for all features. However,
+# :class:`equisolve.numpy.models.linear_model.Ridge` can also handle different
+# regularization for each feature. You can apply a feature wise regularization by
+# setting ``"values"`` of ``alpha_dict`` with an 1d array of the same length as the
+# number of features in the training data X (here 7200)
#
# With a valid regulerizer object we now initilize the Ridge object.
# ``parameter_keys`` determines with respect to which parameters the regression is
-# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
+# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
# ``"positions"`` (forces).
-from equisolve.numpy.models.linear_model import Ridge
-clf = Ridge(parameter_keys=["values", "positions"],
- alpha=alpha)
+clf = Ridge(parameter_keys=["values", "positions"], alpha=alpha)
# %%
#
# Next we create a sample weighting :class:`equistiore.TensorMap` that weights energies
# five times more then the forces.
-from equisolve.utils import dictionary_to_tensormap
-
sw_dict = {"values": 5, "positions": 1}
sw = dictionary_to_tensormap(sw_dict, y)
-
# %%
#
# The function `equisolve.utils.dictionary_to_tensormap` create a
diff --git a/src/equisolve/numpy/models/linear_model.py b/src/equisolve/numpy/models/linear_model.py
index e194444..314155e 100644
--- a/src/equisolve/numpy/models/linear_model.py
+++ b/src/equisolve/numpy/models/linear_model.py
@@ -12,7 +12,6 @@
from equistore import Labels, TensorBlock, TensorMap
from scipy.linalg import solve
-from ...utils.metrics import rmse
from ..utils import block_to_array
@@ -49,7 +48,7 @@ def __init__(self, parameter_keys: Union[List[str], str], alpha: TensorMap) -> N
self.alpha = alpha
self.coef_ = []
- def _validate_data(self, X: TensorMap, y: TensorMap = None) -> None:
+ def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None:
"""Validates :class:`equistore.TensorBlock`'s for the usage in models.
:param X: training data to check
@@ -84,9 +83,11 @@ def _validate_params(self, X: TensorBlock) -> None:
for i_block, X_block in enumerate(X.blocks()):
alpha_block = self.alpha.block(i_block)
if len(alpha_block.samples) != 1:
- raise ValueError("Only one sample is allowed for regularization. "
- f"Given alpha contains {len(alpha_block.samples)} "
- "samples.")
+ raise ValueError(
+ "Only one sample is allowed for regularization. "
+ f"Given alpha contains {len(alpha_block.samples)} "
+ "samples."
+ )
if len(X_block.properties) != len(alpha_block.properties):
raise ValueError("X and y must have the same number of features")
@@ -180,6 +181,6 @@ def score(self, X: TensorMap, y: TensorMap) -> float:
:returns score: :math:`RMSE` of ``self.predict(X)`` wrt. `y`
"""
- y_pred = self.predict(X)
+ # y_pred = self.predict(X)
# We need a tensormap implementation of rmse :-)
# return rmse(y, y_pred)
diff --git a/tox.ini b/tox.ini
index 22853b4..f3b016d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -11,6 +11,8 @@ envlist =
docs
format
+lint_folders = {toxinidir}/examples {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
+
[testenv:lint]
# lint the Python code with flake8 (code linter), black (code formatter), and
# isort (sorting of imports)
@@ -20,9 +22,9 @@ deps =
black
isort
commands =
- flake8 {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
- black --check --diff {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
- isort --check-only --diff {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
+ flake8 {[tox]lint_folders}
+ black --check --diff {[tox]lint_folders}
+ isort --check-only --diff {[tox]lint_folders}
[testenv]
# configures which environments run with each python version
@@ -82,8 +84,8 @@ deps =
black
isort
commands =
- black {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
- isort {toxinidir}/src {toxinidir}/tests {toxinidir}/setup.py
+ black {[tox]lint_folders}
+ isort {[tox]lint_folders}
[flake8]
# https://flake8.pycqa.org/en/latest/#