Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Ridge regressor #4

Merged
merged 4 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up rust
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.8
python-version: "3.10"
- run: pip install tox

- name: Build docs
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: 3.8
- run: pip install tox
Expand Down
49 changes: 4 additions & 45 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,57 +1,16 @@
*.so
/*.egg-info
*.pyc
__pycache__
*.DS_store
*.npz
*.dat
*.npy
*.xvg
*.egg-info
.duecredit.p
.tox/
.swp
*.npz
*.dat
*.npy
*.xvg
*.lock

# Cython generated files
_cutil.c
build/
dist/
*.egg-info
__pycache__/

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Sphinx documentation
docs/_build/
docs/html
docs/latex

# editors and IDEs
*.sw[a-z]
*~
.idea
.vscode

# Ignore build dir
/build
/dist

# Ignore the .DS_Store file in the osx file system
*.DS_store

# duecredit
.duecredit.p

# editors and IDEs
*.sw[a-z]
Expand Down
1 change: 1 addition & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
examples
21 changes: 11 additions & 10 deletions docs/src/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

# -- Project information -----------------------------------------------------
project = 'equisolve'
copyright = 'All source code is available under the MIT License'
copyright = 'All source code is available under the BSD3 License'
author = equisolve.__authors__
version = equisolve.__version__

Expand All @@ -28,17 +28,18 @@
# Add any Sphinx extension module names here, as strings.
extensions = [
'sphinx.ext.autodoc', # import the modules you are documenting
'sphinx.ext.viewcode', # tries to find the source files where the objects are contained
'sphinx.ext.intersphinx', # generate links to the documentation of objects in external projects
'sphinx.ext.mathjax', # Render math via JavaScript
'sphinx.ext.napoleon', # Support for NumPy and Google style docstrings
'nbsphinx', # provides a source parser for *.ipynb files
"sphinx_gallery.gen_gallery", # provides a source parser for *.ipynb files
]

# Execute the notebooks
nbsphinx_execute = 'always'
nbsphinx_allow_errors = False # Fail if there are errors in notebook
exclude_patterns = ['_build', '**.ipynb_checkpoints']
sphinx_gallery_conf = {
"filename_pattern": "/*",
"examples_dirs": ["../../examples"],
"gallery_dirs": ["examples"],
"min_reported_time": 60,
"reference_url": {"equisolve": None},
"prefer_full_module": ["equisolve"],
}

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
Expand Down Expand Up @@ -185,5 +186,5 @@
'https://docs.scipy.org/doc/scipy/': None,
'https://numpy.org/doc/stable/': None,
'https://lab-cosmo.github.io/equistore/latest/': None,
'https://luthaf.fr/rascaline/latest/index.html': None,
'https://luthaf.fr/rascaline/latest/': None,
}
9 changes: 9 additions & 0 deletions docs/src/how-to/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.. _userdoc-how-to:

How-to guides
=============

.. toctree::
:maxdepth: 1

../examples/linear-model
5 changes: 5 additions & 0 deletions docs/src/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@ Overview of Equisolve's Documentation
=====================================

TBD

.. toctree::
:hidden:

how-to/index
1,460 changes: 1,460 additions & 0 deletions docs/static/dataset.xyz

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions examples/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Equisolve Examples
==================

This folder consists of introductory examples.
1 change: 1 addition & 0 deletions examples/dataset.xyz
195 changes: 195 additions & 0 deletions examples/linear-model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""
Computing a Linear Model
========================

.. start-body

In this tutorial we calculate a linear model using Ridge regression.
If you are never worked with equistore objects before please take a look at
the documentation.

For constructing a linear Model we need the atomic descriptor as training data
``X`` as well as the energies and forces as target data ``y``.

We first import all necessary packages.
"""

import ase.io
import numpy as np
from equistore import Labels
from equistore.operations import ones_like, slice, sum_over_samples
from rascaline import SoapPowerSpectrum

from equisolve.numpy.models.linear_model import Ridge
from equisolve.utils.convert import ase_to_tensormap


# %%
#
# Dataset
# -------
#
# As data set we use the SHIFTML set. You can obtain the dataset used in this
# example from our :download:`website<../../static/dataset.xyz>`.
# We read the first 20 structures of the data set using
# `ASE <https://wiki.fysik.dtu.dk/ase/>`.


frames = ase.io.read("dataset.xyz", ":20")

# %%
#
# The data set contains everything we need for the model:
# The atomic positions we use for the descriptor and with this as
# training data. The data set also stores the energies and forces which will be our
# target data we regress against.
#
# Training data
# -------------
#
# We construct the descriptor training data with a SOAP powerspectrum using
# rascaline. We first define the hyper parameters for the calculation


HYPER_PARAMETERS = {
"cutoff": 5.0,
"max_radial": 6,
"max_angular": 4,
"atomic_gaussian_width": 0.3,
"center_atom_weight": 1.0,
"radial_basis": {
"Gto": {},
},
"cutoff_function": {
"ShiftedCosine": {"width": 0.5},
},
}

calculator = SoapPowerSpectrum(**HYPER_PARAMETERS)

# %%
#
# And then run the actual calculation, including gradients with respect to positions.

descriptor = calculator.compute(frames, gradients=["positions"])

# %%
#
# For more details on how the descriptor works see the documentation of
# rascaline.
#
# We now move all keys into properties to access them for our model.

descriptor = descriptor.keys_to_samples("species_center")
descriptor = descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"])

# %%
#
# The descriptor contains a represenantion with respect to each central atoms per
# structure. However, our energies as target data is per structure only.
# Therefore, we sum the properties of each center atom per structure.

X = sum_over_samples(descriptor, ["center", "species_center"])

# %%
#
# The newly defined :class:`equistore.TensorMap` contains a single block

print(f"X contains {len(X.blocks())} block.")

# %%
#
# As well as 1800 properties and 20 sample.
#
# We acces the data using the :meth:``equistore.TensorMap.block`` method


print(f"X contains {len(X.block().properties)} properties.")
print(f"X contains {len(X.block().samples)} samples.")

# Target data
# -----------
#
# We construct the target data by converting energies and forces into a
# :class:`equisolve.TensorMap`.


y = ase_to_tensormap(frames, energy="energy", forces="forces")

# %%
#
# The target data y contains a single block

print(y.block())

# %%
#
# Construct the model
# -------------------
#
# Before we fit the model we have to define our regression values.
#
# For this we create a TensorMap containing with the desired regulerizer

alpha = ones_like(X)
alpha.block().values[:] *= 1e-5

# %%
#
# So far ``alpha`` contains the same number of samples as ``X``. However,
# the regulerizer only has to be one sample, because all samples will be
# regulerized in the same way in a linear model.
#
# We remove all sample except the 0th one by using the
# :func:`equistore.operations.slice`.

samples = Labels(
names=["structure"],
values=np.array([(0,)]),
)

alpha = slice(alpha, samples=samples)

# %%
#
# In our regulerizer we use the same values for all properties. However,
# :class:`equisolve.numpy.models.linear_model.Ridge` can also handle different
# regularization for each property. You can apply a property wise regularization by
# setting ``"values"`` of ``alpha_dict`` with an 1d array of the same length as the
# number of properties in the training data X (here 7200)
#
# With a valid regulerizer object we now initilize the Ridge object.
# ``parameter_keys`` determines with respect to which parameters the regression is
# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
# ``"positions"`` (forces).


clf = Ridge(parameter_keys=["values", "positions"], alpha=alpha)

# %%
#
# Next we create a sample weighting :class:`equistiore.TensorMap` that weights energies
# five times more then the forces.

sw = ones_like(y)
sw.block().values[:] *= 5

# %%
#
# The function `equisolve.utils.dictionary_to_tensormap` create a
# :class:`equistore.TensorMap` with the same shape as our target data ``y`` but with
# values a defined by ``sw_dict``.

print(sw)

# Finally we can fit the model using the sample weights defined above.

clf.fit(X, y, sample_weight=sw)


# Finally we can predict values and calculate the root mean squre error
# of our model.

clf.predict(X)
print(f"RMSE energies = {clf.score(X, y, parameter_key='values')[0]:.3f} eV")
print(f"RMSE forces = {clf.score(X, y, parameter_key='positions')[0]:.3f} eV/Å")
8 changes: 6 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ packages = find:
include_package_data = True

install_requires =
equistore @ https://github.com/lab-cosmo/equistore/archive/467baf0.zip
numpy>=1.19.0
scipy>=1.0.0

Expand All @@ -54,20 +55,23 @@ where = src

[options.extras_require]
tests =
ase
coverage[toml]
hypothesis
pytest
pytest-cov
ase
equistore @ https://github.com/lab-cosmo/equistore/archive/6ca7fa3.zip
rascaline @ https://github.com/luthaf/rascaline/archive/master.zip
docs =
ase
furo
ipykernel
ipywidgets
matplotlib
nbsphinx
nbval
sphinx
sphinx-gallery
rascaline @ https://github.com/luthaf/rascaline/archive/master.zip

[bdist_wheel]
universal=1
Expand Down
Loading