Skip to content

Commit

Permalink
rfactor - mostly complete
Browse files Browse the repository at this point in the history
  • Loading branch information
despadam committed Feb 1, 2024
1 parent 7082995 commit 2a7ac89
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 7 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ MANIFEST
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
# Unit test / coverage reports / Unit test outputs
htmlcov/
.tox/
.nox/
Expand All @@ -50,6 +50,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
cover/
*.png

# Translations
*.mo
Expand Down
94 changes: 94 additions & 0 deletions cleedpy/preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import numpy as np
from cleedpy.rfactor import r2_factor, rp_factor
from scipy.interpolate import CubicHermiteSpline


def lorentzian_smoothing(curve, vi):
"""
Draws a Lorentzian curve in the same grid as the given curve.
Performs the convolution of the two curves.
The given curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- curve: numpy array of [e,i] arrays
- vi: predefined constant representing the width
Output:
The result of the convolution: a new array of [e,i] arrays
"""

E = curve[:, 0]
I = curve[:, 1]
L = []

for j in range(E.size):
c = 0
L_value = 0
for i in range(E.size):
c += vi**2 / ((E[i] - E[j])**2 + vi**2)
L_value += (I[i] * vi**2 / ((E[i] - E[j])**2 + vi**2))
L.append(L_value / c)

return np.array(list(zip(E, L)))


def preprocessing_loop(theoretical_curves, experimental_curves, shift, r_factor, vi):
"""
Performs all the preprocessing steps and R-factor calculations, required before the Search.
A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- theoretical_curves: numpy array of arrays of [e,i] arrays
- experimental_curves: numpy array of arrays of [e,i] arrays
where:
- number of theoretical curves = number of experimental curves
- shift: provided by user, used to edit the energy value per step
- r_factor: provided by user, name of the preferred R-factor method
Design:
1. Lorentzian smoothing of all curves
2. For each energy point:
a. Shift theoretical curve
b. Find boundaries of overlap
c. Filter both curves to keep only the elements in the overlap
d. For each experimental curve:
i. Spline = interpolate it to the theoretical grid
ii. Calculate R-factor
e. Calculate average R-factor of all curves in this shift
3. Find the min of the average values
Output:
The optimal R-factor value per curve.
"""

for i in range(len(theoretical_curves)):
theoretical_curves[i] = lorentzian_smoothing(theoretical_curves[i], vi)
experimental_curves[i] = lorentzian_smoothing(experimental_curves[i], vi)

energy_shift = [shift, 0]
shifted_theoretical_curves = theoretical_curves + energy_shift
print(shifted_theoretical_curves)

# Find the boundaries of the overlap after the shift
min_bound = np.maximum(shifted_theoretical_curves[0, 0], experimental_curves[0, 0])[0]
max_bound = np.minimum(shifted_theoretical_curves[0, -1], experimental_curves[0, -1])[0]
print(min_bound, max_bound)

# Filter both curves based on the boundaries
filtered_theoretical_curves = shifted_theoretical_curves[shifted_theoretical_curves[:, 0] >= min_bound]
filtered_theoretical_curves = filtered_theoretical_curves[filtered_theoretical_curves[:, 0] <= max_bound]

filtered_experimental_curves = experimental_curves[experimental_curves[:, 0] >= min_bound]
filtered_experimental_curves = filtered_experimental_curves[filtered_experimental_curves[:, 0] <= max_bound]
print(filtered_theoretical_curves, filtered_experimental_curves)

# Spline
for i in range(len(filtered_experimental_curves)):
E_values = filtered_experimental_curves[i][:, 0]
I_values = filtered_experimental_curves[i][:, 1]
theoretical_E_grid = filtered_theoretical_curves[i][:, 0]
CubicHermiteSpline(E_values, I_values, theoretical_E_grid)


print(globals().get(r_factor))
return
126 changes: 120 additions & 6 deletions cleedpy/rfactor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,120 @@
import numpy as np


def mean_square_error(y_true, y_pred):
"""Mean Square Error"""
return np.mean(np.square(y_true - y_pred))
import numpy as np


def r2_factor(theoretical_curve, experimental_curve):
"""
Calculates R2-factor of the curve of a specific spot (x,y) of the experiment.
A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- theoretical curve: numpy array of [e,i] arrays
- experimental curve: numpy array of [e,i] arrays
Design:
R2 = sqrt{ S(It - c*Ie)^2 / S(It - It_avg)^2 }
where:
c = sqrt( S|It|^2 / S|Ie|^ 2)
It_avg = (S It)/ dE
"""

# [TODO] (not sure) Use the length of the shortest curve
min_length = min(len(theoretical_curve), len(experimental_curve))
theoretical_curve = theoretical_curve[:min_length]
experimental_curve = experimental_curve[:min_length]

# Extract the It, Ie values for theoretical and experimental intensity
It = theoretical_curve[:, 1]
Ie = experimental_curve[:, 1]

# Calculate normalization factor c and It_avg
c = np.sqrt(np.sum(It**2) / np.sum(Ie**2))
It_avg = np.sum(It) / It.size # dE = number of energy steps

# Calculate the numerator and denominator of R2
numerator = np.sum((It - c*Ie)**2)
denominator = np.sum((It - It_avg)**2)

# Calculate R2
R2 = np.sqrt(numerator / denominator)

# [TODO] error handling: may return NaN
return R2


def rp_factor(theoretical_curve, experimental_curve):
"""
Calculates Pendry's R-factor of the curve of a specific spot (x,y) of the experiment.
A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- theoretical curve: numpy array of [e,i] arrays
- experimental curve: numpy array of [e,i] arrays
Design:
Rp = S(Ye - Yt)^2 / S(Ye^2 + Yt^2)
"""

# Extract the It, Ie values for theoretical and experimental intensity
It = theoretical_curve[:, 1]
Ie = experimental_curve[:, 1]

# Extract the Et, Ee values for theoretical and experimental energy
Et = theoretical_curve[:, 0]
Ee = experimental_curve[:,0]

# Calculate theoretical and experimental energy steps
step_Et = energy_step(Et)
step_Ee = energy_step(Ee)

# Calculate Y for theoretical and experimental intensity
Yt = Y_function(It, step_Et)
Ye = Y_function(Ie, step_Ee)

# Calculate the numerator and denominator of Rp
numerator = np.sum((Yt - Ye)**2 * step_Et)
denominator = np.sum(Ye**2 * step_Ee) + np.sum(Yt**2 * step_Et)

# Calculate Rp
Rp = numerator / denominator

# [TODO] error handling: may return NaN
return Rp


def Y_function(I, energy_step):
"""
Calculates Y for a given curve.
Inputs:
- I: array of intensity values of the curve
- E: array of energy values of the curve
Design:
Y = L / (1 + L^2 * vi^2)
where:
L = (I[i] - I[i-1]) / (energy_step * 0.5 * (I[i] + I[i-1]))
"""

# [TODO] constants should be defined elsewhere
vi = 4

L = (I[1:] - I[:-1]) / (energy_step * 0.5 * (I[1:] + I[:-1]))
Y = L / (1 + L**2 * vi**2)

return Y


def energy_step(E):
"""
Calculates the pairwise energy step. Returns an array.
Inputs:
- E: array of energy values of the curve
Design:
step = E[i] - E[i-1]
"""

return E[1:] - E[:-1]

0 comments on commit 2a7ac89

Please sign in to comment.