rfactor - mostly complete

empa-scientific-it · Feb 1, 2024 · 2a7ac89 · 2a7ac89
1 parent 7082995
commit 2a7ac89
Show file tree

Hide file tree

Showing 3 changed files with 216 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -36,7 +36,7 @@ MANIFEST
 pip-log.txt
 pip-delete-this-directory.txt
 
-# Unit test / coverage reports
+# Unit test / coverage reports / Unit test outputs
 htmlcov/
 .tox/
 .nox/
@@ -50,6 +50,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
+*.png
 
 # Translations
 *.mo

diff --git a/cleedpy/preprocessing.py b/cleedpy/preprocessing.py
@@ -0,0 +1,94 @@
+import numpy as np 
+from cleedpy.rfactor import r2_factor, rp_factor
+from scipy.interpolate import CubicHermiteSpline
+
+
+def lorentzian_smoothing(curve, vi):
+    """
+        Draws a Lorentzian curve in the same grid as the given curve.
+        Performs the convolution of the two curves.
+        The given curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+        Inputs:
+            - curve: numpy array of [e,i] arrays
+            - vi: predefined constant representing the width
+
+        Output:
+            The result of the convolution: a new array of [e,i] arrays
+    """
+
+    E = curve[:, 0]
+    I = curve[:, 1]
+    L = []
+
+    for j in range(E.size):
+        c = 0
+        L_value = 0
+        for i in range(E.size):
+            c += vi**2 / ((E[i] - E[j])**2 + vi**2)
+            L_value += (I[i] * vi**2 / ((E[i] - E[j])**2 + vi**2))
+        L.append(L_value / c)
+
+    return np.array(list(zip(E, L)))
+
+
+def preprocessing_loop(theoretical_curves, experimental_curves, shift, r_factor, vi):
+    """
+        Performs all the preprocessing steps and R-factor calculations, required before the Search.
+        A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+        Inputs:
+            - theoretical_curves: numpy array of arrays of [e,i] arrays
+            - experimental_curves: numpy array of arrays of [e,i] arrays
+                where:
+                    - number of theoretical curves = number of experimental curves
+            - shift: provided by user, used to edit the energy value per step
+            - r_factor: provided by user, name of the preferred R-factor method
+
+        Design:
+            1. Lorentzian smoothing of all curves
+            2. For each energy point:
+                a. Shift theoretical curve
+                b. Find boundaries of overlap
+                c. Filter both curves to keep only the elements in the overlap
+                d. For each experimental curve:
+                    i. Spline = interpolate it to the theoretical grid
+                    ii. Calculate R-factor
+                e. Calculate average R-factor of all curves in this shift
+            3. Find the min of the average values
+
+        Output:
+            The optimal R-factor value per curve.
+    """
+
+    for i in range(len(theoretical_curves)):
+        theoretical_curves[i] = lorentzian_smoothing(theoretical_curves[i], vi)
+        experimental_curves[i] = lorentzian_smoothing(experimental_curves[i], vi)
+
+    energy_shift = [shift, 0]
+    shifted_theoretical_curves = theoretical_curves + energy_shift
+    print(shifted_theoretical_curves)
+
+    # Find the boundaries of the overlap after the shift
+    min_bound = np.maximum(shifted_theoretical_curves[0, 0], experimental_curves[0, 0])[0]
+    max_bound = np.minimum(shifted_theoretical_curves[0, -1], experimental_curves[0, -1])[0]
+    print(min_bound, max_bound)
+
+    # Filter both curves based on the boundaries
+    filtered_theoretical_curves = shifted_theoretical_curves[shifted_theoretical_curves[:, 0] >= min_bound]
+    filtered_theoretical_curves = filtered_theoretical_curves[filtered_theoretical_curves[:, 0] <= max_bound]
+
+    filtered_experimental_curves = experimental_curves[experimental_curves[:, 0] >= min_bound]
+    filtered_experimental_curves = filtered_experimental_curves[filtered_experimental_curves[:, 0] <= max_bound]
+    print(filtered_theoretical_curves, filtered_experimental_curves)
+
+    # Spline
+    for i in range(len(filtered_experimental_curves)):
+        E_values = filtered_experimental_curves[i][:, 0]
+        I_values = filtered_experimental_curves[i][:, 1]
+        theoretical_E_grid = filtered_theoretical_curves[i][:, 0]
+        CubicHermiteSpline(E_values, I_values, theoretical_E_grid)
+
+
+    print(globals().get(r_factor))
+    return
diff --git a/cleedpy/rfactor.py b/cleedpy/rfactor.py
@@ -1,6 +1,120 @@
-import numpy as np
-
-
-def mean_square_error(y_true, y_pred):
-    """Mean Square Error"""
-    return np.mean(np.square(y_true - y_pred))
+import numpy as np
+
+
+def r2_factor(theoretical_curve, experimental_curve):
+    """
+        Calculates R2-factor of the curve of a specific spot (x,y) of the experiment.
+        A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+        Inputs:
+            - theoretical curve: numpy array of [e,i] arrays
+            - experimental curve: numpy array of [e,i] arrays
+
+        Design:
+            R2 = sqrt{ S(It - c*Ie)^2 / S(It - It_avg)^2 }
+
+            where:
+                c = sqrt( S|It|^2 / S|Ie|^ 2)
+                It_avg = (S It)/ dE
+    """
+
+    # [TODO] (not sure) Use the length of the shortest curve
+    min_length = min(len(theoretical_curve), len(experimental_curve))
+    theoretical_curve = theoretical_curve[:min_length]
+    experimental_curve = experimental_curve[:min_length]
+
+    # Extract the It, Ie values for theoretical and experimental intensity
+    It = theoretical_curve[:, 1]
+    Ie = experimental_curve[:, 1]
+
+    # Calculate normalization factor c and It_avg
+    c = np.sqrt(np.sum(It**2) / np.sum(Ie**2))
+    It_avg = np.sum(It) / It.size   # dE = number of energy steps
+
+    # Calculate the numerator and denominator of R2
+    numerator = np.sum((It - c*Ie)**2)
+    denominator = np.sum((It - It_avg)**2)
+
+    # Calculate R2
+    R2 = np.sqrt(numerator / denominator)
+
+    # [TODO] error handling: may return NaN
+    return R2
+
+
+def rp_factor(theoretical_curve, experimental_curve):
+    """
+        Calculates Pendry's R-factor of the curve of a specific spot (x,y) of the experiment.
+        A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+        Inputs:
+            - theoretical curve: numpy array of [e,i] arrays
+            - experimental curve: numpy array of [e,i] arrays
+
+        Design:
+            Rp = S(Ye - Yt)^2 / S(Ye^2 + Yt^2)
+    """
+
+    # Extract the It, Ie values for theoretical and experimental intensity
+    It = theoretical_curve[:, 1]
+    Ie = experimental_curve[:, 1]
+
+    # Extract the Et, Ee values for theoretical and experimental energy
+    Et = theoretical_curve[:, 0]
+    Ee = experimental_curve[:,0]
+
+    # Calculate theoretical and experimental energy steps
+    step_Et = energy_step(Et)
+    step_Ee = energy_step(Ee)
+
+    # Calculate Y for theoretical and experimental intensity
+    Yt = Y_function(It, step_Et)
+    Ye = Y_function(Ie, step_Ee)
+
+    # Calculate the numerator and denominator of Rp
+    numerator = np.sum((Yt - Ye)**2 * step_Et)
+    denominator = np.sum(Ye**2 * step_Ee) + np.sum(Yt**2 * step_Et)
+
+    # Calculate Rp
+    Rp = numerator / denominator
+
+    # [TODO] error handling: may return NaN
+    return Rp
+
+
+def Y_function(I, energy_step):
+    """
+        Calculates Y for a given curve.
+
+        Inputs:
+            - I: array of intensity values of the curve
+            - E: array of energy values of the curve
+
+        Design:
+            Y = L / (1 + L^2 * vi^2)
+
+            where:
+                L = (I[i] - I[i-1]) / (energy_step * 0.5 * (I[i] + I[i-1]))
+    """
+
+    # [TODO] constants should be defined elsewhere
+    vi = 4
+
+    L = (I[1:] - I[:-1]) / (energy_step * 0.5 * (I[1:] + I[:-1]))
+    Y = L / (1 + L**2 * vi**2)
+
+    return Y
+
+
+def energy_step(E):
+    """
+        Calculates the pairwise energy step. Returns an array.
+
+        Inputs:
+            - E: array of energy values of the curve
+
+        Design:
+            step = E[i] - E[i-1]
+    """
+
+    return E[1:] - E[:-1]