From 62aad14d884520d585ebbf631f0984fb4d68c42c Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Thu, 15 Feb 2024 10:26:48 +1300
Subject: [PATCH 1/9] Update PCA embedder to add load and save functionality

Add tests
---
 .../DataAugmentationUtils/Embedder.py         | 261 ++++++++++++++----
 Testing/PythonTests/PythonTests.cpp           |   4 +
 Testing/PythonTests/pcaembedder.py            | 156 +++++++++++
 3 files changed, 363 insertions(+), 58 deletions(-)
 create mode 100644 Testing/PythonTests/pcaembedder.py

diff --git a/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py b/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py
index 4c060f4223..c20a8e77d7 100644
--- a/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py
+++ b/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py
@@ -6,6 +6,8 @@
 import numpy as np
 from abc import ABC, abstractmethod
 from shapeworks.utils import sw_message
+from pathlib import Path
+from glob import glob
 
 # abstract base class for embedders 
 class Embedder(ABC):
@@ -19,61 +21,204 @@ def project(self, PCA_instance):
  
 # instance of embedder that uses PCA for dimension reduction
 class PCA_Embbeder(Embedder):
-	# overriding abstract methods
-	def __init__(self, data_matrix, num_dim=0, percent_variability=0.95):
-		self.data_matrix = data_matrix
-		num_dim = self.run_PCA(num_dim, percent_variability)
-		self.num_dim = num_dim
-	# run PCA on data_matrix for PCA_Embedder
-	def run_PCA(self, num_dim, percent_variability):
-		# get covariance matrix (uses compact trick)
-		N = self.data_matrix.shape[0]
-		data_matrix_2d = self.data_matrix.reshape(self.data_matrix.shape[0], -1).T # flatten data instances and transpose
-		mean = np.mean(data_matrix_2d, axis=1)
-		centered_data_matrix_2d = (data_matrix_2d.T - mean).T
-		trick_cov_matrix  = np.dot(centered_data_matrix_2d.T,centered_data_matrix_2d) * 1.0/np.sqrt(N-1)
-		# get eignevectors and eigenvalues
-		eigen_values, eigen_vectors = np.linalg.eigh(trick_cov_matrix)
-		eigen_vectors = np.dot(centered_data_matrix_2d, eigen_vectors)
-		for i in range(N):
-			eigen_vectors[:,i] = eigen_vectors[:,i]/np.linalg.norm(eigen_vectors[:,i])
-		eigen_values = np.flip(eigen_values)
-		eigen_vectors = np.flip(eigen_vectors, 1)
-		# get num PCA components
-		cumDst = np.cumsum(eigen_values) / np.sum(eigen_values)
-		if num_dim == 0:
-			cumDst = np.cumsum(eigen_values) / np.sum(eigen_values)
-			num_dim = np.where(cumDst > float(percent_variability))[0][0] + 1
-		W = eigen_vectors[:, :num_dim]
-		PCA_scores = np.matmul(centered_data_matrix_2d.T, W)
-		sw_message(f"The PCA modes of particles being retained : {num_dim}")
-		sw_message(f"Variablity preserved: {str(float(cumDst[num_dim-1]))}")
-		self.num_dim = num_dim
-		self.PCA_scores = PCA_scores
-		self.eigen_vectors = eigen_vectors
-		self.eigen_values = eigen_values
-		return num_dim
-	# write PCA info to files 
-	# @TODO do we need all of this?
-	def write_PCA(self, out_dir, suffix):
-		if not os.path.exists(out_dir):
-			os.makedirs(out_dir)
-		np.save(out_dir +  'original_PCA_scores.npy', self.PCA_scores)
-		mean = np.mean(self.data_matrix, axis=0)
-		np.savetxt(out_dir + 'mean.' + suffix, mean)
-		np.savetxt(out_dir + 'eigenvalues.txt', self.eigen_values)
-		for i in range(self.data_matrix.shape[0]):
-			nm = out_dir + 'pcamode' + str(i) + '.' + suffix
-			data = self.eigen_vectors[:, i]
-			data = data.reshape(self.data_matrix.shape[1:])
-			np.savetxt(nm, data)
-	# returns embedded form of dtat_matrix
-	def getEmbeddedMatrix(self):
-		return self.PCA_scores
-	# projects embbed array into data
-	def project(self, PCA_instance):
-		W = self.eigen_vectors[:, :self.num_dim].T
-		mean = np.mean(self.data_matrix, axis=0)
-		data_instance =  np.matmul(PCA_instance, W) + mean.reshape(-1)
-		data_instance = data_instance.reshape((self.data_matrix.shape[1:]))
-		return data_instance
\ No newline at end of file
+    def __init__(self, data_matrix=None, num_dim=0, percent_variability=0.95):
+        """
+        Initialize the PCA_Embedder. If data_matrix is provided, a PCA model is generated.
+        Otherwise, the attributes defining the model are initialized as None. A model can then be initialized from arrays
+        using load_pca.
+
+        Parameters
+        ----------
+        data_matrix
+            Data to use to generate a PCA model
+        num_dim
+            Number of PCA dimensions to keep in the generated PCA scores. (Max is data_matrix.shape[0]-1, i.e., the maximum number of
+            modes of variation is one less than the number of samples used to build the model.
+            If set to zero, the maximum number of dimensions are kept.
+        percent_variability
+            Percentage of the variation in the input data to keep in the generated PCA scores, scaled to between 0 and 1.
+            This is only used if num_dim is not set.
+        """
+        super().__init__(data_matrix)
+        self.PCA_scores = None
+        self.eigen_vectors = None
+        self.eigen_values = None
+
+        if data_matrix is not None:
+            self.data_matrix = data_matrix
+            self.mean_data = np.mean(self.data_matrix, axis=0)
+            self.run_PCA(num_dim, percent_variability)
+
+    def run_PCA(self, num_dim, percent_variability):
+        """
+        Perform principal component analysis on the data_matrix.
+
+        Parameters
+        ----------
+        num_dim
+            Number of PCA dimensions to keep in the model. (Max is data_matrix.shape[0]-1, i.e., the maximum number of
+            modes of variation is one less than the number of samples used to build the model.
+            If set to zero, the maximum number of dimensions are kept.
+        percent_variability
+            Percentage of the variation in the input data to keep in the model, scaled to between 0 and 1.
+            This is only used if num_dim is not set.
+
+        Returns
+        -------
+        num_dim
+            num_dim actually used
+        """
+        # get covariance matrix (uses compact trick)
+        N = self.data_matrix.shape[0]
+        data_matrix_2d = self.data_matrix.reshape(self.data_matrix.shape[0],
+                                                  -1).T  # flatten data instances and transpose
+        mean = np.mean(data_matrix_2d, axis=1)
+        centered_data_matrix_2d = (data_matrix_2d.T - mean).T
+        trick_cov_matrix = np.dot(centered_data_matrix_2d.T, centered_data_matrix_2d) * 1.0 / np.sqrt(N - 1)
+        # get eignevectors and eigenvalues
+        eigen_values, eigen_vectors = np.linalg.eigh(trick_cov_matrix)
+        eigen_vectors = np.dot(centered_data_matrix_2d, eigen_vectors)
+        for i in range(N):
+            eigen_vectors[:, i] = eigen_vectors[:, i] / np.linalg.norm(eigen_vectors[:, i])
+        eigen_values = np.flip(eigen_values)
+        eigen_vectors = np.flip(eigen_vectors, 1)
+        # get num PCA components
+        # Note that the number of the eigen_values and eigen_vectors is equal to the dimension of the data
+        # matrix, but the last column is not used in the model because it describes no variation.
+        cumDst = np.cumsum(eigen_values) / np.sum(eigen_values)
+        if num_dim == 0:
+            num_dim = np.where(cumDst >= float(percent_variability))[0][0] + 1
+        W = eigen_vectors[:, :num_dim]
+        PCA_scores = np.matmul(centered_data_matrix_2d.T, W)
+        sw_message(f"The PCA modes of particles being retained : {num_dim}")
+        sw_message(f"Variablity preserved: {str(float(cumDst[num_dim - 1]))}")
+
+        self.PCA_scores = PCA_scores
+        self.eigen_vectors = eigen_vectors
+        self.eigen_values = eigen_values
+        return num_dim
+
+    def write_PCA(self, out_dir: Path, score_option="full", suffix="txt"):
+        """
+        Write PCA data to a specified directory.
+
+        Parameters
+        ----------
+        out_dir
+            Directory in which to save PCA data
+        score_option
+            Option for how to save PCA scores. The full scores can be used to recreate the data used to create the
+            model, which may be privileged information, so options are provided to save no information about the scores.
+            Options are:
+                full: Save complete scores
+                Otherwise: Don't save scores
+        suffix
+            File extension to use
+        """
+        out_dir = Path(out_dir)
+        if not os.path.exists(out_dir):
+            os.makedirs(out_dir)
+        if score_option == "full":
+            np.savetxt(str(out_dir / f'original_PCA_scores.{suffix}'), self.PCA_scores)
+
+        mean = np.mean(self.data_matrix, axis=0)
+        np.savetxt(str(out_dir / f'mean.{suffix}'), mean)
+        np.savetxt(str(out_dir / f'eigenvalues.{suffix}'), self.eigen_values)
+        for i in range(self.data_matrix.shape[0]):
+            nm = str(out_dir / f'pcamode{i}.{suffix}')
+            data = self.eigen_vectors[:, i]
+            data = data.reshape(self.data_matrix.shape[1:])
+            np.savetxt(nm, data)
+
+    def project(self, PCA_instance):
+        """
+        Maps a given set of scores to the data values (e.g., coordinate points) they represent, given the embedded
+        PCA model.
+
+        Parameters
+        ----------
+        PCA_instance
+            A row vector containing one score for each PCA mode.
+
+        Returns
+        -------
+        data instance
+            Data represented by the input scores for this PCA model
+
+        """
+        num_dim = len(PCA_instance)
+        W = self.eigen_vectors[:, :num_dim].T
+        data_instance = np.matmul(PCA_instance, W) + self.mean_data.reshape(-1)
+        data_instance = data_instance.reshape(self.mean_data.shape)
+        return data_instance
+
+    def load_PCA(self, mean_data, eigen_values, eigen_vectors, scores=None):
+        """
+        Load PCA model from arrays
+
+        Parameters
+        ----------
+        mean_data
+        eigen_values
+        eigen_vectors
+        scores
+        """
+        self.mean_data = mean_data
+        self.eigen_values = eigen_values
+        self.eigen_vectors = eigen_vectors
+        self.PCA_scores = scores
+
+    @classmethod
+    def from_directory(cls, directory: Path):
+        """
+        Factory function to create a PCA_embedder instance by loading saved data from a specified directory.
+
+        Parameters
+        ----------
+        directory
+            Directory from which to load data
+
+        Returns
+        -------
+        embedder
+            PCA_embedder instance
+
+        """
+        directory = Path(directory)
+
+        mean = np.loadtxt(glob(str(directory / "mean*"))[0])
+        eigen_values = np.loadtxt(glob(str(directory / "eigenvalues*"))[0])
+        eigen_vectors = []
+
+        eigen_vector_files = glob(str(directory / "pcamode*"))
+        eigen_vector_files.sort(key=lambda f: int(str(Path(f).stem).split("pcamode")[-1]))  # Sort numerically by name
+        for file in eigen_vector_files:
+            eigen_vector = np.flip(np.loadtxt(file))
+            eigen_vectors.append(eigen_vector.reshape((-1)))
+
+        eigen_vectors = np.rot90(np.array(eigen_vectors))
+
+        embedder = cls()
+
+        scores = None
+        if scores_glob := glob(str(directory / "original_PCA_scores*")):
+            scores = np.loadtxt(scores_glob[0])
+
+        embedder.load_PCA(mean, eigen_values, eigen_vectors, scores=scores)
+
+        return embedder
+
+    def getEmbeddedMatrix(self):
+        """
+        Get the embedded form of data_matrix
+
+        Returns
+        -------
+        PCA_scores
+            A matrix with one row for each input data sample
+            The columns are floats that represent the value for each PCA mode that together represent the input data
+            sample.
+            The number of columns indicates the number of PCA modes that were used to generate the scores.
+
+        """
+        return self.PCA_scores
diff --git a/Testing/PythonTests/PythonTests.cpp b/Testing/PythonTests/PythonTests.cpp
index 31d39a900c..e40f995b46 100644
--- a/Testing/PythonTests/PythonTests.cpp
+++ b/Testing/PythonTests/PythonTests.cpp
@@ -331,6 +331,10 @@ TEST(pythonTests, pcaTest) {
   run_test("pca.py");
 }
 
+TEST(pythonTests, pcaEmbedderTest) {
+  run_test("pcaembedder.py")
+}
+
 TEST(pythonTests, findreferencemeshTest) {
   run_test("findReferenceMesh.py");
 }
diff --git a/Testing/PythonTests/pcaembedder.py b/Testing/PythonTests/pcaembedder.py
new file mode 100644
index 0000000000..c648e7bd72
--- /dev/null
+++ b/Testing/PythonTests/pcaembedder.py
@@ -0,0 +1,156 @@
+import shapeworks as sw
+import pyvista as pv
+from DataAugmentationUtils.Embedder import PCA_Embbeder
+import numpy as np
+import tempfile
+from pathlib import Path
+from glob import glob
+from sklearn.decomposition import PCA
+
+
+def test_compare_pca_methods():
+    # Prepare meshes with known stdev
+    # ------------------------------------------------------------------------------------------------------------------
+    std = 0.5
+    mean = 1.5
+    n_samples = 40
+
+    rng = np.random.default_rng(0)
+    scales = rng.normal(mean, std, n_samples)
+    scales = np.sort(scales)
+
+    meshes = []
+    for scale in scales:
+        mesh = pv.Sphere(theta_resolution=20, phi_resolution=20, radius=1.5, center=[0, 0, 0]).scale([scale, 1, 1],
+                                                                                                     inplace=False)
+        meshes.append(mesh)
+
+    points = np.array([mesh.points for mesh in meshes])
+    # Add some noise. The test fails without this
+    points = points + rng.normal(0, 0.01, points.shape)
+
+    # Method 1: Shapeworks PCA embedder
+    # ------------------------------------------------------------------------------------------------------------------
+    embedder = PCA_Embbeder(points, num_dim=len(meshes) - 1)
+
+    mean_data = embedder.mean_data
+    project_zeros = embedder.project(np.zeros(len(points) - 1))
+
+    np.testing.assert_allclose(project_zeros, mean_data)
+
+    for scores, p in zip(embedder.PCA_scores, points):
+        np.testing.assert_allclose(embedder.project(scores), p)
+
+    # Method 2: sklearn PCA
+    # ------------------------------------------------------------------------------------------------------------------
+    pca = PCA(svd_solver="auto")
+    pca_loadings = pca.fit_transform(points.reshape([points.shape[0], -1]))
+
+    np.testing.assert_allclose(pca_loadings[:, 0], embedder.PCA_scores[:, 0])
+
+    for scores, p in zip(pca_loadings, points):
+        np.testing.assert_allclose(pca.inverse_transform(scores).reshape([-1, 3]), p)
+
+    # Method 3: Shapeworks ShapeStatistics
+    # Go through temp directory because ParticleSystem can only be created with files
+    # ------------------------------------------------------------------------------------------------------------------
+    with tempfile.TemporaryDirectory() as td:
+        for i, p in enumerate(points):
+            filename = str(Path(td) / f"{i}_particles")
+            np.savetxt(filename, p)
+
+        files = glob(str(Path(td) / "*particles"))
+        particle_system = sw.ParticleSystem(files)
+
+    shape_statistics = sw.ParticleShapeStatistics()
+    shape_statistics.PCA(particleSystem=particle_system, domainsPerShape=1)
+    shape_statistics.principalComponentProjections()
+
+    loadings = np.flip(np.sort(shape_statistics.pcaLoadings()[:, 0]))
+    # This API does not yet have an inverse function
+
+    # Compare loadings of all methods
+    # ------------------------------------------------------------------------------------------------------------------
+    np.testing.assert_allclose(loadings, embedder.PCA_scores[:, 0])
+    np.testing.assert_allclose(pca_loadings[:, 0], embedder.PCA_scores[:, 0])
+
+
+def test_pca_load_and_save():
+    # Prepare meshes...
+    std = 0.5
+    mean = 1.5
+    n_samples = 40
+
+    rng = np.random.default_rng(0)
+    scales = rng.normal(mean, std, n_samples)
+    scales = np.sort(scales)
+
+    meshes = []
+    for scale in scales:
+        mesh = pv.Sphere(theta_resolution=20, phi_resolution=20, radius=1.5, center=[0, 0, 0]).scale([scale, 1, 1],
+                                                                                                     inplace=False)
+        meshes.append(mesh)
+
+    points = np.array([mesh.points for mesh in meshes])
+    # Add some noise. The test fails without this
+    points = points + rng.normal(0, 0.01, points.shape)
+
+    # Create PCA embedder
+    embedder = PCA_Embbeder(points, num_dim=len(meshes) - 1)
+
+    # Write and read from file
+    with tempfile.TemporaryDirectory() as td:
+        embedder.write_PCA(Path(td), score_option="full")
+        embedder2 = PCA_Embbeder.from_directory(Path(td))
+
+    for scores1, scores2, p in zip(embedder.PCA_scores, embedder2.PCA_scores, points):
+        np.testing.assert_allclose(embedder.project(scores1), p)
+        np.testing.assert_allclose(embedder2.project(scores2), p)
+
+    # Write and read from file without scores
+    with tempfile.TemporaryDirectory() as td:
+        embedder.write_PCA(Path(td), score_option="none")
+        embedder_2 = PCA_Embbeder.from_directory(Path(td))
+
+    for scores, p in zip(embedder.PCA_scores, points):
+        np.testing.assert_allclose(embedder.project(scores), p)
+        np.testing.assert_allclose(embedder_2.project(scores), p)
+
+
+def test_pca_percent_variability():
+    # Prepare meshes with multiple shape modes
+    std_x = 0.5
+    mean_x = 1.5
+    std_y = 0.4
+    mean_y = 1.4
+    n_samples = 40
+
+    rng = np.random.default_rng(0)
+    scales_x = rng.normal(mean_x, std_x, n_samples)
+    scales_y = rng.normal(mean_y, std_y, n_samples)
+
+    meshes = []
+    for scale_x, scale_y in zip(scales_x, scales_y):
+        mesh = pv.Sphere(theta_resolution=20, phi_resolution=20, radius=1.5, center=[0, 0, 0]).scale(
+            [scale_x, scale_y, 1],
+            inplace=False)
+        meshes.append(mesh)
+
+    points = np.array([mesh.points for mesh in meshes])
+    # Add some noise. The test fails without this
+    points = points + rng.normal(0, 0.01, points.shape)
+
+    # Create PCA embedder
+    embedder1 = PCA_Embbeder(points, percent_variability=0.5)
+    embedder2 = PCA_Embbeder(points, percent_variability=1)
+
+    assert len(embedder1.PCA_scores[0]) == 1
+    assert len(embedder2.PCA_scores[0]) == (len(meshes) - 1)
+
+    # Can project with lower number of scores with no problems
+    embedder1.project(embedder1.PCA_scores[0])
+    embedder2.project(embedder2.PCA_scores[0])
+
+test_compare_pca_methods()
+test_pca_load_and_save()
+test_pca_percent_variability()

From d6610098f93104bb8cd41df9282edb249901ff2e Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Thu, 15 Feb 2024 10:52:37 +1300
Subject: [PATCH 2/9] Adding an action just to run tests

---
 .github/workflows/build-test-linux.yml | 104 +++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 .github/workflows/build-test-linux.yml

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
new file mode 100644
index 0000000000..5e11a4b8a9
--- /dev/null
+++ b/.github/workflows/build-test-linux.yml
@@ -0,0 +1,104 @@
+name: Linux Build Test
+
+on:
+  workflow_dispatch
+
+env:
+  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
+  BUILD_TYPE: Release
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+  
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    container: akenmorris/ubuntu-build-box-sw65
+    
+    steps:
+
+    - name: Conda info
+      run: conda info
+
+    - name: Check space1
+      run: df -h
+
+    - name: Free some space 
+      run: cd /__t ; rm -rf CodeQL go
+      
+    - name: Check space2
+      run: df -h
+      
+    - name: Checkout code
+      uses: actions/checkout@v3
+      with:
+        lfs: true
+
+    - name: Workaround for permission issue
+      run: git config --global --add safe.directory /__w/ShapeWorks/ShapeWorks
+      
+    - name: Get tags
+      run: git fetch --unshallow origin +refs/tags/*:refs/tags/*
+
+    - name: Check space3
+      run: df -h
+
+    - name: Restore Caches
+      shell: bash -l {0}
+      run: .github/workflows/restore_caches.sh
+
+    - name: Conda Installs
+      shell: bash -l {0}
+      run: .github/workflows/gha_conda.sh
+
+    - name: try import vtk
+      shell: bash -l {0}
+      run: conda activate shapeworks && python -c "import vtk"
+
+    - name: Build Dependencies
+      shell: bash -l {0}
+      run: .github/workflows/gha_deps.sh
+
+    - name: Check space4
+      run: df -h
+
+    - name: cmake
+      shell: bash -l {0}
+      run: conda activate shapeworks && mkdir build && cd build && cmake -DCMAKE_CXX_FLAGS=-g -DITK_DIR=$HOME/install/lib/cmake/ITK-5.2 -DVTK_DIR=$HOME/install/lib/cmake/vtk-9.1 -DXLNT_DIR=$HOME/install -DLIBIGL_DIR=$HOME/install -DOpenVDB_DIR=$HOME/install/lib/cmake/OpenVDB -DGEOMETRYCENTRAL_DIR=$HOME/install -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBuild_Studio=ON -DJKQTCommonSharedLib_DIR=$HOME/install/lib/cmake/JKQTCommonSharedLib -DJKQTMathTextSharedLib_DIR=$HOME/install/lib/cmake/JKQTMathTextSharedLib -DJKQTPlotterSharedLib_DIR=$HOME/install/lib/cmake/JKQTPlotterSharedLib -DACVD_DIR=$HOME/install -DCMAKE_PREFIX_PATH=${CONDA_PREFIX} -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/shapeworks-install -DUSE_ORIGIN_RPATH=ON -DGA_MEASUREMENT_ID=$GA_MEASUREMENT_ID ..
+
+    - name: Check space5
+      run: df -h
+
+    - name: make
+      shell: bash -l {0}
+      run: conda activate shapeworks && cd build && make -j4
+
+    - name: Check space6
+      run: df -h
+
+    - name: Du
+      run: cd / ; du -sh * || true
+
+    - name: make install
+      shell: bash -l {0}
+      run: conda activate shapeworks && cd build && make install
+      
+    - name: Build Binary Package
+      shell: bash -l {0}
+      env:
+        PR_NUMBER: ${{ github.event.number }}
+      run: conda activate shapeworks && source ./devenv.sh ./build/bin && PATH=$HOME:$PATH ./Support/package.sh tag ${GITHUB_WORKSPACE}/shapeworks-install $HOME/install
+
+    - name: Download test data
+      shell: bash -l {0}
+      run: .github/workflows/download_test_data.sh
+      
+    - name: make test
+      shell: bash -l {0}
+      run: conda activate shapeworks && source ./devenv.sh ./build/bin && cd build && ctest -VV
+
+    
+
+      

From 39c47af12b7c291677b62ec45522a33b5476eeca Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Thu, 15 Feb 2024 10:57:37 +1300
Subject: [PATCH 3/9] update workflow run conditions

---
 .github/workflows/build-test-linux.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
index 5e11a4b8a9..cf23819d5c 100644
--- a/.github/workflows/build-test-linux.yml
+++ b/.github/workflows/build-test-linux.yml
@@ -1,7 +1,8 @@
 name: Linux Build Test
 
 on:
-  workflow_dispatch
+  workflow_dispatch:
+  pull_request:
 
 env:
   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)

From d9520261ae269db8c3c3582930c983d251967d0c Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Thu, 15 Feb 2024 11:01:02 +1300
Subject: [PATCH 4/9] remove get tags

---
 .github/workflows/build-test-linux.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
index cf23819d5c..50c92ba3c7 100644
--- a/.github/workflows/build-test-linux.yml
+++ b/.github/workflows/build-test-linux.yml
@@ -39,9 +39,6 @@ jobs:
 
     - name: Workaround for permission issue
       run: git config --global --add safe.directory /__w/ShapeWorks/ShapeWorks
-      
-    - name: Get tags
-      run: git fetch --unshallow origin +refs/tags/*:refs/tags/*
 
     - name: Check space3
       run: df -h

From a99ccf609bc4f994674c25ddc37a3f3c5c3277fd Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Thu, 15 Feb 2024 12:18:31 +1300
Subject: [PATCH 5/9] add missing semicolon

hopefully add store cache to workflow
---
 .github/workflows/build-test-linux.yml | 6 +++---
 Testing/PythonTests/PythonTests.cpp    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
index 50c92ba3c7..e47c3a2221 100644
--- a/.github/workflows/build-test-linux.yml
+++ b/.github/workflows/build-test-linux.yml
@@ -97,6 +97,6 @@ jobs:
       shell: bash -l {0}
       run: conda activate shapeworks && source ./devenv.sh ./build/bin && cd build && ctest -VV
 
-    
-
-      
+    - name: Store ccache
+      shell: bash -l {0}
+      run: .github/workflows/store_ccache.sh
diff --git a/Testing/PythonTests/PythonTests.cpp b/Testing/PythonTests/PythonTests.cpp
index e40f995b46..883fd75edd 100644
--- a/Testing/PythonTests/PythonTests.cpp
+++ b/Testing/PythonTests/PythonTests.cpp
@@ -332,7 +332,7 @@ TEST(pythonTests, pcaTest) {
 }
 
 TEST(pythonTests, pcaEmbedderTest) {
-  run_test("pcaembedder.py")
+  run_test("pcaembedder.py");
 }
 
 TEST(pythonTests, findreferencemeshTest) {

From fd5516c77037f7f8909fc3a905ea53dd054d7465 Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Fri, 16 Feb 2024 10:17:15 +1300
Subject: [PATCH 6/9] does removing the flip fix the test?

---
 Testing/PythonTests/pcaembedder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Testing/PythonTests/pcaembedder.py b/Testing/PythonTests/pcaembedder.py
index c648e7bd72..b53b7401eb 100644
--- a/Testing/PythonTests/pcaembedder.py
+++ b/Testing/PythonTests/pcaembedder.py
@@ -66,7 +66,7 @@ def test_compare_pca_methods():
     shape_statistics.PCA(particleSystem=particle_system, domainsPerShape=1)
     shape_statistics.principalComponentProjections()
 
-    loadings = np.flip(np.sort(shape_statistics.pcaLoadings()[:, 0]))
+    loadings = np.sort(shape_statistics.pcaLoadings()[:, 0])
     # This API does not yet have an inverse function
 
     # Compare loadings of all methods

From dc9d3c39fb05959215938735c5a2ce453b4586bb Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Fri, 16 Feb 2024 11:59:45 +1300
Subject: [PATCH 7/9] does multiplying loadings by -1 fix the test?

---
 Testing/PythonTests/pcaembedder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Testing/PythonTests/pcaembedder.py b/Testing/PythonTests/pcaembedder.py
index b53b7401eb..2a1ddfb9ea 100644
--- a/Testing/PythonTests/pcaembedder.py
+++ b/Testing/PythonTests/pcaembedder.py
@@ -71,7 +71,7 @@ def test_compare_pca_methods():
 
     # Compare loadings of all methods
     # ------------------------------------------------------------------------------------------------------------------
-    np.testing.assert_allclose(loadings, embedder.PCA_scores[:, 0])
+    np.testing.assert_allclose(loadings*-1, embedder.PCA_scores[:, 0])
     np.testing.assert_allclose(pca_loadings[:, 0], embedder.PCA_scores[:, 0])
 
 

From 2b1a48e358b26e5320b14b7321bfdbe684471485 Mon Sep 17 00:00:00 2001
From: acreegan <andrew.s.creegan@gmail.com>
Date: Fri, 16 Feb 2024 13:35:53 +1300
Subject: [PATCH 8/9] remove workflow changes

---
 .github/workflows/build-test-linux.yml | 102 -------------------------
 1 file changed, 102 deletions(-)
 delete mode 100644 .github/workflows/build-test-linux.yml

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
deleted file mode 100644
index e47c3a2221..0000000000
--- a/.github/workflows/build-test-linux.yml
+++ /dev/null
@@ -1,102 +0,0 @@
-name: Linux Build Test
-
-on:
-  workflow_dispatch:
-  pull_request:
-
-env:
-  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
-  BUILD_TYPE: Release
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-  
-jobs:
-  build:
-
-    runs-on: ubuntu-latest
-    container: akenmorris/ubuntu-build-box-sw65
-    
-    steps:
-
-    - name: Conda info
-      run: conda info
-
-    - name: Check space1
-      run: df -h
-
-    - name: Free some space 
-      run: cd /__t ; rm -rf CodeQL go
-      
-    - name: Check space2
-      run: df -h
-      
-    - name: Checkout code
-      uses: actions/checkout@v3
-      with:
-        lfs: true
-
-    - name: Workaround for permission issue
-      run: git config --global --add safe.directory /__w/ShapeWorks/ShapeWorks
-
-    - name: Check space3
-      run: df -h
-
-    - name: Restore Caches
-      shell: bash -l {0}
-      run: .github/workflows/restore_caches.sh
-
-    - name: Conda Installs
-      shell: bash -l {0}
-      run: .github/workflows/gha_conda.sh
-
-    - name: try import vtk
-      shell: bash -l {0}
-      run: conda activate shapeworks && python -c "import vtk"
-
-    - name: Build Dependencies
-      shell: bash -l {0}
-      run: .github/workflows/gha_deps.sh
-
-    - name: Check space4
-      run: df -h
-
-    - name: cmake
-      shell: bash -l {0}
-      run: conda activate shapeworks && mkdir build && cd build && cmake -DCMAKE_CXX_FLAGS=-g -DITK_DIR=$HOME/install/lib/cmake/ITK-5.2 -DVTK_DIR=$HOME/install/lib/cmake/vtk-9.1 -DXLNT_DIR=$HOME/install -DLIBIGL_DIR=$HOME/install -DOpenVDB_DIR=$HOME/install/lib/cmake/OpenVDB -DGEOMETRYCENTRAL_DIR=$HOME/install -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBuild_Studio=ON -DJKQTCommonSharedLib_DIR=$HOME/install/lib/cmake/JKQTCommonSharedLib -DJKQTMathTextSharedLib_DIR=$HOME/install/lib/cmake/JKQTMathTextSharedLib -DJKQTPlotterSharedLib_DIR=$HOME/install/lib/cmake/JKQTPlotterSharedLib -DACVD_DIR=$HOME/install -DCMAKE_PREFIX_PATH=${CONDA_PREFIX} -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/shapeworks-install -DUSE_ORIGIN_RPATH=ON -DGA_MEASUREMENT_ID=$GA_MEASUREMENT_ID ..
-
-    - name: Check space5
-      run: df -h
-
-    - name: make
-      shell: bash -l {0}
-      run: conda activate shapeworks && cd build && make -j4
-
-    - name: Check space6
-      run: df -h
-
-    - name: Du
-      run: cd / ; du -sh * || true
-
-    - name: make install
-      shell: bash -l {0}
-      run: conda activate shapeworks && cd build && make install
-      
-    - name: Build Binary Package
-      shell: bash -l {0}
-      env:
-        PR_NUMBER: ${{ github.event.number }}
-      run: conda activate shapeworks && source ./devenv.sh ./build/bin && PATH=$HOME:$PATH ./Support/package.sh tag ${GITHUB_WORKSPACE}/shapeworks-install $HOME/install
-
-    - name: Download test data
-      shell: bash -l {0}
-      run: .github/workflows/download_test_data.sh
-      
-    - name: make test
-      shell: bash -l {0}
-      run: conda activate shapeworks && source ./devenv.sh ./build/bin && cd build && ctest -VV
-
-    - name: Store ccache
-      shell: bash -l {0}
-      run: .github/workflows/store_ccache.sh

From bcf118833fcff1d000c85a50987e35732c861a41 Mon Sep 17 00:00:00 2001
From: Alan Morris <akenmorris@gmail.com>
Date: Fri, 29 Mar 2024 12:37:10 -0600
Subject: [PATCH 9/9] Add back num_dim, fix suffix usage.

---
 .../DataAugmentationUtils/DataAugmentation.py                 | 2 +-
 .../DataAugmentationUtils/Embedder.py                         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/DataAugmentation.py b/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/DataAugmentation.py
index fbe11016af..2eaf3a5bb1 100644
--- a/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/DataAugmentation.py
+++ b/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/DataAugmentation.py
@@ -35,7 +35,7 @@ def point_based_aug(out_dir, orig_img_list, orig_point_list, num_samples, num_di
 	else:
 		PointEmbedder = Embedder.PCA_Embbeder(point_matrix, num_dim, percent_variability)
 	num_dim = PointEmbedder.num_dim
-	PointEmbedder.write_PCA(out_dir + "PCA_Particle_Info/", "particles") # write PCA info for DeepSSM testing
+	PointEmbedder.write_PCA(out_dir + "PCA_Particle_Info/", suffix="particles") # write PCA info for DeepSSM testing
 	embedded_matrix = PointEmbedder.getEmbeddedMatrix()
 	# Get sampler
 	if sampler_type == "gaussian":
diff --git a/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py b/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py
index c20a8e77d7..f7c642249b 100644
--- a/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py
+++ b/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/Embedder.py
@@ -96,6 +96,7 @@ def run_PCA(self, num_dim, percent_variability):
         self.PCA_scores = PCA_scores
         self.eigen_vectors = eigen_vectors
         self.eigen_values = eigen_values
+        self.num_dim = num_dim
         return num_dim
 
     def write_PCA(self, out_dir: Path, score_option="full", suffix="txt"):
@@ -120,10 +121,9 @@ def write_PCA(self, out_dir: Path, score_option="full", suffix="txt"):
             os.makedirs(out_dir)
         if score_option == "full":
             np.savetxt(str(out_dir / f'original_PCA_scores.{suffix}'), self.PCA_scores)
-
         mean = np.mean(self.data_matrix, axis=0)
         np.savetxt(str(out_dir / f'mean.{suffix}'), mean)
-        np.savetxt(str(out_dir / f'eigenvalues.{suffix}'), self.eigen_values)
+        np.savetxt(str(out_dir / f'eigenvalues.txt'), self.eigen_values)
         for i in range(self.data_matrix.shape[0]):
             nm = str(out_dir / f'pcamode{i}.{suffix}')
             data = self.eigen_vectors[:, i]