From d31e8205a77d08e13d8c266131656ba64805889c Mon Sep 17 00:00:00 2001
From: lllangWV <lllangWV@gmail.com>
Date: Mon, 22 Jul 2024 16:45:52 -0400
Subject: [PATCH] Quantum Espresso dos and atomic projection parsing refactor

---
 pyprocar/io/qe.py | 443 +++++++++++++++++++++++++++-------------------
 1 file changed, 256 insertions(+), 187 deletions(-)

diff --git a/pyprocar/io/qe.py b/pyprocar/io/qe.py
index cd7f240c..298ca942 100644
--- a/pyprocar/io/qe.py
+++ b/pyprocar/io/qe.py
@@ -74,12 +74,9 @@ def __init__(self,
             self.isBandsCalc = True
             with open(bands_in_filename, "r") as f:
                 self.bandsIn = f.read()
-            self.getKpointLabels()
+            self._get_kpoint_labels()
 
-        # if xml_root.findall(".//input/control_variables/calculation")[0].text == "nscf":
-        #     self.is_dos_fermi_calc = True
-        # if self.kpath is not None:
-        # self.bands -= self.efermi
+ 
         self.ebs = ElectronicBandStructure(
                                 kpoints=self.kpoints,
                                 n_kx=self.nkx,
@@ -202,11 +199,10 @@ def _parse_pdos(self,pdos_in_filename,dirname):
 
         # Parsing total density of states
         energies, total_dos = self._parse_dos_total(dos_total_filename=f"{dirname}{os.sep}{self.pdos_prefix}.pdos_tot")
-
+        self.n_energies=len(energies)
         # Finding all the density of states projections files
-        # print(self.wfc_filenames)
         wfc_filenames = self._parse_available_wfc_filenames(dirname = self.dirname)
-        projected_dos, projected_labels = self._parse_dos_projections(wfc_filenames=wfc_filenames, n_energy = len(energies))    
+        projected_dos = self._parse_dos_projections(wfc_filenames=wfc_filenames, n_energy = len(energies))    
         
         # print(projected_labels)
         dos = DensityOfStates(energies=energies,
@@ -255,131 +251,157 @@ def _parse_dos_total(self, dos_total_filename ):
         energies -= self.efermi
         return energies, total_dos
    
-    def _parse_dos_projections(self,wfc_filenames,n_energy):
-        """Helper method to parse the dos projection files
+    def _parse_dos_projections(self, wfc_filenames, n_energy):
+        """Parse the dos projection files using efficient array operations."""
+        n_principal_number = 1
 
-        Parameters
-        ----------
-        wfc_filenames : List[str]
-            A List of projection filenames.
-        n_energy : int
-            The number of energies for which the density of states is calculated at.
+        projected_dos_array = np.zeros((self.n_atoms, n_principal_number, self.n_orbitals, self.n_spin, n_energy))
 
-        Returns
-        -------
-        _type_
-            _description_
+        for filename in wfc_filenames:
+            self._validate_file(filename)
 
-        Raises
-        ------
-        ValueError
-            _description_
-        """
-        n_principal_number = 1
-        projected_dos_array =  np.zeros(shape=(self.n_atoms,n_principal_number,self.n_orbitals,self.n_spin,n_energy))
-        for filename in wfc_filenames[:]:
-            if not os.path.exists(filename):
-                raise ValueError('ERROR: pdos file not found')   
-            with open(filename) as f:
-                pdos_text = ''.join(f.readlines()[1:])
-
-            # -1 because indexing starts at 0
-            atom_num = int(re.findall("#(\d*)",filename)[0]) - 1
-            wfc_Name = re.findall("atm#\d*\(([a-zA-Z0-9]*)\)",filename)[0]
-            orbital_name = re.findall("wfc#\d*\(([_a-zA-Z0-9.]*)\)",filename)[0]
-            n_orbitial = len(self.orbitals)
+            # Some pdos files are not non-colinear. 
+            # In the version of the qe code where we get the spin projections. 
+            # Non-colinear pdos files should have the word 'j' in the filename.
+            if not self._is_non_colinear_file(filename) and self.is_non_colinear:
+                continue
             
-
-            # For noncolinear caluclations
+            # Determine the parsing method based on calculation type
             if self.is_non_colinear:
-                # In the noncolinear case there are some files that are not used. They are in the uncoupled basis
-                try:
-                    tot_ang_mom = float(orbital_name.split('j')[-1])
-                except:
-                    continue
-                
-                # This controls the mapping from raw orbital projection data to structured orbital projection data
-                # as defined by the order in self.orbitals
-                l_orbital_name = orbital_name.split('_')[0]
-                if l_orbital_name == 's':
-                    if tot_ang_mom == 0.5:
-                        m_js = np.linspace(-0.5,0.5,2)
-                        orbital_nums = [0,1]
-                elif l_orbital_name == 'p':
-                    if tot_ang_mom == 0.5:
-                        m_js = np.linspace(-0.5,0.5,2)
-                        orbital_nums = [2,3]
-                    elif tot_ang_mom == 1.5:
-                        m_js = np.linspace(-1.5,1.5,4)
-                        orbital_nums = [4,5,6,7]
-                elif l_orbital_name == 'd':
-                    if tot_ang_mom == 0.5:
-                        m_js = np.linspace(-0.5,0.5,2)
-                        orbital_nums = [2,3]
-                    elif tot_ang_mom == 1.5:
-                        m_js = np.linspace(-1.5,1.5,4)
-                        orbital_nums = [8,9,10,11]
-                    elif tot_ang_mom == 2.5:
-                        m_js = np.linspace(-2.5,2.5,6)
-                        orbital_nums = [12,13,14,15,16,17]
-
-                # Filters unwanted empty character from the regular expression search
-                raw_energy_blocks = list(filter(None,re.findall('([\s\S]*?)(?=\n \n)',pdos_text)))
-
-                # Loop through the energies
-                for i_energy, raw_energy_block in enumerate(raw_energy_blocks[:]):
-                    #Strippng unwanted space characters before and after the block
-                    raw_energy_block =raw_energy_block.lstrip().rstrip()
-                    # Orbital_num is the index in the final array, i_orbital is the index in the raw_data
-                    for i_orbital,orbital_num in enumerate(orbital_nums):
-                        # Loop through spins
-                        for i_spin,spin_comp in enumerate([1,2,3]):
-                            # Totals
-                            # if i_spin == 0:
-                            #     projected_dos_array[atom_num,0,orbital_num,i_spin,i_energy] += float(raw_energy_block.split('\n')[0].split()[2+i_orbital])
-                            # # spin components
-                            # else:
-                            #     projected_dos_array[atom_num,0,orbital_num,i_spin,i_energy] += float(raw_energy_block.split('\n')[i_spin+1].split()[1+i_orbital])
-                                
-                            projected_dos_array[atom_num,0,orbital_num,spin_comp,i_energy] += float(raw_energy_block.split('\n')[i_spin+2].split()[1+i_orbital])
-                        
-            # For colinear calculations
+                self._parse_non_colinear(filename,  projected_dos_array)
             else:
-                current_orbital_name =  orbital_name
-
-                # This controls the mapping from raw orbital projection data to structured orbital projection data
-                # as defined by the order in self.orbitals
-                if current_orbital_name == 's':
-                    m_s = np.linspace(0,0,1)
-                    orbital_nums = [0]
-                elif current_orbital_name == 'p':
-                    m_s = np.linspace(-1,1,3)
-                    orbital_nums = [1,2,3]
-                elif current_orbital_name == 'd':
-                    m_s = np.linspace(-2,2,5)
-                    orbital_nums = [4,5,6,7,8]
-
-                # Filters unwanted empty character from the regular expression search
-                raw_energy_blocks = pdos_text.rstrip().split('\n')
-
-                # Loop through the energies
-                for i_energy, raw_energy_block in enumerate(raw_energy_blocks):
-                    raw_energy_block =raw_energy_block.lstrip().rstrip()
-                    raw_projections = raw_energy_block.split()[1+self.n_spin:]
-
-                    # Orbital_num is the index in the final array, i_orbital is the index in the raw_data
-                    for i_orbital, orbital_num in enumerate(orbital_nums):
-
-                        # Loop through spins
-                        for i_spin in range(self.n_spin):
-                            projected_dos_array[atom_num,0,orbital_num,i_spin,i_energy] += float(raw_projections[i_spin::self.n_spin][i_orbital])
-
+                self._parse_colinear(filename, projected_dos_array)
 
+        return projected_dos_array
+    
+    def _is_non_colinear_file(self, filename):
+        """Determine if the file corresponds to non-colinear calculations."""
+        file_tag = filename.split('_')[-1]
+        return 'j' in file_tag
+    
+    def _validate_file(self, filename):
+        if not os.path.exists(filename):
+            raise ValueError(f'ERROR: pdos file not found: {filename}')
+
+    def _read_file_content(self, filename):
+        with open(filename) as f:
+            # Check if spin component projections are present
+            lines=f.readlines()[1:]
+            spin_projections_present = 'lsigma' in lines[1]
+
+            if spin_projections_present:
+                pdos=self._parse_spin_components_lines(lines)
+            elif self.is_non_colinear:
+                pdos=self._parse_noncolinear_pdos(lines)
+            else:
+                pdos=self._parse_colinear_pdos(lines)
+        return pdos
+            
+    def _parse_spin_components_lines(self, lines):
+ 
+        n_orbitals=len(lines[0].split()) - 2
+        pdos=np.zeros(shape=(self.n_energies, n_orbitals, self.n_spin))
+        for i_energy in range(self.n_energies):
+            iblock_start=i_energy*6
+            total_line=lines[iblock_start].split()[2:]
+            x_line=lines[iblock_start+2].split()[1:]
+            y_line=lines[iblock_start+3].split()[1:]
+            z_line=lines[iblock_start+4].split()[1:]
+            pdos[i_energy, : ,0] = [float(tot) for tot in total_line]
+            pdos[i_energy, : ,1] = [float(x) for x in x_line]
+            pdos[i_energy, : ,2] = [float(y) for y in y_line]
+            pdos[i_energy, : ,3] = [float(z) for z in z_line]
+
+        # Move energy to the last axis
+        pdos = np.moveaxis(pdos, 0, -1)
+        return pdos
+    
+    def _parse_colinear_pdos(self,lines):
+        # The energy column is first, the sum totals by spin channel
+        n_orbitals=len(lines[0].split()) - self.n_spin - 1
+        n_orbitals //= self.n_spin
+        pdos=np.zeros(shape=(self.n_energies, n_orbitals, self.n_spin))
+        col_start= 1 + self.n_spin
+        for i_energy in range(self.n_energies):
+            
+            # Skip energies and sum over projections
+            values=lines[i_energy].split()[col_start:]
+            if self.n_spin == 1:
+                pdos[i_energy, : , 0] = values
+            else:
+                pdos[i_energy, : , 0] = values[::2]
+                pdos[i_energy, : , 1] = values[1::2]
+        
+        # Move energy to the last axis
+        pdos = np.moveaxis(pdos, 0, -1)
+        return pdos
+    
+    def _parse_noncolinear_pdos(self,lines):
+        # The energy column is first, the sum totals by spin channel
+        n_orbitals=len(lines[0].split()) - 1 - 1
+        n_orbitals //= 1
+        pdos=np.zeros(shape=(self.n_energies, n_orbitals, 4))
+        col_start= 1 + 1
+        for i_energy in range(self.n_energies):
+            # Skip energies and sum over projections
+            values=lines[i_energy].split()[col_start:]
+            pdos[i_energy, : , 0] = values
+        # Move energy to the last axis
+        pdos = np.moveaxis(pdos, 0, -1)
+        return pdos
+
+    def _extract_file_info(self, filename):
+        atom_num = int(re.findall(r"#(\d*)", filename)[0]) - 1
+        wfc_name = re.findall(r"atm#\d*\(([a-zA-Z0-9]*)\)", filename)[0]
+        orbital_name = filename.split('(')[-1][0]
+        
+        total_angular_momentum=None
         if self.is_non_colinear:
-            projected_dos_array[:,:,:,0,:] = (projected_dos_array[:,:,:,1,:]**2 + projected_dos_array[:,:,:,2,:]**2 + projected_dos_array[:,:,:,3,:]**2)**0.5 
-        return projected_dos_array, self.orbital_names
-
-    def getKpointLabels(self):
+            tmp_str=filename.split('_')[-1]
+            total_angular_momentum=float(tmp_str.strip('j').strip(')'))
+        return atom_num, wfc_name, orbital_name,total_angular_momentum
+
+    def _parse_non_colinear(self, filename, projected_dos_array):
+
+        pdos_data = self._read_file_content(filename)
+        atom_num, wfc_name, orbital_name, total_angular_momentum = self._extract_file_info(filename)
+
+        orbital_nums, _ = self._get_orbital_info_non_colinear(orbital_name, total_angular_momentum)
+        if not orbital_nums:
+            raise ValueError(f'ERROR: orbital_nums is empty for {filename}')
+
+        projected_dos_array[atom_num, 0, orbital_nums, :, :self.n_energies] += pdos_data
+
+    def _parse_colinear(self, filename, projected_dos_array):
+
+        pdos_data = self._read_file_content(filename)
+        atom_num, wfc_name, orbital_name,total_angular_momentum = self._extract_file_info(filename)
+        orbital_nums, _ = self._get_orbital_info_colinear(orbital_name)
+
+        if not orbital_nums:
+            raise ValueError(f'ERROR: orbital_nums is empty for {filename}')
+        projected_dos_array[atom_num, 0, orbital_nums, : , :self.n_energies] += pdos_data
+
+    def _get_orbital_info_non_colinear(self, l_orbital_name, tot_ang_mom):
+        orbital_info = {
+            's': {0.5: ([0, 1], np.linspace(-0.5, 0.5, 2))},
+            'p': {0.5: ([2, 3], np.linspace(-0.5, 0.5, 2)),
+                  1.5: ([4, 5, 6, 7], np.linspace(-1.5, 1.5, 4))},
+            'd': {0.5: ([2, 3], np.linspace(-0.5, 0.5, 2)),
+                  1.5: ([8, 9, 10, 11], np.linspace(-1.5, 1.5, 4)),
+                  2.5: ([12, 13, 14, 15, 16, 17], np.linspace(-2.5, 2.5, 6))}
+        }
+        return orbital_info.get(l_orbital_name, {}).get(tot_ang_mom, ([], []))
+
+    def _get_orbital_info_colinear(self, orbital_name):
+        orbital_info = {
+            's': ([0], np.linspace(0, 0, 1)),
+            'p': ([1, 2, 3], np.linspace(-1, 1, 3)),
+            'd': ([4, 5, 6, 7, 8], np.linspace(-2, 2, 5))
+        }
+        return orbital_info.get(orbital_name, ([], []))
+
+    def _get_kpoint_labels(self):
         """
         This method will parse the bands.in file to get the kpath information.
         """
@@ -496,20 +518,26 @@ def _initialize_filenames(self, dirname, scf_in, bands_in_filename, pdos_in_file
             dirname = dirname + os.sep
         else:
             dirname = ""
-            
-        with open(f"{dirname}{scf_in}", "r") as f:
+        
+        scf_in_file_path = os.path.join(dirname,scf_in)
+        with open(scf_in_file_path, "r") as f:
             scf_in = f.read()
 
         outdir = re.findall("outdir\s*=\s*'\S*?([A-Za-z]*)'",  scf_in)[0]
         prefix = re.findall("prefix\s*=\s*'(.*)'", scf_in)[0]
         xml_filename =  prefix + ".xml"
         
-        if os.path.exists(f"{dirname}{outdir}"):
-            atomic_proj_xml = f"{dirname}{os.sep}{outdir}{os.sep}{prefix}.save{os.sep}atomic_proj.xml"
-        else:
-            atomic_proj_xml = f"{dirname}atomic_proj.xml"
+        atomic_proj_xml = os.path.join(dirname,outdir,prefix + ".save","atomic_proj.xml")
+        if not os.path.exists(atomic_proj_xml):
+            atomic_proj_xml = os.path.join(dirname,"atomic_proj.xml")
+
+        output_xml=os.path.join(dirname,outdir,xml_filename)
+        if not os.path.exists(output_xml):
+            output_xml = os.path.join(dirname,xml_filename)
+
+        
+        tree = ET.parse(output_xml)
 
-        tree = ET.parse(f"{dirname}{xml_filename}")
         root = tree.getroot()
         prefix = root.findall(".//input/control_variables/prefix")[0].text
 
@@ -636,9 +664,13 @@ def _parse_atomic_projections(self,atomic_proj_xml_filename):
         nbnd = int(root_header.get("NUMBER_OF_BANDS"))
         nk = int(root_header.get("NUMBER_OF_K-POINTS"))
         nwfc = int(root_header.get("NUMBER_OF_ATOMIC_WFC"))
-        norb = len(self.orbitals) 
-        natm = len(self.species)
-
+        norb = self.n_orbitals
+        natm = self.n_atoms
+        nspin_channels = int(root_header.get("NUMBER_OF_SPIN_COMPONENTS"))
+        nspin_projections = self.n_spin 
+        # The indices are to match the format of the from PROCAR format. 
+        # In it there is an extra 2 columns for orbitals for the ion index and the total
+        # Also there is and extra row for the totals
         self.spd = np.zeros(shape = (self.n_k, self.n_band , self.n_spin ,self.n_atoms+1,self.n_orbitals + 2,))
 
         self.spd_phase = np.zeros(
@@ -647,70 +679,107 @@ def _parse_atomic_projections(self,atomic_proj_xml_filename):
             ),
             dtype=np.complex_,
         )
-        # print(self.wfc_mapping)
-        ik = -1
-        for ieigenstate, eigenstates_element in enumerate(atm_proj_root.findall(".//EIGENSTATES")[0]):
-            # print(eigenstates_element.tag)
-            if eigenstates_element.tag == 'K-POINT':
-
-                # sets ik back to zero for other spin channel
-                if ik==nk-1:
-                    ik=0
-                else:
-                    ik+=1
-                
-            if eigenstates_element.tag == 'PROJS':
-                
-                for i, projs_element in enumerate(eigenstates_element):
-                    iwfc = int(projs_element.get('index'))
-                    iorb = self.wfc_mapping[f"wfc_{iwfc}"]["orbital"]
-                    iatm = self.wfc_mapping[f"wfc_{iwfc}"]["atom"]
-
-                    if self.is_non_colinear:
-                        # Skips the total projections
-                        if projs_element.tag == "ATOMIC_WFC":
-                            ispin = int(projs_element.get('spin'))-1
-                            continue
-                        # Parse spin components. -1 to align index with spd array
-                        if projs_element.tag == "ATOMIC_SIGMA_PHI":
-                            ispin = int(projs_element.get('ipol'))
-                    else:
-                        if projs_element.tag == "ATOMIC_WFC":
-                            ispin = int(projs_element.get('spin'))-1
-                       
 
-                    projections = projs_element.text.split("\n")[1:-1]
-                    for iband, band_projection in enumerate(projections):
-                        real = float(band_projection.split()[0])
-                        imag = float(band_projection.split()[1])
-                        comp = complex(real , imag)
-                        comp_squared = np.absolute(comp)**2
+        bands=self._parse_bands_tag(atm_proj_root, nk, nbnd, nspin_channels)
+        kpoints=self._parse_kpoints_tag(atm_proj_root, nk, nspin_channels)
+        projs, projs_phase=self._parse_projections_tag(atm_proj_root, nk, nbnd, natm, norb, nspin_channels, nspin_projections)
 
-                        
-                        self.spd_phase[ik,iband,ispin,iatm - 1,iorb + 1] = complex(real , imag)
+        # maping the projections to the spd array. The spd array is the output of the PROCAR file
+        self.spd[:,:,:,:-1,1:-1] += projs[:,:,:,:,:]
+        self.spd_phase[:,:,:,:-1,1:-1] += projs_phase[:,:,:,:,:]
 
-                        # The spd will depend on of the calculation is a non colinear or colinear. Noncolinear
-                        if self.is_non_colinear:
-                            
-                            self.spd[ik,iband,ispin,iatm - 1,iorb + 1] = real
-                        else:
-                            self.spd[ik,iband,ispin,iatm - 1,iorb + 1] = comp_squared
-
-        if self.is_non_colinear:
-            
-            self.spd[:,:,0,:,:] = (self.spd[:,:,1,:,:] ** 2 + self.spd[:,:,2,:,:] ** 2 + self.spd[:,:,3,:,:] ** 2)**0.5
-            self.spd_phase[:,:,0,:,:] = (self.spd_phase[:,:,1,:,:] ** 2 + self.spd_phase[:,:,2,:,:] ** 2 + self.spd_phase[:,:,3,:,:] ** 2)**0.5
 
+        # Adding atom index. This is a vasp output thing
         for ions in range(self.ionsCount):
             self.spd[:, :, :, ions, 0] = ions + 1
 
-        # The following fills the totals for the spd array
+        # The following fills the totals for the spd array. Again this is a vasp output thing.
         self.spd[:, :, :, :, -1] = np.sum(self.spd[:, :, :, :, 1:-1], axis=4)
         self.spd[:, :, :, -1, :] = np.sum(self.spd[:, :, :, :-1, :], axis=3)
         self.spd[:, :, :, -1, 0] = 0
 
         return None
-                
+    
+    def _parse_bands_tag(self,atm_proj_root, nk, nbnd, nspins):
+
+        bands=np.zeros(shape=(nk,nbnd,nspins))
+        results=atm_proj_root.findall(".//EIGENSTATES/E")
+        # For spin-polarized calculations, there are two spin channels. 
+        # They add them by first adding the spin up and then the spin down
+        # I break this down with the folloiwng indexing
+        spin_reuslts=[results[i*nk : (i+1)*nk ] for i in range(nspins)]
+        for ispin,spin_result in enumerate(spin_reuslts):
+            for ik,result in enumerate(spin_result):
+                bands_per_kpoint=result.text.split()
+                bands[ik,:,ispin]=bands_per_kpoint
+
+        return bands
+    
+    def _parse_kpoints_tag(self,atm_proj_root, nk, nspins):
+        kpoints=np.zeros(shape=(nk,3))
+        kpoint_tags=atm_proj_root.findall(".//EIGENSTATES/K-POINT")
+        # For spin-polarized calculations, there are two spin channels. 
+        # They add them by first adding the spin up and then the spin down
+        # I break this down with the folloiwng indexing
+        spin_reuslts=[kpoint_tags[i*nk : (i+1)*nk ] for i in range(nspins)]
+        for ispin,spin_result in enumerate(spin_reuslts):
+            for ik,kpoint_tag in enumerate(spin_result):
+                kpoint=kpoint_tag.text.split()
+                kpoints[ik,:]=kpoint
+        return kpoints
+    
+    def _parse_projections_tag(self,atm_proj_root, nk, nbnd, natm, norb, nspin_channels, nspin_projections):
+
+        projs=np.zeros(shape=(nk,nbnd,nspin_projections,natm,norb))
+        projs_phase=np.zeros(shape=(nk,nbnd,nspin_projections,natm,norb), dtype=np.complex_)
+        proj_tags=atm_proj_root.findall(".//EIGENSTATES/PROJS")
+        # For spin-polarized calculations, there are two spin channels. 
+        # They add them by first adding the spin up and then the spin down
+        # I break this down with the folloiwng indexing
+        spin_reuslts=[proj_tags[i*nk : (i+1)*nk ] for i in range(nspin_channels)]
+        for ispin,spin_result in enumerate(spin_reuslts):
+            for ik,proj_tag in enumerate(spin_result):
+                atm_wfs_tags=proj_tag.findall('ATOMIC_WFC')
+                for atm_wfs_tag in atm_wfs_tags:
+                    iwfc = int(atm_wfs_tag.get('index'))
+                    iorb = self.wfc_mapping[f"wfc_{iwfc}"]["orbital"]
+                    iatm = self.wfc_mapping[f"wfc_{iwfc}"]["atom"] - 1
+                    band_projections=atm_wfs_tag.text.strip().split('\n')
+                    for iband, band_projection in enumerate(band_projections):
+                        real = float(band_projection.split()[0])
+                        imag = float(band_projection.split()[1])
+                        comp = complex(real , imag)
+                        comp_squared = np.absolute(comp)**2
+
+                        projs_phase[ik,iband,ispin,iatm,iorb] = complex(real , imag)
+                        projs[ik,iband,ispin,iatm,iorb] = comp_squared
+
+                atm_sigma_wfs_tags=proj_tag.findall('ATOMIC_SIGMA_PHI')
+                if atm_sigma_wfs_tags:
+                    spin_x_projections = [atm_sigma_wfs_tag for atm_sigma_wfs_tag in atm_sigma_wfs_tags if atm_sigma_wfs_tag.get('ipol') == '1']
+                    spin_y_projections = [atm_sigma_wfs_tag for atm_sigma_wfs_tag in atm_sigma_wfs_tags if atm_sigma_wfs_tag.get('ipol') == '2']
+                    spin_z_projections = [atm_sigma_wfs_tag for atm_sigma_wfs_tag in atm_sigma_wfs_tags if atm_sigma_wfs_tag.get('ipol') == '3']
+                    spin_projections=[spin_x_projections,spin_y_projections,spin_z_projections]
+                    for i_spin_component,spin_projection_tags in enumerate(spin_projections):
+                        for spin_projection_tag in spin_projection_tags:
+                            iwfc = int(spin_projection_tag.get('index'))
+                            iorb = self.wfc_mapping[f"wfc_{iwfc}"]["orbital"]
+                            iatm = self.wfc_mapping[f"wfc_{iwfc}"]["atom"] - 1
+                            band_projections=spin_projection_tag.text.strip().split('\n')
+                            for iband, band_projection in enumerate(band_projections):
+                                real = float(band_projection.split()[0])
+                                imag = float(band_projection.split()[1])
+                                comp = complex(real , imag)
+                                comp_squared = np.absolute(comp)**2
+
+                                # Move spin index by 1 to match the order in the spd array
+                                # First index should be total, 
+                                # second, third, and fourth should be x,y,z, respoectively
+                                projs_phase[ik,iband,i_spin_component+1,iatm,iorb] = complex(real , imag)
+                                projs[ik,iband,i_spin_component+1,iatm,iorb] = real
+
+        return projs, projs_phase
+
     def _parse_structure(self,main_xml_root):
         """A helper method to parse the structure tag of the main xml file