From 019001c11ece20976a9e0277f950f5d4139d0c8a Mon Sep 17 00:00:00 2001
From: Peter N O Gillespie <peter.no.gillespie@gmail.com>
Date: Mon, 29 Apr 2024 15:09:32 +0000
Subject: [PATCH 1/4] `get_xspectra_structures`: Refactor and Improve Code

Refactors major components of `get_xspectra_structures` as part of
ongoing improvement work. These changes are the first of two
improvements intended to enable users to set symmetry data manually
instead of relying on automatic symmetry analysis - ultimately enabling
the same feature for the `XspectraCrystalWorkChain` and any others which
use the same structure preparation tools.

Changes:
* Moves supercell creation, processes for molecules, and generation of
  `equivalent_sites_data` to separate functions.
* Adds functionality for users to manually provide symmetry data, thus
  enabling the `CalcFunction` to be used as a means to generate structures
 with user control over which exact sites to mark.
* Fixes a bug discovered in the case where non-hubbard structures with
  custom Kind names lost their Kind names when converting the ASE
supercell to `StructureData` type.
* Fixes a small oversight in the tests where `spglib_settings` was
  mistakenly named `spglib_options`.
---
 .../functions/get_xspectra_structures.py      | 397 +++++++++++-------
 .../functions/test_get_xspectra_structures.py |  12 +-
 2 files changed, 248 insertions(+), 161 deletions(-)

diff --git a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
index 7ae29d2f..0986d0e2 100644
--- a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
+++ b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
@@ -15,6 +15,160 @@
 from aiida_quantumespresso.utils.hubbard import HubbardStructureData, HubbardUtils
 
 
+def process_molecule_input(structure, **kwargs):  # pylint: disable=too-many-statements
+    """Generate symmetry data and supercell suitable for molecular systems using Pymatgen."""
+    from pymatgen.symmetry.analyzer import PointGroupAnalyzer
+
+    result = {'output_params': {}}
+    # If we are working with a molecule, check for pymatgen_settings
+    if 'pymatgen_settings' in kwargs:
+        pymatgen_settings_dict = kwargs['pymatgen_settings'].get_dict()
+        valid_keys = ['tolerance', 'eigen_tolerance', 'matrix_tolerance']
+        pymatgen_kwargs = {key: value for key, value in pymatgen_settings_dict.items() if key in valid_keys}
+    else:
+        pymatgen_kwargs = {}
+
+    abs_elements_list = kwargs.get('abs_elements_list', [kind.symbol for kind in structure.kinds])
+    pymatgen_molecule = structure.get_pymatgen_molecule()
+    centered_molecule = pymatgen_molecule.get_centered_molecule()
+    centered_positions = centered_molecule.cart_coords
+
+    analyzer_data = PointGroupAnalyzer(pymatgen_molecule, **pymatgen_kwargs)
+    eq_atoms_data = analyzer_data.get_equivalent_atoms()['eq_sets']
+    atomic_nos = pymatgen_molecule.atomic_numbers
+    point_group = str(analyzer_data.get_pointgroup())
+    result['output_params']['molecule_point_group'] = point_group
+
+    equivalency_dict = {}
+
+    for key in eq_atoms_data:
+        site_symbol = elements[atomic_nos[key]]['symbol']
+        if site_symbol in abs_elements_list:
+            equivalency_dict[f'site_{key}'] = {}
+            atom_no_set = eq_atoms_data[key]
+            equivalency_dict[f'site_{key}']['site_index'] = key
+            equivalency_dict[f'site_{key}']['equivalent_sites_list'] = atom_no_set
+            equivalency_dict[f'site_{key}']['multiplicity'] = len(atom_no_set)
+            equivalency_dict[f'site_{key}']['symbol'] = site_symbol
+
+    result['output_params']['equivalent_sites_data'] = equivalency_dict
+
+    supercell_min_parameter = kwargs.get('supercell_min_parameter', 8.0)
+
+    x_pos = []
+    y_pos = []
+    z_pos = []
+    for site in structure.sites:
+        x_pos.append(site.position[0])
+        y_pos.append(site.position[1])
+        z_pos.append(site.position[2])
+
+    # We assume that the Martyna-Tuckerman approach will be used in the core-hole SCF
+    # thus, at a minimum, the new box for the molecule needs to be 2x the extent of the
+    # molecule in x, y, and z. We therefore use the larger of either the MT distance or
+    # `supercell_min_parameter` for each cell parameter.
+    high_x = x_pos[(np.argmax(x_pos))]
+    high_y = y_pos[(np.argmax(y_pos))]
+    high_z = z_pos[(np.argmax(z_pos))]
+
+    low_x = x_pos[(np.argmin(x_pos))]
+    low_y = y_pos[(np.argmin(y_pos))]
+    low_z = z_pos[(np.argmin(z_pos))]
+
+    box_a = max((high_x - low_x) * 2, supercell_min_parameter)
+    box_b = max((high_y - low_y) * 2, supercell_min_parameter)
+    box_c = max((high_z - low_z) * 2, supercell_min_parameter)
+
+    new_supercell = StructureData()
+    new_supercell.set_cell(value=([box_a, 0., 0.], [0., box_b, 0.], [0., 0., box_c]))
+
+    for kind in structure.kinds:
+        new_supercell.append_kind(kind)
+
+    for site in structure.sites:
+        new_supercell.append_site(site)
+
+    # Reset the positions so that the centre of mass is at the centre of the new box
+    new_supercell.reset_sites_positions(centered_positions + (np.array(new_supercell.cell_lengths) / 2))
+
+    result['output_params']['supercell_cell_matrix'] = new_supercell.cell
+    result['output_params']['supercell_cell_lengths'] = new_supercell.cell_lengths
+    result['output_params']['supercell_num_sites'] = len(new_supercell.sites)
+    result['supercell'] = new_supercell
+
+    return result
+
+
+def get_spglib_equivalency_dict(equivalent_atoms_array, abs_elements_list, element_types, type_mapping_dict):
+    """Convert the equivalent atoms array into the required dictionary format."""
+
+    equivalency_dict = {}
+    for index, symmetry_types in enumerate(zip(equivalent_atoms_array, element_types)):
+        symmetry_value, element_type = symmetry_types
+        if type_mapping_dict[str(element_type)].symbol in abs_elements_list:
+            if f'site_{symmetry_value}' in equivalency_dict:
+                equivalency_dict[f'site_{symmetry_value}']['equivalent_sites_list'].append(index)
+            else:
+                equivalency_dict[f'site_{symmetry_value}'] = {
+                    'kind_name': type_mapping_dict[str(element_type)].name,
+                    'symbol': type_mapping_dict[str(element_type)].symbol,
+                    'site_index': symmetry_value,
+                    'equivalent_sites_list': [symmetry_value]
+                }
+
+    return equivalency_dict
+
+
+def get_supercell(structure, supercell_min_parameter, is_hubbard_structure, **kwargs):
+    """Generate a supercell of a PBC system, given a minimum cell length requirement."""
+    standard_pbc = structure.cell_lengths
+
+    result = {}
+    multiples = []
+    for length in standard_pbc:
+        multiple = np.ceil(supercell_min_parameter / length)
+        multiples.append(int(multiple))
+
+    result['multiples'] = multiples
+    ase_structure = structure.get_ase()
+    ase_supercell = ase_structure * multiples
+
+    # Here we construct the supercell site-by-site in order to keep the
+    # correct Kind ordering (if any)
+    blank_supercell = StructureData(ase=ase_supercell)
+    new_supercell = StructureData()
+    new_supercell.set_cell(blank_supercell.cell)
+    num_extensions = np.product(multiples)
+    types_order = kwargs['types_order']
+    type_mapping_dict = kwargs['type_mapping_dict']
+    supercell_types_order = []
+
+    for _ in range(0, num_extensions):
+        for type_number in types_order:
+            supercell_types_order.append(type_number)
+
+    for site, type_number in zip(blank_supercell.sites, supercell_types_order):
+        kind_present = type_mapping_dict[str(type_number)]
+        if kind_present.name not in [kind.name for kind in new_supercell.kinds]:
+            new_supercell.append_kind(kind_present)
+        new_site = Site(kind_name=kind_present.name, position=site.position)
+        new_supercell.append_site(new_site)
+
+    if is_hubbard_structure:
+        # Scale up the hubbard parameters to match and return the HubbardStructureData.
+        # We can exploit the fact that `get_hubbard_for_supercell` will return a HubbardStructureData node
+        # with the same hubbard parameters in the case where the input structure and the supercell are the
+        # same (i.e. multiples == [1, 1, 1])
+        hubbard_manip = HubbardUtils(structure)
+        new_hubbard_supercell = hubbard_manip.get_hubbard_for_supercell(new_supercell)
+        new_supercell = new_hubbard_supercell
+        supercell_hubbard_params = new_supercell.hubbard
+        result['supercell_hubbard_params'] = supercell_hubbard_params
+
+    result['new_supercell'] = new_supercell
+    return result
+
+
 @calcfunction
 def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-statements
     """Read a StructureData object using spglib for its symmetry data and return structures for XSpectra calculations.
@@ -52,6 +206,14 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
                              from the parent structure. For instance, use_element_types = True would
                              consider sites for Kinds 'Si' and 'Si1' to be equivalent if both are sites
                              containing silicon. Defaults to True.
+        - parse_symmetry: a Bool object to indicate that the symmetry data should be generated using spglib
+                          for the input structure. If False, will instead use symmetry data provided
+                          via the `equivalent_sites_data` and `spacegroup_number` inputs and generate all
+                          the structures based on this information. Defaults to True.
+        - equivalent_sites_data: a Dict object taking the form of the `equivalent_sites_data` dict normally
+                                 normally generated by the CF. Only read if `parse_symmetry = False`.
+        - spacegroup_number: an Int object defining the spacegroup number of the input structure, only read
+                             if `parse_symmetry = False`.
         - spglib_settings: an optional Dict object containing overrides for the symmetry
                             tolerance parameters used by spglib (symmprec, angle_tolerance).
         - pymatgen_settings: an optional Dict object containing overrides for the symmetry
@@ -63,10 +225,14 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
               all generated structure and associated symmetry data
     """
 
-    is_molecule_input = False
     elements_present = [kind.symbol for kind in structure.kinds]
     unwrapped_kwargs = {key: node for key, node in kwargs.items() if isinstance(node, orm.Data)}
-    if 'abs_atom_marker' in unwrapped_kwargs.keys():
+
+    is_molecule_input = False
+    if 'is_molecule_input' in unwrapped_kwargs:
+        is_molecule_input = kwargs['is_molecule_input'].value
+
+    if 'abs_atom_marker' in unwrapped_kwargs:
         abs_atom_marker = unwrapped_kwargs['abs_atom_marker'].value
         if abs_atom_marker in elements_present:
             raise ValidationError(
@@ -76,7 +242,7 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
         unwrapped_kwargs.pop('abs_atom_marker')
     else:
         abs_atom_marker = 'X'
-    if 'supercell_min_parameter' in unwrapped_kwargs.keys():
+    if 'supercell_min_parameter' in unwrapped_kwargs:
         supercell_min_parameter = unwrapped_kwargs.pop('supercell_min_parameter').value
         if supercell_min_parameter < 0:
             raise ValueError(
@@ -90,12 +256,12 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
     else:
         supercell_min_parameter = 8.0  # angstroms
         scale_unit_cell = True
-    if 'standardize_structure' in unwrapped_kwargs.keys():
+    if 'standardize_structure' in unwrapped_kwargs:
         standardize_structure = unwrapped_kwargs['standardize_structure'].value
         unwrapped_kwargs.pop('standardize_structure')
     else:
         standardize_structure = True
-    if 'absorbing_elements_list' in unwrapped_kwargs.keys():
+    if 'absorbing_elements_list' in unwrapped_kwargs:
         abs_elements_list = unwrapped_kwargs['absorbing_elements_list'].get_list()
         # confirm that the elements requested are actually in the input structure
         for req_element in abs_elements_list:
@@ -110,28 +276,23 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
             if kind.symbol not in abs_elements_list:
                 abs_elements_list.append(kind.symbol)
 
-    if 'is_molecule_input' in unwrapped_kwargs.keys():
-        is_molecule_input = unwrapped_kwargs['is_molecule_input'].value
-        # If we are working with a molecule, check for pymatgen_settings
-        if 'pymatgen_settings' in unwrapped_kwargs.keys():
-            pymatgen_settings_dict = unwrapped_kwargs.keys()['pymatgen_settings'].get_dict()
-            valid_keys = ['tolerance', 'eigen_tolerance', 'matrix_tolerance']
-            pymatgen_kwargs = {key: value for key, value in pymatgen_settings_dict.items() if key in valid_keys}
-        else:
-            pymatgen_kwargs = {}
-
-    if 'spglib_settings' in unwrapped_kwargs.keys():
+    if 'spglib_settings' in unwrapped_kwargs:
         spglib_settings_dict = unwrapped_kwargs['spglib_settings'].get_dict()
         valid_keys = ['symprec', 'angle_tolerance']
         spglib_kwargs = {key: value for key, value in spglib_settings_dict.items() if key in valid_keys}
     else:
         spglib_kwargs = {}
 
-    if 'use_element_types' in unwrapped_kwargs.keys():
+    if 'use_element_types' in unwrapped_kwargs:
         use_element_types = unwrapped_kwargs['use_element_types'].value
     else:
         use_element_types = True
 
+    if 'parse_symmetry' in unwrapped_kwargs:
+        parse_symmetry = unwrapped_kwargs['parse_symmetry'].value
+    else:
+        parse_symmetry = True
+
     if isinstance(structure, HubbardStructureData):
         is_hubbard_structure = True
         if standardize_structure:
@@ -156,94 +317,62 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
     output_params['input_structure_cell_lengths'] = incoming_structure_params
 
     # Process a non-periodic system
-    if is_molecule_input:
-        from pymatgen.symmetry.analyzer import PointGroupAnalyzer
-
-        pymatgen_molecule = structure.get_pymatgen_molecule()
-        centered_molecule = pymatgen_molecule.get_centered_molecule()
-        centered_positions = centered_molecule.cart_coords
-
-        analyzer_data = PointGroupAnalyzer(pymatgen_molecule, **pymatgen_kwargs)
-        eq_atoms_data = analyzer_data.get_equivalent_atoms()['eq_sets']
-        atomic_nos = pymatgen_molecule.atomic_numbers
-        point_group = str(analyzer_data.get_pointgroup())
-        output_params['molecule_point_group'] = point_group
-
-        equivalency_dict = {}
-
-        for key in eq_atoms_data:
-            site_symbol = elements[atomic_nos[key]]['symbol']
-            if site_symbol in abs_elements_list:
-                equivalency_dict[f'site_{key}'] = {}
-                atom_no_set = eq_atoms_data[key]
-                equivalency_dict[f'site_{key}']['site_index'] = key
-                equivalency_dict[f'site_{key}']['equivalent_sites_list'] = atom_no_set
-                equivalency_dict[f'site_{key}']['multiplicity'] = len(atom_no_set)
-                equivalency_dict[f'site_{key}']['symbol'] = site_symbol
+    get_supercell_inputs = {'is_hubbard_structure': is_hubbard_structure}
 
-        output_params['equivalent_sites_data'] = equivalency_dict
+    incoming_structure_tuple = structure_to_spglib_tuple(structure)
+    spglib_tuple = incoming_structure_tuple[0]
+    types_order = spglib_tuple[-1]
+    kinds_information = incoming_structure_tuple[1]
+    kinds_list = incoming_structure_tuple[2]
 
-        x_pos = []
-        y_pos = []
-        z_pos = []
-        for site in structure.sites:
-            x_pos.append(site.position[0])
-            y_pos.append(site.position[1])
-            z_pos.append(site.position[2])
-
-        # We assume that the Martyna-Tuckerman approach will be used in the core-hole SCF
-        # thus, at a minimum, the new box for the molecule needs to be 2x the extent of the
-        # molecule in x, y, and z. We therefore use the larger of either the MT distance or
-        # `supercell_min_parameter` for each cell parameter.
-        high_x = x_pos[(np.argmax(x_pos))]
-        high_y = y_pos[(np.argmax(y_pos))]
-        high_z = z_pos[(np.argmax(z_pos))]
-
-        low_x = x_pos[(np.argmin(x_pos))]
-        low_y = y_pos[(np.argmin(y_pos))]
-        low_z = z_pos[(np.argmin(z_pos))]
-
-        box_a = max((high_x - low_x) * 2, supercell_min_parameter)
-        box_b = max((high_y - low_y) * 2, supercell_min_parameter)
-        box_c = max((high_z - low_z) * 2, supercell_min_parameter)
-
-        new_supercell = StructureData()
-        new_supercell.set_cell(value=([box_a, 0., 0.], [0., box_b, 0.], [0., 0., box_c]))
+    # We need a way to reliably convert type number into element, so we
+    # first create a mapping of assigned number to kind name then a mapping
+    # of assigned number to ``Kind``
+    # Note that we set `types_order` and `types_mapping_dict` using the
+    # input structure in case the symmetry is not parsed later. If it is,
+    # then `types_order` and `type_mapping_dict` will be updated as needed.
 
-        for kind in structure.kinds:
-            new_supercell.append_kind(kind)
+    type_name_mapping = {str(value): key for key, value in kinds_information.items()}
+    type_mapping_dict = {}
 
-        for site in structure.sites:
-            new_supercell.append_site(site)
+    for key, value in type_name_mapping.items():
+        for kind in kinds_list:
+            if value == kind.name:
+                type_mapping_dict[key] = kind
 
-        # Reset the positions so that the centre of mass is at the centre of the new box
-        new_supercell.reset_sites_positions(centered_positions + (np.array(new_supercell.cell_lengths) / 2))
+    get_supercell_inputs['types_order'] = types_order
+    get_supercell_inputs['type_mapping_dict'] = type_mapping_dict
 
+    if is_molecule_input:
+        process_molecule_result = process_molecule_input(structure, **kwargs)
+        new_supercell = process_molecule_result['supercell']
+        for key, value in process_molecule_result['output_params'].items():
+            output_params[key] = value
+        equivalency_dict = output_params['equivalent_sites_data']
+    # Process a periodic system
+    elif not parse_symmetry:
+        equivalency_dict = kwargs['equivalent_sites_data'].get_dict()
+        output_params['equivalent_sites_data'] = equivalency_dict
+        output_params['spacegroup_number'] = kwargs['spacegroup_number'].value
+        output_params['symmetry_parsed'] = False
+        output_params['structure_is_standardized'] = False
+        get_supercell_inputs['structure'] = structure
+        get_supercell_inputs['supercell_min_parameter'] = supercell_min_parameter
+
+        if not scale_unit_cell:
+            multiples = [1, 1, 1]
+            new_supercell = structure
+        else:
+            get_supercell_result = get_supercell(**get_supercell_inputs)
+            multiples = get_supercell_result['multiples']
+            new_supercell = get_supercell_result['new_supercell']
+            output_params['supercell_factors'] = multiples
+
+        output_params['supercell_num_sites'] = len(new_supercell.sites)
         output_params['supercell_cell_matrix'] = new_supercell.cell
         output_params['supercell_cell_lengths'] = new_supercell.cell_lengths
-        output_params['supercell_num_sites'] = len(new_supercell.sites)
-        result['supercell'] = new_supercell
 
-    # Process a periodic system
     else:
-        incoming_structure_tuple = structure_to_spglib_tuple(structure)
-        spglib_tuple = incoming_structure_tuple[0]
-        types_order = spglib_tuple[-1]
-        kinds_information = incoming_structure_tuple[1]
-        kinds_list = incoming_structure_tuple[2]
-
-        # We need a way to reliably convert type number into element, so we
-        # first create a mapping of assigned number to kind name then a mapping
-        # of assigned number to ``Kind``
-
-        type_name_mapping = {str(value): key for key, value in kinds_information.items()}
-        type_mapping_dict = {}
-
-        for key, value in type_name_mapping.items():
-            for kind in kinds_list:
-                if value == kind.name:
-                    type_mapping_dict[key] = kind
-
         # By default, `structure_to_spglib_tuple` gives different
         # ``Kinds`` of the same element a distinct atomic number by
         # multiplying the normal atomic number by 1000, then adding
@@ -268,7 +397,7 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
         # the user has submitted an improper crystal for calculation work and doesn't want it to
         # be changed.
         if symmetry_dataset['number'] in [1, 2] or not standardize_structure:
-            standardized_structure_node = spglib_tuple_to_structure(spglib_tuple, kinds_information, kinds_list)
+            standardized_structure_node = structure
             structure_is_standardized = False
         else:  # otherwise, we proceed with the standardized structure.
             standardized_structure_tuple = spglib.standardize_cell(spglib_tuple, **spglib_kwargs)
@@ -280,6 +409,8 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
             symmetry_dataset = spglib.get_symmetry_dataset(standardized_structure_tuple, **spglib_kwargs)
             structure_is_standardized = True
 
+        get_supercell_inputs['structure'] = standardized_structure_node
+        get_supercell_inputs['supercell_min_parameter'] = supercell_min_parameter
         equivalent_atoms_array = symmetry_dataset['equivalent_atoms']
 
         if structure_is_standardized:
@@ -302,19 +433,12 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
                 primitive_types.append(atom_type)
             element_types = primitive_types
 
-        equivalency_dict = {}
-        for index, symmetry_types in enumerate(zip(equivalent_atoms_array, element_types)):
-            symmetry_value, element_type = symmetry_types
-            if type_mapping_dict[str(element_type)].symbol in abs_elements_list:
-                if f'site_{symmetry_value}' in equivalency_dict:
-                    equivalency_dict[f'site_{symmetry_value}']['equivalent_sites_list'].append(index)
-                else:
-                    equivalency_dict[f'site_{symmetry_value}'] = {
-                        'kind_name': type_mapping_dict[str(element_type)].name,
-                        'symbol': type_mapping_dict[str(element_type)].symbol,
-                        'site_index': symmetry_value,
-                        'equivalent_sites_list': [symmetry_value]
-                    }
+        equivalency_dict = get_spglib_equivalency_dict(
+            equivalent_atoms_array, abs_elements_list, element_types, type_mapping_dict
+        )
+        output_params['symmetry_parsed'] = True
+        # update the types order, in case it has changed during symmetry analysis
+        get_supercell_inputs['types_order'] = element_types
 
         for value in equivalency_dict.values():
             value['multiplicity'] = len(value['equivalent_sites_list'])
@@ -331,53 +455,16 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
             output_params['standardized_structure_cell_matrix'] = standardized_structure_node.cell
             output_params['standardized_structure_params'] = standardized_structure_node.cell_lengths
 
-        standard_pbc = standardized_structure_node.cell_lengths
-
-        multiples = [1, 1, 1]
-        if scale_unit_cell:
-            multiples = []
-            for length in standard_pbc:
-                multiple = np.ceil(supercell_min_parameter / length)
-                multiples.append(int(multiple))
+        if not scale_unit_cell:
+            multiples = [1, 1, 1]
+            new_supercell = standardized_structure_node
+        else:
+            get_supercell_result = get_supercell(**get_supercell_inputs)
+            multiples = get_supercell_result['multiples']
+            new_supercell = get_supercell_result['new_supercell']
 
-        ase_structure = standardized_structure_node.get_ase()
-        ase_supercell = ase_structure * multiples
+        result['supercell'] = new_supercell
 
-        # if there are hubbard data to apply, we re-construct
-        # the supercell site-by-site to keep the correct ordering
-        if is_hubbard_structure:
-            blank_supercell = StructureData(ase=ase_supercell)
-            new_supercell = StructureData()
-            new_supercell.set_cell(blank_supercell.cell)
-            num_extensions = np.product(multiples)
-            supercell_types_order = []
-            # For each supercell extension, loop over each site.
-            # This way, the original pattern-ordering of sites is
-            # preserved.
-            for i in range(0, num_extensions):  # pylint: disable=unused-variable
-                for type_number in types_order:
-                    supercell_types_order.append(type_number)
-
-            for site, type_number in zip(blank_supercell.sites, supercell_types_order):
-                kind_present = type_mapping_dict[str(type_number)]
-                if kind_present.name not in [kind.name for kind in new_supercell.kinds]:
-                    new_supercell.append_kind(kind_present)
-                new_site = Site(kind_name=kind_present.name, position=site.position)
-                new_supercell.append_site(new_site)
-        else:  # otherwise, simply re-construct the supercell with ASE
-            new_supercell = StructureData(ase=ase_supercell)
-
-        if is_hubbard_structure:  # Scale up the hubbard parameters to match and return the HubbardStructureData
-            # we can exploit the fact that `get_hubbard_for_supercell` will return a HubbardStructureData node
-            # with the same hubbard parameters in the case where the input structure and the supercell are the
-            # same (i.e. multiples == [1, 1, 1])
-            hubbard_manip = HubbardUtils(structure)
-            new_hubbard_supercell = hubbard_manip.get_hubbard_for_supercell(new_supercell)
-            new_supercell = new_hubbard_supercell
-            supercell_hubbard_params = new_supercell.hubbard
-            result['supercell'] = new_supercell
-        else:
-            result['supercell'] = new_supercell
         output_params['supercell_factors'] = multiples
         output_params['supercell_num_sites'] = len(new_supercell.sites)
         output_params['supercell_cell_matrix'] = new_supercell.cell
@@ -404,7 +491,7 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
 
         if is_hubbard_structure:
             marked_hubbard_structure = HubbardStructureData.from_structure(marked_structure)
-            marked_hubbard_structure.hubbard = supercell_hubbard_params
+            marked_hubbard_structure.hubbard = get_supercell_result['supercell_hubbard_params']
             result[f'site_{target_site}_{value["symbol"]}'] = marked_hubbard_structure
         else:
             result[f'site_{target_site}_{value["symbol"]}'] = marked_structure
diff --git a/tests/workflows/functions/test_get_xspectra_structures.py b/tests/workflows/functions/test_get_xspectra_structures.py
index e192bbc4..1daf5fea 100644
--- a/tests/workflows/functions/test_get_xspectra_structures.py
+++ b/tests/workflows/functions/test_get_xspectra_structures.py
@@ -36,8 +36,8 @@ def test_base(generate_structure):
     """Test the basic operation of get_xspectra_structures."""
 
     c_si = generate_structure('silicon')
-    spglib_options = Dict({'symprec': 1.0e-3})
-    inputs = {'structure': c_si, 'spglib_options': spglib_options}
+    spglib_settings = Dict({'symprec': 1.0e-3})
+    inputs = {'structure': c_si, 'spglib_settings': spglib_settings}
     result = get_xspectra_structures(**inputs)
     assert len(result) == 4
     out_params = result['output_parameters'].get_dict()
@@ -51,12 +51,12 @@ def test_use_element_types(generate_structure):
 
     c_si = generate_structure('silicon')
     c_si_kinds = generate_structure('silicon-kinds')
-    spglib_options = Dict({'symprec': 1.0e-3})
-    inputs_bare = {'structure': c_si, 'spglib_options': spglib_options}
+    spglib_settings = Dict({'symprec': 1.0e-3})
+    inputs_bare = {'structure': c_si, 'spglib_settings': spglib_settings}
     inputs_kinds = {
         'structure': c_si_kinds,
         'use_element_types': Bool(False),
-        'spglib_options': spglib_options,
+        'spglib_options': spglib_settings,
         'standardize_structure': Bool(False)
     }
 
@@ -65,7 +65,7 @@ def test_use_element_types(generate_structure):
 
     inputs_element_types = {
         'structure': c_si_kinds,
-        'spglib_options': spglib_options,
+        'spglib_options': spglib_settings,
         'standardize_structure': Bool(False),
     }
     result_element_types = get_xspectra_structures(**inputs_element_types)

From 3a47e2a494e8f7868ea0a35e135774b8212580dc Mon Sep 17 00:00:00 2001
From: Peter N O Gillespie <peter.no.gillespie@gmail.com>
Date: Tue, 30 Apr 2024 10:11:37 +0000
Subject: [PATCH 2/4] `get_xspectra_structures`: Changes for PR 1026 (1)

1st set of changes requested for #1026:

* Added test of manual symmetry data input functionality.
* Removed unnecessary checks for keys of Bool nodes in
  `unwrapped_kwargs` as well as extracting of truth values in favour of
using Bool objects directly.
* Removed unnecessary comment in `process_molecular_system`
* Added example to docstring to demonstrate format for
  `equivalent_sites_data` input
---
 .../functions/get_xspectra_structures.py      | 23 ++++++------
 .../functions/test_get_xspectra_structures.py | 36 ++++++++++++++++++-
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
index 0986d0e2..3caa418e 100644
--- a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
+++ b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
@@ -212,6 +212,13 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
                           the structures based on this information. Defaults to True.
         - equivalent_sites_data: a Dict object taking the form of the `equivalent_sites_data` dict normally
                                  normally generated by the CF. Only read if `parse_symmetry = False`.
+                                 Example: {
+                                    'site_0' : {
+                                        'symbol' : 'Si',
+                                        'site_index' : 0,
+                                        'multiplicity' : 8
+                                        }
+                                    }
         - spacegroup_number: an Int object defining the spacegroup number of the input structure, only read
                              if `parse_symmetry = False`.
         - spglib_settings: an optional Dict object containing overrides for the symmetry
@@ -228,9 +235,9 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
     elements_present = [kind.symbol for kind in structure.kinds]
     unwrapped_kwargs = {key: node for key, node in kwargs.items() if isinstance(node, orm.Data)}
 
-    is_molecule_input = False
-    if 'is_molecule_input' in unwrapped_kwargs:
-        is_molecule_input = kwargs['is_molecule_input'].value
+    is_molecule_input = unwrapped_kwargs.get('is_molecule_input', False)
+    use_element_types = unwrapped_kwargs.get('use_element_types', True)
+    parse_symmetry = unwrapped_kwargs.get('parse_symmetry', True)
 
     if 'abs_atom_marker' in unwrapped_kwargs:
         abs_atom_marker = unwrapped_kwargs['abs_atom_marker'].value
@@ -283,16 +290,6 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
     else:
         spglib_kwargs = {}
 
-    if 'use_element_types' in unwrapped_kwargs:
-        use_element_types = unwrapped_kwargs['use_element_types'].value
-    else:
-        use_element_types = True
-
-    if 'parse_symmetry' in unwrapped_kwargs:
-        parse_symmetry = unwrapped_kwargs['parse_symmetry'].value
-    else:
-        parse_symmetry = True
-
     if isinstance(structure, HubbardStructureData):
         is_hubbard_structure = True
         if standardize_structure:
diff --git a/tests/workflows/functions/test_get_xspectra_structures.py b/tests/workflows/functions/test_get_xspectra_structures.py
index 1daf5fea..5a13af14 100644
--- a/tests/workflows/functions/test_get_xspectra_structures.py
+++ b/tests/workflows/functions/test_get_xspectra_structures.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """Tests for the `get_marked_structure` class."""
-from aiida.orm import Bool, Dict
+from aiida.orm import Bool, Dict, Int
 import pytest
 
 from aiida_quantumespresso.utils.hubbard import HubbardStructureData, HubbardUtils
@@ -98,3 +98,37 @@ def test_hubbard(generate_structure):
     assert out_params['supercell_num_sites'] == 54
     assert 'U\tX-1s\t0.0' in hub_card_lines
     assert 'U\tSi-1s\t0.0' in hub_card_lines
+
+
+def test_symmetry_input(generate_structure):
+    """Test the basic operation of get_xspectra_structures."""
+
+    c_si = generate_structure('silicon')
+    sites_data = {'site_0': {'symbol': 'Si', 'site_index': 1, 'multiplicity': 8}}
+    inputs = {
+        'structure': c_si,
+        'equivalent_sites_data': Dict(sites_data),
+        'spacegroup_number': Int(220),
+        'parse_symmetry': Bool(False),
+    }
+
+    # Test also to confirm at leaving out `parse_symmetry` should cause
+    # the CF to ignore the provided symmetry data.
+    inputs_no_parse_symmetry = {
+        'structure': c_si,
+        'equivalent_sites_data': Dict(sites_data),
+        'spacegroup_number': Int(220),
+    }
+
+    result = get_xspectra_structures(**inputs)
+    result_no_parse_symmetry = get_xspectra_structures(**inputs_no_parse_symmetry)
+
+    out_params = result['output_parameters'].get_dict()
+    out_params_no_parse_symmetry = result_no_parse_symmetry['output_parameters'].get_dict()
+
+    assert out_params['spacegroup_number'] == 220
+    assert out_params['symmetry_parsed'] is False
+    assert out_params['equivalent_sites_data'] == sites_data
+    assert out_params_no_parse_symmetry['spacegroup_number'] == 227
+    assert out_params_no_parse_symmetry['symmetry_parsed'] is True
+    assert out_params_no_parse_symmetry['equivalent_sites_data'] != sites_data

From 7079f2d82e45585c60d2772050abd86646bd7b3f Mon Sep 17 00:00:00 2001
From: Peter N O Gillespie <peter.no.gillespie@gmail.com>
Date: Tue, 30 Apr 2024 11:37:16 +0000
Subject: [PATCH 3/4] get_xspectra_structures`: Fix Docstring

Changes the docstring so that the `equivalent_sites_data` example is
in-line, as required by the docs. Also adds a sentence stating which
options are required and that all other options are ignored.
---
 .../workflows/functions/get_xspectra_structures.py     | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
index 3caa418e..23d48cd5 100644
--- a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
+++ b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
@@ -212,13 +212,9 @@ def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-st
                           the structures based on this information. Defaults to True.
         - equivalent_sites_data: a Dict object taking the form of the `equivalent_sites_data` dict normally
                                  normally generated by the CF. Only read if `parse_symmetry = False`.
-                                 Example: {
-                                    'site_0' : {
-                                        'symbol' : 'Si',
-                                        'site_index' : 0,
-                                        'multiplicity' : 8
-                                        }
-                                    }
+                                 Example: {'site_0' : {'symbol' : 'Si', 'site_index' : 0, 'multiplicity' : 8}}
+                                 Note that, for each site, only 'symbol', 'site_index', and 'multiplicity' are
+                                 required, all other options are ignored.
         - spacegroup_number: an Int object defining the spacegroup number of the input structure, only read
                              if `parse_symmetry = False`.
         - spglib_settings: an optional Dict object containing overrides for the symmetry

From fbba65160a15df4399c1d74e025eb0a046201c6c Mon Sep 17 00:00:00 2001
From: Peter N O Gillespie <peter.no.gillespie@gmail.com>
Date: Tue, 30 Apr 2024 12:13:35 +0000
Subject: [PATCH 4/4] `get_xspectra_structures`: Remove Unnecessary Comment

Removes an unnecessary comment from `process_molecule_input` which
should have been removed in a previous commit.
---
 .../workflows/functions/get_xspectra_structures.py               | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
index 23d48cd5..369bfa45 100644
--- a/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
+++ b/src/aiida_quantumespresso/workflows/functions/get_xspectra_structures.py
@@ -20,7 +20,6 @@ def process_molecule_input(structure, **kwargs):  # pylint: disable=too-many-sta
     from pymatgen.symmetry.analyzer import PointGroupAnalyzer
 
     result = {'output_params': {}}
-    # If we are working with a molecule, check for pymatgen_settings
     if 'pymatgen_settings' in kwargs:
         pymatgen_settings_dict = kwargs['pymatgen_settings'].get_dict()
         valid_keys = ['tolerance', 'eigen_tolerance', 'matrix_tolerance']