Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse_retrieved_files option for the PP plugin. #1029

Merged
merged 12 commits into from
Nov 20, 2024
5 changes: 3 additions & 2 deletions src/aiida_quantumespresso/calculations/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def define(cls, spec):
spec.input('metadata.options.parser_name', valid_type=str, default='quantumespresso.pp')
spec.input('metadata.options.withmpi', valid_type=bool, default=True)
spec.input('metadata.options.keep_plot_file', valid_type=bool, default=False)
spec.input('metadata.options.parse_retrieved_files', valid_type=bool, default=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a docstring as to what this does


spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_data', valid_type=orm.ArrayData)
Expand Down Expand Up @@ -218,10 +219,10 @@ def prepare_for_submission(self, folder): # pylint: disable=too-many-branches,t
# distinguish them from one another. The `fileout` filename will be the full data filename with the `fileout`
# value as a suffix.
retrieve_tuples = [self._FILEOUT, (f'{self._FILPLOT}_*{self._FILEOUT}', '.', 0)]

if self.inputs.metadata.options.keep_plot_file:
calcinfo.retrieve_list.extend(retrieve_tuples)
else:
# If we do not want to parse the retrieved files, temporary retrieval is meaningless
elif self.inputs.metadata.options.parse_retrieved_files:
calcinfo.retrieve_temporary_list.extend(retrieve_tuples)

return calcinfo
58 changes: 29 additions & 29 deletions src/aiida_quantumespresso/parsers/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,35 +117,35 @@ def get_key_from_filename(filename):
matches = re.search(pattern, filename)
return matches.group(1)

for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

# Create output nodes
if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))
if self.node.base.attributes.get('parse_retrieved_files', True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of moving this entire block in a conditional, we can simply exit early. All the code starting from line 67 is only necessary to parse the additional output data. So I propose we simply add

if not self.node.base.attributes.get('parse_retrieved_files', True):
    return self.exit(logs=logs)

after line 65.

for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))

return self.exit(logs=logs)

Expand Down
Loading