Skip to content

Commit

Permalink
add extra checks for parsing comments
Browse files Browse the repository at this point in the history
  • Loading branch information
sevyharris committed Dec 22, 2024
1 parent 562c6de commit 66a6888
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 19 deletions.
63 changes: 44 additions & 19 deletions rmgpy/data/kinetics/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -4429,6 +4429,8 @@ def assign_weights_to_entries(entry_nested_list, weighted_entries, n=1):
re.sub(r"(?<=]) \+ (?!Average)", ",'", # + sign between average and non-average
re.sub(r"(?<!]) \+ (?!Average)", "','", # + sign between non-averages
comment)))))))
if "['" in eval_comment_string and "']" not in eval_comment_string:
eval_comment_string = eval_comment_string.replace("]", "']")

entry_nested_list = eval(eval_comment_string)

Expand Down Expand Up @@ -4489,6 +4491,11 @@ def extract_source_from_comments(self, reaction):
training_entries = None
degeneracy = 1

for line in lines:
if 'Ea raised from' in line:
print('how is this handled?')
break

regex = r"\[(.*)\]" # only hit outermost brackets
for line in lines:
if line.startswith('Matched'):
Expand Down Expand Up @@ -4522,26 +4529,44 @@ def extract_source_from_comments(self, reaction):

# Check whether we're using the old rate rule templates or the new BM tree nodes
if full_comment_string.find('for rate rule') < 0: # New trees

start_tag = 'Estimated from node '
end_tag = 'Multiplied by reaction path degeneracy'

start_loc = full_comment_string.find(start_tag)
end_loc = full_comment_string.find(end_tag)
if start_loc == -1:
raise ValueError('Could not find start of node in comments')
if end_loc == -1:
# check if the nodename is the last token
node_tokens = full_comment_string.split()
if node_tokens[-2] == 'node':
end_loc = None
else:
raise ValueError('Could not find end of node in comments')
matches = []
matches = re.search(r'This reaction matched rate rule \[(.*?)\]', full_comment_string)
if matches:
node = matches[1]
node = node.replace('\\n', '')
node = node.replace('# ', '')
node = node.replace('! ', '')
node = node.replace(' ', '')
node = ''.join(node.split())
if ' ' in node:
print('problem!')
else:
start_tag = 'Estimated from node '
end_tag = 'Multiplied by reaction path degeneracy'

start_loc = full_comment_string.find(start_tag)
end_loc = full_comment_string.find(end_tag)
if start_loc == -1:
raise ValueError('Could not find start of node in comments')
if end_loc == -1:
# check if the nodename is the last token
node_tokens = full_comment_string.split()
if node_tokens[-2] == 'node':
end_loc = None
else:
end_loc = None
print(f'Warning: Could not find end of node in comments {full_comment_string}')
# raise ValueError('Could not find end of node in comments')

node = full_comment_string[start_loc + len(start_tag): end_loc]
node = node.replace('\\n', '')
node = node.replace('# ', '')
node = node.replace('! ', '')
node = node.replace(' ', '')
node = ''.join(node.split())
if ' ' in node:
print('problem here!')

node = full_comment_string[start_loc + len(start_tag): end_loc]
node = node.replace('\\n', '')
node = node.replace('# ', '')
node = node.replace(' ', '')
rules = ''
training_entries = ''
template = ''
Expand Down
5 changes: 5 additions & 0 deletions rmgpy/tools/uncertainty.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,11 @@ def extract_sources_from_model(self):
# Fetch standard deviation if autogenerated tree
if 'node' in source['Rate Rules'][1] and source['Rate Rules'][1]['node'] != '':
node_name = source['Rate Rules'][1]['node']
# while ' ' in node_name:
# node_name = ''.join(node_name.split())
if 'Earaised' in node_name:
node_name = node_name.split('Earaised')[0]

long_desc = self.database.kinetics.families[reaction.family].rules.entries[node_name][0].long_desc
std_dev_matches = re.search(r'Standard Deviation in ln\(k\): ([0-9]*.[0-9]*)', long_desc)
std_dev = -1.0
Expand Down

0 comments on commit 66a6888

Please sign in to comment.