Skip to content

Commit

Permalink
feat: added searching for keywords from abstract as well.
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Aug 11, 2024
1 parent a7fbf9c commit 640d9df
Showing 1 changed file with 19 additions and 10 deletions.
29 changes: 19 additions & 10 deletions printing_press.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,38 +60,47 @@ def _collect_feeds(self,):
self.nArticles = len(notes)
return notes

def _clean_notes(self,i,entry):
def _clean_notes(self, i, entry):
""" returns a dict from entry containing article content and link """
summary = reduce(lambda x,y: sub(y,' ',x),
[entry.summary,'Author[^>]+>','<[^>]+>','\n'])
[entry.summary,'Author[^>]+>','<[^>]+>','\n'])

# Include abstract if available
abstract = getattr(entry, 'abstract', '')
full_text = summary + ' ' + abstract # Combine summary and abstract

try:
data = {'text': entry.title +'\n'+ sub('\.','',entry.author) +'\n'+ summary,
data = {'text': entry.title +'\n'+ sub('\.','',entry.author) +'\n'+ full_text,
'link': entry.link,
'title': entry.title,
'authors': sub('<[^>]+>','',entry.author),
'summary': summary
'summary': summary,
'abstract': abstract
}
except AttributeError:
data = {'text': entry.title +'\n'+ summary,
data = {'text': entry.title +'\n'+ full_text,
'link': entry.link,
'title': entry.title,
'authors': 'No Author found in RSS',
'summary': summary
'summary': summary,
'abstract': abstract
}
except:
e = sys.exc_info()[0]
data = {'text': None}
self.errors.append('Error: {}'.format(e) +'\n'+ entry.link)
finally:
return data
def _review_note(self,note,words):

def _review_note(self, note, words):
"""checks the note for any keywords"""
self.titles.append(note['title'])

# Search in both summary and abstract
match_list = [self._search_note(note['text'], word) for word in words]
match_words = [x for x, y in zip(words, match_list) if y == 1]
dictionary = {'quality':sum(match_list),'matching': match_words,
'entry': note}
dictionary = {'quality': sum(match_list), 'matching': match_words,
'entry': note}
return dictionary

def _search_note(self,note,word):
Expand Down

0 comments on commit 640d9df

Please sign in to comment.