Skip to content

Commit

Permalink
Merge pull request #111 from bioinfodlsu/text-mining
Browse files Browse the repository at this point in the history
Limit number of text mining results
  • Loading branch information
pbong authored Sep 5, 2023
2 parents be73b66 + f6c6358 commit 4d144f9
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions callbacks/text_mining/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

COLNAMES = ['Gene', 'PMID', 'Title', 'Sentence', 'Score']
SIMILARITY_CUTOFF = 85
MAX_NUM_RESULTS = 100


def sanitize_text(text):
Expand Down Expand Up @@ -100,6 +101,7 @@ def text_mining_query_search(query_string):

df = pd.DataFrame(columns=COLNAMES)
pubmed_matches = set()
pubmed_matches_100 = set()

with open(Constants.TEXT_MINING_ANNOTATED_ABSTRACTS, 'r', encoding='utf8') as f:
for line in f:
Expand Down Expand Up @@ -176,6 +178,12 @@ def text_mining_query_search(query_string):
df.loc[len(df.index)] = [Entity, PMID,
Title, Sentence, similarity.score]

if similarity.score == 100:
pubmed_matches_100.add(PMID)

if len(pubmed_matches_100) == MAX_NUM_RESULTS:
break

except:
pass

Expand Down

0 comments on commit 4d144f9

Please sign in to comment.