Skip to content

Commit

Permalink
google sentiment evaluater evals
Browse files Browse the repository at this point in the history
  • Loading branch information
jlherzberg committed Oct 16, 2019
1 parent bf77e40 commit de798a9
Showing 1 changed file with 36 additions and 30 deletions.
66 changes: 36 additions & 30 deletions lib/tagnews/senteval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,19 @@ def process_google_result(text):
return mention.sentiment.score


def clean_html_text(html_text):
return "".join(filter(str.isalpha, html_text)).lower()


class SentimentGoogler:
def __init__(self):
self.client = self.connect_to_client()

def run(self, doc_text):
sentiment_ = self.call_api(doc_text)
for entity in sentiment_.entities:
police_entity = self.is_police_entity(entity)
return police_entity

def connect_to_client(self):
return language.LanguageServiceClient()

@staticmethod
def pre_process(html_text):
"""
Parameters
----------
html_text : str
Article text.
Returns
-------
words: str
lower case, just letters
"""
words = "".join(filter(str.isalpha, html_text)).lower()
return words

def call_api(self, doc_text):
"""
Parameters
Expand All @@ -55,16 +41,22 @@ def call_api(self, doc_text):
sentiment : json
google response call
"""
cleaned_doc_text = self.pre_process(doc_text)
document = types.Document(
content=cleaned_doc_text, type=enums.Document.Type.PLAIN_TEXT
content=doc_text, type=enums.Document.Type.PLAIN_TEXT
)
sentiment = self.client.analyze_entity_sentiment(document=document)

return sentiment

@staticmethod
def is_police_entity(sentiment_response):
def is_police_entity(self, entity):
possible_responses = [
"police",
"officer",
"cop",
"officers",
"pigs",
"policeofficer",
]
possible_responses = [
"police",
"officer",
Expand All @@ -73,10 +65,24 @@ def is_police_entity(sentiment_response):
"pigs",
"policeofficer",
]
for entity in sentiment_response.entities:
if clean_html_text(clean_entity) in possible_responses:
if entity in possible_responses:
return entity
for mention in entity.mentions:
if pre_process_text(mention.text.content) in possible_responses:
return entity
for mention in entity.mentions:
if clean_html_text(mention.text.content) in possible_responses:
return entity
return False
return False

def pre_process_text(html_text):
"""
Parameters
----------
html_text : str
Article text.
Returns
-------
words: str
lower case, just letters
"""
words = "".join(filter(str.isalpha, html_text)).lower()
return words

0 comments on commit de798a9

Please sign in to comment.