-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Issue #5] Refactored etl.py and minor formatting
- Loading branch information
1 parent
12e83e0
commit e9c1801
Showing
3 changed files
with
156 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,45 @@ | ||
from enum import Enum | ||
|
||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | ||
|
||
|
||
class Sentiment(Enum): | ||
NEUTRAL = 0, | ||
POSITIVE = 1, | ||
NEGATIVE = -1 | ||
|
||
|
||
def get_score_tweet(tweet_text): | ||
"""Get VaderSentiment score of the tweet text | ||
"""Get VaderSentiment score of the tweet text. | ||
Args: | ||
tweet_text (str): Text for performing sentimental analysis | ||
tweet_text (str): Text for performing sentimental analysis. | ||
Returns: | ||
dict: Score in the form | ||
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0} | ||
refer https://github.com/cjhutto/vaderSentiment | ||
refer https://github.com/cjhutto/vaderSentiment. | ||
""" | ||
|
||
return get_score_tweet.analyzer.polarity_scores(tweet_text) | ||
|
||
|
||
def get_tweet_sentiment(score): | ||
"""Get the general sentiment from the sentiment score. | ||
Args: | ||
score (float): score from VaderSentimentAnalysis. | ||
Returns: | ||
Sentiment: General sentiment. | ||
""" | ||
|
||
if score['compound'] <= -0.05: | ||
return 'negative' | ||
return Sentiment.NEGATIVE | ||
elif score['compound'] >= 0.05: | ||
return 'positive' | ||
return 'neutral' | ||
return Sentiment.POSITIVE | ||
|
||
return Sentiment.NEUTRAL | ||
|
||
|
||
get_score_tweet.analyzer = SentimentIntensityAnalyzer() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import socket | ||
|
||
import pymongo | ||
|
||
from sentimental_analysis import Sentiment | ||
|
||
|
||
def get_sentiment_html(sentiment): | ||
"""Get the html text for streamlit markdown to display the tweet general | ||
sentiment. | ||
Args: | ||
sentiment (Sentiment): General sentiment. | ||
Returns: | ||
str: html of sentiment red color for Negative, green for Positive and | ||
yellow for Neutral. | ||
""" | ||
if sentiment == Sentiment.NEGATIVE: | ||
html_color = 'red' | ||
elif sentiment == Sentiment.POSITIVE: | ||
html_color = 'green' | ||
else: | ||
html_color = 'yellow' | ||
|
||
return f'<p style="color:{html_color}">{sentiment}</p>' | ||
|
||
def is_replica_set(mongo_db): | ||
"""Checks if mongoDB client is a replica set. | ||
Args: | ||
mongo_db (pymongo.MongoClient): MongoDB client comprising | ||
of database "tweets". | ||
Returns: | ||
bool: True if a replicaset else false. | ||
""" | ||
try: | ||
mongo_db.admin.command("replSetGetStatus") | ||
return True | ||
except pymongo.errors.OperationFailure: | ||
return False | ||
|
||
def get_mongodb_replica_set(): | ||
""" Init a mongodb replica set and return the pymongo client. | ||
Returns: | ||
pymongo.MongoClient: pymongo client | ||
""" | ||
mongodb_client = pymongo.MongoClient(host="mongodb", | ||
port=27017, | ||
directConnection=True) | ||
# Check if db is replica set if not initialize it and config it. | ||
if not is_replica_set(mongodb_client): | ||
config = {'_id': 'dbrs', | ||
'members': [ | ||
{'_id': 0, 'host': 'mongodb:27017'} | ||
]} | ||
mongodb_client.admin.command("replSetInitiate", config) | ||
|
||
return mongodb_client | ||
|
||
|
||
def send_query_to_tweet_stream(query): | ||
"""Pass the query to tweet collector process via python socket | ||
communication | ||
Args: | ||
query (str): query for filterting tweets in Twitter API tweets | ||
streaming. | ||
Returns: | ||
bool: True when connection and passing of query was successfull. | ||
""" | ||
# The server's hostname or IP address | ||
HOST = socket.gethostbyname('tweet_collector') | ||
PORT = 8888 # The port used by the server | ||
try: | ||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | ||
s.connect((HOST, PORT)) | ||
s.sendall(bytes(query, 'utf-8')) | ||
except ConnectionRefusedError: | ||
return False | ||
return True | ||
|