diff --git a/clean.py b/clean.py
index 743f859..a55e61f 100644
--- a/clean.py
+++ b/clean.py
@@ -1,7 +1,18 @@
 import csv
 import json
+import re
+import string
 import sys
 
+# Cleans the tweet text by removing special characters, RT, punctuation, and extra whitespace
+def clean(tweet_text):
+	tweet_text = tweet_text.encode('ascii', 'ignore').replace('\n', ' ')
+	tweet_text = re.sub(r"(?:\@|https?\://)\S+", "", tweet_text)
+	tweet_text = tweet_text.replace('http', '').replace('RT', '')
+	tweet_text = tweet_text.translate(string.maketrans("",""), string.punctuation)
+	tweet_text = ' '.join(tweet_text.split())
+	return tweet_text
+
 # Check for proper command line arguments
 if len(sys.argv) != 2:
     print 'Usage: clean.py <filename.json>'
@@ -25,9 +36,10 @@
     for line in tweets:
         values = json.loads(line)
         if values['tweetOwner']['language'] == 'en':
-            tweetId = str(values['tweetId'])
-            text = values['text'].encode('ascii', 'ignore').replace('\n', ' ')
-            writer.writerow([tweetId, text])
+            tweetId = values['tweetId']
+            text = clean(values['text'])
+            if text != '':
+            	writer.writerow([tweetId, text])
 
 # Print location of cleaned data and close csv file
 print 'Data written to:', csvfile.name