From 8a60d5282bdd5266c14d4174983f73bb1534b834 Mon Sep 17 00:00:00 2001 From: Kevin Wittmer Date: Thu, 7 Apr 2016 00:59:14 -0400 Subject: [PATCH] extract English tweets --- script.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/script.py b/script.py index 3ace292..c7f083a 100644 --- a/script.py +++ b/script.py @@ -1,10 +1,16 @@ import json # Open the data and read each JSON object into a list -data_file = open('sample_data.json', 'r') +data_file = open('collection_brands.11.json', 'r') tweets = data_file.readlines() +data_file.close() -# Load each JSON object and print the tweetId +# Load each JSON object and add English tweets to new list +english_tweets = [] for line in tweets: values = json.loads(line) - print values["tweetId"] + if values['tweetOwner']['language'] == 'en': + english_tweets.append(values) + +# Print the number of English tweets retrieved +print len(english_tweets)