-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathupdate_dataset.py
executable file
·73 lines (58 loc) · 2.04 KB
/
update_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python
import yaml
import os
import sys
import twitter
import time
import codecs
from traceback import print_exc
def update_dataset(username):
"""Fetches as many tweets as possible for the given user since the last one we received."""
username = username.replace('@', '')
print "Retrieving tweets from @%s" % username
topdir = os.path.dirname(os.path.realpath(__file__))
settings = yaml.load(open(os.path.join(topdir, 'settings.yml')))
auth = None
for sett in settings.itervalues():
if sett['target'].replace('@','') == username:
auth = sett['auth']
api = twitter.Api(**auth)
userdir = os.path.join(topdir, 'users', username)
if not os.path.exists(userdir):
os.makedirs(userdir)
lti_path = os.path.join(userdir, 'last_tweet_id')
try: since_id = open(lti_path, 'r').read()
except IOError: since_id = None
page = 1
statuses = []
while True:
try:
new = api.GetUserTimeline(username, since_id=since_id, count=200,
include_rts=False, page=page)
if len(new) == 0: break
statuses += new
print "Received %d tweets..." % len(statuses)
page += 1
except twitter.TwitterError:
print_exc()
print "Retrying in 5 seconds..."
time.sleep(5)
if len(statuses) == 0:
print "No new tweets."
return
texts = [s.text.replace("\n", " ") for s in statuses]
datafile_path = os.path.join(userdir, 'tweets')
try:
datafile = codecs.open(datafile_path, encoding='utf-8', mode='r')
texts.append(datafile.read())
except IOError:
pass
datafile = codecs.open(datafile_path, encoding='utf-8', mode='w')
datafile.write("\n".join(texts))
ltf = open(os.path.join(userdir, 'last_tweet_id'), 'w')
ltf.write(str(statuses[0].id))
if __name__ == '__main__':
if len(sys.argv) < 2:
print "Usage: %s @target_account" % sys.argv[0]
sys.exit()
update_dataset(sys.argv[1])