-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpull_recent_days.py
92 lines (84 loc) · 3.88 KB
/
pull_recent_days.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# This script can be run to pull some days of parking data (from earlier
# in the week) through the process_data script and (with the argument
# push_to_CKAN = True in that function call), cause the resulting data
# to be pushed to the CKAN instance specified by modules and settings
# called by process_data.
# In contrast with pull_from_last_hour.py, this script is designed to pull from
# days prior to the current day (to avoid any issues where some parking
# transactions are still ongoing) and to pull transaction data from many days
# all at once but be run daily, rather than pulling in small increments and
# updating frequently like the quasi-live pull_from_last_hour.py.
import sys, pytz
from datetime import datetime, timedelta
import process_data
from read_entire_history import list_of_servers
from pprint import pprint
# This script seems to be malfunctioning.
def main(*args,**kwargs):
raw_only = kwargs.get('raw_only',False)
test_mode = kwargs.get('test_mode',False)
mute_alerts = kwargs.get('mute_alerts',False)
if test_mode:
output_to_csv = True
push_to_CKAN = False
else:
output_to_csv = False
push_to_CKAN = True
pgh = pytz.timezone('US/Eastern')
slot_width = process_data.DEFAULT_TIMECHUNK.seconds
slot_start = process_data.beginning_of_day(datetime.now(pgh) - timedelta(days=4))
halting_time = process_data.beginning_of_day(datetime.now(pgh) - timedelta(days=2))
# Note that these days are chosen to be within the last 7 days so that
# the data can be pulled from the API without using the bulk API
# (and without doing any caching). It might be more efficient to pull
# all of that data in larger chunks and then use the default
# timechunk value for the processing; this would require reworking
# process_data.py.
script_start = datetime.now()
print("Started processing at {}.".format(script_start))
success = process_data.main(raw_only = raw_only, output_to_csv = output_to_csv, push_to_CKAN = push_to_CKAN, slot_start = slot_start, halting_time = halting_time, threshold_for_uploading = 1000, mute_alerts = mute_alerts)
print("Started processing at {} and finished at {}.".format(script_start,datetime.now()))
if __name__ == '__main__':
if len(sys.argv) > 1:
args = sys.argv[1:]
output_to_csv = False
push_to_CKAN = False
raw_only = True
test_mode = False
mute_alerts = False
server = 'testbed'
copy_of_args = list(args)
kwparams = {}
server = 'debug'
# This is a new way of parsing command-line arguments that cares less about position
# and just does its best to identify the user's intent.
for k,arg in enumerate(copy_of_args):
if arg in ['scan', 'save', 'csv']:
output_to_csv = True
args.remove(arg)
elif arg in ['pull', 'push', 'ckan']:
push_to_CKAN = True
args.remove(arg)
elif arg in ['raw','raw_only']:
raw_only = True
elif arg in ['cooked','well-done','well_done','done']:
raw_only = False
elif arg in ['test','test_mode']:
test_mode = True
elif arg in ['mute', 'mute_alerts']:
mute_alerts = True
elif arg in list_of_servers:
kwparams['server'] = arg
args.remove(arg)
else:
print("I have no idea what do with args[{}] = {}.".format(k,arg))
kwparams['mute_alerts'] = mute_alerts
kwparams['raw_only'] = raw_only
kwparams['test_mode'] = test_mode
kwparams['server'] = server
kwparams['output_to_csv'] = output_to_csv
kwparams['push_to_CKAN'] = push_to_CKAN
pprint(kwparams)
main(**kwparams)
else:
raise ValueError("Please specify some command-line parameters")