-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBuildIndex.py
394 lines (332 loc) · 13.7 KB
/
BuildIndex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
import requests, json, time, os, sys, ftplib
from datetime import date, datetime, timezone, timedelta
import datetime as dt
import traceback
DEBUG_MODE = False
EXISTING_TICKERS = {
"HBRI": "HBRI - Hepatitis B Research Index",
"ALT": "ALT - Altimmune, Inc.",
"ARWR": "ARWR - Arrowhead Pharmaceuticals, Inc.",
"ABUS": "ABUS - Arbutus Biopharma Corporation",
"ASMB": "ASMB - Assembly Biosciences, Inc.",
"BBI": "BBI - Brickell Biotech, Inc.",
"DRNA": "DRNA - Dicerna Pharmaceuticals, Inc.",
"DVAX": "DVAX - Dynavax Technologies Corporation",
"ENTA": "ENTA - Enanta Pharmaceuticals, Inc.",
"HEPA": "HEPA - Hepion Pharmaceuticals, Inc.",
"NTLA": "NTLA - Intellia Therapeutics, Inc.",
"SBPH": "SBPH - Spring Bank Pharmaceuticals, Inc.",
"VIR": "VIR - Vir Biotechnology, Inc.",
}
def build_index_data(symbol, current_data):
""" This will parse through the request passed back from the Global Quote api endpoint.
It will return a dictionary object, also leveraging Yahoo Finance web scraping
"""
log("About to start scraping for {}".format(symbol))
change_amt, change_pct, price, name, market_cap = None, None, None, symbol, None
# Retrieve the HTML content to be used througout
try:
action = "HTML content"
if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol))
content = get_html_content(symbol)
except Exception as e:
print(e)
log("Error retrieving {} data for: {}".format(action, symbol), e)
# Retrieve the change in percentage, value
try:
action = "change amounts"
if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol))
change_amt, change_pct = scrape_yahoo_change(content)
log("[{}] Retrieved change amount: {}\tchange percent: {}".format(symbol, change_amt, change_pct))
except Exception as e:
print(e)
log("Error retrieving {} data for: {}".format(action, symbol), e)
# Retrieve the Price amount
try:
action = "price"
if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol))
price = scrape_yahoo_price(content)
log("[{}] Retrieved price: {}".format(symbol, price))
except Exception as e:
print(e)
log("Error retrieving {} data for: {}".format(action, symbol), e)
# Retrieve market cap
try:
action = "market cap"
if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol))
market_cap = scrape_yahoo_mkt_cap(content)
log("[{}] Retrieved mkt cap: {}".format(symbol, market_cap))
except Exception as e:
print(e)
log("Error retrieving {} data for: {}".format(action, symbol), e)
# Retrieve the name
try:
action = "name"
if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol))
name = scrape_yahoo_name(symbol, content)
log("[{}] Retrieved name: {}".format(symbol, name))
except Exception as e:
print(e)
log("Error retrieving {} data for: {}".format(action, symbol), e)
# Build the new dictionary using the data we just retrieved
new_data = {
"price": price,
"refresh_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"market_cap": market_cap,
"name": name,
"change_from_open": change_amt,
"change_from_open_percent": change_pct
}
# Check our new dictionary. If we have any blanks, use the old dictionary if that's
# not blank
for key in new_data.keys():
if key not in current_data:
continue
elif new_data[key] is None and current_data[key] is not None:
new_data[key] = current_data[key]
return new_data
def search_and_discard(str_to_find, str_to_search, keep_all_before=False, additional_spaces=0):
""" Finds a substring, then returns the remainder of the string after or before that substring,
plus any additional spaces. If you want to trim spaces, make additional_spaces negative
"""
i = str_to_search.find(str_to_find)
if keep_all_before:
return str_to_search[:i + additional_spaces]
return str_to_search[i + additional_spaces:]
def scrape_yahoo_change(content):
""" This function will scrape the Yahoo finance page and return a tuple of
(change amount, change percentage)
"""
content = search_and_discard('quote-header-info', content)
content = search_and_discard('data-reactid="51"', content, additional_spaces=len('data-reactid="51"') + 1)
content = search_and_discard('<', content, keep_all_before=True)
split_str = content.split(' ')
split_str[1] = split_str[1].replace('(', '')
split_str[1] = split_str[1].replace(')', '')
return float(split_str[0]), float(split_str[1][:-1]) / 100
def scrape_yahoo_price(content):
""" This function will scrape the yahoo finance page for the price
"""
content = search_and_discard('quote-header-info', content)
content = search_and_discard('data-reactid="50"', content, additional_spaces=len('data-reactid="50"')+1)
content = search_and_discard('<', content, keep_all_before=True)
return float(content)
def scrape_yahoo_mkt_cap(content):
""" This function will scrape the yahoo finance page for the market cap of the company
"""
multipliers = {
'T': 1000000000000,
'B': 1000000000,
'M': 1000000,
'K': 1000,
}
to_find = '<div id="Main" role="content"'
content = search_and_discard(to_find, content)
content = search_and_discard('Market Cap', content)
content = search_and_discard('<span', content)
content = search_and_discard('>', content, additional_spaces=1)
content = search_and_discard('<', content, keep_all_before=True)
mkt_cap = float(content.strip()[:-1])
mkt_cap_multiplier = content.strip()[-1]
return mkt_cap * multipliers[mkt_cap_multiplier]
def scrape_yahoo_name(symbol, content):
""" This function will scrape the yahoo finance page for the name of the company
"""
name = symbol
if name not in EXISTING_TICKERS.keys():
content = search_and_discard('quote-header-info', content)
content = search_and_discard('<h1', content)
content = search_and_discard('>', content, additional_spaces=1)
content = search_and_discard('<', content, keep_all_before=True).strip()
name = content
else:
name = EXISTING_TICKERS[symbol]
return name
def generate_html(tickers_json, css_file):
""" This function will populate the HTML file for display
"""
log("Generating HTML")
down_facing_triangle = "▼"
up_facing_triangle = "▲"
side_facing_triangle = "►"
output = """
<div class="ticker-wrap">
<div class="ticker">""".format(css_file)
# Add the individual stocks
for symbol in tickers_json:
symbol_data = tickers_json[symbol]
name = symbol_data['name']
price, change_from_open_percent = symbol_data["price"], symbol_data["change_from_open_percent"]
if change_from_open_percent is None:
continue
elif change_from_open_percent < 0:
# Negative change
direction = down_facing_triangle
change_color = "red"
elif change_from_open_percent > 0:
# Positive change
direction = up_facing_triangle
change_color = "green"
else:
# No change
direction = side_facing_triangle
change_color = "gray"
this_ticker = """
<span class="tickerSymbol">{}</span> \n\t\t\
<span class="tickerValue"> ${:.2f}</span> \n\t\t\
<span class="tickerPercent {}">{:.3%}</span> \n\t\t\
<span class="tickerDirection {}">{}</span>\n\t\t
""".format(name, price, change_color, change_from_open_percent, change_color, direction)
output += "<div class=\"ticker__item\">{}</div>".format(this_ticker)
output += """ </div>
</div>
"""
log("HTML created")
return output
def update_index_data(current_data, idx_symbol):
""" This function is used to build a condensed 'index' value of all the market fluctuations across companies.
The divisor is arbitrary (see price)
"""
log("UPDATING THE INDEX DATA... SYMBOL={}".format(idx_symbol))
current_mkt_cap = 0
open_mkt_cap = 0
for symbol in current_data:
if symbol != idx_symbol:
cur_mkt_cap = current_data[symbol]['market_cap']
change_pct = current_data[symbol]['change_from_open_percent']
print("SYM: {} | cur_mkt_cap: {} | change_pct: {}".format(symbol, cur_mkt_cap, change_pct))
if cur_mkt_cap is None or change_pct is None:
log("Skipping")
continue
op_mkt_cap = cur_mkt_cap - (change_pct * cur_mkt_cap)
open_mkt_cap += op_mkt_cap
current_mkt_cap += cur_mkt_cap
if current_mkt_cap == 0:
current_mkt_cap = 1;
if open_mkt_cap == 0:
open_mkt_cap = current_mkt_cap;
current_data[idx_symbol] = {
"price": current_mkt_cap / 30000000,
"open": open_mkt_cap,
"change_from_open": current_mkt_cap - open_mkt_cap,
"change_from_open_percent": (current_mkt_cap - open_mkt_cap) / open_mkt_cap,
"market_cap": current_mkt_cap,
"name": "{} - Hepatitis B Research Index".format(idx_symbol)
}
log("HBRI BELOW")
log(json.dumps(current_data[idx_symbol], indent=2))
def validate_time(now):
""" Checks to make sure this is a valid trading time. If not, we'll sleep until
trading resumes
"""
year, month, day = now.year, now.month, now.day
delta = dt.timedelta(days=0)
# Saturday, skip to Monday
if now.weekday() == 6:
delta = dt.timedelta(days=2)
# Sunday, skip to Monday
elif now.weekday() == 7:
delta = dt.timedelta(days=1)
elif now.hour > 16:
# Friday afternoon, skip to Monday
if now.weekday() == 5:
delta = dt.timedelta(days=3)
# All other weekday afternoons
else:
delta = dt.timedelta(days=1)
future = now + delta
year, month, day = future.year, future.month, future.day
then = datetime(year=year, month=month, day=day, hour=9, minute=30)
time_diff = then - now
return max(time_diff.days * (24 * 60 * 60) + time_diff.seconds, 0)
def get_html_content(symbol):
url = 'https://finance.yahoo.com/quote/{}'.format(symbol)
r = requests.get(url)
content = str(r.content)
return content
def log(msg, err=None):
""" Log locally to a file with message and optional error inclusion
"""
now = datetime.now
tz = timezone(-timedelta(hours=4))
time = now(tz=tz).strftime("%Y-%m-%d %H:%M:%S %Z")
to_write = '{} > {}\n'.format(time, msg)
if err is not None:
to_write += '{}\n'.format(err)
to_write += '{}\n'.format(traceback.print_exc())
if DEBUG_MODE:
print(to_write)
f = open('./log.txt', 'a+')
f.write(to_write)
f.close()
def upload():
session = ftplib.FTP(os.environ['FTP_HOST'], os.environ['FTP_USER'], os.environ['FTP_PASS'])
file = open('data.html','rb')
session.storbinary('STOR public_html/dwd-ticker/data.html', file)
file.close()
session.quit()
def main():
global DEBUG_MODE
if "debug" in sys.argv:
DEBUG_MODE = True
# File naming variables
filename_prefix = "data" if len(sys.argv) == 1 or 'debug' in sys.argv else sys.argv[1]
log("Beginning run using '{}' prefix".format(filename_prefix))
storage_file = "{}.json".format(filename_prefix)
css_file = "{}.css".format(filename_prefix)
html_file = "{}.html".format(filename_prefix)
input_file = "{}.csv".format(filename_prefix)
index_symbol = 'HBRI'
# Infinite loop for calculations. This will rest when rate limit is hit,
# or if it's not a valid trading time (markets are closed for weekend,
# afternoon, etc.)
while True:
# Sleep until next trading day
sec_until_next_trading_day = validate_time(datetime.now())
print("Sleeping for {} second(s)".format(sec_until_next_trading_day))
time.sleep(sec_until_next_trading_day)
# Read in the stock symbols from the provided csv
f = open(input_file, "r")
contents = f.read().split(",")
f.close()
# Create the dictionary to contain the stock information
try:
f = open(storage_file, "r")
current_data = json.loads(f.read())
f.close
except Exception as e:
log("Error trying to read in the current data.", e)
current_data = {}
# Query all stocks for relevant data
f = open(storage_file, "w+")
for i in range(len(contents)):
symbol = contents[i]
symbol = symbol.strip()
symbol = symbol.replace('\n', '')
if symbol == index_symbol:
current_data[symbol] = {}
continue
elif symbol not in current_data.keys():
current_data[symbol] = {}
data = build_index_data(symbol, current_data[symbol])
if data is None:
continue
else:
current_data[symbol] = data
# Update the overall index value
update_index_data(current_data, index_symbol)
# Save this data
if len(current_data) != 0:
f.write(json.dumps(current_data, indent=2))
f.close()
# Generate the HTML file
f = open(html_file, 'w+')
f.write(generate_html(current_data, css_file))
f.close()
##### Uncomment for Heroku #####
if not DEBUG_MODE:
upload()
# Sleep for 20 minutes, then repeat
time.sleep(20 * 60)
if __name__ == "__main__":
main()