diff --git a/BuildIndex.py b/BuildIndex.py index f4c96ea..277c2ec 100644 --- a/BuildIndex.py +++ b/BuildIndex.py @@ -1,7 +1,14 @@ -import requests, json, time, os, sys, ftplib -from datetime import date, datetime, timezone, timedelta +import requests +import json +import time +import os +import sys +import ftplib +from datetime import datetime, timezone, timedelta import datetime as dt import traceback +import re +from bs4 import BeautifulSoup DEBUG_MODE = False EXISTING_TICKERS = { @@ -16,7 +23,6 @@ "ENTA": "ENTA - Enanta Pharmaceuticals, Inc.", "HEPA": "HEPA - Hepion Pharmaceuticals, Inc.", "NTLA": "NTLA - Intellia Therapeutics, Inc.", - "SBPH": "SBPH - Spring Bank Pharmaceuticals, Inc.", "VIR": "VIR - Vir Biotechnology, Inc.", } @@ -31,31 +37,24 @@ def build_index_data(symbol, current_data): try: action = "HTML content" if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol)) - content = get_html_content(symbol) + content = get_all_text_on_page(symbol) except Exception as e: print(e) log("Error retrieving {} data for: {}".format(action, symbol), e) - # Retrieve the change in percentage, value + # Retrieve the change in percentage, value, and current price try: action = "change amounts" if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol)) - change_amt, change_pct = scrape_yahoo_change(content) + price_data = scrape_price_and_change(content) + price = price_data[0] + change_amt = price_data[1] + change_pct = price_data[2] / 100 log("[{}] Retrieved change amount: {}\tchange percent: {}".format(symbol, change_amt, change_pct)) except Exception as e: print(e) log("Error retrieving {} data for: {}".format(action, symbol), e) - # Retrieve the Price amount - try: - action = "price" - if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol)) - price = scrape_yahoo_price(content) - log("[{}] Retrieved price: {}".format(symbol, price)) - except Exception as e: - print(e) - log("Error retrieving {} data for: {}".format(action, symbol), e) - # Retrieve market cap try: action = "market cap" @@ -107,28 +106,27 @@ def search_and_discard(str_to_find, str_to_search, keep_all_before=False, additi return str_to_search[i + additional_spaces:] -def scrape_yahoo_change(content): - """ This function will scrape the Yahoo finance page and return a tuple of - (change amount, change percentage) +def get_all_text_on_page(symbol): + """ Retrieve all non-HTML content on page """ - content = search_and_discard('quote-header-info', content) - content = search_and_discard('data-reactid="51"', content, additional_spaces=len('data-reactid="51"') + 1) - content = search_and_discard('<', content, keep_all_before=True) + req = requests.get(f"https://finance.yahoo.com/quote/{symbol}") + soup = BeautifulSoup(req.content, features="html.parser") + return soup.get_text().strip() - split_str = content.split(' ') - split_str[1] = split_str[1].replace('(', '') - split_str[1] = split_str[1].replace(')', '') - return float(split_str[0]), float(split_str[1][:-1]) / 100 - -def scrape_yahoo_price(content): - """ This function will scrape the yahoo finance page for the price +def scrape_price_and_change(content): + """ Retrieve the price information and return a list of: + [ price, valueChange, valueChangePercent ] """ + text = search_and_discard(')As of ', content, keep_all_before=True, additional_spaces=1) + text = search_and_discard('Visitors trend', text, additional_spaces=1) + data = re.findall('\d+\.\d{2,5}[-|+]\d+\.\d{2,5}\W+\d+\.\d{2,5}%\W{1}', text) + data = re.findall('\W{0,1}\d+\.\d+', data[0]) - content = search_and_discard('quote-header-info', content) - content = search_and_discard('data-reactid="50"', content, additional_spaces=len('data-reactid="50"')+1) - content = search_and_discard('<', content, keep_all_before=True) - return float(content) + # Cast all data to float, then return + for i in range(len(data)): + data[i] = float(data[i]) + return data def scrape_yahoo_mkt_cap(content): @@ -142,14 +140,10 @@ def scrape_yahoo_mkt_cap(content): 'K': 1000, } - to_find = '
', content, additional_spaces=1) - content = search_and_discard('<', content, keep_all_before=True) - mkt_cap = float(content.strip()[:-1]) - mkt_cap_multiplier = content.strip()[-1] + data = re.findall('Market Cap\d+\.*\d*[M|B|K]{1}', content) + data = re.findall('\d+\.*\d*[M|B|K]{1}', data[0]) + mkt_cap = float(data[0].strip()[:-1]) + mkt_cap_multiplier = data[0].strip()[-1] return mkt_cap * multipliers[mkt_cap_multiplier] @@ -159,11 +153,7 @@ def scrape_yahoo_name(symbol, content): """ name = symbol if name not in EXISTING_TICKERS.keys(): - content = search_and_discard('quote-header-info', content) - content = search_and_discard('', content, additional_spaces=1) - content = search_and_discard('<', content, keep_all_before=True).strip() - name = content + name = search_and_discard('Stock Price,', content, keep_all_before=True) else: name = EXISTING_TICKERS[symbol] return name @@ -300,7 +290,7 @@ def log(msg, err=None): to_write = '{} > {}\n'.format(time, msg) if err is not None: to_write += '{}\n'.format(err) - to_write += '{}\n'.format(traceback.print_exc()) + if DEBUG_MODE: to_write += '{}\n'.format(traceback.print_exc()) if DEBUG_MODE: print(to_write) f = open('./log.txt', 'a+') @@ -390,5 +380,6 @@ def main(): # Sleep for 20 minutes, then repeat time.sleep(20 * 60) + if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/data.html b/data.html index 8042d03..e94d802 100644 --- a/data.html +++ b/data.html @@ -2,74 +2,74 @@
HBRI - Hepatitis B Research Index   - $936.23   - -1.302%  + $784.37   + -0.144% 
ABUS - Arbutus Biopharma Corporation   - $4.33   - -1.140%  + $3.18   + -0.310% 
ALT - Altimmune, Inc.   - $22.22   - 0.630%  - + $12.50   + -1.420%  +
ARWR - Arrowhead Pharmaceuticals, Inc.   - $88.20   - -0.160%  - + $71.24   + 0.350%  +
ASMB - Assembly Biosciences, Inc.   - $6.29   - -1.260%  + $4.29   + -1.950% 
BBI - Brickell Biotech, Inc.   - $1.61   - -0.620%  - + $0.93   + 4.990%  +
DRNA - Dicerna Pharmaceuticals, Inc.   - $26.01   - -0.690%  + $28.74   + -0.420% 
DVAX - Dynavax Technologies Corporation   - $9.74   - 2.200%  - + $9.53   + -0.100%  +
ENTA - Enanta Pharmaceuticals, Inc.   - $51.32   - -4.660%  + $51.29   + -2.100% 
HEPA - Hepion Pharmaceuticals, Inc.   - $2.97   - 4.580%  + $1.71   + 1.780% 
NTLA - Intellia Therapeutics, Inc.   - $68.28   - -0.930%  - + $77.26   + 3.560%  +
VIR - Vir Biotechnology, Inc.   - $69.71   - -3.140%  + $48.22   + -3.250% 
diff --git a/data.json b/data.json index 306f428..0a1b5b4 100644 --- a/data.json +++ b/data.json @@ -1,106 +1,106 @@ { "HBRI": { - "price": 936.2319, - "open": 28457338376.3, - "change_from_open": -370381376.29999924, - "change_from_open_percent": -0.013015320385987409, - "market_cap": 28086957000.0, + "price": 784.3678, + "open": 23565018472.300003, + "change_from_open": -33984472.30000305, + "change_from_open_percent": -0.0014421576770648754, + "market_cap": 23531034000.0, "name": "HBRI - Hepatitis B Research Index" }, "ABUS": { - "price": 4.33, - "refresh_time": "2021-02-15 14:31:42", - "market_cap": 367657000.0, + "price": 3.18, + "refresh_time": "2021-04-23 12:05:31", + "market_cap": 304705000.0, "name": "ABUS - Arbutus Biopharma Corporation", - "change_from_open": -0.05, - "change_from_open_percent": -0.011399999999999999 + "change_from_open": -0.01, + "change_from_open_percent": -0.0031 }, "ALT": { - "price": 22.22, - "refresh_time": "2021-02-15 14:31:42", - "market_cap": 825315000.0, + "price": 12.5, + "refresh_time": "2021-04-23 12:05:32", + "market_cap": 455224000.0, "name": "ALT - Altimmune, Inc.", - "change_from_open": 0.14, - "change_from_open_percent": 0.0063 + "change_from_open": -0.18, + "change_from_open_percent": -0.014199999999999999 }, "ARWR": { - "price": 88.2, - "refresh_time": "2021-02-15 14:31:43", - "market_cap": 9154000000.0, + "price": 71.24, + "refresh_time": "2021-04-23 12:05:32", + "market_cap": 7190000000.0, "name": "ARWR - Arrowhead Pharmaceuticals, Inc.", - "change_from_open": -0.14, - "change_from_open_percent": -0.0016 + "change_from_open": 0.25, + "change_from_open_percent": 0.0034999999999999996 }, "ASMB": { - "price": 6.29, - "refresh_time": "2021-02-15 14:31:43", - "market_cap": 207710000.0, + "price": 4.285, + "refresh_time": "2021-04-23 12:05:33", + "market_cap": 171653000.0, "name": "ASMB - Assembly Biosciences, Inc.", - "change_from_open": -0.08, - "change_from_open_percent": -0.0126 + "change_from_open": -0.085, + "change_from_open_percent": -0.0195 }, "BBI": { - "price": 1.61, - "refresh_time": "2021-02-15 14:31:43", - "market_cap": 86160000.0, + "price": 0.9314, + "refresh_time": "2021-04-23 12:05:33", + "market_cap": 62339000.0, "name": "BBI - Brickell Biotech, Inc.", - "change_from_open": -0.01, - "change_from_open_percent": -0.0062 + "change_from_open": 0.0443, + "change_from_open_percent": 0.0499 }, "DRNA": { - "price": 26.01, - "refresh_time": "2021-02-15 14:31:44", - "market_cap": 1952000000.0, + "price": 28.74, + "refresh_time": "2021-04-23 12:05:34", + "market_cap": 2202000000.0, "name": "DRNA - Dicerna Pharmaceuticals, Inc.", - "change_from_open": -0.18, - "change_from_open_percent": -0.0069 + "change_from_open": -0.12, + "change_from_open_percent": -0.0042 }, "DVAX": { - "price": 9.74, - "refresh_time": "2021-02-15 14:31:44", - "market_cap": 1073000000.0, + "price": 9.53, + "refresh_time": "2021-04-23 12:05:34", + "market_cap": 1075000000.0, "name": "DVAX - Dynavax Technologies Corporation", - "change_from_open": 0.21, - "change_from_open_percent": 0.022000000000000002 + "change_from_open": -0.01, + "change_from_open_percent": -0.001 }, "ENTA": { - "price": 51.32, - "refresh_time": "2021-02-15 14:31:45", + "price": 51.29, + "refresh_time": "2021-04-23 12:05:35", "market_cap": 1034999999.9999999, "name": "ENTA - Enanta Pharmaceuticals, Inc.", - "change_from_open": -2.51, - "change_from_open_percent": -0.0466 + "change_from_open": -1.1, + "change_from_open_percent": -0.021 }, "HEPA": { - "price": 2.97, - "refresh_time": "2021-02-15 14:31:45", - "market_cap": 95115000.0, + "price": 1.7099, + "refresh_time": "2021-04-23 12:05:35", + "market_cap": 131113000.0, "name": "HEPA - Hepion Pharmaceuticals, Inc.", - "change_from_open": 0.13, - "change_from_open_percent": 0.0458 + "change_from_open": 0.0299, + "change_from_open_percent": 0.0178 }, "NTLA": { - "price": 68.28, - "refresh_time": "2021-02-15 14:31:45", - "market_cap": 4411000000.0, + "price": 77.26, + "refresh_time": "2021-04-23 12:05:36", + "market_cap": 4884000000.0, "name": "NTLA - Intellia Therapeutics, Inc.", - "change_from_open": -0.64, - "change_from_open_percent": -0.009300000000000001 + "change_from_open": 2.66, + "change_from_open_percent": 0.0356 }, "SBPH": { "price": null, - "refresh_time": "2021-02-15 14:31:46", + "refresh_time": "2021-04-23 12:05:36", "market_cap": null, - "name": "SBPH - Spring Bank Pharmaceuticals, Inc.", + "name": "Symbol Lookup from Yahoo FinanceHomeMailNewsFinanceSportsEntertainmentSearchMobileMore...Yahoo FinanceSearchSign inMailSign in to view your mailFinance HomeWatchlistsMy PortfolioScreenersYahoo Finance PlusMarketsNewsPersonal FinanceVideosIndustriesTechContact UsU.S. markets close in 3 hours 55 minutesS&P 5004,177.10+42.12(+1.02%)Dow 3033,990.98+175.08(+0.52%)Nasdaq14,009.77+191.35(+1.38%)Symbols similar to 'sbph'All (8)Stocks (8)Mutual Funds (0)ETFs (0)Indices (0)Futures (0)Currencies (0)SymbolNameLast PriceIndustry / CategoryTypeExchangeSPHSuburban Propane Partners, L.P.14.63UtilitiesStocksNYQSBHSally Beauty Holdings, Inc. (Na20.41Consumer CyclicalStocksNYQTBPHTheravance Biopharma, Inc.21.18HealthcareStocksNGMLBPHLongboard Pharmaceuticals, Inc.10.94HealthcareStocksNGMSCPHscPharmaceuticals Inc.6.84HealthcareStocksNMSSPPHSPENCER PHARMACEUTICAL INC0.00N/AStocksPNKKBPHKYTO TECHNOLOGY AND LIFE SCI IN1.90Financial ServicesStocksPNKSBPHSBPHN/AN/AStocksNCMAlready got your third stimulus check? A bonus amount may be on the wayMoneyWiseEconomist: Government will 'attempt' to make unearned and earned tax rate similarYahoo Finance VideoThe worst mistake you can make on a Zoom job interview: \u2018It\u2019s the first thing that occurs and it never goes well\u2019Yahoo FinanceAdvertise with us\u00a9 2021 Verizon Media. All rights reserved.Data DisclaimerHelpSuggestionsPrivacy DashboardPrivacy (Updated)About Our AdsTerms (Updated)Sitema", "change_from_open": null, "change_from_open_percent": null }, "VIR": { - "price": 69.71, - "refresh_time": "2021-02-15 14:31:46", - "market_cap": 8880000000.0, + "price": 48.22, + "refresh_time": "2021-04-23 12:05:36", + "market_cap": 6020000000.0, "name": "VIR - Vir Biotechnology, Inc.", - "change_from_open": -2.26, - "change_from_open_percent": -0.031400000000000004 + "change_from_open": -1.62, + "change_from_open_percent": -0.0325 } } \ No newline at end of file