diff --git a/BuildIndex.py b/BuildIndex.py index f4c96ea..277c2ec 100644 --- a/BuildIndex.py +++ b/BuildIndex.py @@ -1,7 +1,14 @@ -import requests, json, time, os, sys, ftplib -from datetime import date, datetime, timezone, timedelta +import requests +import json +import time +import os +import sys +import ftplib +from datetime import datetime, timezone, timedelta import datetime as dt import traceback +import re +from bs4 import BeautifulSoup DEBUG_MODE = False EXISTING_TICKERS = { @@ -16,7 +23,6 @@ "ENTA": "ENTA - Enanta Pharmaceuticals, Inc.", "HEPA": "HEPA - Hepion Pharmaceuticals, Inc.", "NTLA": "NTLA - Intellia Therapeutics, Inc.", - "SBPH": "SBPH - Spring Bank Pharmaceuticals, Inc.", "VIR": "VIR - Vir Biotechnology, Inc.", } @@ -31,31 +37,24 @@ def build_index_data(symbol, current_data): try: action = "HTML content" if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol)) - content = get_html_content(symbol) + content = get_all_text_on_page(symbol) except Exception as e: print(e) log("Error retrieving {} data for: {}".format(action, symbol), e) - # Retrieve the change in percentage, value + # Retrieve the change in percentage, value, and current price try: action = "change amounts" if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol)) - change_amt, change_pct = scrape_yahoo_change(content) + price_data = scrape_price_and_change(content) + price = price_data[0] + change_amt = price_data[1] + change_pct = price_data[2] / 100 log("[{}] Retrieved change amount: {}\tchange percent: {}".format(symbol, change_amt, change_pct)) except Exception as e: print(e) log("Error retrieving {} data for: {}".format(action, symbol), e) - # Retrieve the Price amount - try: - action = "price" - if DEBUG_MODE: log("Retrieving {} for {}".format(action, symbol)) - price = scrape_yahoo_price(content) - log("[{}] Retrieved price: {}".format(symbol, price)) - except Exception as e: - print(e) - log("Error retrieving {} data for: {}".format(action, symbol), e) - # Retrieve market cap try: action = "market cap" @@ -107,28 +106,27 @@ def search_and_discard(str_to_find, str_to_search, keep_all_before=False, additi return str_to_search[i + additional_spaces:] -def scrape_yahoo_change(content): - """ This function will scrape the Yahoo finance page and return a tuple of - (change amount, change percentage) +def get_all_text_on_page(symbol): + """ Retrieve all non-HTML content on page """ - content = search_and_discard('quote-header-info', content) - content = search_and_discard('data-reactid="51"', content, additional_spaces=len('data-reactid="51"') + 1) - content = search_and_discard('<', content, keep_all_before=True) + req = requests.get(f"https://finance.yahoo.com/quote/{symbol}") + soup = BeautifulSoup(req.content, features="html.parser") + return soup.get_text().strip() - split_str = content.split(' ') - split_str[1] = split_str[1].replace('(', '') - split_str[1] = split_str[1].replace(')', '') - return float(split_str[0]), float(split_str[1][:-1]) / 100 - -def scrape_yahoo_price(content): - """ This function will scrape the yahoo finance page for the price +def scrape_price_and_change(content): + """ Retrieve the price information and return a list of: + [ price, valueChange, valueChangePercent ] """ + text = search_and_discard(')As of ', content, keep_all_before=True, additional_spaces=1) + text = search_and_discard('Visitors trend', text, additional_spaces=1) + data = re.findall('\d+\.\d{2,5}[-|+]\d+\.\d{2,5}\W+\d+\.\d{2,5}%\W{1}', text) + data = re.findall('\W{0,1}\d+\.\d+', data[0]) - content = search_and_discard('quote-header-info', content) - content = search_and_discard('data-reactid="50"', content, additional_spaces=len('data-reactid="50"')+1) - content = search_and_discard('<', content, keep_all_before=True) - return float(content) + # Cast all data to float, then return + for i in range(len(data)): + data[i] = float(data[i]) + return data def scrape_yahoo_mkt_cap(content): @@ -142,14 +140,10 @@ def scrape_yahoo_mkt_cap(content): 'K': 1000, } - to_find = '