diff --git a/.gitignore b/.gitignore index b33dd25..e9196d6 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,7 @@ typings/ # Model Training __pycache__ *.csv +*.pkl # python venv @@ -72,3 +73,6 @@ venv # AWS .aws + +#ML +*.pt \ No newline at end of file diff --git a/api/app.py b/api/app.py index 45e819b..9a84576 100644 --- a/api/app.py +++ b/api/app.py @@ -9,8 +9,14 @@ import boto3 import time import hashlib +<<<<<<< HEAD +import torch +import numpy as np +from torch import Variable +======= import jwt +>>>>>>> 62e71d6fc8bbf1389585b4ec199cc124c6be355f # import ssl # ssl._create_default_https_context = ssl._create_unverified_context @@ -1027,5 +1033,29 @@ def sendTempData(): } return jsonify(data) + +# ML - API + +#Prediction for the next k-datapoints +@app.route('/ml/prediction/') +@cross_origin() +def prediction(k_data): + #k_data (json) is the data of the previous k data-points of the given currenct + #k is determined by the number of previous data used for training (Currently k = 5) + m = int(k_data['next']) + k_data = np.array(k_data['data']) + #Model directory here + model = torch.load('../prediction/model.pt') + output = [] + with torch.no_grad(): + for i in range(m): + data = Variable(torch.from_numpy(k_data)) + out = model.forward(data)[0].cpu().float().numpy() + k_data.append(out) + output.append(out) + k_data = k_data[1:-1] + return jsonify(output) + + if __name__ == '__main__': app.run() \ No newline at end of file diff --git a/api/requirements.txt b/api/requirements.txt index a9b2f55..e4a3262 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -22,6 +22,7 @@ Jinja2==2.11.1 jmespath==0.9.5 MarkupSafe==1.1.1 more-itertools==5.0.0 +numpy==1.16.6 packaging==20.3 pathlib2==2.3.5 pluggy==0.13.1 @@ -37,6 +38,7 @@ s3transfer==0.3.3 scandir==1.10.0 six==1.14.0 soupsieve==1.9.5 +torch==1.2.0 typing==3.7.4.1 urllib3==1.25.8 wcwidth==0.1.9 diff --git a/sentiment/crypto_history.py b/sentiment/crypto_history.py new file mode 100644 index 0000000..a389d74 --- /dev/null +++ b/sentiment/crypto_history.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Script to gather historical cryptocurrency data from coinmarketcap.com (cmc) """ + +import json +import requests +from bs4 import BeautifulSoup +import csv +import sys +from time import sleep + + +def CoinNames(): + """Gets ID's of all coins on cmc""" + + names = [] + response = requests.get("https://api.coinmarketcap.com/v1/ticker/?limit=0") + respJSON = json.loads(response.text) + for i in respJSON: + names.append(i['id']) + return names + +def gather(startdate, enddate, names): + historicaldata = [] + counter = 1 + + if len(names) == 0: + names = CoinNames() + + for coin in names: + sleep(10) + r = requests.get("https://coinmarketcap.com/currencies/{0}/historical-data/?start={1}&end={2}".format(coin, startdate, enddate)) + data = r.text + soup = BeautifulSoup(data, "html.parser") + table = soup.find('table', attrs={ "class" : "table"}) + + #Add table header to list + if len(historicaldata) == 0: + headers = [header.text for header in table.find_all('th')] + headers.insert(0, "Coin") + + for row in table.find_all('tr'): + currentrow = [val.text for val in row.find_all('td')] + if(len(currentrow) != 0): + currentrow.insert(0, coin) + historicaldata.append(currentrow) + + print("Coin Counter -> " + str(counter), end='\r') + counter += 1 + return headers, historicaldata + +def _gather(startdate, enddate): + """ Scrape data off cmc""" + + if(len(sys.argv) == 3): + names = CoinNames() + else: + names = [sys.argv[3]] + + headers, historicaldata = gather(startdate, enddate, names) + + Save(headers, historicaldata) + +def Save(headers, rows): + + if(len(sys.argv) == 3): + FILE_NAME = "HistoricalCoinData.csv" + else: + FILE_NAME = sys.argv[3] + ".csv" + + with open(FILE_NAME, 'w') as f: + writer = csv.writer(f) + writer.writerow(headers) + writer.writerows(row for row in rows if row) + print("Finished!") + +if __name__ == "__main__": + + startdate = sys.argv[1] + enddate = sys.argv[2] + + _gather(startdate, enddate) + diff --git a/sentiment/install_locally.py b/sentiment/install_locally.py new file mode 100644 index 0000000..b297d25 --- /dev/null +++ b/sentiment/install_locally.py @@ -0,0 +1,25 @@ +from __future__ import print_function +import os, site + +#package = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages') +package = os.path.dirname(os.path.abspath(__file__)) +print(package) + +pathspec = r""" +# Generated by Crypto-History's installer (install_locally.py) +# In the lines below, list the paths where Python should look for +# supplied modules, one directory per line. +# +# If a directory does not exist when Python is started, it will be ignored. +%s +""" % package + +print("Adding path:", package) + +usp = site.getusersitepackages() +if not os.path.exists(usp): + os.makedirs(usp) +uspfile = os.path.join(usp, 'crypto-history.pth') +open(uspfile, 'w').write(pathspec) +print('Wrote to ' + uspfile) +print("Crypto-history package installed successfully!") diff --git a/sentiment/main.py b/sentiment/main.py index 85b5896..006b688 100644 --- a/sentiment/main.py +++ b/sentiment/main.py @@ -3,8 +3,12 @@ from sklearn.model_selection import train_test_split import pandas as pd import time +import torch if __name__ == '__main__': + + #device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + ''' TwComments = pd.read_csv('tweetsbitcoin.csv',delimiter=",", index_col=None) TwComments = TwComments.dropna() TwComments=TwComments.drop_duplicates() @@ -44,7 +48,7 @@ Price.loc[Price['Price Diff'] < 0, 'Price Diff'] = 0 Price.loc[Price['Price Diff'] > 0, 'Price Diff'] = 1 - Price1 = Price[['OpTime','Log_Ret','Price Diff']] + Price1 = Price[['OpTime','Log_Ret','Price ']] Price1['Log_Ret']=np.square(Price1[['Log_Ret']]) Price1['OpTime']= Price1['OpTime'].floordiv(1000) @@ -58,6 +62,8 @@ df['tweets'] = df['tweets'].apply(lambda x: tokenizer(x)) tweets_1 = df['tweets'] + print(tweets_1) + exit() final_tweets = [] for t in tweets_1: t.insert(0, '') @@ -73,12 +79,15 @@ padded_word_indices = pad_sequences(word_indices) X_train, X_test, y_train, y_test = train_test_split(padded_word_indices, df['Price Diff'].values, test_size=0.1, shuffle=True) - X_train = Variable(torch.from_numpy(X_train).long()) - y_train = Variable(torch.from_numpy(y_train).float()) + X_train = Variable(torch.from_numpy(X_train).long().device()) + y_train = Variable(torch.from_numpy(y_train).float().device()) + ''' - model = SimpleClassifier(100, 32) - print(model.parameters()) + model = SimpleClassifier1(100, 32) + #model.save('sentiment.pt') + #print(model.parameters()) mseLoss = nn.MSELoss() optimizer = optim.Adam(model.parameters()) - model = train(model, X_train, y_train, mseLoss, optimizer, batch_size = 16, n_epochs = 1) + torch.save(model, 'sentiment.pt') + #model = train(model, X_train, y_train, mseLoss, optimizer, batch_size = 16, n_epochs = 1) diff --git a/sentiment/model.py b/sentiment/model.py index e038820..5ac3492 100644 --- a/sentiment/model.py +++ b/sentiment/model.py @@ -5,9 +5,9 @@ from torch.autograd import Variable -class SimpleClassifier(nn.Module): +class SimpleClassifier1(nn.Module): def __init__(self, input_size, hidden_size): - super(SimpleClassifier, self).__init__() + super(SimpleClassifier1, self).__init__() self.hidden_size = hidden_size self.embedding = nn.Embedding(300000, hidden_size) self.gru = nn.GRU(input_size = hidden_size, hidden_size = hidden_size)