Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added the code for pulling bitcoin prices and a template for the ML API (prediction) #16

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ typings/
# Model Training
__pycache__
*.csv
*.pkl

# python
venv
Expand All @@ -72,3 +73,6 @@ venv

# AWS
.aws

#ML
*.pt
30 changes: 30 additions & 0 deletions api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@
import boto3
import time
import hashlib
<<<<<<< HEAD
import torch
import numpy as np
from torch import Variable
=======
import jwt

>>>>>>> 62e71d6fc8bbf1389585b4ec199cc124c6be355f
# import ssl
# ssl._create_default_https_context = ssl._create_unverified_context

Expand Down Expand Up @@ -1027,5 +1033,29 @@ def sendTempData():
}
return jsonify(data)


# ML - API

#Prediction for the next k-datapoints
@app.route('/ml/prediction/<k_data>')
@cross_origin()
def prediction(k_data):
#k_data (json) is the data of the previous k data-points of the given currenct
#k is determined by the number of previous data used for training (Currently k = 5)
m = int(k_data['next'])
k_data = np.array(k_data['data'])
#Model directory here
model = torch.load('../prediction/model.pt')
output = []
with torch.no_grad():
for i in range(m):
data = Variable(torch.from_numpy(k_data))
out = model.forward(data)[0].cpu().float().numpy()
k_data.append(out)
output.append(out)
k_data = k_data[1:-1]
return jsonify(output)


if __name__ == '__main__':
app.run()
2 changes: 2 additions & 0 deletions api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Jinja2==2.11.1
jmespath==0.9.5
MarkupSafe==1.1.1
more-itertools==5.0.0
numpy==1.16.6
packaging==20.3
pathlib2==2.3.5
pluggy==0.13.1
Expand All @@ -37,6 +38,7 @@ s3transfer==0.3.3
scandir==1.10.0
six==1.14.0
soupsieve==1.9.5
torch==1.2.0
typing==3.7.4.1
urllib3==1.25.8
wcwidth==0.1.9
Expand Down
83 changes: 83 additions & 0 deletions sentiment/crypto_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Script to gather historical cryptocurrency data from coinmarketcap.com (cmc) """

import json
import requests
from bs4 import BeautifulSoup
import csv
import sys
from time import sleep


def CoinNames():
"""Gets ID's of all coins on cmc"""

names = []
response = requests.get("https://api.coinmarketcap.com/v1/ticker/?limit=0")
respJSON = json.loads(response.text)
for i in respJSON:
names.append(i['id'])
return names

def gather(startdate, enddate, names):
historicaldata = []
counter = 1

if len(names) == 0:
names = CoinNames()

for coin in names:
sleep(10)
r = requests.get("https://coinmarketcap.com/currencies/{0}/historical-data/?start={1}&end={2}".format(coin, startdate, enddate))
data = r.text
soup = BeautifulSoup(data, "html.parser")
table = soup.find('table', attrs={ "class" : "table"})

#Add table header to list
if len(historicaldata) == 0:
headers = [header.text for header in table.find_all('th')]
headers.insert(0, "Coin")

for row in table.find_all('tr'):
currentrow = [val.text for val in row.find_all('td')]
if(len(currentrow) != 0):
currentrow.insert(0, coin)
historicaldata.append(currentrow)

print("Coin Counter -> " + str(counter), end='\r')
counter += 1
return headers, historicaldata

def _gather(startdate, enddate):
""" Scrape data off cmc"""

if(len(sys.argv) == 3):
names = CoinNames()
else:
names = [sys.argv[3]]

headers, historicaldata = gather(startdate, enddate, names)

Save(headers, historicaldata)

def Save(headers, rows):

if(len(sys.argv) == 3):
FILE_NAME = "HistoricalCoinData.csv"
else:
FILE_NAME = sys.argv[3] + ".csv"

with open(FILE_NAME, 'w') as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(row for row in rows if row)
print("Finished!")

if __name__ == "__main__":

startdate = sys.argv[1]
enddate = sys.argv[2]

_gather(startdate, enddate)

25 changes: 25 additions & 0 deletions sentiment/install_locally.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from __future__ import print_function
import os, site

#package = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages')
package = os.path.dirname(os.path.abspath(__file__))
print(package)

pathspec = r"""
# Generated by Crypto-History's installer (install_locally.py)
# In the lines below, list the paths where Python should look for
# supplied modules, one directory per line.
#
# If a directory does not exist when Python is started, it will be ignored.
%s
""" % package

print("Adding path:", package)

usp = site.getusersitepackages()
if not os.path.exists(usp):
os.makedirs(usp)
uspfile = os.path.join(usp, 'crypto-history.pth')
open(uspfile, 'w').write(pathspec)
print('Wrote to ' + uspfile)
print("Crypto-history package installed successfully!")
21 changes: 15 additions & 6 deletions sentiment/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
from sklearn.model_selection import train_test_split
import pandas as pd
import time
import torch

if __name__ == '__main__':

#device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
'''
TwComments = pd.read_csv('tweetsbitcoin.csv',delimiter=",", index_col=None)
TwComments = TwComments.dropna()
TwComments=TwComments.drop_duplicates()
Expand Down Expand Up @@ -44,7 +48,7 @@
Price.loc[Price['Price Diff'] < 0, 'Price Diff'] = 0
Price.loc[Price['Price Diff'] > 0, 'Price Diff'] = 1

Price1 = Price[['OpTime','Log_Ret','Price Diff']]
Price1 = Price[['OpTime','Log_Ret','Price ']]
Price1['Log_Ret']=np.square(Price1[['Log_Ret']])
Price1['OpTime']= Price1['OpTime'].floordiv(1000)

Expand All @@ -58,6 +62,8 @@
df['tweets'] = df['tweets'].apply(lambda x: tokenizer(x))

tweets_1 = df['tweets']
print(tweets_1)
exit()
final_tweets = []
for t in tweets_1:
t.insert(0, '<SOS>')
Expand All @@ -73,12 +79,15 @@

padded_word_indices = pad_sequences(word_indices)
X_train, X_test, y_train, y_test = train_test_split(padded_word_indices, df['Price Diff'].values, test_size=0.1, shuffle=True)
X_train = Variable(torch.from_numpy(X_train).long())
y_train = Variable(torch.from_numpy(y_train).float())
X_train = Variable(torch.from_numpy(X_train).long().device())
y_train = Variable(torch.from_numpy(y_train).float().device())
'''

model = SimpleClassifier(100, 32)
print(model.parameters())
model = SimpleClassifier1(100, 32)
#model.save('sentiment.pt')
#print(model.parameters())
mseLoss = nn.MSELoss()
optimizer = optim.Adam(model.parameters())
model = train(model, X_train, y_train, mseLoss, optimizer, batch_size = 16, n_epochs = 1)
torch.save(model, 'sentiment.pt')
#model = train(model, X_train, y_train, mseLoss, optimizer, batch_size = 16, n_epochs = 1)

4 changes: 2 additions & 2 deletions sentiment/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from torch.autograd import Variable


class SimpleClassifier(nn.Module):
class SimpleClassifier1(nn.Module):
def __init__(self, input_size, hidden_size):
super(SimpleClassifier, self).__init__()
super(SimpleClassifier1, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(300000, hidden_size)
self.gru = nn.GRU(input_size = hidden_size, hidden_size = hidden_size)
Expand Down