harsh-2711 · smit2k14 · Mar 8, 2020 · Mar 8, 2020 · Mar 10, 2020 · Mar 10, 2020
diff --git a/.gitignore b/.gitignore
@@ -60,6 +60,7 @@ typings/
 # Model Training
 __pycache__
 *.csv
+*.pkl
 
 # python
 venv
@@ -72,3 +73,6 @@ venv
 
 # AWS
 .aws
+
+#ML
+*.pt
diff --git a/api/app.py b/api/app.py
@@ -9,8 +9,14 @@
 import boto3
 import time
 import hashlib
+<<<<<<< HEAD
+import torch
+import numpy as np
+from torch import Variable
+=======
 import jwt
 
+>>>>>>> 62e71d6fc8bbf1389585b4ec199cc124c6be355f
 # import ssl
 # ssl._create_default_https_context = ssl._create_unverified_context
 
@@ -1027,5 +1033,29 @@ def sendTempData():
 	}
 	return jsonify(data)
 
+
+# ML - API
+
+#Prediction for the next k-datapoints
+@app.route('/ml/prediction/<k_data>')
+@cross_origin()
+def prediction(k_data):
+	#k_data (json) is the data of the previous k data-points of the given currenct
+	#k is determined by the number of previous data used for training (Currently k = 5)
+	m = int(k_data['next'])
+	k_data = np.array(k_data['data'])
+	#Model directory here
+	model = torch.load('../prediction/model.pt')
+	output = []
+	with torch.no_grad():
+		for i in range(m):
+			data = Variable(torch.from_numpy(k_data))
+			out = model.forward(data)[0].cpu().float().numpy()
+			k_data.append(out)
+			output.append(out)
+			k_data = k_data[1:-1]
+	return jsonify(output)
+
+
 if __name__ == '__main__':
     app.run()
diff --git a/api/requirements.txt b/api/requirements.txt
@@ -22,6 +22,7 @@ Jinja2==2.11.1
 jmespath==0.9.5
 MarkupSafe==1.1.1
 more-itertools==5.0.0
+numpy==1.16.6
 packaging==20.3
 pathlib2==2.3.5
 pluggy==0.13.1
@@ -37,6 +38,7 @@ s3transfer==0.3.3
 scandir==1.10.0
 six==1.14.0
 soupsieve==1.9.5
+torch==1.2.0
 typing==3.7.4.1
 urllib3==1.25.8
 wcwidth==0.1.9

diff --git a/sentiment/crypto_history.py b/sentiment/crypto_history.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Script to gather historical cryptocurrency data from coinmarketcap.com (cmc) """
+
+import json
+import requests
+from bs4 import BeautifulSoup
+import csv
+import sys
+from time import sleep
+
+
+def CoinNames():
+    """Gets ID's of all coins on cmc"""
+
+    names = []
+    response = requests.get("https://api.coinmarketcap.com/v1/ticker/?limit=0")
+    respJSON = json.loads(response.text)
+    for i in respJSON:
+        names.append(i['id'])
+    return names
+
+def gather(startdate, enddate, names):
+    historicaldata = []
+    counter = 1
+
+    if len(names) == 0:
+        names = CoinNames()
+
+    for coin in names:
+        sleep(10)
+        r  = requests.get("https://coinmarketcap.com/currencies/{0}/historical-data/?start={1}&end={2}".format(coin, startdate, enddate))
+        data = r.text
+        soup = BeautifulSoup(data, "html.parser")
+        table = soup.find('table', attrs={ "class" : "table"})
+
+        #Add table header to list
+        if len(historicaldata) == 0:
+            headers = [header.text for header in table.find_all('th')]
+            headers.insert(0, "Coin")
+
+        for row in table.find_all('tr'):
+            currentrow = [val.text for val in row.find_all('td')]
+            if(len(currentrow) != 0):
+                currentrow.insert(0, coin)
+            historicaldata.append(currentrow)
+
+        print("Coin Counter -> " + str(counter), end='\r')
+        counter += 1
+    return headers, historicaldata
+
+def _gather(startdate, enddate):
+    """ Scrape data off cmc"""
+
+    if(len(sys.argv) == 3):
+        names = CoinNames()
+    else:
+        names = [sys.argv[3]]
+
+    headers, historicaldata = gather(startdate, enddate, names)
+
+    Save(headers, historicaldata)
+
+def Save(headers, rows):
+
+    if(len(sys.argv) == 3):
+        FILE_NAME = "HistoricalCoinData.csv"
+    else:
+        FILE_NAME = sys.argv[3] + ".csv"
+
+    with open(FILE_NAME, 'w') as f:
+        writer = csv.writer(f)
+        writer.writerow(headers)
+        writer.writerows(row for row in rows if row)
+    print("Finished!")
+
+if __name__ == "__main__":
+
+    startdate = sys.argv[1]
+    enddate = sys.argv[2]
+
+    _gather(startdate, enddate)
+
diff --git a/sentiment/install_locally.py b/sentiment/install_locally.py
@@ -0,0 +1,25 @@
+from __future__ import print_function
+import os, site
+
+#package = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages')
+package = os.path.dirname(os.path.abspath(__file__))
+print(package)
+
+pathspec = r"""
+# Generated by Crypto-History's installer (install_locally.py)
+# In the lines below, list the paths where Python should look for
+# supplied modules, one directory per line.
+#
+# If a directory does not exist when Python is started, it will be ignored.
+%s
+""" % package
+
+print("Adding path:", package)
+
+usp = site.getusersitepackages()
+if not os.path.exists(usp):
+    os.makedirs(usp)
+uspfile = os.path.join(usp, 'crypto-history.pth')
+open(uspfile, 'w').write(pathspec)
+print('Wrote to ' + uspfile)
+print("Crypto-history package installed successfully!")
diff --git a/sentiment/main.py b/sentiment/main.py
@@ -3,8 +3,12 @@
 from sklearn.model_selection import train_test_split
 import pandas as pd
 import time
+import torch
 
 if __name__ == '__main__':
+
+    #device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    '''
     TwComments = pd.read_csv('tweetsbitcoin.csv',delimiter=",", index_col=None)
     TwComments = TwComments.dropna() 
     TwComments=TwComments.drop_duplicates()
@@ -44,7 +48,7 @@
     Price.loc[Price['Price Diff'] < 0, 'Price Diff'] = 0
     Price.loc[Price['Price Diff'] > 0, 'Price Diff'] = 1
 
-    Price1 = Price[['OpTime','Log_Ret','Price Diff']]
+    Price1 = Price[['OpTime','Log_Ret','Price ']]
     Price1['Log_Ret']=np.square(Price1[['Log_Ret']])
     Price1['OpTime']= Price1['OpTime'].floordiv(1000)
 
@@ -58,6 +62,8 @@
     df['tweets'] = df['tweets'].apply(lambda x: tokenizer(x))
 
     tweets_1 = df['tweets']
+    print(tweets_1)
+    exit()
     final_tweets = []
     for t in tweets_1:
         t.insert(0, '<SOS>')
@@ -73,12 +79,15 @@
 
     padded_word_indices = pad_sequences(word_indices)
     X_train, X_test, y_train, y_test = train_test_split(padded_word_indices, df['Price Diff'].values, test_size=0.1, shuffle=True)
-    X_train = Variable(torch.from_numpy(X_train).long())
-    y_train = Variable(torch.from_numpy(y_train).float())
+    X_train = Variable(torch.from_numpy(X_train).long().device())
+    y_train = Variable(torch.from_numpy(y_train).float().device())
+    '''
 
-    model = SimpleClassifier(100, 32)
-    print(model.parameters())
+    model = SimpleClassifier1(100, 32)
+    #model.save('sentiment.pt')
+    #print(model.parameters())
     mseLoss = nn.MSELoss()
     optimizer = optim.Adam(model.parameters())
-    model = train(model, X_train, y_train, mseLoss, optimizer, batch_size = 16, n_epochs = 1)
+    torch.save(model, 'sentiment.pt')
+    #model = train(model, X_train, y_train, mseLoss, optimizer, batch_size = 16, n_epochs = 1)
 
diff --git a/sentiment/model.py b/sentiment/model.py
@@ -5,9 +5,9 @@
 from torch.autograd import Variable
 
 
-class SimpleClassifier(nn.Module):
+class SimpleClassifier1(nn.Module):
     def __init__(self, input_size, hidden_size):
-        super(SimpleClassifier, self).__init__()
+        super(SimpleClassifier1, self).__init__()
         self.hidden_size = hidden_size
         self.embedding = nn.Embedding(300000, hidden_size)
         self.gru = nn.GRU(input_size = hidden_size, hidden_size = hidden_size)
-Original file line number
+Diff line change
@@ Expand Up / @@ -60,6 +60,7 @@ typings/ @@
     # Model Training
     __pycache__
     *.csv
+    *.pkl
     # python
     venv
@@ Expand All / @@ -72,3 +73,6 @@ venv @@
     # AWS
     .aws
+    #ML
+    *.pt