-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrbf_etoimos.py
324 lines (270 loc) · 12.8 KB
/
rbf_etoimos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# from https://github.com/mohabmes/StockNN/blob/master/RBF/RBF-stock.py
#######!!!!!!! NOTES:
### koitaw kai ston prohgoumeno kwdika pou eixa 1 input_dim an allaksw ton kwdika gia to 30 prediction
### me auton se auto to arxeio
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
from keras.initializers import Initializer
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from rbflayer import RBFLayer, InitCentersRandom
from keras.models import load_model
from err import error_count, calc_diff
from visual import plot
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import time
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset) - look_back - 1):
a = dataset[i:(i + look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
def addtoLastRow(dataset,valuetoAdd):
newLastRow = []
a = dataset[-1] # to a exei thn 1h granmmh
#print(a.shape," ==============================IN APPEND a is ", a)
for i in range(1,a.shape[1]):
newLastRow.append(a[0][i])
#print(valuetoAdd.shape, " Value While a is ", valuetoAdd[0][0])
x = valuetoAdd[0][0]
newLastRow.append(x)
row_to_be_added = np.array(newLastRow)
row_to_be_added = row_to_be_added.reshape(1, row_to_be_added.shape[0])
#print(row_to_be_added.shape, " LAST ROW While a is ", row_to_be_added)
return row_to_be_added
# gia to real_predictions, kanw append ola ta #lookback vectors se ena vector numpy array
def append_values(dataset):
dataX = []
a = dataset[0] # to a exei thn 1h granmmh
# print(a.shape," IN APPEND a is ", a)
for i in range(a.shape[1]):
dataX.append(a[0][i])
# kanw epanalamvanomena append to teleutaio stoixeio kathe row se ena teliko numpy array
for i in range(1, dataset.shape[0]):
# dataset[i].size
# print("dataset[i].size ",dataset[i].size)
a = dataset[i][-1][-1]
dataX.append(a)
# print("dataX ",dataX)
return np.array(dataX)
# gia na kanei plot ta predict me ta real values
def create_real_for_plot(dataset, predicted_stock_price):
dataX = []
# kanw epanalamvanomena append to teleutaio stoixeio kathe row se ena teliko numpy array
for i in range(1, dataset.shape[0]):
# dataset[i].size
# print("dataset[i].size ",dataset[i].size)
a = dataset[i][-1][0]
dataX.append(a)
# print("dataX ",dataX)
dataX.append(predicted_stock_price)
return np.array(dataX)
# gia na kanei TO PRED VALUES SE ENA VECTOR NUMPY
def create_pred_for_plot(dataset):
dataX = []
# kanw epanalamvanomena append to teleutaio stoixeio kathe row se ena teliko numpy array
for i in range(dataset.shape[0]):
# dataset[i].size
# print("dataset[i].size ",dataset[i].size)
a = dataset[i][0][0]
dataX.append(a)
# print("dataX ",dataX)
return np.array(dataX)
def predict_prices(dates, prices, x):
dates = np.reshape(dates, (len(dates), 1))
svr_lin = SVR(kernel='linear', C=1e3)
svr_poly = SVR(kernel='poly', C=1e3, degree=2)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
start_time = time.time()
svr_lin.fit(dates, prices)
print("--- Linear Fit Complete in %s seconds ---" % (time.time() - start_time))
print("\n")
# The Polynomial fitting did not complete within a reasonable time, therefore commenting it out.
# svr_poly.fit(dates,prices)
# print("Polynomial Fit Complete")
start_time = time.time()
svr_rbf.fit(dates, prices)
print("--- RBF Fit Complete in %s seconds ---" % (time.time() - start_time))
print("\n")
rbf_prediction = svr_rbf.predict(x)[0],
linear_prediction = svr_lin.predict(x)[0]
print("RBF Prediction is : ", rbf_prediction)
print("\n")
print("Linear Prediction is : ", linear_prediction)
plt.scatter(dates, prices, color='black', label='Data')
plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model')
plt.plot(dates, svr_lin.predict(dates), color='blue', label='Linear model')
# plt.plot(dates,svr_poly.predict(dates), color = 'red', label = 'Polynomial model')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
return rbf_prediction, linear_prediction
############ Data Preprocessing ############
# Importing the dataset
ds = pd.read_csv('Stock_Price_Training_Data.csv')
df = pd.read_csv('Stock_Price_Training_Data.csv', dayfirst=True)
print(df.head())
print('\n Data Types:')
print(df.dtypes)
dates = df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
print("to_datetime is \n", dates)
dataset = ds.iloc[:, [4, 4]].values
all_entries = int(len(dataset))
print("all entries", all_entries)
X = ds.iloc[:all_entries - 1, 4].values
y = ds.iloc[1:all_entries, 4].values
print(X)
# Feature Scaling
scaler = MinMaxScaler(feature_range=(0, 1))
dataset_scaled = scaler.fit_transform(dataset)
X = dataset_scaled[:all_entries - 1, 0]
y = dataset_scaled[1:all_entries, 1]
print("X ALL", X, "\n y", y, "\n with size x = ", X.shape, " and y = ", y.shape)
lookback = 55
print("X[lookback]", X[lookback-1])
X_lookback, ylookback = create_dataset(dataset_scaled, lookback)
print("LOOKBACK", X_lookback, "\n y", ylookback, "\n with size x = ", X_lookback.shape, " and y = ", ylookback.shape)
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X_lookback, ylookback, test_size=0.2, random_state=0, shuffle=False)
# X_train, X_test, y_train, y_test = train_test_split(X, y, teist_size = 0.2, random_state = 0)
# Sizes of dataset, train_ds, test_ds
X_all = X_lookback
y_all = ylookback
dataset_sz = X.shape[0]
all_sz = X_all.shape[0]
train_sz = X_train.shape[0]
test_sz = X_test.shape[0]
print("train_sz: ", train_sz, "\ntest_sz: ", test_sz, "\nall_sz: ", all_sz, "\ndataset_sz: ", dataset_sz)
print("X_train \n", X_train)
# X_train = np.reshape(X_train, (train_sz, 1))
# y_train = np.reshape(y_train, (train_sz, 1))
print("shape 1 Xtrain ", X_train.shape, "X_trrain 941th element ", X_train[940])
X_all = np.reshape(X_all, (X_all.shape[0], 1, X_all.shape[1]))
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
print("AFTER RESHAPE train_sz: ", X_train.shape, "\ntest_sz: ", X_test.shape, "\nall_sz: ", X_all.shape)
print("X_train after transform \n", X_train)
print("reshaped shape 1 Xtrain ", X_train.shape, "X_trrain 941 element ", X_train[940])
print("y_train with size ", y_train.shape)
############ Building the RBF ############
# Initialising the RBF
regressor = Sequential()
# Adding the input layer and the first layer and Drop out Regularization
regressor.add(
RBFLayer(60, input_dim=lookback, initializer=InitCentersRandom(X_train[0]), betas=2.0, input_shape=(1, lookback)))
regressor.add(Dropout(.2))
# Adding the output layer
regressor.add(Dense(units=1, kernel_initializer='uniform', activation='linear'))
# Compiling the RBF
regressor.compile(optimizer='adam', loss='mean_squared_error')
regressor.summary()
# Fitting the RBF to the Training set
regressor.fit(X_train, y_train, batch_size=1, epochs=5, shuffle=False)
############ Save & load Trained Model ############
# Save Trained Model
regressor.save('TICKER-RBF.h5')
# deletes the existing model
# del regressor
# load Trained Model
# regressor = load_model('TICKER-RBF', custom_objects={'RBFLayer':RBFLayer})
############ Predict & Test the Model ############
real_stock_price = np.array(X_test)
inputs = real_stock_price
predicted_stock_price = regressor.predict(inputs)
# rebuild the Structure
dataset_test_total = pd.DataFrame()
print("real_stock_prices ", real_stock_price, "size ", real_stock_price.shape, "\ninputs ", inputs, "size ",
inputs.shape, "\n predicted_stock_price ", predicted_stock_price, "size ", predicted_stock_price.shape, )
real_stock_price_oneVector = append_values(real_stock_price)
print("real_stock_prices_oneVector ", real_stock_price_oneVector, "size ", real_stock_price_oneVector.shape, "\n")
real_stock_price_mod = create_real_for_plot(real_stock_price, ylookback[-1])
print("real mod shape ", real_stock_price_mod.shape)
dataset_test_total['real'] = real_stock_price_mod
# h teleutaia timh tou predicted_stock_price den yparxei sto set real,(epeidh einai ena +1 step prediction ektos set)
# opote gia na exoun idies diastaseis, vazw append sto real thn teleutaia timh tou predicted
print("LAST PRED ", predicted_stock_price[-1])
predicted_stock_price_mod = create_pred_for_plot(predicted_stock_price)
print("pred_stock_prices_oneVector ", predicted_stock_price_mod, "size ", predicted_stock_price_mod.shape, "\n")
dataset_test_total['predicted'] = predicted_stock_price_mod
print("Dataset_test_total ", dataset_test_total)
# real data price VS. predicted price
predicted_stock_price = scaler.inverse_transform(dataset_test_total)
# count of Wrong predicted value after applying treshold
err_cnt = error_count(predicted_stock_price[:, 0], predicted_stock_price[:, 1], toler_treshold=5.0)
# Calc difference between real data price and predicted price
diff_rate = calc_diff(predicted_stock_price[:, 0], predicted_stock_price[:, 1])
# show the inputs and predicted outputs
for i in range(len(predicted_stock_price[:, 1])):
print("X=%s, Predicted=%s" % (predicted_stock_price[i, 1], predicted_stock_price[i, 0]))
print("Error count: ", err_cnt, "\n diff rate: ", diff_rate, "\n")
## Visualising the results
plot(predicted=predicted_stock_price[:, 1])
plot(real=predicted_stock_price[:, 0])
plot(predicted=predicted_stock_price[:, 1], real=predicted_stock_price[:, 0])
# MSE
mse = mean_squared_error(predicted_stock_price[:, 0], predicted_stock_price[:, 1])
print("MSE: ", mse)
############ Visualizing the results ############
print("#############################################################")
# prin thn allagh, ola ta X_all kai y_all htan X kai y
inputs = np.array(X_all)
all_real_price = np.array(ylookback)
print("all real price ", all_real_price,all_real_price.shape)
all_predicted_price = regressor.predict(X_all)
all_predicted_price_mod = create_pred_for_plot(all_predicted_price)
print("all stock prediction ", all_predicted_price, all_predicted_price.shape)
print("all inputs ", inputs)
size_of_in = inputs.size
print("input shape ", size_of_in)
list_of_predictions = []
#inputs = all_predicted_price
print("Inputs ", inputs, inputs.shape)
############## Predict the 30 prices in the future with #lookback input########
## inputs is a window of 55 giving you the 55 + 1 prediction
for k in range(30):
thirty_predicted_price = regressor.predict(inputs)
print(k, " predicted ", thirty_predicted_price[-1])
#inputs = np.append(inputs, thirty_predicted_price[-1])
#inputs = np.delete(inputs, 0 , 0)
#print("============Inputs ", inputs, inputs.shape)
#inputs = inputs[1:size_of_in]
row_to_be_added = addtoLastRow(inputs, thirty_predicted_price[-1])
row_to_be_added = np.reshape(row_to_be_added,(row_to_be_added.shape[0], 1, row_to_be_added.shape[1]))
inputs = np.vstack((inputs, row_to_be_added))
list_of_predictions.append(thirty_predicted_price[-1])
# thirty_predicted_price = np.reshape(thirty_predicted_price, (thirty_predicted_price.shape[0], 1))
print("THIRTY DIMENSIONS ", thirty_predicted_price.shape)
print("predicted 30 ", thirty_predicted_price[0][0][1172:1202])
dataset_pred_real = pd.DataFrame()
dataset_pred_real['real'] = all_real_price
dataset_pred_real['predicted'] = all_predicted_price_mod
print("dataset_pred_real dataframe ", dataset_pred_real)
# real test data price VS. predicted price
all_prices = scaler.inverse_transform(dataset_pred_real)
inputs_1d = append_values(inputs)
dataset_pred_thirty = pd.DataFrame()
dataset_pred_thirty['predicted'] = inputs_1d
dataset_pred_thirty['still_predicted'] = inputs_1d
print("prediction dataframe ", dataset_pred_thirty)
all_thirty_predictions = scaler.inverse_transform(dataset_pred_thirty)
# predicted_price = predict_prices(dates,inputs,31)
## Visualising the results
print("predicted inversed ", all_thirty_predictions[:, 1])
plot(predicted=all_thirty_predictions[:, 1])
plot(real=all_prices[:, 0])
plot(predicted=all_prices[:, 1], real=all_prices[:, 0])
# MSE
mse = mean_squared_error(all_prices[:, 0], all_prices[:, 1])
print("MSE: ", mse)