-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathTimeSeriesFutures.py
203 lines (153 loc) · 7.43 KB
/
TimeSeriesFutures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!pip install yfinance
from google.colab import drive
import yfinance as yf
import numpy as np # linear algebra
import pandas as pd # data processing
import matplotlib.pyplot as plt # data visualization
drive.mount('/content/drive')
!ls "/content/drive/My Drive/LSTM Futures"
!cp "/content/drive/My Drive/LSTM Futures/reversed_micro_emini_nasdaq_historical.csv" "reversed_micro_emini_nasdaq_historical.csv"
df=pd.read_csv("reversed_micro_emini_nasdaq_historical.csv")
print('Number of rows and columns:', df.shape)
df.head(5)
import numpy as np # linear algebra
import pandas as pd # data processing
import matplotlib.pyplot as plt # data visualization
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
df.head()
"""
Open High Low Close Adj_Close Volume
Date
2017-01-02 998.617004 1031.390015 996.702026 1021.750000 1021.750000 222184992
2017-01-03 1021.599976 1044.079956 1021.599976 1043.839966 1043.839966 185168000
2017-01-04 1044.400024 1159.420044 1044.400024 1154.729980 1154.729980 344945984
2017-01-05 1156.729980 1191.099976 910.416992 1013.380005 1013.380005 510199008
2017-01-06 1014.239990 1046.810059 883.943970 902.200989 902.200989 351876000
"""
df_close = pd.DataFrame(df['Close'])
df_close.describe()
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
plt.figure(figsize=(8, 6))
plt.plot(df_close, color='g')
plt.title('Micro eMini NASDAQ Closing Price', weight='bold', fontsize=16)
plt.xlabel('Time', weight='bold', fontsize=14)
plt.ylabel('USD ($)', weight='bold', fontsize=14)
plt.xticks(weight='bold', fontsize=12, rotation=45)
plt.yticks(weight='bold', fontsize=12)
plt.grid(color = 'y', linewidth = 0.5)
from statsmodels.tsa import stattools
acf_djia, confint_djia, qstat_djia, pvalues_djia = stattools.acf(df_close,
unbiased=True,
nlags=50,
qstat=True,
fft=True,
alpha = 0.05)
plt.figure(figsize=(7, 5))
plt.plot(pd.Series(acf_djia), color='r', linewidth=2)
plt.title('Autocorrelation of Micro eMini NASDAQ Closing Price', weight='bold', fontsize=16)
plt.xlabel('Lag', weight='bold', fontsize=14)
plt.ylabel('Value', weight='bold', fontsize=14)
plt.xticks(weight='bold', fontsize=12, rotation=45)
plt.yticks(weight='bold', fontsize=12)
plt.grid(color = 'y', linewidth = 0.5)
def create_regressor_attributes(df, attribute, list_of_prev_t_instants) :
"""
Ensure that the index is of datetime type
Creates features with previous time instant values
"""
list_of_prev_t_instants.sort()
start = list_of_prev_t_instants[-1]
end = len(df)
df['datetime'] = df.index
df.reset_index(drop=True)
df_copy = df[start:end]
df_copy.reset_index(inplace=True, drop=True)
for attribute in attribute :
foobar = pd.DataFrame()
for prev_t in list_of_prev_t_instants :
new_col = pd.DataFrame(df[attribute].iloc[(start - prev_t) : (end - prev_t)])
new_col.reset_index(drop=True, inplace=True)
new_col.rename(columns={attribute : '{}_(t-{})'.format(attribute, prev_t)}, inplace=True)
foobar = pd.concat([foobar, new_col], sort=False, axis=1)
df_copy = pd.concat([df_copy, foobar], sort=False, axis=1)
df_copy.set_index(['datetime'], drop=True, inplace=True)
return df_copy
list_of_attributes = ['Close']
list_of_prev_t_instants = []
for i in range(1,16):
list_of_prev_t_instants.append(i)
list_of_prev_t_instants
df_new = create_regressor_attributes(df_close, list_of_attributes, list_of_prev_t_instants)
df_new.head()
df_new.shape
# Making the neural Network
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint
input_layer = Input(shape=(16), dtype='float32')
dense1 = Dense(60, activation='linear')(input_layer)
dense2 = Dense(60, activation='linear')(dense1)
dropout_layer = Dropout(0.2)(dense2)
output_layer = Dense(1, activation='linear')(dropout_layer)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()
from tensorflow.keras.utils import plot_model
plot_model(model)
test_set_size = 0.05
valid_set_size= 0.05
df_copy = df_new.reset_index(drop=True)
# Using last 16 days
df_test = df_copy.iloc[ int(np.floor(len(df_copy)*(1-test_set_size))) : ]
df_train_plus_valid = df_copy.iloc[ : int(np.floor(len(df_copy)*(1-test_set_size))) ]
df_train = df_train_plus_valid.iloc[ : int(np.floor(len(df_train_plus_valid)*(1-valid_set_size))) ]
df_valid = df_train_plus_valid.iloc[ int(np.floor(len(df_train_plus_valid)*(1-valid_set_size))) : ]
X_train, y_train = df_train.iloc[:, 0:], df_train.iloc[:, 0]
X_valid, y_valid = df_valid.iloc[:, 0:], df_valid.iloc[:, 0]
X_test, y_test = df_test.iloc[:, 0:], df_test.iloc[:, 0]
print('Shape of training inputs, training target:', X_train.shape, y_train.shape)
print('Shape of validation inputs, validation target:', X_valid.shape, y_valid.shape)
print('Shape of test inputs, test target:', X_test.shape, y_test.shape)
print(X_test)
# Scaling the data
from sklearn.preprocessing import MinMaxScaler
Target_scaler = MinMaxScaler(feature_range=(0.01, 0.99))
Feature_scaler = MinMaxScaler(feature_range=(0.01, 0.99))
X_train_scaled = Feature_scaler.fit_transform(np.array(X_train))
X_valid_scaled = Feature_scaler.fit_transform(np.array(X_valid))
X_test_scaled = Feature_scaler.fit_transform(np.array(X_test))
y_train_scaled = Target_scaler.fit_transform(np.array(y_train).reshape(-1,1))
y_valid_scaled = Target_scaler.fit_transform(np.array(y_valid).reshape(-1,1))
y_test_scaled = Target_scaler.fit_transform(np.array(y_test).reshape(-1,1))
# Training and Validation
model.fit(x=X_train_scaled, y=y_train_scaled, batch_size=5, epochs=30, verbose=1, validation_data=(X_valid_scaled, y_valid_scaled), shuffle=True)
# Making predictions on the test set
y_pred = model.predict(X_test_scaled)
# the predictions also lie in that range. We need to scale them back in the other direction
y_pred_rescaled = Target_scaler.inverse_transform(y_pred)
print(y_pred_rescaled)
# Checking the r2 score
from sklearn.metrics import r2_score
y_test_rescaled = Target_scaler.inverse_transform(y_test_scaled)
score = r2_score(y_test_rescaled, y_pred_rescaled)
print('R-squared score for the test set:', round(score,4))
# Plotting the predictions
y_actual = pd.DataFrame(y_test_rescaled, columns=['Actual Close Price'])
y_hat = pd.DataFrame(y_pred_rescaled, columns=['Predicted Close Price'])
plt.figure(figsize=(11, 6))
plt.plot(y_actual, linestyle='solid', color='r')
plt.plot(y_hat, linestyle='dashed', color='b')
plt.legend(['Actual','Predicted'], loc='best', prop={'size': 14})
plt.title('Micro eMini NASDAQ Futures Closing Prices', weight='bold', fontsize=16)
plt.ylabel('USD ($)', weight='bold', fontsize=14)
plt.xlabel('Test Set Day no.', weight='bold', fontsize=14)
plt.xticks(weight='bold', fontsize=12, rotation=45)
plt.yticks(weight='bold', fontsize=12)
plt.grid(color = 'y', linewidth='0.5')
plt.show()