-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreference.py
83 lines (63 loc) · 2.42 KB
/
reference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import matplotlib.pyplot as plt
# Load the dataset from CSV file
df = pd.read_csv('data/Structured/all_data.csv')
df = df.dropna(axis=1, how='all')
df.drop(['Summa gas/Diesel'], axis=1, inplace=True)
df = df.fillna(method='ffill')
df = df.fillna(method='bfill')
for e in df.columns:
if df[e].nunique() == 1:
df = df.drop(e, axis=1)
first_column = df.pop('SE1')
df.insert(1, 'SE1', first_column)
df.drop(['Date'], axis=1, inplace=True)
df = df.iloc[:,0:2]
print(df.head(10))
n_features = df.shape[1]
# Normalize the features using MinMaxScaler
scaler = MinMaxScaler()
df[df.columns[1:]] = scaler.fit_transform(df[df.columns[1:]])
# Define the sequence length for the LSTM model
sequence_length = 10
# Split the dataset into training and testing sets
train_size = int(len(df) * 0.7)
train_df = df[:train_size]
test_df = df[train_size:]
# Create sequences of input and output for the LSTM model
def create_sequences(df, sequence_length):
X = []
y = []
for i in range(len(df) - sequence_length):
X.append(df.iloc[i:i+sequence_length, 0])
y.append(df.iloc[i+sequence_length, 0])
return np.array(X), np.array(y)
X_train, y_train = create_sequences(train_df, sequence_length)
X_test, y_test = create_sequences(test_df, sequence_length)
print(X_train.shape)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
plt.plot(X_train[0,:,0])
plt.plot(y_train[0])
plt.show()
# Define the model architecture
model = tf.keras.Sequential([
tf.keras.layers.LSTM(64, input_shape=(sequence_length, 1)),
tf.keras.layers.Dense(1)
])
# Compile the model with mean squared error loss and Adam optimizer
model.compile(optimizer='adam', loss='mse')
# Train the model on the training set
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)
# Evaluate the model on the test set
loss = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', loss)
# Make predictions on new data
new_data = pd.read_csv('new_data.csv', index_col=0)
new_data[new_data.columns[1:]] = scaler.transform(new_data[new_data.columns[1:]])
X_new = np.array([new_data.iloc[i:i+sequence_length, 1:].values for i in range(len(new_data) - sequence_length)])
y_pred = model.predict(X_new)
# Print the predicted prices
print(y_pred)