Commit d95eddc8 authored by ukuiq's avatar ukuiq
Browse files

added LSTM models

parent 9b2a20c5
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from scipy.signal import lfilter
```
%% Cell type:code id: tags:
``` python
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy
from keras.datasets import imdb
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from tensorflow.keras import activations
from keras.layers import Activation, Dense
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.layers import Dense, Softmax, Dropout, BatchNormalization
```
%% Cell type:code id: tags:
``` python
path_0 = '../data/preprocessed/clusters/0/'
path_1 = '../data/preprocessed/clusters/1/'
path_2 = '../data/preprocessed/clusters/2/'
dfs_c_0 = []
dfs_c_1 = []
dfs_c_2 = []
dfs_c_0_grouped = []
dfs_c_1_grouped = []
dfs_c_2_grouped = []
csv_files_0 = [csv for csv in os.listdir(path_0) if csv.endswith('.csv')]
csv_files_1 = [csv for csv in os.listdir(path_1) if csv.endswith('.csv')]
csv_files_2 = [csv for csv in os.listdir(path_2) if csv.endswith('.csv')]
for file in csv_files_0:
# import DataFrame
df = pd.read_csv(path_0 + file)
if file.startswith('g_'):
dfs_c_0_grouped.append(df)
else:
dfs_c_0.append(df)
for file in csv_files_1:
# import DataFrame
df = pd.read_csv(path_1 + file)
if file.startswith('g_'):
dfs_c_1_grouped.append(df)
else:
dfs_c_1.append(df)
for file in csv_files_2:
# import DataFrame
df = pd.read_csv(path_2 + file)
if file.startswith('g_'):
dfs_c_2_grouped.append(df)
else:
dfs_c_2.append(df)
```
%% Cell type:code id: tags:
``` python
def create_windows_smoothed(dfs, length, batch_size, stride):
features = []
targets = []
X = []
y = []
# Height as feature and target
for df in dfs:
height = df['inter_pol'].to_numpy().tolist()
# apply TimeSeriesGenerator
ts_generator = TimeseriesGenerator(height,height,length=length, batch_size=batch_size, stride=stride)
for j in range(len(ts_generator)):
features.append(ts_generator[j][0])
targets.append(ts_generator[j][1])
#reshape data for neural network
for i in range(len(features)):
x = np.reshape(features[i], (length,1))
X.append(x)
X = np.array(X)
y = np.array(targets)
return X, y
```
%% Cell type:code id: tags:
``` python
# create lstm input
X,y = create_windows_smoothed(dfs_c_0, 40, 1, 20)
#Split data into train & test set & validation set
X_tr_2, X_val_2, y_tr_2, y_val_2 = train_test_split(X, y, test_size=0.1)
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_tr_2, y_tr_2, test_size=0.2)
```
%% Cell type:code id: tags:
``` python
print(X_train_2.shape)
print(y_train_2.shape)
print(X_test_2.shape)
print(y_test_2.shape)
print(X_val_2.shape)
print(y_val_2.shape)
```
%%%% Output: stream
(3563, 40, 1)
(3563, 1)
(891, 40, 1)
(891, 1)
(495, 40, 1)
(495, 1)
%% Cell type:code id: tags:
``` python
# build lstm
model01 = Sequential()
model01.add(LSTM(200, return_sequences=True, input_shape= (40, 1)))
model01.add(LSTM(200))
model01.add(Dense(1, activation='relu'))
#optimizer = optimizers.Adam(clipvalue=0.5)
adam = tf.keras.optimizers.Adam(learning_rate=0.001) # , clipnorm=1
model01.compile(optimizer=adam, loss='mse',metrics=['mean_absolute_error'])
```
%% Cell type:code id: tags:
``` python
model01.fit(X_train_2, y_train_2, epochs=20, batch_size=64, validation_data=(X_test_2, y_test_2), verbose=1, shuffle=True)
```
%% Cell type:code id: tags:
``` python
from keras.models import load_model
model01 = load_model('c0_lstm.h5')
```
%% Cell type:code id: tags:
``` python
model01.summary()
```
%%%% Output: stream
Model: "sequential_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_20 (LSTM) (None, 40, 200) 161600
_________________________________________________________________
lstm_21 (LSTM) (None, 200) 320800
_________________________________________________________________
dense_11 (Dense) (None, 1) 201
=================================================================
Total params: 482,601
Trainable params: 482,601
Non-trainable params: 0
_________________________________________________________________
%% Cell type:code id: tags:
``` python
initial_set = X_val_2[0]
initial_set
```
%%%% Output: execute_result
array([[84.4 ],
[83.93333333],
[83.53333333],
[83.33333333],
[83.13333333],
[82.73333333],
[82.26666667],
[81.73333333],
[81.13333333],
[80.53333333],
[79.93333333],
[79.33333333],
[78.93333333],
[78.4 ],
[77.86666667],
[77.33333333],
[76.8 ],
[76.33333333],
[75.8 ],
[75.2 ],
[74.6 ],
[74.93333333],
[75.33333333],
[75.53333333],
[75.73333333],
[74.93333333],
[75.06666667],
[75.06666667],
[74.86666667],
[73.93333333],
[73.13333333],
[72.46666667],
[71.86666667],
[71.06666667],
[70.26666667],
[69.66666667],
[69.06666667],
[68.53333333],
[67.93333333],
[67.33333333]])
%% Cell type:code id: tags:
``` python
result2 = []
for i in range(0, 500):
x_input = initial_set.reshape((1, 40, 1))
yhat = model01.predict(x_input, verbose=0)
result2.append(yhat[0][0])
initial_set = np.append(initial_set, yhat)
initial_set = np.delete(initial_set, 0)
```
%% Cell type:code id: tags:
``` python
plt.figure(figsize=(30,8))
plt.ylim((0,200))
plt.title("LSTM trained on smoothed, grouped data")
plt.xticks(fontsize=8, rotation=90)
plt.yticks(fontsize=10, fontweight='bold')
plt.plot(result2)
plt.legend(['Predicted values'], loc='upper left')
plt.show()
```
%%%% Output: display_data
![]()
%% Cell type:code id: tags:
``` python
initial_set2 = []
for i in range(1, 41, 1):
initial_set2.append(140)
initial_set2 = np.array(initial_set2)
initial = initial_set2
initial_set2
```
%%%% Output: execute_result
array([140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
140])
%% Cell type:code id: tags:
``` python
result3 = []
for i in range(0, 500):
x_input = initial_set2.reshape((1, 40, 1))
yhat = model01.predict(x_input, verbose=0)
result3.append(yhat[0][0])
initial_set2 = np.append(initial_set2, yhat)
initial_set2 = np.delete(initial_set2, 0)
gradients = np.gradient(initial_set2)
if ((sum(gradients) / len(gradients))) > -1 and (initial_set2[39] < 20):
initial_set2 = initial
```
%% Cell type:code id: tags:
``` python
plt.figure(figsize=(30,8))
plt.ylim((0,200))
plt.title("LSTM trained on smoothed, grouped data")
plt.xticks(np.arange(0, 500, step=60), fontsize=8, rotation=90, )
plt.yticks(fontsize=10, fontweight='bold')
plt.plot(result3)
plt.legend(['Predicted values'], loc='upper left')
plt.show()
```
%%%% Output: display_data
![]()
%% Cell type:code id: tags:
``` python
dfs_c_0
```
%%%% Output: execute_result
[ Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016E5 2020-05-09 01:49:33.615246 96
1 1 70B3D500700016E5 2020-05-09 02:49:37.365573 96
2 2 70B3D500700016E5 2020-05-09 03:49:33.130492 96
3 3 70B3D500700016E5 2020-05-09 04:49:32.877295 96
4 4 70B3D500700016E5 2020-05-09 06:49:32.415491 96
... ... ... ... ...
7228 7230 70B3D500700016E5 2021-05-08 16:14:47.805898 126
7229 7231 70B3D500700016E5 2021-05-08 17:14:47.563265 126
7230 7232 70B3D500700016E5 2021-05-08 18:14:47.288859 126
7231 7233 70B3D500700016E5 2021-05-08 19:14:48.629415 124
7232 7234 70B3D500700016E5 2021-05-08 22:14:48.112348 132
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 12.0 1 1.600000 NaN NaN 89.266667
1 13.0 1 3.200000 NaN NaN 89.266667
2 11.0 2 4.800000 NaN NaN 89.266667
3 10.0 1 6.400000 NaN NaN 89.266667
4 8.0 2 8.000000 NaN NaN 89.266667
... ... ... ... ... ... ...
7228 27.0 1 108.300000 118.466667 96.0 118.466667
7229 24.0 1 108.566667 119.466667 96.0 119.466667
7230 21.0 0 108.833333 120.400000 96.0 120.400000
7231 16.0 1 109.066667 121.333333 96.0 121.333333
7232 10.0 1 109.666667 122.466667 96.0 122.466667
[7233 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016F1 2020-05-09 00:34:23.845544 84
1 1 70B3D500700016F1 2020-05-09 01:34:23.699359 84
2 2 70B3D500700016F1 2020-05-09 02:34:23.547372 84
3 3 70B3D500700016F1 2020-05-09 03:34:23.395129 84
4 4 70B3D500700016F1 2020-05-09 05:34:23.110478 84
... ... ... ... ...
7850 7851 70B3D500700016F1 2021-05-08 16:12:58.267171 106
7851 7852 70B3D500700016F1 2021-05-08 17:12:58.097596 106
7852 7853 70B3D500700016F1 2021-05-08 18:12:57.891567 104
7853 7854 70B3D500700016F1 2021-05-08 20:12:59.235848 104
7854 7855 70B3D500700016F1 2021-05-08 22:12:58.943461 102
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 15.0 0 1.400000 NaN NaN 86.000000
1 15.0 1 2.800000 NaN NaN 86.000000
2 13.0 1 4.200000 NaN NaN 86.000000
3 14.0 0 5.600000 NaN NaN 86.000000
4 15.0 0 7.000000 NaN NaN 86.000000
... ... ... ... ... ... ...
7850 31.0 0 121.133333 112.466667 106.0 112.466667
7851 29.0 0 121.566667 111.933333 106.0 111.933333
7852 25.0 0 121.966667 111.333333 104.0 111.333333
7853 18.0 0 121.333333 110.733333 104.0 110.733333
7854 11.0 0 120.666667 110.066667 102.0 110.066667
[7855 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016F4 2020-05-09 00:30:46.645843 60
1 1 70B3D500700016F4 2020-05-09 01:30:46.466883 60
2 2 70B3D500700016F4 2020-05-09 02:30:46.286815 60
3 3 70B3D500700016F4 2020-05-09 03:30:46.110369 60
4 4 70B3D500700016F4 2020-05-09 04:30:45.925179 60
... ... ... ... ...
3527 3530 70B3D500700016F4 2021-05-08 18:04:20.859170 62
3528 3531 70B3D500700016F4 2021-05-08 19:04:22.253953 60
3529 3532 70B3D500700016F4 2021-05-08 20:04:22.088932 60
3530 3533 70B3D500700016F4 2021-05-08 21:04:21.967920 62
3531 3534 70B3D500700016F4 2021-05-08 22:04:21.781690 60
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 15.0 0 1.000000 NaN NaN 68.533333
1 13.0 0 2.000000 NaN NaN 68.533333
2 12.0 0 3.000000 NaN NaN 68.533333
3 11.0 0 4.000000 NaN NaN 68.533333
4 11.0 0 5.000000 NaN NaN 68.533333
... ... ... ... ... ... ...
3527 23.0 0 78.366667 72.400000 60.0 72.400000
3528 18.0 0 78.533333 70.000000 60.0 70.000000
3529 17.0 0 78.766667 67.533333 60.0 67.533333
3530 14.0 0 79.033333 65.000000 60.0 65.000000
3531 9.0 0 79.233333 62.600000 60.0 62.600000
[3532 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016F6 2020-05-09 00:55:40.099451 26
1 1 70B3D500700016F6 2020-05-09 02:55:39.674995 26
2 2 70B3D500700016F6 2020-05-09 03:55:39.484016 26
3 3 70B3D500700016F6 2020-05-09 04:55:39.274142 26
4 4 70B3D500700016F6 2020-05-09 05:55:39.079391 26
... ... ... ... ...
7785 7785 70B3D500700016F6 2021-05-08 16:26:17.336651 64
7786 7786 70B3D500700016F6 2021-05-08 17:26:17.131030 122
7787 7787 70B3D500700016F6 2021-05-08 18:26:16.959249 116
7788 7788 70B3D500700016F6 2021-05-08 19:26:24.328798 116
7789 7789 70B3D500700016F6 2021-05-08 20:26:18.141285 118
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 20.0 0 0.433333 NaN NaN 73.266667
1 19.0 1 0.866667 NaN NaN 73.266667
2 16.0 1 1.300000 NaN NaN 73.266667
3 17.0 0 1.733333 NaN NaN 73.266667
4 14.0 1 2.166667 NaN NaN 73.266667
... ... ... ... ... ... ...
7785 28.0 0 111.900000 111.066667 62.0 111.066667
7786 27.0 1 111.966667 111.600000 62.0 111.600000
7787 26.0 0 112.000000 111.933333 62.0 111.933333
7788 22.0 1 112.866667 112.333333 62.0 112.333333
7789 20.0 0 113.766667 112.866667 62.0 112.866667
[7790 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001724 2020-05-09 01:07:46.505893 52
1 1 70B3D50070001724 2020-05-09 02:07:46.372718 52
2 2 70B3D50070001724 2020-05-09 03:07:46.259972 52
3 3 70B3D50070001724 2020-05-09 04:07:46.128727 52
4 4 70B3D50070001724 2020-05-09 05:07:46.004955 52
... ... ... ... ...
7799 7799 70B3D50070001724 2021-05-08 17:49:42.078325 120
7800 7800 70B3D50070001724 2021-05-08 18:49:42.019627 122
7801 7801 70B3D50070001724 2021-05-08 19:49:43.468573 116
7802 7802 70B3D50070001724 2021-05-08 20:49:43.335279 120
7803 7803 70B3D50070001724 2021-05-08 21:49:43.207884 120
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 15.0 0 0.866667 NaN NaN 103.066667
1 15.0 0 1.733333 NaN NaN 103.066667
2 14.0 0 2.600000 NaN NaN 103.066667
3 15.0 0 3.466667 NaN NaN 103.066667
4 15.0 0 4.333333 NaN NaN 103.066667
... ... ... ... ... ... ...
7799 23.0 1 114.866667 124.133333 92.0 124.133333
7800 19.0 1 115.100000 124.800000 92.0 124.800000
7801 18.0 1 115.133333 125.133333 92.0 125.133333
7802 18.0 1 115.333333 125.800000 92.0 125.800000
7803 14.0 0 115.533333 126.466667 92.0 126.466667
[7804 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001726 2020-05-09 00:30:37.051543 66
1 1 70B3D50070001726 2020-05-09 01:30:36.843577 66
2 2 70B3D50070001726 2020-05-09 02:30:36.668140 66
3 3 70B3D50070001726 2020-05-09 04:30:36.260866 70
4 4 70B3D50070001726 2020-05-09 06:30:35.842373 66
... ... ... ... ...
7063 7063 70B3D50070001726 2021-05-08 17:01:34.990787 122
7064 7064 70B3D50070001726 2021-05-08 18:01:34.785983 120
7065 7065 70B3D50070001726 2021-05-08 19:01:34.583706 120
7066 7066 70B3D50070001726 2021-05-08 20:01:35.968216 120
7067 7067 70B3D50070001726 2021-05-08 21:01:35.760639 118
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 18.0 2 1.100000 NaN NaN 104.266667
1 15.0 3 2.200000 NaN NaN 104.266667
2 13.0 3 3.300000 NaN NaN 104.266667
3 13.0 2 4.466667 NaN NaN 104.266667
4 12.0 2 5.566667 NaN NaN 104.266667
... ... ... ... ... ... ...
7063 20.0 3 119.400000 123.866667 108.0 123.866667
7064 18.0 3 119.433333 124.000000 108.0 124.000000
7065 19.0 3 119.400000 124.133333 108.0 124.133333
7066 16.0 3 119.466667 124.266667 108.0 124.266667
7067 13.0 3 119.500000 124.333333 108.0 124.333333
[7068 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001739 2020-05-09 01:55:25.148156 12
1 1 70B3D50070001739 2020-05-09 02:55:24.935643 12
2 2 70B3D50070001739 2020-05-09 03:55:24.722681 12
3 3 70B3D50070001739 2020-05-09 04:55:24.513137 12
4 4 70B3D50070001739 2020-05-09 05:55:24.383964 12
... ... ... ... ...
7832 7832 70B3D50070001739 2021-05-08 16:25:31.350083 106
7833 7833 70B3D50070001739 2021-05-08 17:25:31.123096 102
7834 7834 70B3D50070001739 2021-05-08 18:25:30.886051 106
7835 7835 70B3D50070001739 2021-05-08 19:25:32.295031 100
7836 7836 70B3D50070001739 2021-05-08 20:25:32.042148 100
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 11.0 0 0.200000 NaN NaN 88.733333
1 10.0 0 0.400000 NaN NaN 88.733333
2 11.0 0 0.600000 NaN NaN 88.733333
3 10.0 0 0.800000 NaN NaN 88.733333
4 9.0 0 1.000000 NaN NaN 88.733333
... ... ... ... ... ... ...
7832 20.0 0 92.200000 103.000000 68.0 103.000000
7833 19.0 0 92.233333 104.133333 68.0 104.133333
7834 16.0 0 92.400000 105.400000 68.0 105.400000
7835 16.0 0 92.466667 106.466667 68.0 106.466667
7836 12.0 0 92.700000 107.533333 68.0 107.533333
[7837 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D5007000173E 2020-05-09 01:01:07.216028 96
1 1 70B3D5007000173E 2020-05-09 02:01:07.264309 98
2 2 70B3D5007000173E 2020-05-09 03:01:18.913175 98
3 3 70B3D5007000173E 2020-05-09 04:01:07.636712 98
4 4 70B3D5007000173E 2020-05-09 05:01:07.504293 98
... ... ... ... ...
7569 7569 70B3D5007000173E 2021-05-08 13:40:58.079424 80
7570 7570 70B3D5007000173E 2021-05-08 16:40:57.574948 72
7571 7571 70B3D5007000173E 2021-05-08 17:40:57.466938 72
7572 7572 70B3D5007000173E 2021-05-08 18:40:57.309826 72
7573 7573 70B3D5007000173E 2021-05-08 20:40:58.643590 70
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 25.0 3 1.600000 NaN NaN 82.933333
1 24.0 3 3.233333 NaN NaN 82.933333
2 22.0 2 4.866667 NaN NaN 82.933333
3 22.0 2 6.500000 NaN NaN 82.933333
4 20.0 2 8.133333 NaN NaN 82.933333
... ... ... ... ... ... ...
7569 30.0 2 105.566667 103.400000 80.0 103.400000
7570 30.0 1 104.866667 102.533333 72.0 102.533333
7571 32.0 1 104.166667 101.666667 72.0 101.666667
7572 30.0 0 103.433333 100.800000 72.0 100.800000
7573 25.0 0 102.666667 99.866667 70.0 99.866667
[7574 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001747 2020-05-09 00:48:26.913839 62
1 1 70B3D50070001747 2020-05-09 01:48:26.279385 62
2 2 70B3D50070001747 2020-05-09 02:48:26.086640 62
3 3 70B3D50070001747 2020-05-09 03:48:25.894323 62
4 4 70B3D50070001747 2020-05-09 04:48:25.699732 62
... ... ... ... ...
7462 7462 70B3D50070001747 2021-05-08 16:20:48.596188 136
7463 7463 70B3D50070001747 2021-05-08 17:20:48.388594 136
7464 7464 70B3D50070001747 2021-05-08 18:20:48.244483 136
7465 7465 70B3D50070001747 2021-05-08 19:20:49.592471 136
7466 7466 70B3D50070001747 2021-05-08 22:20:49.213155 136
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 21.0 2 1.033333 NaN NaN 54.533333
1 20.0 1 2.066667 NaN NaN 54.533333
2 19.0 1 3.100000 NaN NaN 54.533333
3 18.0 1 4.133333 NaN NaN 54.533333
4 19.0 1 5.166667 NaN NaN 54.533333
... ... ... ... ... ... ...
7462 31.0 1 98.100000 116.600000 72.0 116.600000
7463 29.0 2 99.033333 118.466667 72.0 118.466667
7464