Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
uflgi
bda-analytics-challenge-template
Commits
d95eddc8
Commit
d95eddc8
authored
Jul 19, 2021
by
ukuiq
Browse files
added LSTM models
parent
9b2a20c5
Changes
8
Show whitespace changes
Inline
Side-by-side
notebooks/Cluster0_LSTM.ipynb
0 → 100644
View file @
d95eddc8
%% Cell type:code id: tags:
```
python
import
pandas
as
pd
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
sklearn.model_selection
import
train_test_split
import
os
from
scipy.signal
import
lfilter
```
%% Cell type:code id: tags:
```
python
from
keras.models
import
Sequential
from
keras.layers
import
Dense
from
keras.layers
import
LSTM
from
sklearn.preprocessing
import
MinMaxScaler
from
sklearn.metrics
import
mean_squared_error
import
numpy
from
keras.datasets
import
imdb
from
keras.layers.embeddings
import
Embedding
from
keras.preprocessing
import
sequence
from
tensorflow.keras
import
activations
from
keras.layers
import
Activation
,
Dense
from
keras.preprocessing.sequence
import
TimeseriesGenerator
from
sklearn.model_selection
import
train_test_split
import
tensorflow
as
tf
from
keras.layers
import
Dense
,
Softmax
,
Dropout
,
BatchNormalization
```
%% Cell type:code id: tags:
```
python
path_0
=
'../data/preprocessed/clusters/0/'
path_1
=
'../data/preprocessed/clusters/1/'
path_2
=
'../data/preprocessed/clusters/2/'
dfs_c_0
=
[]
dfs_c_1
=
[]
dfs_c_2
=
[]
dfs_c_0_grouped
=
[]
dfs_c_1_grouped
=
[]
dfs_c_2_grouped
=
[]
csv_files_0
=
[
csv
for
csv
in
os
.
listdir
(
path_0
)
if
csv
.
endswith
(
'.csv'
)]
csv_files_1
=
[
csv
for
csv
in
os
.
listdir
(
path_1
)
if
csv
.
endswith
(
'.csv'
)]
csv_files_2
=
[
csv
for
csv
in
os
.
listdir
(
path_2
)
if
csv
.
endswith
(
'.csv'
)]
for
file
in
csv_files_0
:
# import DataFrame
df
=
pd
.
read_csv
(
path_0
+
file
)
if
file
.
startswith
(
'g_'
):
dfs_c_0_grouped
.
append
(
df
)
else
:
dfs_c_0
.
append
(
df
)
for
file
in
csv_files_1
:
# import DataFrame
df
=
pd
.
read_csv
(
path_1
+
file
)
if
file
.
startswith
(
'g_'
):
dfs_c_1_grouped
.
append
(
df
)
else
:
dfs_c_1
.
append
(
df
)
for
file
in
csv_files_2
:
# import DataFrame
df
=
pd
.
read_csv
(
path_2
+
file
)
if
file
.
startswith
(
'g_'
):
dfs_c_2_grouped
.
append
(
df
)
else
:
dfs_c_2
.
append
(
df
)
```
%% Cell type:code id: tags:
```
python
def
create_windows_smoothed
(
dfs
,
length
,
batch_size
,
stride
):
features
=
[]
targets
=
[]
X
=
[]
y
=
[]
# Height as feature and target
for
df
in
dfs
:
height
=
df
[
'inter_pol'
].
to_numpy
().
tolist
()
# apply TimeSeriesGenerator
ts_generator
=
TimeseriesGenerator
(
height
,
height
,
length
=
length
,
batch_size
=
batch_size
,
stride
=
stride
)
for
j
in
range
(
len
(
ts_generator
)):
features
.
append
(
ts_generator
[
j
][
0
])
targets
.
append
(
ts_generator
[
j
][
1
])
#reshape data for neural network
for
i
in
range
(
len
(
features
)):
x
=
np
.
reshape
(
features
[
i
],
(
length
,
1
))
X
.
append
(
x
)
X
=
np
.
array
(
X
)
y
=
np
.
array
(
targets
)
return
X
,
y
```
%% Cell type:code id: tags:
```
python
# create lstm input
X
,
y
=
create_windows_smoothed
(
dfs_c_0
,
40
,
1
,
20
)
#Split data into train & test set & validation set
X_tr_2
,
X_val_2
,
y_tr_2
,
y_val_2
=
train_test_split
(
X
,
y
,
test_size
=
0.1
)
X_train_2
,
X_test_2
,
y_train_2
,
y_test_2
=
train_test_split
(
X_tr_2
,
y_tr_2
,
test_size
=
0.2
)
```
%% Cell type:code id: tags:
```
python
print
(
X_train_2
.
shape
)
print
(
y_train_2
.
shape
)
print
(
X_test_2
.
shape
)
print
(
y_test_2
.
shape
)
print
(
X_val_2
.
shape
)
print
(
y_val_2
.
shape
)
```
%%%% Output: stream
(3563, 40, 1)
(3563, 1)
(891, 40, 1)
(891, 1)
(495, 40, 1)
(495, 1)
%% Cell type:code id: tags:
```
python
# build lstm
model01
=
Sequential
()
model01
.
add
(
LSTM
(
200
,
return_sequences
=
True
,
input_shape
=
(
40
,
1
)))
model01
.
add
(
LSTM
(
200
))
model01
.
add
(
Dense
(
1
,
activation
=
'relu'
))
#optimizer = optimizers.Adam(clipvalue=0.5)
adam
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
0.001
)
# , clipnorm=1
model01
.
compile
(
optimizer
=
adam
,
loss
=
'mse'
,
metrics
=
[
'mean_absolute_error'
])
```
%% Cell type:code id: tags:
```
python
model01
.
fit
(
X_train_2
,
y_train_2
,
epochs
=
20
,
batch_size
=
64
,
validation_data
=
(
X_test_2
,
y_test_2
),
verbose
=
1
,
shuffle
=
True
)
```
%% Cell type:code id: tags:
```
python
from
keras.models
import
load_model
model01
=
load_model
(
'c0_lstm.h5'
)
```
%% Cell type:code id: tags:
```
python
model01
.
summary
()
```
%%%% Output: stream
Model: "sequential_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_20 (LSTM) (None, 40, 200) 161600
_________________________________________________________________
lstm_21 (LSTM) (None, 200) 320800
_________________________________________________________________
dense_11 (Dense) (None, 1) 201
=================================================================
Total params: 482,601
Trainable params: 482,601
Non-trainable params: 0
_________________________________________________________________
%% Cell type:code id: tags:
```
python
initial_set
=
X_val_2
[
0
]
initial_set
```
%%%% Output: execute_result
array([[84.4 ],
[83.93333333],
[83.53333333],
[83.33333333],
[83.13333333],
[82.73333333],
[82.26666667],
[81.73333333],
[81.13333333],
[80.53333333],
[79.93333333],
[79.33333333],
[78.93333333],
[78.4 ],
[77.86666667],
[77.33333333],
[76.8 ],
[76.33333333],
[75.8 ],
[75.2 ],
[74.6 ],
[74.93333333],
[75.33333333],
[75.53333333],
[75.73333333],
[74.93333333],
[75.06666667],
[75.06666667],
[74.86666667],
[73.93333333],
[73.13333333],
[72.46666667],
[71.86666667],
[71.06666667],
[70.26666667],
[69.66666667],
[69.06666667],
[68.53333333],
[67.93333333],
[67.33333333]])
%% Cell type:code id: tags:
```
python
result2
=
[]
for
i
in
range
(
0
,
500
):
x_input
=
initial_set
.
reshape
((
1
,
40
,
1
))
yhat
=
model01
.
predict
(
x_input
,
verbose
=
0
)
result2
.
append
(
yhat
[
0
][
0
])
initial_set
=
np
.
append
(
initial_set
,
yhat
)
initial_set
=
np
.
delete
(
initial_set
,
0
)
```
%% Cell type:code id: tags:
```
python
plt
.
figure
(
figsize
=
(
30
,
8
))
plt
.
ylim
((
0
,
200
))
plt
.
title
(
"LSTM trained on smoothed, grouped data"
)
plt
.
xticks
(
fontsize
=
8
,
rotation
=
90
)
plt
.
yticks
(
fontsize
=
10
,
fontweight
=
'bold'
)
plt
.
plot
(
result2
)
plt
.
legend
([
'Predicted values'
],
loc
=
'upper left'
)
plt
.
show
()
```
%%%% Output: display_data

%% Cell type:code id: tags:
```
python
initial_set2
=
[]
for
i
in
range
(
1
,
41
,
1
):
initial_set2
.
append
(
140
)
initial_set2
=
np
.
array
(
initial_set2
)
initial
=
initial_set2
initial_set2
```
%%%% Output: execute_result
array([140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
140])
%% Cell type:code id: tags:
```
python
result3
=
[]
for
i
in
range
(
0
,
500
):
x_input
=
initial_set2
.
reshape
((
1
,
40
,
1
))
yhat
=
model01
.
predict
(
x_input
,
verbose
=
0
)
result3
.
append
(
yhat
[
0
][
0
])
initial_set2
=
np
.
append
(
initial_set2
,
yhat
)
initial_set2
=
np
.
delete
(
initial_set2
,
0
)
gradients
=
np
.
gradient
(
initial_set2
)
if
((
sum
(
gradients
)
/
len
(
gradients
)))
>
-
1
and
(
initial_set2
[
39
]
<
20
):
initial_set2
=
initial
```
%% Cell type:code id: tags:
```
python
plt
.
figure
(
figsize
=
(
30
,
8
))
plt
.
ylim
((
0
,
200
))
plt
.
title
(
"LSTM trained on smoothed, grouped data"
)
plt
.
xticks
(
np
.
arange
(
0
,
500
,
step
=
60
),
fontsize
=
8
,
rotation
=
90
,
)
plt
.
yticks
(
fontsize
=
10
,
fontweight
=
'bold'
)
plt
.
plot
(
result3
)
plt
.
legend
([
'Predicted values'
],
loc
=
'upper left'
)
plt
.
show
()
```
%%%% Output: display_data

%% Cell type:code id: tags:
```
python
dfs_c_0
```
%%%% Output: execute_result
[ Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016E5 2020-05-09 01:49:33.615246 96
1 1 70B3D500700016E5 2020-05-09 02:49:37.365573 96
2 2 70B3D500700016E5 2020-05-09 03:49:33.130492 96
3 3 70B3D500700016E5 2020-05-09 04:49:32.877295 96
4 4 70B3D500700016E5 2020-05-09 06:49:32.415491 96
... ... ... ... ...
7228 7230 70B3D500700016E5 2021-05-08 16:14:47.805898 126
7229 7231 70B3D500700016E5 2021-05-08 17:14:47.563265 126
7230 7232 70B3D500700016E5 2021-05-08 18:14:47.288859 126
7231 7233 70B3D500700016E5 2021-05-08 19:14:48.629415 124
7232 7234 70B3D500700016E5 2021-05-08 22:14:48.112348 132
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 12.0 1 1.600000 NaN NaN 89.266667
1 13.0 1 3.200000 NaN NaN 89.266667
2 11.0 2 4.800000 NaN NaN 89.266667
3 10.0 1 6.400000 NaN NaN 89.266667
4 8.0 2 8.000000 NaN NaN 89.266667
... ... ... ... ... ... ...
7228 27.0 1 108.300000 118.466667 96.0 118.466667
7229 24.0 1 108.566667 119.466667 96.0 119.466667
7230 21.0 0 108.833333 120.400000 96.0 120.400000
7231 16.0 1 109.066667 121.333333 96.0 121.333333
7232 10.0 1 109.666667 122.466667 96.0 122.466667
[7233 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016F1 2020-05-09 00:34:23.845544 84
1 1 70B3D500700016F1 2020-05-09 01:34:23.699359 84
2 2 70B3D500700016F1 2020-05-09 02:34:23.547372 84
3 3 70B3D500700016F1 2020-05-09 03:34:23.395129 84
4 4 70B3D500700016F1 2020-05-09 05:34:23.110478 84
... ... ... ... ...
7850 7851 70B3D500700016F1 2021-05-08 16:12:58.267171 106
7851 7852 70B3D500700016F1 2021-05-08 17:12:58.097596 106
7852 7853 70B3D500700016F1 2021-05-08 18:12:57.891567 104
7853 7854 70B3D500700016F1 2021-05-08 20:12:59.235848 104
7854 7855 70B3D500700016F1 2021-05-08 22:12:58.943461 102
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 15.0 0 1.400000 NaN NaN 86.000000
1 15.0 1 2.800000 NaN NaN 86.000000
2 13.0 1 4.200000 NaN NaN 86.000000
3 14.0 0 5.600000 NaN NaN 86.000000
4 15.0 0 7.000000 NaN NaN 86.000000
... ... ... ... ... ... ...
7850 31.0 0 121.133333 112.466667 106.0 112.466667
7851 29.0 0 121.566667 111.933333 106.0 111.933333
7852 25.0 0 121.966667 111.333333 104.0 111.333333
7853 18.0 0 121.333333 110.733333 104.0 110.733333
7854 11.0 0 120.666667 110.066667 102.0 110.066667
[7855 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016F4 2020-05-09 00:30:46.645843 60
1 1 70B3D500700016F4 2020-05-09 01:30:46.466883 60
2 2 70B3D500700016F4 2020-05-09 02:30:46.286815 60
3 3 70B3D500700016F4 2020-05-09 03:30:46.110369 60
4 4 70B3D500700016F4 2020-05-09 04:30:45.925179 60
... ... ... ... ...
3527 3530 70B3D500700016F4 2021-05-08 18:04:20.859170 62
3528 3531 70B3D500700016F4 2021-05-08 19:04:22.253953 60
3529 3532 70B3D500700016F4 2021-05-08 20:04:22.088932 60
3530 3533 70B3D500700016F4 2021-05-08 21:04:21.967920 62
3531 3534 70B3D500700016F4 2021-05-08 22:04:21.781690 60
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 15.0 0 1.000000 NaN NaN 68.533333
1 13.0 0 2.000000 NaN NaN 68.533333
2 12.0 0 3.000000 NaN NaN 68.533333
3 11.0 0 4.000000 NaN NaN 68.533333
4 11.0 0 5.000000 NaN NaN 68.533333
... ... ... ... ... ... ...
3527 23.0 0 78.366667 72.400000 60.0 72.400000
3528 18.0 0 78.533333 70.000000 60.0 70.000000
3529 17.0 0 78.766667 67.533333 60.0 67.533333
3530 14.0 0 79.033333 65.000000 60.0 65.000000
3531 9.0 0 79.233333 62.600000 60.0 62.600000
[3532 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D500700016F6 2020-05-09 00:55:40.099451 26
1 1 70B3D500700016F6 2020-05-09 02:55:39.674995 26
2 2 70B3D500700016F6 2020-05-09 03:55:39.484016 26
3 3 70B3D500700016F6 2020-05-09 04:55:39.274142 26
4 4 70B3D500700016F6 2020-05-09 05:55:39.079391 26
... ... ... ... ...
7785 7785 70B3D500700016F6 2021-05-08 16:26:17.336651 64
7786 7786 70B3D500700016F6 2021-05-08 17:26:17.131030 122
7787 7787 70B3D500700016F6 2021-05-08 18:26:16.959249 116
7788 7788 70B3D500700016F6 2021-05-08 19:26:24.328798 116
7789 7789 70B3D500700016F6 2021-05-08 20:26:18.141285 118
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 20.0 0 0.433333 NaN NaN 73.266667
1 19.0 1 0.866667 NaN NaN 73.266667
2 16.0 1 1.300000 NaN NaN 73.266667
3 17.0 0 1.733333 NaN NaN 73.266667
4 14.0 1 2.166667 NaN NaN 73.266667
... ... ... ... ... ... ...
7785 28.0 0 111.900000 111.066667 62.0 111.066667
7786 27.0 1 111.966667 111.600000 62.0 111.600000
7787 26.0 0 112.000000 111.933333 62.0 111.933333
7788 22.0 1 112.866667 112.333333 62.0 112.333333
7789 20.0 0 113.766667 112.866667 62.0 112.866667
[7790 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001724 2020-05-09 01:07:46.505893 52
1 1 70B3D50070001724 2020-05-09 02:07:46.372718 52
2 2 70B3D50070001724 2020-05-09 03:07:46.259972 52
3 3 70B3D50070001724 2020-05-09 04:07:46.128727 52
4 4 70B3D50070001724 2020-05-09 05:07:46.004955 52
... ... ... ... ...
7799 7799 70B3D50070001724 2021-05-08 17:49:42.078325 120
7800 7800 70B3D50070001724 2021-05-08 18:49:42.019627 122
7801 7801 70B3D50070001724 2021-05-08 19:49:43.468573 116
7802 7802 70B3D50070001724 2021-05-08 20:49:43.335279 120
7803 7803 70B3D50070001724 2021-05-08 21:49:43.207884 120
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 15.0 0 0.866667 NaN NaN 103.066667
1 15.0 0 1.733333 NaN NaN 103.066667
2 14.0 0 2.600000 NaN NaN 103.066667
3 15.0 0 3.466667 NaN NaN 103.066667
4 15.0 0 4.333333 NaN NaN 103.066667
... ... ... ... ... ... ...
7799 23.0 1 114.866667 124.133333 92.0 124.133333
7800 19.0 1 115.100000 124.800000 92.0 124.800000
7801 18.0 1 115.133333 125.133333 92.0 125.133333
7802 18.0 1 115.333333 125.800000 92.0 125.800000
7803 14.0 0 115.533333 126.466667 92.0 126.466667
[7804 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001726 2020-05-09 00:30:37.051543 66
1 1 70B3D50070001726 2020-05-09 01:30:36.843577 66
2 2 70B3D50070001726 2020-05-09 02:30:36.668140 66
3 3 70B3D50070001726 2020-05-09 04:30:36.260866 70
4 4 70B3D50070001726 2020-05-09 06:30:35.842373 66
... ... ... ... ...
7063 7063 70B3D50070001726 2021-05-08 17:01:34.990787 122
7064 7064 70B3D50070001726 2021-05-08 18:01:34.785983 120
7065 7065 70B3D50070001726 2021-05-08 19:01:34.583706 120
7066 7066 70B3D50070001726 2021-05-08 20:01:35.968216 120
7067 7067 70B3D50070001726 2021-05-08 21:01:35.760639 118
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 18.0 2 1.100000 NaN NaN 104.266667
1 15.0 3 2.200000 NaN NaN 104.266667
2 13.0 3 3.300000 NaN NaN 104.266667
3 13.0 2 4.466667 NaN NaN 104.266667
4 12.0 2 5.566667 NaN NaN 104.266667
... ... ... ... ... ... ...
7063 20.0 3 119.400000 123.866667 108.0 123.866667
7064 18.0 3 119.433333 124.000000 108.0 124.000000
7065 19.0 3 119.400000 124.133333 108.0 124.133333
7066 16.0 3 119.466667 124.266667 108.0 124.266667
7067 13.0 3 119.500000 124.333333 108.0 124.333333
[7068 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001739 2020-05-09 01:55:25.148156 12
1 1 70B3D50070001739 2020-05-09 02:55:24.935643 12
2 2 70B3D50070001739 2020-05-09 03:55:24.722681 12
3 3 70B3D50070001739 2020-05-09 04:55:24.513137 12
4 4 70B3D50070001739 2020-05-09 05:55:24.383964 12
... ... ... ... ...
7832 7832 70B3D50070001739 2021-05-08 16:25:31.350083 106
7833 7833 70B3D50070001739 2021-05-08 17:25:31.123096 102
7834 7834 70B3D50070001739 2021-05-08 18:25:30.886051 106
7835 7835 70B3D50070001739 2021-05-08 19:25:32.295031 100
7836 7836 70B3D50070001739 2021-05-08 20:25:32.042148 100
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 11.0 0 0.200000 NaN NaN 88.733333
1 10.0 0 0.400000 NaN NaN 88.733333
2 11.0 0 0.600000 NaN NaN 88.733333
3 10.0 0 0.800000 NaN NaN 88.733333
4 9.0 0 1.000000 NaN NaN 88.733333
... ... ... ... ... ... ...
7832 20.0 0 92.200000 103.000000 68.0 103.000000
7833 19.0 0 92.233333 104.133333 68.0 104.133333
7834 16.0 0 92.400000 105.400000 68.0 105.400000
7835 16.0 0 92.466667 106.466667 68.0 106.466667
7836 12.0 0 92.700000 107.533333 68.0 107.533333
[7837 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D5007000173E 2020-05-09 01:01:07.216028 96
1 1 70B3D5007000173E 2020-05-09 02:01:07.264309 98
2 2 70B3D5007000173E 2020-05-09 03:01:18.913175 98
3 3 70B3D5007000173E 2020-05-09 04:01:07.636712 98
4 4 70B3D5007000173E 2020-05-09 05:01:07.504293 98
... ... ... ... ...
7569 7569 70B3D5007000173E 2021-05-08 13:40:58.079424 80
7570 7570 70B3D5007000173E 2021-05-08 16:40:57.574948 72
7571 7571 70B3D5007000173E 2021-05-08 17:40:57.466938 72
7572 7572 70B3D5007000173E 2021-05-08 18:40:57.309826 72
7573 7573 70B3D5007000173E 2021-05-08 20:40:58.643590 70
Temperature Tilt lfilter mov_avg min_avg inter_pol
0 25.0 3 1.600000 NaN NaN 82.933333
1 24.0 3 3.233333 NaN NaN 82.933333
2 22.0 2 4.866667 NaN NaN 82.933333
3 22.0 2 6.500000 NaN NaN 82.933333
4 20.0 2 8.133333 NaN NaN 82.933333
... ... ... ... ... ... ...
7569 30.0 2 105.566667 103.400000 80.0 103.400000
7570 30.0 1 104.866667 102.533333 72.0 102.533333
7571 32.0 1 104.166667 101.666667 72.0 101.666667
7572 30.0 0 103.433333 100.800000 72.0 100.800000
7573 25.0 0 102.666667 99.866667 70.0 99.866667
[7574 rows x 10 columns],
Unnamed: 0 device_id time_stamp Height \
0 0 70B3D50070001747 2020-05-09 00:48:26.913839 62
1 1 70B3D50070001747 2020-05-09 01:48:26.279385 62
2 2 70B3D50070001747 2020-05-09 02:48:26.086640 62
3 3 70B3D50070001747 2020-05-09 03:48:25.894323 62
4 4 70B3D50070001747 2020-05-09 04:48:25.699732 62
... ... ... ... ...
7462 7462 70B3D50070001747 2021-05-08 16:20:48.596188 136
7463 7463 70B3D50070001747 2021-05-08 17:20:48.388594 136
7464 7464 70B3D50070001747 2021-05-08 18:20:48.244483 136
7465 7465 70B3D50070001747 2021-05-08 19:20:49.592471 136
7466 7466 70B3D50070001747 2021-05-08 22:20:49.213155 136
Temperature Tilt lfilter mov_avg min_avg inter_pol