Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
uwwdp
WindTurbinePrediction
Commits
baef828a
Commit
baef828a
authored
Jun 02, 2021
by
anastasiaslobodyanik
Browse files
Merge branch 'master' of git.scc.kit.edu:oliver.wirth/windturbineprediction
parents
57231100
47f73752
Changes
1
Hide whitespace changes
Inline
Side-by-side
Neural.ipynb
View file @
baef828a
%% Cell type:code id: tags:
```
python
from
pathlib
import
Path
import
datetime
import
numpy
as
np
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
from
tqdm
import
tqdm
from
sklearn.preprocessing
import
StandardScaler
import
torch
from
torch
import
nn
,
optim
from
torch.utils.data
import
Dataset
,
DataLoader
from
torch.utils.data.dataset
import
random_split
from
torch.nn.utils.rnn
import
pack_sequence
from
torch.nn.utils.rnn
import
pack_sequence
,
pad_sequence
from
torchvision
import
transforms
np
.
random
.
seed
(
42
)
pd
.
set_option
(
"display.max_columns"
,
None
)
sns
.
set_theme
()
```
%% Cell type:markdown id: tags:
## Load data
%% Cell type:code id: tags:
```
python
# Data paths
data_path
=
Path
(
'data'
)
train_path
=
data_path
/
'train'
test_path
=
data_path
/
'test'
runs_path
=
Path
(
'runs'
)
# Load labels
train_labels
=
pd
.
read_csv
(
f
'
{
train_path
}
_label.csv'
)
test_labels
=
pd
.
read_csv
(
f
'
{
test_path
}
_label.csv'
)
# Merge train and test labels
all_labels
=
train_labels
.
append
(
test_labels
,
ignore_index
=
True
)
all_labels
=
all_labels
.
dropna
()
all_labels
.
ret
=
all_labels
.
ret
.
astype
(
int
)
```
%% Cell type:code id: tags:
```
python
# Custom dataset per region
class
RegionDataset
(
Dataset
)
:
def
__init__
(
self
,
region_path
,
labels
,
transform
=
None
)
:
super
().
__init__
()
self
.
region
=
region_path
.
name
self
.
transform
=
transform
self
.
dfs
=
[]
for
csv_path
in
region_path
.
iterdir
()
:
df
=
pd
.
read_csv
(
csv_path
)
df
=
df
.
dropna
()
df
=
df
[(
df
.
T
!=
0
).
any
()]
label
=
labels
[
labels
.
file_name
==
csv_path
.
name
].
ret
.
values
if
df
.
shape
[
0
]
>
0
and
len
(
label
)
==
1
:
self
.
dfs
.
append
((
df
,
label
[
0
]))
def
__len__
(
self
)
:
return
len
(
self
.
dfs
)
def
__getitem__
(
self
,
idx
)
:
df
,
label
=
self
.
dfs
[
idx
]
if
self
.
transform
:
df
=
self
.
transform
(
df
)
return
df
,
label
```
%% Cell type:code id: tags:
```
python
# Preprocessing transformations
# TODO: PCA, truncated SVD, MinMaxScaling
# TODO: PCA, truncated SVD, MinMaxScaling
, scale over whole dataset, features selection
transform
=
transforms
.
Compose
([
StandardScaler
().
fit_transform
,
torch
.
FloatTensor
])
# Load train and test set
trainset
=
RegionDataset
(
train_path
/
'004'
,
all_labels
,
transform
=
transform
)
testset
=
RegionDataset
(
test_path
/
'dummy'
,
all_labels
,
transform
=
transform
)
```
%% Cell type:code id: tags:
```
python
# Split into train and validation set
holdout
=
.
2
n_val
=
int
(
len
(
trainset
)
*
.
2
)
n_train
=
len
(
trainset
)
-
n_val
trainset
,
valset
=
random_split
(
trainset
,
[
n_train
,
n_val
])
# Pack variable sized sequences for RNN
def
collate_pack
(
batch
)
:
samples
,
labels
=
zip
(
*
batch
)
samples
=
pack_sequence
(
samples
,
enforce_sorted
=
False
)
labels
=
torch
.
tensor
(
labels
)
return
samples
,
labels
# Crop sequences to same length for CNN
# TODO masking
# Crop sequences to same length
def
collate_crop
(
batch
)
:
samples
,
labels
=
zip
(
*
batch
)
length
=
min
(
x
.
size
(
0
)
for
x
in
samples
)
length
=
min
(
x
.
size
(
0
)
for
x
in
samples
)
samples
=
torch
.
stack
([
x
[
-
length
:]
for
x
in
samples
])
labels
=
torch
.
tensor
(
labels
)
return
samples
,
labels
# Pad sequences to same length
# TODO masking?
def
collate_pad
(
batch
)
:
samples
,
labels
=
zip
(
*
batch
)
samples
=
pad_sequence
([
x
.
flip
((
0
,))
for
x
in
samples
],
batch_first
=
True
).
flip
((
1
,))
labels
=
torch
.
tensor
(
labels
)
return
samples
,
labels
# Create data loader
batch_size
=
8
collate_fn
=
collate_crop
trainloader
=
DataLoader
(
trainset
,
batch_size
=
batch_size
,
collate_fn
=
collate_fn
,
shuffle
=
True
)
testloader
=
DataLoader
(
testset
,
batch_size
=
batch_size
,
collate_fn
=
collate_fn
,
shuffle
=
False
)
valloader
=
DataLoader
(
valset
,
batch_size
=
batch_size
,
collate_fn
=
collate_fn
,
shuffle
=
False
)
```
%% Cell type:markdown id: tags:
## Models
%% Cell type:code id: tags:
```
python
# NN hyperparameters
input_size
=
75
lstm_size
=
32
hidden_size
=
16
output_size
=
2
dropout
=
.
5
# LSTM-based NN
class
LSTMRNN
(
nn
.
Module
)
:
def
__init__
(
self
)
:
super
().
__init__
()
self
.
rec
=
nn
.
LSTM
(
input_size
=
input_size
,
hidden_size
=
lstm_size
,
batch_first
=
True
)
self
.
clf
=
nn
.
Sequential
(
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
lstm_size
,
hidden_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
hidden_size
,
output_size
)
)
def
forward
(
self
,
x
)
:
_
,
(
x
,
_
)
=
self
.
rec
(
x
)
x
=
x
.
squeeze
(
dim
=
0
)
x
=
self
.
clf
(
x
)
return
x
```
%% Cell type:code id: tags:
```
python
# NN hyperparameters
input_size
=
75
rec_size
=
32
hidden_size
=
16
output_size
=
2
dropout
=
.
5
# GRU-based NN
class
GRURNN
(
nn
.
Module
)
:
def
__init__
(
self
)
:
super
().
__init__
()
self
.
rec
=
nn
.
GRU
(
input_size
=
input_size
,
hidden_size
=
rec_size
,
batch_first
=
True
)
self
.
clf
=
nn
.
Sequential
(
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
rec_size
,
hidden_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
hidden_size
,
output_size
)
)
def
forward
(
self
,
x
)
:
_
,
x
=
self
.
rec
(
x
)
x
=
x
.
squeeze
(
dim
=
0
)
x
=
self
.
clf
(
x
)
return
x
```
%% Cell type:code id: tags:
```
python
# NN hyperparameters
input_size
=
75
conv1_size
=
64
conv1_kernel
=
7
conv2_size
=
64
conv2_kernel
=
7
conv3_size
=
64
conv3_kernel
=
7
conv_stride
=
2
pool_size
=
16
hidden_size
=
32
output_size
=
2
dropout
=
.
5
# 1D convolutional NN
class
CNN
(
nn
.
Module
)
:
def
__init__
(
self
)
:
super
().
__init__
()
self
.
conv
=
nn
.
Sequential
(
nn
.
Conv1d
(
input_size
,
conv1_size
,
kernel_size
=
conv1_kernel
,
stride
=
conv_stride
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
Conv1d
(
conv1_size
,
conv2_size
,
kernel_size
=
conv2_kernel
,
stride
=
conv_stride
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
Conv1d
(
conv2_size
,
conv3_size
,
kernel_size
=
conv3_kernel
,
stride
=
conv_stride
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
AdaptiveMaxPool1d
(
pool_size
)
)
self
.
fc
=
nn
.
Sequential
(
nn
.
Linear
(
conv3_size
*
pool_size
,
hidden_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
hidden_size
,
output_size
)
)
def
forward
(
self
,
x
)
:
x
=
x
.
transpose
(
1
,
2
)
x
=
self
.
conv
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
fc
(
x
)
return
x
```
%% Cell type:code id: tags:
```
python
# NN hyperparameters
input_size
=
75
conv1_size
=
64
conv1_kernel
=
7
conv2_size
=
64
conv2_kernel
=
7
conv3_size
=
64
conv3_kernel
=
7
conv_stride
=
2
pool_size
=
16
hidden_size
=
32
output_size
=
2
dropout
=
.
5
# Separable convolution layer
class
SepConv1d
(
nn
.
Module
)
:
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
)
:
super
().
__init__
()
self
.
depthwise_conv
=
nn
.
Conv1d
(
in_channels
,
in_channels
,
kernel_size
,
stride
,
padding
,
groups
=
in_channels
)
self
.
pointwise_conv
=
nn
.
Conv1d
(
in_channels
,
out_channels
,
kernel_size
=
1
)
def
forward
(
self
,
x
)
:
x
=
self
.
depthwise_conv
(
x
)
x
=
self
.
pointwise_conv
(
x
)
return
x
# CNN with separable 1D convolutions
class
SepCNN
(
nn
.
Module
)
:
def
__init__
(
self
)
:
super
().
__init__
()
self
.
conv
=
nn
.
Sequential
(
SepConv1d
(
input_size
,
conv1_size
,
kernel_size
=
conv1_kernel
,
stride
=
conv_stride
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
SepConv1d
(
conv1_size
,
conv2_size
,
kernel_size
=
conv2_kernel
,
stride
=
conv_stride
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
SepConv1d
(
conv2_size
,
conv3_size
,
kernel_size
=
conv3_kernel
,
stride
=
conv_stride
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
AdaptiveMaxPool1d
(
pool_size
)
)
self
.
fc
=
nn
.
Sequential
(
nn
.
Linear
(
conv3_size
*
pool_size
,
hidden_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
hidden_size
,
output_size
)
)
def
forward
(
self
,
x
)
:
x
=
x
.
transpose
(
1
,
2
)
x
=
self
.
conv
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
fc
(
x
)
return
x
```
%% Cell type:code id: tags:
```
python
# NN hyperparameters
input_size
=
75
conv1_size
=
64
conv1_kernel
=
7
conv2_size
=
64
conv2_kernel
=
7
conv3_size
=
64
conv3_kernel
=
7
conv_stride
=
2
pool_size
=
16
hidden_size
=
32
output_size
=
2
dropout
=
.
4
# CNN with separable 1D convolutions
class
BNSepCNN
(
nn
.
Module
)
:
def
__init__
(
self
)
:
super
().
__init__
()
self
.
conv
=
nn
.
Sequential
(
SepConv1d
(
input_size
,
conv1_size
,
kernel_size
=
conv1_kernel
,
stride
=
conv_stride
),
nn
.
BatchNorm1d
(
conv1_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
SepConv1d
(
conv1_size
,
conv2_size
,
kernel_size
=
conv2_kernel
,
stride
=
conv_stride
),
nn
.
BatchNorm1d
(
conv2_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
SepConv1d
(
conv2_size
,
conv3_size
,
kernel_size
=
conv3_kernel
,
stride
=
conv_stride
),
nn
.
BatchNorm1d
(
conv3_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
AdaptiveMaxPool1d
(
pool_size
)
)
self
.
fc
=
nn
.
Sequential
(
nn
.
Linear
(
conv3_size
*
pool_size
,
hidden_size
),
nn
.
ReLU
(),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
hidden_size
,
output_size
)
)
def
forward
(
self
,
x
)
:
x
=
x
.
transpose
(
1
,
2
)
x
=
self
.
conv
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
fc
(
x
)
return
x
```
%% Cell type:markdown id: tags:
## Training parameters
%% Cell type:code id: tags:
```
python
# Learning rate, starting epoch and max epochs
lr
=
1e-3
epoch
=
0
epochs
=
1
00
epochs
=
5
00
# Init model and optimizer
model
=
SepCNN
()
model
=
BN
SepCNN
()
criterion
=
nn
.
CrossEntropyLoss
()
optimizer
=
optim
.
Adam
(
model
.
parameters
(),
lr
=
lr
)
# LR scheduler
scheduler_warmup
=
30
break_on_min_lr
=
True
min_lr
=
1e-5
factor
=
.
5
patience
=
10
scheduler
=
optim
.
lr_scheduler
.
ReduceLROnPlateau
(
optimizer
,
factor
=
factor
,
patience
=
patience
,
verbose
=
True
)
# Loss/accuracy logs
loss_stats
=
pd
.
DataFrame
(
columns
=
[
'train'
,
'val'
])
acc_stats
=
pd
.
DataFrame
(
columns
=
[
'train'
,
'val'
])
# CUDA
DEVICE
=
torch
.
device
(
'cuda:0'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
model
=
model
.
to
(
DEVICE
)
# Model info
def
prod
(
xs
)
:
return
1
if
len
(
xs
)
==
0
else
xs
[
0
]
*
prod
(
xs
[
1
:])
n_parameters
=
sum
(
prod
(
p
.
size
())
for
p
in
model
.
parameters
())
print
(
model
)
print
(
f
'The model
\'
{
model
.
__class__
.
__name__
}
\'
has
{
n_parameters
}
parameters'
)
```
%%%% Output: stream
SepCNN(
(conv): Sequential(
(0): SepConv1d(
(depthwise_conv): Conv1d(75, 75, kernel_size=(9,), stride=(3,), groups=75)
(pointwise_conv): Conv1d(75, 50, kernel_size=(1,), stride=(1,))
)
(1): ReLU()
(2): Dropout(p=0.5, inplace=False)
(3): SepConv1d(
(depthwise_conv): Conv1d(50, 50, kernel_size=(9,), stride=(3,), groups=50)
(pointwise_conv): Conv1d(50, 50, kernel_size=(1,), stride=(1,))
)
(4): ReLU()
(5): Dropout(p=0.5, inplace=False)
(6): SepConv1d(
(depthwise_conv): Conv1d(50, 50, kernel_size=(9,), stride=(3,), groups=50)
(pointwise_conv): Conv1d(50, 50, kernel_size=(1,), stride=(1,))
)
(7): ReLU()
(8): Dropout(p=0.5, inplace=False)
(9): AdaptiveMaxPool1d(output_size=16)
)
(fc): Sequential(
(0): Linear(in_features=800, out_features=32, bias=True)
(1): ReLU()
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=32, out_features=2, bias=True)
)
)
The model 'SepCNN' has 36348 parameters
%% Cell type:code id: tags:
```
python
# Create run directory
run_path
=
runs_path
/
str
(
datetime
.
datetime
.
now
())
run_path
.
mkdir
()
with
open
(
run_path
/
'model_architecture.txt'
,
'w'
)
as
f
:
f
.
write
(
f
'
{
model
}
\n\n
{
optimizer
}
\n
batch_size:
{
batch_size
}
'
)
f
.
write
(
f
"""
{
model
}
{
optimizer
}
batch_size:
{
batch_size
}
factor:
{
factor
}
patience:
{
patience
}
scheduler_warmup:
{
scheduler_warmup
}
"""
)
```
%% Cell type:code id: tags:
```
python
# Forward pass for a single batch
def
forward_pass
(
model
,
samples
,
labels
,
criterion
,
optimizer
=
None
)
:
out
=
model
(
samples
)
pred
=
out
.
argmax
(
dim
=
1
)
loss
=
criterion
(
out
,
labels
)
if
optimizer
:
optimizer
.
zero_grad
()
loss
.
backward
()
optimizer
.
step
()
loss
=
loss
.
item
()
*
labels
.
size
(
0
)
correct
=
(
pred
==
labels
).
sum
().
item
()
return
loss
,
correct
# Forward pass for whole dataset
def
train_loop
(
model
,
loader
,
criterion
,
optimizer
=
None
)
:
model
.
train
(
optimizer
is
not
None
)
running_loss
=
0
running_correct
=
0
for
samples
,
labels
in
loader
:
samples
,
labels
=
samples
.
to
(
DEVICE
),
labels
.
to
(
DEVICE
)
loss
,
correct
=
forward_pass
(
model
,
samples
,
labels
,
criterion
,
optimizer
)
running_loss
+=
loss
running_correct
+=
correct
return
running_loss
,
running_correct
```
%% Cell type:markdown id: tags:
## Train model
%% Cell type:code id: tags:
```
python
# Train model
for
epoch
in
range
(
epoch
,
epoch
+
epochs
)
:
print
(
f
'=== Epoch
{
(
epoch
+
1
):
3
}
==='
)
# Train loop
loss
,
correct
=
train_loop
(
model
,
trainloader
,
criterion
,
optimizer
)
train_loss
=
loss
/
len
(
trainset
)
train_acc
=
correct
/
len
(
trainset
)
print
(
f
' training loss =
{
train_loss
:.
4
f
}
, acc =
{
train_acc
:.
4
f
}
'
)
# Validation loop
loss
,
correct
=
train_loop
(
model
,
valloader
,
criterion
)
val_loss
=
loss
/
len
(
valset
)
val_acc
=
correct
/
len
(
valset
)
print
(
f
'validation loss =
{
val_loss
:.
4
f
}
, acc =
{
val_acc
:.
4
f
}
'
)
# Statistics
loss_stats
=
loss_stats
.
append
({
'train'
:
train_loss
,
'val'
:
val_loss
},
ignore_index
=
True
)
acc_stats
=
acc_stats
.
append
({
'train'
:
train_acc
,
'val'
:
val_acc
},
ignore_index
=
True
)
# Save best model
if
loss_stats
[
'val'
].
idxmin
()
==
len
(
loss_stats
)
-
1
:
torch
.
save
(
model
,
run_path
/
f
'model_best.pt'
)
# Schedule learning rate after warmup period
if
epoch
>=
scheduler_warmup
:
scheduler
.
step
(
val_loss
)
current_lr
=
optimizer
.
param_groups
[
0
][
'lr'
]
if
break_on_min_lr
and
current_lr
<
min_lr
:
break
torch
.
save
(
model
,
run_path
/
f
'model_last.pt'
)
epoch
+=
1
```
%%%% Output: stream
=== Epoch 1 ===
training loss = 0.6933, acc = 0.5166
validation loss = 0.6907, acc = 0.5359
=== Epoch 2 ===
training loss = 0.6918, acc = 0.5249
validation loss = 0.6911, acc = 0.5359
=== Epoch 13 ===
training loss = 0.6919, acc = 0.5235
validation loss = 0.6909, acc = 0.5359
=== Epoch 14 ===
training loss = 0.6912, acc = 0.5166
validation loss = 0.6923, acc = 0.5359
=== Epoch 15 ===
training loss = 0.6953, acc = 0.5055
validation loss = 0.6929, acc = 0.5359
=== Epoch 16 ===
training loss = 0.6912, acc = 0.5207
validation loss = 0.6935, acc = 0.5359
=== Epoch 17 ===
training loss = 0.6888, acc = 0.5235
validation loss = 0.6970, acc = 0.5414
=== Epoch 18 ===
training loss = 0.6929, acc = 0.5055
validation loss = 0.6964, acc = 0.5359
=== Epoch 19 ===
training loss = 0.6873, acc = 0.5152
validation loss = 0.6969, acc = 0.5414
=== Epoch 20 ===
training loss = 0.6818, acc = 0.5249
validation loss = 0.7036, acc = 0.5193
=== Epoch 21 ===
training loss = 0.6886, acc = 0.5110
validation loss = 0.7048, acc = 0.5304
=== Epoch 22 ===
training loss = 0.6884, acc = 0.5290
validation loss = 0.7018, acc = 0.5193
=== Epoch 23 ===
training loss = 0.6883, acc = 0.5428
validation loss = 0.6977, acc = 0.5414
=== Epoch 24 ===
training loss = 0.6837, acc = 0.5428
validation loss = 0.7006, acc = 0.5193
=== Epoch 25 ===
training loss = 0.6909, acc = 0.5428
validation loss = 0.6975, acc = 0.5359
=== Epoch 26 ===
training loss = 0.6853, acc = 0.5580
validation loss = 0.6990, acc = 0.4475
=== Epoch 27 ===
training loss = 0.6819, acc = 0.5622
validation loss = 0.7040, acc = 0.4033
=== Epoch 28 ===
training loss = 0.6833, acc = 0.5912
validation loss = 0.7016, acc = 0.5083
=== Epoch 29 ===
training loss = 0.6851, acc = 0.5773
validation loss = 0.6988, acc = 0.4475
=== Epoch 30 ===
training loss = 0.6748, acc = 0.6215
validation loss = 0.6995, acc = 0.5304
=== Epoch 31 ===
training loss = 0.6741, acc = 0.6367
validation loss = 0.7004, acc = 0.4420
=== Epoch 32 ===
training loss = 0.6668, acc = 0.6271
validation loss = 0.6939, acc = 0.5580
=== Epoch 33 ===
training loss = 0.6565, acc = 0.6119
validation loss = 0.6879, acc = 0.5359
=== Epoch 34 ===
training loss = 0.6428, acc = 0.6160
validation loss = 0.6832, acc = 0.5856
=== Epoch 35 ===
training loss = 0.6429, acc = 0.6326
validation loss = 0.6830, acc = 0.5083
=== Epoch 36 ===
training loss = 0.6471, acc = 0.6298
validation loss = 0.6603, acc = 0.5967
=== Epoch 37 ===
training loss = 0.6337, acc = 0.6354
validation loss = 0.6556, acc = 0.6188
=== Epoch 38 ===
training loss = 0.6192, acc = 0.6561
validation loss = 0.6393, acc = 0.6298
=== Epoch 39 ===
training loss = 0.6056, acc = 0.6685
validation loss = 0.6140, acc = 0.6685
=== Epoch 40 ===