Commit 52494095 authored by PauTheu's avatar PauTheu
Browse files

rename

parent 48015cb6
......@@ -27,4 +27,12 @@ The best validation result can be observed for CNNs with depth-wise separable co
To tackle the concept drift problematic occuring in the testset, we combine several neural models into one ensemble.
Each of the models is trained on a single region of wind turbines.
The models' predictions are combined by using a weighted majority vote.
More specifically, the models' output probabilities are acquired via softmax and averaged to form the ensemble's output.
\ No newline at end of file
More specifically, the models' output probabilities are acquired via softmax and averaged to form the ensemble's output.
## `Others.ipynb`
Data: use ten best features explored in data_exploration.ipynb file
### Approaches:
1. Lightgbm: leaf wise tree growth (best first) instead of level wise tree growth. low acc.
2. Easy NN: 2 hidden layers, batches 64, epochs 50, binary classification, Relu, ADAM. low acc.
3. TPOPT: looks for the best classification pipeline. low acc.
%% Cell type:code id:b23a2e7d-3719-4a6a-8142-8904143d239e tags:
``` python
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score
# Parameter optimization
from skopt.space import Integer, Real, Categorical, Identity
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.plots import plot_convergence
# Model
from sklearn import svm
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import pickle as pkl
```
%% Cell type:code id:14ed4e25-21ca-4d3c-9f6e-2e2c06894a0d tags:
``` python
df_train = pkl.load(open("train_ten_best_features.pkl" ,"rb"))
df_train.dropna(inplace = True)
```
%% Cell type:code id:af4e895b-9fb4-4251-bf00-710564d7addc tags:
``` python
df_test = pkl.load(open("test_ten_best_features.pkl" ,"rb"))
df_test.dropna(inplace = True)
```
%% Cell type:code id:0fcf1140-299b-44fb-b81b-a999453f833a tags:
``` python
y_train = df_train["label"]
y_test = df_test["label"]
X_train = df_train.drop(["label"], axis=1)
X_test = df_test.drop(["label"], axis=1)
```
%% Cell type:markdown id:e67dda33-9761-435f-84fa-d03652fb4fa9 tags:
### LightGBM
%% Cell type:code id:23dccf02-779d-4210-9723-1dbca8d44c08 tags:
``` python
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import json
from sklearn.metrics import accuracy_score
```
%% Cell type:code id:f66fc2b3-2666-46ef-80d2-d0d260b8a963 tags:
``` python
df_train.all().corr(df_test.all())
```
%%%% Output: execute_result
0.7745966692414835
%% Cell type:code id:47d34961-594d-47bf-8fed-7c2bd15e2e4f tags:
``` python
X_train.shape
```
%%%% Output: execute_result
(33264, 11)
%% Cell type:code id:4efa2b64-9bda-4f0e-858a-815915b44732 tags:
``` python
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
# specify your configurations as a dict
print('Starting training...')
# train
gbm = lgb.LGBMRegressor(num_leaves=50,
learning_rate=0.01,
n_estimators=30,
boosting_type = "dart")
gbm.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric='l1')
print('Starting predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
# feature importances
print('Feature importances:', list(gbm.feature_importances_))
```
%% Cell type:code id:356b6771-39fe-42ee-90f5-ec5b63b40bee tags:
``` python
estimator = lgb.LGBMRegressor(num_leaves=31)
param_grid = {
'learning_rate': [0.01, 0.05, 0.1],
'n_estimators': [20, 30, 40, 200],
'num_leaves': [20, 31, 50]
'boosting_type': ['gbdt', 'dart', 'goss', 'rf']
}
gbm = GridSearchCV(estimator, param_grid, cv=3)
gbm.fit(X_train, y_train)
print('Best parameters found by grid search are:', gbm.best_params_)
```
%% Cell type:code id:afe897d8-3050-4f9a-9b0c-9dac793baa3b tags:
``` python
## yikes
```
%% Cell type:markdown id:fbf4bd8c-fbd0-4cc7-9aa3-c98969c9a55c tags:
### EASY NN
%% Cell type:code id:cb955265-8cf0-430a-88ab-4708876a8147 tags:
``` python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
```
%% Cell type:code id:f90bf39c-5781-4b27-98e2-dcd183f925ef tags:
``` python
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
```
%% Cell type:code id:f88fdd4c-d42b-4981-9729-a949c6780507 tags:
``` python
EPOCHS = 50
BATCH_SIZE = 64
LEARNING_RATE = 0.001
```
%% Cell type:code id:122bf29f-83bb-45ae-b78b-4808959fa58f tags:
``` python
class trainData(Dataset):
def __init__(self, X_data, y_data):
self.X_data = X_data
self.y_data = y_data
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
def __len__ (self):
return len(self.X_data)
train_data = trainData(torch.FloatTensor(X_train),
torch.FloatTensor(y_train))
```
%% Cell type:code id:c073a012-37fb-4424-ba66-fe344d00e34e tags:
``` python
class testData(Dataset):
def __init__(self, X_data):
self.X_data = X_data
def __getitem__(self, index):
return self.X_data[index]
def __len__ (self):
return len(self.X_data)
test_data = testData(torch.FloatTensor(X_test))
```
%% Cell type:code id:6d8bdcf5-e60c-4379-8024-a13a3ebf4b75 tags:
``` python
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=1)
```
%% Cell type:code id:1c8a9f8d-f758-4a08-9350-b38db859e1b4 tags:
``` python
class binaryClassification(nn.Module):
def __init__(self):
super(binaryClassification, self).__init__()
# Number of input features is 11.
self.layer_1 = nn.Linear(11, 64)
self.layer_2 = nn.Linear(64, 64)
self.layer_3 = nn.Linear(64, 64)
self.layer_out = nn.Linear(64, 1)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.1)
self.batchnorm1 = nn.BatchNorm1d(64)
self.batchnorm2 = nn.BatchNorm1d(64)
self.batchnorm3 = nn.BatchNorm1d(64)
def forward(self, inputs):
x = self.relu(self.layer_1(inputs))
x = self.batchnorm1(x)
x = self.relu(self.layer_2(x))
x = self.batchnorm2(x)
x = self.relu(self.layer_3(x))
x = self.batchnorm3(x)
x = self.dropout(x)
x = self.layer_out(x)
return x
```
%% Cell type:code id:3896791a-41eb-4787-a0c3-93bd21282c19 tags:
``` python
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
```
%% Cell type:code id:7680c582-2d92-484c-bbd4-f22889f63647 tags:
``` python
model = binaryClassification()
model.to(device)
print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
```
%% Cell type:code id:6746f086-fa63-4c1c-a49e-310d7e2ec4b7 tags:
``` python
def binary_acc(y_pred, y_test):
y_pred_tag = torch.round(torch.sigmoid(y_pred))
correct_results_sum = (y_pred_tag == y_test).sum().float()
acc = correct_results_sum/y_test.shape[0]
acc = torch.round(acc * 100)
return acc
```
%% Cell type:code id:93f8739a-7dfa-4e34-9f66-81e942942d34 tags:
``` python
model.train()
for e in range(1, EPOCHS+1):
epoch_loss = 0
epoch_acc = 0
for X_batch, y_batch in train_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
optimizer.zero_grad()
y_pred = model(X_batch)
loss = criterion(y_pred, y_batch.unsqueeze(1))
acc = binary_acc(y_pred, y_batch.unsqueeze(1))
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
```
%% Cell type:code id:13260656-39d3-4bc7-91fe-e7f5fe09c6d2 tags:
``` python
y_pred_list = []
model.eval()
with torch.no_grad():
for X_batch in test_loader:
X_batch = X_batch.to(device)
y_test_pred = model(X_batch)
y_test_pred = torch.sigmoid(y_test_pred)
y_pred_tag = torch.round(y_test_pred)
y_pred_list.append(y_pred_tag.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
```
%% Cell type:code id:1bd71613-25f7-4342-aa58-2abebce0f5f9 tags:
``` python
print(classification_report(y_test, y_pred_list))
```
%% Cell type:markdown id:55bbaee2-335c-40ed-a5ad-3a616c91773f tags:
### TPOPT
%% Cell type:code id:5b393d8b-f625-4a26-b4e3-fa8b9e15dba6 tags:
``` python
from tpot import TPOTClassifier
```
%% Cell type:code id:5efe5da7-3642-4c68-a8bf-c8e093c937a6 tags:
``` python
df_train = pkl.load(open("./data/train" ,"rb"))
df_test = pkl.load(open("./data/test" ,"rb"))
```
%% Cell type:code id:5c0b7f2e-2da4-4538-8e28-e9d06746c1ec tags:
``` python
y_train = df_train["ret"]
y_test = df_test["ret"]
X_train = df_train.drop(["ret"], axis=1)
X_test = df_test.drop(["ret"], axis=1)
```
%% Cell type:code id:a17b4a46-ddc4-451a-985b-4cf9f4e54ae7 tags:
``` python
tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42, n_jobs = -1)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
```
%%%% Output: display_data
%% Cell type:markdown id:22625cc8-5113-40b1-8d46-bc7289d34487 tags:
### Other data
%% Cell type:code id:42ad79c1-3be2-41ae-b673-93c2fb20725d tags:
``` python
df_test = pkl.load(open("test_ten_best_features.pkl" ,"rb"))
df_test.dropna(inplace = True)
df_train = pkl.load(open("train_ten_best_features.pkl" ,"rb"))
df_train.dropna(inplace = True)
y_train = df_train["label"]
y_test = df_test["label"]
X_train = df_train.drop(["label"], axis=1)
X_test = df_test.drop(["label"], axis=1)
```
%% Cell type:code id:2adbd233-18f1-4fc6-97b0-0d75ae7f5544 tags:
``` python
tpot = TPOTClassifier(generations=100, population_size=50, verbosity=2, random_state=42, n_jobs = -1, max_time_mins= 120)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
```
%%%% Output: display_data
%% Cell type:code id:801d72f6-5b32-4129-a7c3-0661b542728a tags:
``` python
print(tpot.score(X_test, y_test))
```
%% Cell type:code id:973c03f8-0463-4951-9475-6d0eb97e74b1 tags:
``` python
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment