Commit 3fe6de26 authored by PauTheu's avatar PauTheu
Browse files

versuch paul

parent 9e90b432
%% Cell type:code id:b23a2e7d-3719-4a6a-8142-8904143d239e tags:
``` python
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score
# Parameter optimization
from skopt.space import Integer, Real, Categorical, Identity
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.plots import plot_convergence
# Model
from sklearn import svm
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import pickle as pkl
```
%% Cell type:code id:14ed4e25-21ca-4d3c-9f6e-2e2c06894a0d tags:
``` python
df_train = pkl.load(open("train_ten_best_features.pkl" ,"rb"))
df_train.dropna(inplace = True)
```
%% Cell type:code id:af4e895b-9fb4-4251-bf00-710564d7addc tags:
``` python
df_test = pkl.load(open("test_ten_best_features.pkl" ,"rb"))
df_test.dropna(inplace = True)
```
%% Cell type:markdown id:e67dda33-9761-435f-84fa-d03652fb4fa9 tags:
### LightGBM
%% Cell type:code id:23dccf02-779d-4210-9723-1dbca8d44c08 tags:
``` python
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import json
from sklearn.metrics import accuracy_score
```
%% Cell type:code id:f66fc2b3-2666-46ef-80d2-d0d260b8a963 tags:
``` python
df_train.all().corr(df_test.all())
```
%%%% Output: execute_result
0.7745966692414835
%% Cell type:code id:0fcf1140-299b-44fb-b81b-a999453f833a tags:
``` python
y_train = df_train["label"]
y_test = df_test["label"]
X_train = df_train.drop(["label"], axis=1)
X_test = df_test.drop(["label"], axis=1)
```
%% Cell type:code id:4efa2b64-9bda-4f0e-858a-815915b44732 tags:
``` python
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
# specify your configurations as a dict
print('Starting training...')
# train
gbm = lgb.LGBMRegressor(num_leaves=50,
learning_rate=0.01,
n_estimators=30,
boosting_type = "dart")
gbm.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric='l1')
print('Starting predicting...')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
# feature importances
print('Feature importances:', list(gbm.feature_importances_))
```
%%%% Output: stream
Starting training...
[1] valid_0's l1: 0.499973 valid_0's l2: 0.250123
[2] valid_0's l1: 0.500054 valid_0's l2: 0.25024
[3] valid_0's l1: 0.500134 valid_0's l2: 0.25037
[4] valid_0's l1: 0.500213 valid_0's l2: 0.250517
[5] valid_0's l1: 0.500297 valid_0's l2: 0.25068
[6] valid_0's l1: 0.500332 valid_0's l2: 0.250821
[7] valid_0's l1: 0.500422 valid_0's l2: 0.251018
[8] valid_0's l1: 0.500395 valid_0's l2: 0.25095
[9] valid_0's l1: 0.500471 valid_0's l2: 0.251157
[10] valid_0's l1: 0.50056 valid_0's l2: 0.251371
[11] valid_0's l1: 0.500656 valid_0's l2: 0.251632
[12] valid_0's l1: 0.500624 valid_0's l2: 0.251541
[13] valid_0's l1: 0.500698 valid_0's l2: 0.251785
[14] valid_0's l1: 0.500799 valid_0's l2: 0.25205
[15] valid_0's l1: 0.50093 valid_0's l2: 0.252347
[16] valid_0's l1: 0.501018 valid_0's l2: 0.252634
[17] valid_0's l1: 0.501099 valid_0's l2: 0.252942
[18] valid_0's l1: 0.501186 valid_0's l2: 0.253254
[19] valid_0's l1: 0.501273 valid_0's l2: 0.253559
[20] valid_0's l1: 0.501362 valid_0's l2: 0.253862
[21] valid_0's l1: 0.501349 valid_0's l2: 0.253796
[22] valid_0's l1: 0.501435 valid_0's l2: 0.254137
[23] valid_0's l1: 0.501522 valid_0's l2: 0.254505
[24] valid_0's l1: 0.501558 valid_0's l2: 0.254815
[25] valid_0's l1: 0.501637 valid_0's l2: 0.255198
[26] valid_0's l1: 0.501581 valid_0's l2: 0.255414
[27] valid_0's l1: 0.501488 valid_0's l2: 0.255611
[28] valid_0's l1: 0.501447 valid_0's l2: 0.25547
[29] valid_0's l1: 0.501474 valid_0's l2: 0.255782
[30] valid_0's l1: 0.501404 valid_0's l2: 0.256021
Starting predicting...
The rmse of prediction is: 0.5059851983827391
Feature importances: [0, 117, 110, 276, 32, 108, 234, 70, 60, 460, 3]
%% Cell type:code id:356b6771-39fe-42ee-90f5-ec5b63b40bee tags:
``` python
estimator = lgb.LGBMRegressor(num_leaves=31)
param_grid = {
'learning_rate': [0.01, 0.05, 0.1],
'n_estimators': [20, 30, 40, 200],
'num_leaves': [20, 31, 50]
'boosting_type': ['gbdt', 'dart', 'goss', 'rf']
}
gbm = GridSearchCV(estimator, param_grid, cv=3)
gbm.fit(X_train, y_train)
print('Best parameters found by grid search are:', gbm.best_params_)
```
%%%% Output: stream
Best parameters found by grid search are: {'boosting_type': 'dart', 'learning_rate': 0.01, 'n_estimators': 30, 'num_leaves': 50}
%% Cell type:code id:afe897d8-3050-4f9a-9b0c-9dac793baa3b tags:
``` python
## yikes
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment