Commit 5ea7bb16 authored by tills's avatar tills
Browse files

Regression

parent 665e492f
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, r2_score, mean_squared_error
```
%% Cell type:code id: tags:
``` python
path_collection_data = r'..\..\data\modeling\train\collection_data.txt'
# Data import
df_orig = pd.read_csv(path_collection_data)
```
%% Cell type:code id: tags:
``` python
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
# Dummy encoding of container ID
df = pd.get_dummies(df_orig, columns=["container_id"], prefix=["container"])
# Features
X = df.drop(['Unnamed: 0', 'timestamp', 'pre_height'], axis=1)
# Target
y = df['pre_height']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
```
%% Cell type:code id: tags:
``` python
from sklearn.model_selection import GridSearchCV
from xgboost.sklearn import XGBRegressor
from sklearn.metrics import make_scorer, r2_score, mean_squared_error
parameters = {
'min_child_weight': [1, 2, 5],
'max_depth': [6, 12, 20],
'n_estimators': [100],
'reg_alpha': [0, 1, 5]}
'reg_alpha': [0, 1, 5]
}
# XGB Regressor
xgbr = xgb.XGBRegressor()
# Grid Search for best parameters
# In the future: try more options
xgb_grid = GridSearchCV(xgbr,
parameters,
scoring=make_scorer(mean_squared_error),
cv = 5,
verbose=True)
xgb_grid.fit(X_train, y_train)
print(xgb_grid.best_score_)
xgb_grid.fit(X_train, y_train)
```
%%%% Output: execute_result
GridSearchCV(cv=5,
estimator=XGBRegressor(base_score=None, booster=None,
colsample_bylevel=None,
colsample_bynode=None,
colsample_bytree=None, gamma=None,
gpu_id=None, importance_type='gain',
interaction_constraints=None,
learning_rate=None, max_delta_step=None,
max_depth=None, min_child_weight=None,
missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None,
num_parallel_tree=None, random_state=None,
reg_alpha=None, reg_lambda=None,
scale_pos_weight=None, subsample=None,
tree_method=None, validate_parameters=None,
verbosity=None),
param_grid={'max_depth': [6, 12, 20],
'min_child_weight': [1, 2, 5], 'n_estimators': [100],
'reg_alpha': [0, 1, 5]},
scoring=make_scorer(mean_squared_error), verbose=True)
%% Cell type:code id: tags:
``` python
print(xgb_grid.best_score_)
print(xgb_grid.best_params_)
```
%% Cell type:code id: tags:
``` python
import xgboost as xgb
# Train XGB Regressor with best params
xgbr = xgb.XGBRegressor(max_depth = 6, min_child_weight = 5, n_estimators = 100, verbosity=0)
xgbr.fit(X_train, y_train)
```
%%%% Output: execute_result
......@@ -87,12 +109,14 @@
tree_method='exact', validate_parameters=1, verbosity=0)
%% Cell type:code id: tags:
``` python
score = xgbr.score(X_train, y_train)
print("Training score: ", score)
score = xgbr.score(X_test, y_test)
print("Training score: ", score)
print("Test score: ", score)
```
%% Cell type:code id: tags:
``` python
......@@ -102,20 +126,14 @@
```
%% Cell type:code id: tags:
``` python
dict = {'True': y_test, 'Pred': ypred}
# Result Dataframe
result_df = pd.DataFrame({'True': y_test, 'Pred': ypred})
result_df = pd.DataFrame(dict)
result_df['Diff'] = abs(result_df['True']-result_df['Pred'])
```
%% Cell type:code id: tags:
``` python
result_df['Diff'] = abs(result_df['True']-result_df['Pred'])
result_df.describe()
```
%%%% Output: execute_result
......@@ -166,12 +184,11 @@
max 140.000000 134.291245 114.803001
%% Cell type:code id: tags:
``` python
result_df[result_df['Diff']>20]
result_df[result_df['Diff']>20]
```
%%%% Output: execute_result
True Pred Diff
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment