Commit 3fe6de26 authored by PauTheu's avatar PauTheu
Browse files

versuch paul

parent 9e90b432
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b23a2e7d-3719-4a6a-8142-8904143d239e",
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"import os\n",
"import pandas as pd\n",
"\n",
"from tqdm import tqdm\n",
"import numpy as np\n",
"from sklearn.metrics import f1_score\n",
"\n",
"# Parameter optimization\n",
"from skopt.space import Integer, Real, Categorical, Identity\n",
"from skopt.utils import use_named_args\n",
"from skopt import gp_minimize\n",
"from skopt.plots import plot_convergence\n",
"\n",
"# Model\n",
"from sklearn import svm\n",
"from sklearn.model_selection import cross_val_score\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import pickle as pkl\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "14ed4e25-21ca-4d3c-9f6e-2e2c06894a0d",
"metadata": {},
"outputs": [],
"source": [
"df_train = pkl.load(open(\"train_ten_best_features.pkl\" ,\"rb\"))\n",
"df_train.dropna(inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "af4e895b-9fb4-4251-bf00-710564d7addc",
"metadata": {},
"outputs": [],
"source": [
"df_test = pkl.load(open(\"test_ten_best_features.pkl\" ,\"rb\"))\n",
"df_test.dropna(inplace = True)"
]
},
{
"cell_type": "markdown",
"id": "e67dda33-9761-435f-84fa-d03652fb4fa9",
"metadata": {},
"source": [
"### LightGBM"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "23dccf02-779d-4210-9723-1dbca8d44c08",
"metadata": {},
"outputs": [],
"source": [
"import lightgbm as lgb\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import GridSearchCV\n",
"import json\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "f66fc2b3-2666-46ef-80d2-d0d260b8a963",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7745966692414835"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.all().corr(df_test.all())"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "0fcf1140-299b-44fb-b81b-a999453f833a",
"metadata": {},
"outputs": [],
"source": [
"y_train = df_train[\"label\"]\n",
"y_test = df_test[\"label\"]\n",
"X_train = df_train.drop([\"label\"], axis=1)\n",
"X_test = df_test.drop([\"label\"], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "4efa2b64-9bda-4f0e-858a-815915b44732",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting training...\n",
"[1]\tvalid_0's l1: 0.499973\tvalid_0's l2: 0.250123\n",
"[2]\tvalid_0's l1: 0.500054\tvalid_0's l2: 0.25024\n",
"[3]\tvalid_0's l1: 0.500134\tvalid_0's l2: 0.25037\n",
"[4]\tvalid_0's l1: 0.500213\tvalid_0's l2: 0.250517\n",
"[5]\tvalid_0's l1: 0.500297\tvalid_0's l2: 0.25068\n",
"[6]\tvalid_0's l1: 0.500332\tvalid_0's l2: 0.250821\n",
"[7]\tvalid_0's l1: 0.500422\tvalid_0's l2: 0.251018\n",
"[8]\tvalid_0's l1: 0.500395\tvalid_0's l2: 0.25095\n",
"[9]\tvalid_0's l1: 0.500471\tvalid_0's l2: 0.251157\n",
"[10]\tvalid_0's l1: 0.50056\tvalid_0's l2: 0.251371\n",
"[11]\tvalid_0's l1: 0.500656\tvalid_0's l2: 0.251632\n",
"[12]\tvalid_0's l1: 0.500624\tvalid_0's l2: 0.251541\n",
"[13]\tvalid_0's l1: 0.500698\tvalid_0's l2: 0.251785\n",
"[14]\tvalid_0's l1: 0.500799\tvalid_0's l2: 0.25205\n",
"[15]\tvalid_0's l1: 0.50093\tvalid_0's l2: 0.252347\n",
"[16]\tvalid_0's l1: 0.501018\tvalid_0's l2: 0.252634\n",
"[17]\tvalid_0's l1: 0.501099\tvalid_0's l2: 0.252942\n",
"[18]\tvalid_0's l1: 0.501186\tvalid_0's l2: 0.253254\n",
"[19]\tvalid_0's l1: 0.501273\tvalid_0's l2: 0.253559\n",
"[20]\tvalid_0's l1: 0.501362\tvalid_0's l2: 0.253862\n",
"[21]\tvalid_0's l1: 0.501349\tvalid_0's l2: 0.253796\n",
"[22]\tvalid_0's l1: 0.501435\tvalid_0's l2: 0.254137\n",
"[23]\tvalid_0's l1: 0.501522\tvalid_0's l2: 0.254505\n",
"[24]\tvalid_0's l1: 0.501558\tvalid_0's l2: 0.254815\n",
"[25]\tvalid_0's l1: 0.501637\tvalid_0's l2: 0.255198\n",
"[26]\tvalid_0's l1: 0.501581\tvalid_0's l2: 0.255414\n",
"[27]\tvalid_0's l1: 0.501488\tvalid_0's l2: 0.255611\n",
"[28]\tvalid_0's l1: 0.501447\tvalid_0's l2: 0.25547\n",
"[29]\tvalid_0's l1: 0.501474\tvalid_0's l2: 0.255782\n",
"[30]\tvalid_0's l1: 0.501404\tvalid_0's l2: 0.256021\n",
"Starting predicting...\n",
"The rmse of prediction is: 0.5059851983827391\n",
"Feature importances: [0, 117, 110, 276, 32, 108, 234, 70, 60, 460, 3]\n"
]
}
],
"source": [
"# create dataset for lightgbm\n",
"lgb_train = lgb.Dataset(X_train, y_train)\n",
"lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)\n",
"\n",
"# specify your configurations as a dict\n",
"print('Starting training...')\n",
"# train\n",
"gbm = lgb.LGBMRegressor(num_leaves=50,\n",
" learning_rate=0.01,\n",
" n_estimators=30,\n",
" boosting_type = \"dart\")\n",
"gbm.fit(X_train, y_train,\n",
" eval_set=[(X_test, y_test)],\n",
" eval_metric='l1')\n",
"\n",
"print('Starting predicting...')\n",
"# predict\n",
"y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)\n",
"# eval\n",
"print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)\n",
"\n",
"# feature importances\n",
"print('Feature importances:', list(gbm.feature_importances_))"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "356b6771-39fe-42ee-90f5-ec5b63b40bee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameters found by grid search are: {'boosting_type': 'dart', 'learning_rate': 0.01, 'n_estimators': 30, 'num_leaves': 50}\n"
]
}
],
"source": [
"estimator = lgb.LGBMRegressor(num_leaves=31)\n",
"\n",
"param_grid = {\n",
" 'learning_rate': [0.01, 0.05, 0.1],\n",
" 'n_estimators': [20, 30, 40, 200],\n",
" 'num_leaves': [20, 31, 50]\n",
" 'boosting_type': ['gbdt', 'dart', 'goss', 'rf'] \n",
"}\n",
"\n",
"gbm = GridSearchCV(estimator, param_grid, cv=3)\n",
"gbm.fit(X_train, y_train)\n",
"\n",
"print('Best parameters found by grid search are:', gbm.best_params_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "afe897d8-3050-4f9a-9b0c-9dac793baa3b",
"metadata": {},
"outputs": [],
"source": [
"## yikes"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment