Commit af5babce authored by PauTheu's avatar PauTheu
Browse files

easy nn

parent 3fe6de26
......@@ -95,6 +95,27 @@
"df_train.all().corr(df_test.all())"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "47d34961-594d-47bf-8fed-7c2bd15e2e4f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(33264, 11)"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 32,
......@@ -220,6 +241,350 @@
"source": [
"## yikes"
]
},
{
"cell_type": "markdown",
"id": "fbf4bd8c-fbd0-4cc7-9aa3-c98969c9a55c",
"metadata": {},
"source": [
"### EASY NN"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "cb955265-8cf0-430a-88ab-4708876a8147",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"from torch.utils.data import Dataset, DataLoader\n",
"\n",
"from sklearn.preprocessing import StandardScaler \n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import confusion_matrix, classification_report"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "f90bf39c-5781-4b27-98e2-dcd183f925ef",
"metadata": {},
"outputs": [],
"source": [
"scaler = StandardScaler()\n",
"X_train = scaler.fit_transform(X_train)\n",
"X_test = scaler.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "f88fdd4c-d42b-4981-9729-a949c6780507",
"metadata": {},
"outputs": [],
"source": [
"EPOCHS = 50\n",
"BATCH_SIZE = 64\n",
"LEARNING_RATE = 0.001"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "122bf29f-83bb-45ae-b78b-4808959fa58f",
"metadata": {},
"outputs": [],
"source": [
"class trainData(Dataset):\n",
" \n",
" def __init__(self, X_data, y_data):\n",
" self.X_data = X_data\n",
" self.y_data = y_data\n",
" \n",
" def __getitem__(self, index):\n",
" return self.X_data[index], self.y_data[index]\n",
" \n",
" def __len__ (self):\n",
" return len(self.X_data)\n",
"\n",
"\n",
"train_data = trainData(torch.FloatTensor(X_train), \n",
" torch.FloatTensor(y_train))"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "c073a012-37fb-4424-ba66-fe344d00e34e",
"metadata": {},
"outputs": [],
"source": [
"class testData(Dataset):\n",
" \n",
" def __init__(self, X_data):\n",
" self.X_data = X_data\n",
" \n",
" def __getitem__(self, index):\n",
" return self.X_data[index]\n",
" \n",
" def __len__ (self):\n",
" return len(self.X_data)\n",
" \n",
"\n",
"test_data = testData(torch.FloatTensor(X_test))"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "6d8bdcf5-e60c-4379-8024-a13a3ebf4b75",
"metadata": {},
"outputs": [],
"source": [
"train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)\n",
"test_loader = DataLoader(dataset=test_data, batch_size=1)"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "1c8a9f8d-f758-4a08-9350-b38db859e1b4",
"metadata": {},
"outputs": [],
"source": [
"class binaryClassification(nn.Module):\n",
" def __init__(self):\n",
" super(binaryClassification, self).__init__()\n",
" # Number of input features is 11.\n",
" self.layer_1 = nn.Linear(11, 64) \n",
" self.layer_2 = nn.Linear(64, 64)\n",
" self.layer_out = nn.Linear(64, 1) \n",
" \n",
" self.relu = nn.ReLU()\n",
" self.dropout = nn.Dropout(p=0.1)\n",
" self.batchnorm1 = nn.BatchNorm1d(64)\n",
" self.batchnorm2 = nn.BatchNorm1d(64)\n",
" \n",
" def forward(self, inputs):\n",
" x = self.relu(self.layer_1(inputs))\n",
" x = self.batchnorm1(x)\n",
" x = self.relu(self.layer_2(x))\n",
" x = self.batchnorm2(x)\n",
" x = self.dropout(x)\n",
" x = self.layer_out(x)\n",
" \n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "3896791a-41eb-4787-a0c3-93bd21282c19",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cuda:0\n"
]
}
],
"source": [
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
"print(device)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "7680c582-2d92-484c-bbd4-f22889f63647",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"binaryClassification(\n",
" (layer_1): Linear(in_features=11, out_features=64, bias=True)\n",
" (layer_2): Linear(in_features=64, out_features=64, bias=True)\n",
" (layer_out): Linear(in_features=64, out_features=1, bias=True)\n",
" (relu): ReLU()\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
")\n"
]
}
],
"source": [
"model = binaryClassification()\n",
"model.to(device)\n",
"print(model)\n",
"criterion = nn.BCEWithLogitsLoss()\n",
"optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)\n"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "6746f086-fa63-4c1c-a49e-310d7e2ec4b7",
"metadata": {},
"outputs": [],
"source": [
"def binary_acc(y_pred, y_test):\n",
" y_pred_tag = torch.round(torch.sigmoid(y_pred))\n",
"\n",
" correct_results_sum = (y_pred_tag == y_test).sum().float()\n",
" acc = correct_results_sum/y_test.shape[0]\n",
" acc = torch.round(acc * 100)\n",
" \n",
" return acc"
]
},
{
"cell_type": "code",
"execution_count": 96,
"id": "93f8739a-7dfa-4e34-9f66-81e942942d34",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 001: | Loss: 0.58836 | Acc: 67.731\n",
"Epoch 002: | Loss: 0.53872 | Acc: 71.175\n",
"Epoch 003: | Loss: 0.51944 | Acc: 72.415\n",
"Epoch 004: | Loss: 0.50751 | Acc: 72.885\n",
"Epoch 005: | Loss: 0.49957 | Acc: 73.498\n",
"Epoch 006: | Loss: 0.49470 | Acc: 73.840\n",
"Epoch 007: | Loss: 0.48752 | Acc: 74.038\n",
"Epoch 008: | Loss: 0.48339 | Acc: 74.573\n",
"Epoch 009: | Loss: 0.47710 | Acc: 74.723\n",
"Epoch 010: | Loss: 0.47275 | Acc: 75.062\n",
"Epoch 011: | Loss: 0.47231 | Acc: 75.313\n",
"Epoch 012: | Loss: 0.46792 | Acc: 75.733\n",
"Epoch 013: | Loss: 0.46326 | Acc: 75.588\n",
"Epoch 014: | Loss: 0.46432 | Acc: 75.515\n",
"Epoch 015: | Loss: 0.46245 | Acc: 75.508\n",
"Epoch 016: | Loss: 0.46001 | Acc: 75.812\n",
"Epoch 017: | Loss: 0.45738 | Acc: 75.963\n",
"Epoch 018: | Loss: 0.45510 | Acc: 76.138\n",
"Epoch 019: | Loss: 0.45753 | Acc: 75.990\n",
"Epoch 020: | Loss: 0.44936 | Acc: 76.512\n",
"Epoch 021: | Loss: 0.45486 | Acc: 76.019\n",
"Epoch 022: | Loss: 0.44876 | Acc: 76.538\n",
"Epoch 023: | Loss: 0.45166 | Acc: 76.363\n",
"Epoch 024: | Loss: 0.45067 | Acc: 76.340\n",
"Epoch 025: | Loss: 0.44514 | Acc: 76.667\n",
"Epoch 026: | Loss: 0.44440 | Acc: 76.779\n",
"Epoch 027: | Loss: 0.44393 | Acc: 76.531\n",
"Epoch 028: | Loss: 0.43909 | Acc: 76.902\n",
"Epoch 029: | Loss: 0.44011 | Acc: 77.085\n",
"Epoch 030: | Loss: 0.43999 | Acc: 76.960\n",
"Epoch 031: | Loss: 0.43862 | Acc: 76.775\n",
"Epoch 032: | Loss: 0.43671 | Acc: 77.077\n",
"Epoch 033: | Loss: 0.43578 | Acc: 77.098\n",
"Epoch 034: | Loss: 0.43612 | Acc: 77.173\n",
"Epoch 035: | Loss: 0.43450 | Acc: 77.048\n",
"Epoch 036: | Loss: 0.43584 | Acc: 76.885\n",
"Epoch 037: | Loss: 0.43497 | Acc: 77.185\n",
"Epoch 038: | Loss: 0.43394 | Acc: 77.125\n",
"Epoch 039: | Loss: 0.43347 | Acc: 77.196\n",
"Epoch 040: | Loss: 0.43379 | Acc: 77.235\n",
"Epoch 041: | Loss: 0.43027 | Acc: 77.267\n",
"Epoch 042: | Loss: 0.42924 | Acc: 77.325\n",
"Epoch 043: | Loss: 0.42890 | Acc: 77.556\n",
"Epoch 044: | Loss: 0.42649 | Acc: 77.806\n",
"Epoch 045: | Loss: 0.42746 | Acc: 77.508\n",
"Epoch 046: | Loss: 0.42936 | Acc: 77.365\n",
"Epoch 047: | Loss: 0.42697 | Acc: 77.654\n",
"Epoch 048: | Loss: 0.42456 | Acc: 77.692\n",
"Epoch 049: | Loss: 0.42648 | Acc: 77.763\n",
"Epoch 050: | Loss: 0.42833 | Acc: 77.573\n"
]
}
],
"source": [
"model.train()\n",
"for e in range(1, EPOCHS+1):\n",
" epoch_loss = 0\n",
" epoch_acc = 0\n",
" for X_batch, y_batch in train_loader:\n",
" X_batch, y_batch = X_batch.to(device), y_batch.to(device)\n",
" optimizer.zero_grad()\n",
" \n",
" y_pred = model(X_batch)\n",
" \n",
" loss = criterion(y_pred, y_batch.unsqueeze(1))\n",
" acc = binary_acc(y_pred, y_batch.unsqueeze(1))\n",
" \n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" epoch_loss += loss.item()\n",
" epoch_acc += acc.item()\n",
" \n",
"\n",
" print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')"
]
},
{
"cell_type": "code",
"execution_count": 97,
"id": "13260656-39d3-4bc7-91fe-e7f5fe09c6d2",
"metadata": {},
"outputs": [],
"source": [
"y_pred_list = []\n",
"model.eval()\n",
"with torch.no_grad():\n",
" for X_batch in test_loader:\n",
" X_batch = X_batch.to(device)\n",
" y_test_pred = model(X_batch)\n",
" y_test_pred = torch.sigmoid(y_test_pred)\n",
" y_pred_tag = torch.round(y_test_pred)\n",
" y_pred_list.append(y_pred_tag.cpu().numpy())\n",
"\n",
"y_pred_list = [a.squeeze().tolist() for a in y_pred_list]"
]
},
{
"cell_type": "code",
"execution_count": 98,
"id": "1bd71613-25f7-4342-aa58-2abebce0f5f9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0.0 0.53 0.61 0.57 7533\n",
" 1.0 0.54 0.45 0.49 7394\n",
"\n",
" accuracy 0.53 14927\n",
" macro avg 0.53 0.53 0.53 14927\n",
"weighted avg 0.53 0.53 0.53 14927\n",
"\n"
]
}
],
"source": [
"print(classification_report(y_test, y_pred_list))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d2f8d98-c965-494a-b3c7-7b822e3ab6f0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......
......@@ -694,9 +694,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:psda]",
"display_name": "Python 3",
"language": "python",
"name": "conda-env-psda-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
......@@ -708,7 +708,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.8.8"
},
"toc": {
"base_numbering": 1,
......@@ -725,5 +725,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment