Commit 15ec4208 authored by Hendrik Becker's avatar Hendrik Becker
Browse files

Include weather data and holidays

parent d95eddc8
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"path_0 = '../data/preprocessed/clusters/0/'\n",
"path_1 = '../data/preprocessed/clusters/1/'\n",
"path_2 = '../data/preprocessed/clusters/2/'\n",
"\n",
"dfs_c_0 = []\n",
"dfs_c_1 = []\n",
"dfs_c_2 = []\n",
"dfs_c_0_grouped = []\n",
"dfs_c_1_grouped = []\n",
"dfs_c_2_grouped = []\n",
"\n",
"csv_files_0 = [csv for csv in os.listdir(path_0) if csv.endswith('.csv')]\n",
"csv_files_1 = [csv for csv in os.listdir(path_1) if csv.endswith('.csv')]\n",
"csv_files_2 = [csv for csv in os.listdir(path_2) if csv.endswith('.csv')]\n",
"\n",
"for file in csv_files_0:\n",
" # import DataFrame\n",
" df = pd.read_csv(path_0 + file)\n",
"\n",
" if file.startswith('g_'):\n",
" dfs_c_0_grouped.append(df)\n",
" else:\n",
" dfs_c_0.append(df)\n",
"\n",
"for file in csv_files_1:\n",
" # import DataFrame\n",
" df = pd.read_csv(path_1 + file)\n",
"\n",
" if file.startswith('g_'):\n",
" dfs_c_1_grouped.append(df)\n",
" else:\n",
" dfs_c_1.append(df)\n",
"\n",
"for file in csv_files_2:\n",
" # import DataFrame\n",
" df = pd.read_csv(path_2 + file)\n",
"\n",
" if file.startswith('g_'):\n",
" dfs_c_2_grouped.append(df)\n",
" else:\n",
" dfs_c_2.append(df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Include all data into DataFrames"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Include holiday data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Holiday</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2020-01-01</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2020-01-02</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2020-01-03</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2020-01-04</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2020-01-05</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>726</th>\n",
" <td>2021-12-27</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>727</th>\n",
" <td>2021-12-28</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>728</th>\n",
" <td>2021-12-29</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>729</th>\n",
" <td>2021-12-30</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>730</th>\n",
" <td>2021-12-31</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>731 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Date Holiday\n",
"0 2020-01-01 1\n",
"1 2020-01-02 0\n",
"2 2020-01-03 0\n",
"3 2020-01-04 0\n",
"4 2020-01-05 1\n",
".. ... ...\n",
"726 2021-12-27 0\n",
"727 2021-12-28 0\n",
"728 2021-12-29 0\n",
"729 2021-12-30 0\n",
"730 2021-12-31 0\n",
"\n",
"[731 rows x 2 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"holiday = '../data/raw/holidays.csv'\n",
"df_holiday = pd.read_csv(holiday)\n",
"df_holiday"
]
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment