{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import os" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "path_0 = '../data/preprocessed/clusters/0/'\n", "path_1 = '../data/preprocessed/clusters/1/'\n", "path_2 = '../data/preprocessed/clusters/2/'\n", "\n", "dfs_c_0 = []\n", "dfs_c_1 = []\n", "dfs_c_2 = []\n", "dfs_c_0_grouped = []\n", "dfs_c_1_grouped = []\n", "dfs_c_2_grouped = []\n", "\n", "csv_files_0 = [csv for csv in os.listdir(path_0) if csv.endswith('.csv')]\n", "csv_files_1 = [csv for csv in os.listdir(path_1) if csv.endswith('.csv')]\n", "csv_files_2 = [csv for csv in os.listdir(path_2) if csv.endswith('.csv')]\n", "\n", "for file in csv_files_0:\n", " # import DataFrame\n", " df = pd.read_csv(path_0 + file)\n", "\n", " if file.startswith('g_'):\n", " dfs_c_0_grouped.append(df)\n", " else:\n", " dfs_c_0.append(df)\n", "\n", "for file in csv_files_1:\n", " # import DataFrame\n", " df = pd.read_csv(path_1 + file)\n", "\n", " if file.startswith('g_'):\n", " dfs_c_1_grouped.append(df)\n", " else:\n", " dfs_c_1.append(df)\n", "\n", "for file in csv_files_2:\n", " # import DataFrame\n", " df = pd.read_csv(path_2 + file)\n", "\n", " if file.startswith('g_'):\n", " dfs_c_2_grouped.append(df)\n", " else:\n", " dfs_c_2.append(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Include all data into DataFrames" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Include holiday data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Date | \n", "Holiday | \n", "
---|---|---|
0 | \n", "2020-01-01 | \n", "1 | \n", "
1 | \n", "2020-01-02 | \n", "0 | \n", "
2 | \n", "2020-01-03 | \n", "0 | \n", "
3 | \n", "2020-01-04 | \n", "0 | \n", "
4 | \n", "2020-01-05 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "
726 | \n", "2021-12-27 | \n", "0 | \n", "
727 | \n", "2021-12-28 | \n", "0 | \n", "
728 | \n", "2021-12-29 | \n", "0 | \n", "
729 | \n", "2021-12-30 | \n", "0 | \n", "
730 | \n", "2021-12-31 | \n", "0 | \n", "
731 rows × 2 columns
\n", "\n", " | date | \n", "tavg | \n", "tmin | \n", "tmax | \n", "prcp | \n", "snow | \n", "wdir | \n", "wspd | \n", "wpgt | \n", "pres | \n", "tsun | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2020-05-08 | \n", "17.5 | \n", "8.2 | \n", "24.8 | \n", "0.0 | \n", "0 | \n", "112.0 | \n", "7.2 | \n", "27.7 | \n", "1018.0 | \n", "616 | \n", "
1 | \n", "2020-05-09 | \n", "15.7 | \n", "12.5 | \n", "17.4 | \n", "0.6 | \n", "0 | \n", "60.0 | \n", "11.5 | \n", "25.2 | \n", "1013.5 | \n", "10 | \n", "
2 | \n", "2020-05-10 | \n", "16.8 | \n", "11.7 | \n", "24.9 | \n", "2.9 | \n", "0 | \n", "129.0 | \n", "8.6 | \n", "63.0 | \n", "1006.5 | \n", "308 | \n", "
3 | \n", "2020-05-11 | \n", "7.2 | \n", "1.9 | \n", "15.5 | \n", "9.0 | \n", "0 | \n", "56.0 | \n", "23.8 | \n", "61.2 | \n", "1009.2 | \n", "38 | \n", "
4 | \n", "2020-05-12 | \n", "7.9 | \n", "1.0 | \n", "14.2 | \n", "0.0 | \n", "0 | \n", "130.0 | \n", "8.3 | \n", "24.1 | \n", "1018.7 | \n", "736 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
361 | \n", "2021-05-04 | \n", "10.5 | \n", "8.0 | \n", "13.7 | \n", "0.0 | \n", "0 | \n", "215.0 | \n", "32.4 | \n", "77.8 | \n", "1006.5 | \n", "67 | \n", "
362 | \n", "2021-05-05 | \n", "7.9 | \n", "4.1 | \n", "11.4 | \n", "0.7 | \n", "0 | \n", "248.0 | \n", "24.1 | \n", "65.5 | \n", "1008.7 | \n", "422 | \n", "
363 | \n", "2021-05-06 | \n", "6.9 | \n", "3.2 | \n", "12.1 | \n", "5.9 | \n", "0 | \n", "174.0 | \n", "11.5 | \n", "34.9 | \n", "1009.1 | \n", "46 | \n", "
364 | \n", "2021-05-07 | \n", "7.5 | \n", "1.5 | \n", "12.1 | \n", "0.7 | \n", "0 | \n", "273.0 | \n", "14.4 | \n", "59.4 | \n", "1015.9 | \n", "654 | \n", "
365 | \n", "2021-05-08 | \n", "10.0 | \n", "-0.7 | \n", "17.5 | \n", "0.0 | \n", "0 | \n", "169.0 | \n", "9.4 | \n", "38.2 | \n", "1017.6 | \n", "620 | \n", "
366 rows × 11 columns
\n", "