{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import os" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "path_0 = '../data/preprocessed/clusters/0/'\n", "path_1 = '../data/preprocessed/clusters/1/'\n", "path_2 = '../data/preprocessed/clusters/2/'\n", "\n", "dfs_c_0 = []\n", "dfs_c_1 = []\n", "dfs_c_2 = []\n", "dfs_c_0_grouped = []\n", "dfs_c_1_grouped = []\n", "dfs_c_2_grouped = []\n", "\n", "csv_files_0 = [csv for csv in os.listdir(path_0) if csv.endswith('.csv')]\n", "csv_files_1 = [csv for csv in os.listdir(path_1) if csv.endswith('.csv')]\n", "csv_files_2 = [csv for csv in os.listdir(path_2) if csv.endswith('.csv')]\n", "\n", "for file in csv_files_0:\n", " # import DataFrame\n", " df = pd.read_csv(path_0 + file)\n", "\n", " if file.startswith('g_'):\n", " dfs_c_0_grouped.append(df)\n", " else:\n", " dfs_c_0.append(df)\n", "\n", "for file in csv_files_1:\n", " # import DataFrame\n", " df = pd.read_csv(path_1 + file)\n", "\n", " if file.startswith('g_'):\n", " dfs_c_1_grouped.append(df)\n", " else:\n", " dfs_c_1.append(df)\n", "\n", "for file in csv_files_2:\n", " # import DataFrame\n", " df = pd.read_csv(path_2 + file)\n", "\n", " if file.startswith('g_'):\n", " dfs_c_2_grouped.append(df)\n", " else:\n", " dfs_c_2.append(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Include all data into DataFrames" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Include holiday data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DateHoliday
02020-01-011
12020-01-020
22020-01-030
32020-01-040
42020-01-051
.........
7262021-12-270
7272021-12-280
7282021-12-290
7292021-12-300
7302021-12-310
\n", "

731 rows × 2 columns

\n", "
" ], "text/plain": [ " Date Holiday\n", "0 2020-01-01 1\n", "1 2020-01-02 0\n", "2 2020-01-03 0\n", "3 2020-01-04 0\n", "4 2020-01-05 1\n", ".. ... ...\n", "726 2021-12-27 0\n", "727 2021-12-28 0\n", "728 2021-12-29 0\n", "729 2021-12-30 0\n", "730 2021-12-31 0\n", "\n", "[731 rows x 2 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "holiday = '../data/raw/holidays.csv'\n", "df_holiday = pd.read_csv(holiday)\n", "df_holiday" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import datetime" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "def assign_holidays(df, df_holiday):\n", " df['holiday'] = 0\n", "\n", " for i in range(0,len(df), 1):\n", " form = \"%Y-%m-%d\"\n", " d1 = datetime.datetime.strptime(df['time_stamp'][i], form)\n", "\n", " #get holiday value\n", " for j in range(0, len(df_holiday), 1):\n", " d2 = datetime.datetime.strptime(df_holiday['Date'][j], form)\n", " if d1 == d2: \n", " df['holiday'][i] = df_holiday['Holiday'][j]" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\hendr\\AppData\\Local\\Temp/ipykernel_12036/2432097566.py:12: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['holiday'][i] = df_holiday['Holiday'][j]\n" ] } ], "source": [ "for df in dfs_c_0_grouped: \n", " assign_holidays(df, df_holiday)\n", "for df in dfs_c_1_grouped: \n", " assign_holidays(df, df_holiday)\n", "for df in dfs_c_2_grouped: \n", " assign_holidays(df, df_holiday)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Include weather data" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetavgtmintmaxprcpsnowwdirwspdwpgtprestsun
02020-05-0817.58.224.80.00112.07.227.71018.0616
12020-05-0915.712.517.40.6060.011.525.21013.510
22020-05-1016.811.724.92.90129.08.663.01006.5308
32020-05-117.21.915.59.0056.023.861.21009.238
42020-05-127.91.014.20.00130.08.324.11018.7736
....................................
3612021-05-0410.58.013.70.00215.032.477.81006.567
3622021-05-057.94.111.40.70248.024.165.51008.7422
3632021-05-066.93.212.15.90174.011.534.91009.146
3642021-05-077.51.512.10.70273.014.459.41015.9654
3652021-05-0810.0-0.717.50.00169.09.438.21017.6620
\n", "

366 rows × 11 columns

\n", "
" ], "text/plain": [ " date tavg tmin tmax prcp snow wdir wspd wpgt pres tsun\n", "0 2020-05-08 17.5 8.2 24.8 0.0 0 112.0 7.2 27.7 1018.0 616\n", "1 2020-05-09 15.7 12.5 17.4 0.6 0 60.0 11.5 25.2 1013.5 10\n", "2 2020-05-10 16.8 11.7 24.9 2.9 0 129.0 8.6 63.0 1006.5 308\n", "3 2020-05-11 7.2 1.9 15.5 9.0 0 56.0 23.8 61.2 1009.2 38\n", "4 2020-05-12 7.9 1.0 14.2 0.0 0 130.0 8.3 24.1 1018.7 736\n", ".. ... ... ... ... ... ... ... ... ... ... ...\n", "361 2021-05-04 10.5 8.0 13.7 0.0 0 215.0 32.4 77.8 1006.5 67\n", "362 2021-05-05 7.9 4.1 11.4 0.7 0 248.0 24.1 65.5 1008.7 422\n", "363 2021-05-06 6.9 3.2 12.1 5.9 0 174.0 11.5 34.9 1009.1 46\n", "364 2021-05-07 7.5 1.5 12.1 0.7 0 273.0 14.4 59.4 1015.9 654\n", "365 2021-05-08 10.0 -0.7 17.5 0.0 0 169.0 9.4 38.2 1017.6 620\n", "\n", "[366 rows x 11 columns]" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weather_path = '../data/raw/weather_FrankfurtAirport.csv'\n", "df_weather = pd.read_csv(weather_path)\n", "df_weather" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "def assign_weather(df, df_weather):\n", " df['temp avg'] = 0.0\n", " df['temp min'] = 0.0\n", " df['temp max'] = 0.0\n", " df['rainfall sum'] = 0.0\n", " df['snowfall sum'] = 0.0\n", " df['sunshine minutes'] = 0\n", "\n", " for i in range(0,len(df), 1):\n", " form = \"%Y-%m-%d\"\n", " d1 = datetime.datetime.strptime(df['time_stamp'][i], form)\n", "\n", " #get weather values\n", " for j in range(0, len(df_weather), 1):\n", " d2 = datetime.datetime.strptime(df_weather['date'][j], form)\n", " if d1 == d2: \n", " df['temp avg'][i] = df_weather['tavg'][j]\n", " df['temp min'][i] = df_weather['tmin'][j]\n", " df['temp max'][i] = df_weather['tmax'][j]\n", " df['rainfall sum'][i] = df_weather['prcp'][j]\n", " df['snowfall sum'][i] = df_weather['snow'][j]\n", " df['sunshine minutes'][i] = df_weather['tsun'][j]" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\hendr\\AppData\\Local\\Temp/ipykernel_12036/777759664.py:17: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['temp avg'][i] = df_weather['tavg'][j]\n", "C:\\Users\\hendr\\AppData\\Local\\Temp/ipykernel_12036/777759664.py:18: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['temp min'][i] = df_weather['tmin'][j]\n", "C:\\Users\\hendr\\AppData\\Local\\Temp/ipykernel_12036/777759664.py:19: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['temp max'][i] = df_weather['tmax'][j]\n", "C:\\Users\\hendr\\AppData\\Local\\Temp/ipykernel_12036/777759664.py:20: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['rainfall sum'][i] = df_weather['prcp'][j]\n", "C:\\Users\\hendr\\AppData\\Local\\Temp/ipykernel_12036/777759664.py:21: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['snowfall sum'][i] = df_weather['snow'][j]\n", "C:\\Users\\hendr\\AppData\\Local\\Temp/ipykernel_12036/777759664.py:22: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['sunshine minutes'][i] = df_weather['tsun'][j]\n" ] } ], "source": [ "# assign to all DataFrames\n", "for df in dfs_c_0_grouped: \n", " assign_weather(df, df_weather)\n", "for df in dfs_c_1_grouped: \n", " assign_weather(df, df_weather)\n", "for df in dfs_c_2_grouped: \n", " assign_weather(df, df_weather)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }