Commit 15ec4208 authored by ukuiq's avatar ukuiq
Browse files

Include weather data and holidays

parent d95eddc8
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
```
%% Cell type:code id: tags:
``` python
path_0 = '../data/preprocessed/clusters/0/'
path_1 = '../data/preprocessed/clusters/1/'
path_2 = '../data/preprocessed/clusters/2/'
dfs_c_0 = []
dfs_c_1 = []
dfs_c_2 = []
dfs_c_0_grouped = []
dfs_c_1_grouped = []
dfs_c_2_grouped = []
csv_files_0 = [csv for csv in os.listdir(path_0) if csv.endswith('.csv')]
csv_files_1 = [csv for csv in os.listdir(path_1) if csv.endswith('.csv')]
csv_files_2 = [csv for csv in os.listdir(path_2) if csv.endswith('.csv')]
for file in csv_files_0:
# import DataFrame
df = pd.read_csv(path_0 + file)
if file.startswith('g_'):
dfs_c_0_grouped.append(df)
else:
dfs_c_0.append(df)
for file in csv_files_1:
# import DataFrame
df = pd.read_csv(path_1 + file)
if file.startswith('g_'):
dfs_c_1_grouped.append(df)
else:
dfs_c_1.append(df)
for file in csv_files_2:
# import DataFrame
df = pd.read_csv(path_2 + file)
if file.startswith('g_'):
dfs_c_2_grouped.append(df)
else:
dfs_c_2.append(df)
```
%% Cell type:markdown id: tags:
Include all data into DataFrames
%% Cell type:markdown id: tags:
Include holiday data
%% Cell type:code id: tags:
``` python
holiday = '../data/raw/holidays.csv'
df_holiday = pd.read_csv(holiday)
df_holiday
```
%%%% Output: execute_result
Date Holiday
0 2020-01-01 1
1 2020-01-02 0
2 2020-01-03 0
3 2020-01-04 0
4 2020-01-05 1
.. ... ...
726 2021-12-27 0
727 2021-12-28 0
728 2021-12-29 0
729 2021-12-30 0
730 2021-12-31 0
[731 rows x 2 columns]
%% Cell type:code id: tags:
``` python
import datetime
```
%% Cell type:code id: tags:
``` python
def assign_holidays(df, df_holiday):
df['holiday'] = 0
for i in range(0,len(df), 1):
form = "%Y-%m-%d"
d1 = datetime.datetime.strptime(df['time_stamp'][i], form)
#get holiday value
for j in range(0, len(df_holiday), 1):
d2 = datetime.datetime.strptime(df_holiday['Date'][j], form)
if d1 == d2:
df['holiday'][i] = df_holiday['Holiday'][j]
```
%% Cell type:code id: tags:
``` python
for df in dfs_c_0_grouped:
assign_holidays(df, df_holiday)
for df in dfs_c_1_grouped:
assign_holidays(df, df_holiday)
for df in dfs_c_2_grouped:
assign_holidays(df, df_holiday)
```
%% Cell type:markdown id: tags:
Include weather data
%% Cell type:code id: tags:
``` python
weather_path = '../data/raw/weather_FrankfurtAirport.csv'
df_weather = pd.read_csv(weather_path)
df_weather
```
%%%% Output: execute_result
date tavg tmin tmax prcp snow wdir wspd wpgt pres tsun
0 2020-05-08 17.5 8.2 24.8 0.0 0 112.0 7.2 27.7 1018.0 616
1 2020-05-09 15.7 12.5 17.4 0.6 0 60.0 11.5 25.2 1013.5 10
2 2020-05-10 16.8 11.7 24.9 2.9 0 129.0 8.6 63.0 1006.5 308
3 2020-05-11 7.2 1.9 15.5 9.0 0 56.0 23.8 61.2 1009.2 38
4 2020-05-12 7.9 1.0 14.2 0.0 0 130.0 8.3 24.1 1018.7 736
.. ... ... ... ... ... ... ... ... ... ... ...
361 2021-05-04 10.5 8.0 13.7 0.0 0 215.0 32.4 77.8 1006.5 67
362 2021-05-05 7.9 4.1 11.4 0.7 0 248.0 24.1 65.5 1008.7 422
363 2021-05-06 6.9 3.2 12.1 5.9 0 174.0 11.5 34.9 1009.1 46
364 2021-05-07 7.5 1.5 12.1 0.7 0 273.0 14.4 59.4 1015.9 654
365 2021-05-08 10.0 -0.7 17.5 0.0 0 169.0 9.4 38.2 1017.6 620
[366 rows x 11 columns]
%% Cell type:code id: tags:
``` python
def assign_weather(df, df_weather):
df['temp avg'] = 0.0
df['temp min'] = 0.0
df['temp max'] = 0.0
df['rainfall sum'] = 0.0
df['snowfall sum'] = 0.0
df['sunshine minutes'] = 0
for i in range(0,len(df), 1):
form = "%Y-%m-%d"
d1 = datetime.datetime.strptime(df['time_stamp'][i], form)
#get weather values
for j in range(0, len(df_weather), 1):
d2 = datetime.datetime.strptime(df_weather['date'][j], form)
if d1 == d2:
df['temp avg'][i] = df_weather['tavg'][j]
df['temp min'][i] = df_weather['tmin'][j]
df['temp max'][i] = df_weather['tmax'][j]
df['rainfall sum'][i] = df_weather['prcp'][j]
df['snowfall sum'][i] = df_weather['snow'][j]
df['sunshine minutes'][i] = df_weather['tsun'][j]
```
%% Cell type:code id: tags:
``` python
# assign to all DataFrames
for df in dfs_c_0_grouped:
assign_weather(df, df_weather)
for df in dfs_c_1_grouped:
assign_weather(df, df_weather)
for df in dfs_c_2_grouped:
assign_weather(df, df_weather)
```
%% Cell type:code id: tags:
``` python
```
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment