Commit e411ed34 authored by Your Name's avatar Your Name
Browse files

Merge branch 'master' of https://git.scc.kit.edu/uflgi/bda-analytics-challenge-template

'merge'

 the commit.
parents 8f5394be 64510496
%% Cell type:code id:0aec3818-50a5-4e40-abfe-09dc0620e6c5 tags:
``` python
import sys
sys.path.insert(1, '..\src')
from additionalFeatures import webscraper
url = ['https://urlaubstage-planen.de/feiertage-deutschland/2020/hessen.htm','https://urlaubstage-planen.de/feiertage-deutschland/2021/hessen.htm']
webscraper(url);
```
%% Cell type:code id:9dabd84d-e26b-4091-9470-c118dae7267d tags:
``` python
```
import requests
from lxml import html
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import numpy as np
import csv
from datetime import date, timedelta
def webscraper(url):
Dates = []
Names = []
for i in url:
request = requests.get(i)
html_code = request.content
tree = html.fromstring(html_code)
columnDate = tree.xpath("//*[@id=\"page-content\"]/main/article/section[1]/div/table/thead/tr/th[1]/text()")
columnName = tree.xpath("//*[@id=\"page-content\"]/main/article/section[1]/div/table/thead/tr/th[1]/text()")
columnDateValue = tree.xpath("//*[@id=\"page-content\"]/main/article/section/div/table/tbody/tr/td[1]/text()")
columnNameValue = tree.xpath("//*[@id=\"page-content\"]/main/article/section/div/table/tbody/tr/td[2]/a/text()")
# 0 bis 12 sind die arbeitsfreien Feiertage
Dates.extend(columnDateValue[0:12])
Names.extend(columnNameValue[0:12])
StringDates = [str(s) for s in Dates]
dr = pd.date_range(start='2020-01-01', end='2021-12-31')
df = pd.DataFrame()
df['Date'] = dr
holidays = StringDates = [str(s) for s in Dates]
df['Holiday'] = (df['Date'].isin(holidays)).astype(int)
print(df)
df.to_csv(r'..\data\raw\holidays.csv', index = False)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment