Commit aebe75ec authored by Your Name's avatar Your Name
Browse files

regression

parent 5d22d90e
This diff is collapsed.
This diff is collapsed.
......@@ -62,7 +62,8 @@ def import_preprocessed_data():
return dfs_c_0, dfs_c_1, dfs_c_2, dfs_c_0_grouped, dfs_c_1_grouped, dfs_c_2_grouped
'''für jedes Cluster unterschiedliche calculate empties funktionen da die Container ihren durchschnittlichen Füllstand
an sehr unterschiedlichen Bereichen haben '''
# function calculates the empties of a container.
def calculate_empties(df, threshold):
empties_indices = []
......@@ -76,6 +77,48 @@ def calculate_empties(df, threshold):
empties_indices.pop(idx)
return empties_indices
# function calculation empties of container for cluster 0
def calculate_empties_0(df, threshold):
empties_indices = []
for idx,val in enumerate(df['inter_pol']):
if idx != 0 and (df['inter_pol'][idx-1]*threshold < (df['inter_pol'][idx])) and (df['inter_pol'][idx] > 120):
empties_indices.append(idx)
# filter out double values
for idx,val in enumerate(empties_indices):
if idx !=0 and (empties_indices[idx] - empties_indices[idx-1]) < 3:
empties_indices.pop(idx)
return empties_indices
# function calculation empties of container for cluster 1
def calculate_empties_1(df, threshold):
empties_indices = []
for idx,val in enumerate(df['inter_pol']):
if idx != 0 and (df['inter_pol'][idx-1]*threshold < (df['inter_pol'][idx])) and (df['inter_pol'][idx] > 60):
empties_indices.append(idx)
# filter out double values
for idx,val in enumerate(empties_indices):
if idx !=0 and (empties_indices[idx] - empties_indices[idx-1]) < 3:
empties_indices.pop(idx)
return empties_indices
# function calculation empties of container for cluster 2
def calculate_empties_2(df, threshold):
empties_indices = []
for idx,val in enumerate(df['inter_pol']):
if idx != 0 and (df['inter_pol'][idx-1]*threshold < (df['inter_pol'][idx])) and (df['inter_pol'][idx] > 65):
empties_indices.append(idx)
# filter out double values
for idx,val in enumerate(empties_indices):
if idx !=0 and (empties_indices[idx] - empties_indices[idx-1]) < 3:
empties_indices.pop(idx)
return empties_indices
# function adds the calculated emptied to the container DataFrame.
......
......@@ -43,6 +43,11 @@ def linearRegression(df_singlePeriod):
print(y_pred)
print(lr.coef_)
'''Funktionen die mithilfe der Steigungsfunktion, versuchen die orginalen Inputdaten zu approximieren. Nehmen einen preprocessden DataFrame
der auf Tagesniveau gruppiert wurde und geben die Predictens als Liste zurück
Dabei wird der Füllstand für die Prediction = mx + b - wobei b der Füllstand nach der vorherigen Leerung ist.'''
def predictValues_general(test, temp_list, temp):
counter = 1
......@@ -70,12 +75,13 @@ def predictValues_general(test, temp_list, temp):
print("Root Squared Mean Error: " + str(rmse))
print("Values have been predicted!")
def predictValues_clust0(input):
counter = 1
pred_list = list()
temp_list = input['inter_pol'].tolist()
emptie_checkpoints = preprocessing.calculate_empties(input, 1.01)
emptie_checkpoints = preprocessing.calculate_empties_0(input, 1.01)
for k in range(emptie_checkpoints[0]):
y = -3.62261628 * k + temp_list[0]
......@@ -98,9 +104,75 @@ def predictValues_clust0(input):
print('values have been predicated')
rmse = mean_squared_error(temp_list, pred_list, squared=False)
print("Root Squared Mean Error: " + rmse)
print("Root Squared Mean Error: " + str(rmse))
return pred_list
def predictValues_clust1(input):
df = input[['time_stamp', 'inter_pol']]
counter = 1
pred_list = list()
temp_list = input['inter_pol'].tolist()
emptie_checkpoints = preprocessing.calculate_empties_1(input, 1.17)
for k in range(emptie_checkpoints[0]):
y = -3.63041747 * k + temp_list[0]
pred_list.append(y)
for timeinterval in emptie_checkpoints:
if counter < len(emptie_checkpoints):
length = emptie_checkpoints[counter] - timeinterval
counter = counter + 1
for i in range(length):
y = -3.62261628 * i + temp_list[timeinterval]
pred_list.append(y)
addition = input.shape[0] - emptie_checkpoints[-1]
for j in range(addition):
y = -3.62261628 * temp_list[emptie_checkpoints[-1]]
pred_list.append(y)
print('values have been predicated')
rmse = mean_squared_error(temp_list, pred_list, squared=False)
print("Root Squared Mean Error: " + str(rmse))
return pred_list
def predictValues_clust2(input):
counter = 1
pred_list = list()
temp_list = input['inter_pol'].tolist()
emptie_checkpoints = preprocessing.calculate_empties_2(input, 1.16)
for k in range(emptie_checkpoints[0]):
y = -3.62261628 * k + temp_list[0]
pred_list.append(y)
for timeinterval in emptie_checkpoints:
if counter < len(emptie_checkpoints):
length = emptie_checkpoints[counter] - timeinterval
counter = counter + 1
for i in range(length):
y = -3.62261628 * i + temp_list[timeinterval]
pred_list.append(y)
addition = input.shape[0] - emptie_checkpoints[-1]
for j in range(addition):
y = -3.38830135 * temp_list[emptie_checkpoints[-1]]
pred_list.append(y)
print('values have been predicated')
rmse = mean_squared_error(temp_list, pred_list, squared=False)
print("Root Mean Squared Error" + str(rmse))
return pred_list
def linearRegressionPlot(temp, test, cluster):
plt.figure(figsize=(30,8))
plt.ylim((0,200))
......@@ -114,4 +186,35 @@ def linearRegressionPlot(temp, test, cluster):
for i in test:
plt.vlines(i, color="green", ymin=0, ymax=200)
plt.show()
def linearRegressionPlot_pred(input, cluster):
if cluster < 0 or cluster > 4:
print('Cluster has to be 0, 1, 2')
return
plt.figure(figsize=(30,8))
plt.ylim((0,200))
plt.title('Abweichung')
plt.xticks(fontsize=8, rotation=90)
plt.yticks(fontsize=10, fontweight='bold')
plt.plot(input['inter_pol'])
if cluster == 0:
predicted = predictValues_clust0(input)
empties = preprocessing.calculate_empties_0(input, 1.10)
elif cluster == 1:
predicted = predictValues_clust1(input)
empties = preprocessing.calculate_empties_1(input, 1.17)
else:
predicted = predictValues_clust2(input)
empties = preprocessing.calculate_empties_2(input, 1.16)
plt.plot(predicted)
plt.legend(['Preprocessed', 'Predictions'], loc='upper left')
for i in empties:
plt.vlines(i, color="green", ymin=0, ymax=200)
plt.show()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment