Commit 12698f0c authored by ugfvr's avatar ugfvr
Browse files

Replace Training_Data.ipynb

parent 9e456e79
......@@ -12,17 +12,103 @@
``` python
data_path = r'..\..\data'
data='collection_data.txt'
file_number=data_path+'\\'+data
df = pd.read_csv(file_number)
df["collection_intervall"] = list(map(lambda st: str(st)[0:int(st.index("d"))],df["last_collection"]))
df["collection_intervall"]=df["collection_intervall"].astype(int)
df["collection_intervall"]= list(map(lambda z: z*(-1),df["collection_intervall"]))
df["number_collections"]=np.ones(len(df["collection_intervall"]))
df["collection_intervall"]= list(map(lambda z: z*(-1),df["last_collection"]))
df["number_collections"]=np.ones(len(df["last_collection"]))
df["number_collections"]=df["number_collections"].astype(int)
```
%% Cell type:code id:4d9c368e tags:
``` python
df
```
%%%% Output: execute_result
Unnamed: 0 timestamp container_id \
0 0 2020-05-22 18:51:01.742945 70B3D500700016DA
1 1 2020-06-05 14:49:42.681218 70B3D500700016DA
2 2 2020-06-29 13:47:52.050553 70B3D500700016DA
3 3 2020-07-17 13:46:18.287249 70B3D500700016DA
4 4 2020-08-07 09:44:36.149679 70B3D500700016DA
... ... ... ...
4113 4113 2021-04-27 20:26:56.511519 70B3D50070001789
4114 4114 2021-05-05 15:26:20.82634 70B3D50070001789
4115 4115 2021-05-06 16:26:17.375246 70B3D50070001789
4116 4116 2021-05-07 12:26:16.709112 70B3D50070001789
4117 4117 2021-05-07 21:26:16.90103 70B3D50070001789
last_collection pre_height post_height sensor_mean_temperature \
0 -14 136 16 15.251029
1 -14 120 14 16.410714
2 -24 136 14 18.255446
3 -18 128 12 19.053476
4 -21 118 14 21.981524
... ... ... ... ...
4113 -2 52 28 19.600000
4114 -8 70 4 17.646552
4115 -2 38 6 15.055556
4116 -1 40 0 14.214286
4117 -1 44 12 19.500000
sensor_max_temperature sensor_min_temperature \
0 47 0
1 44 4
2 43 4
3 45 7
4 47 6
... ... ...
4113 33 10
4114 32 10
4115 23 10
4116 19 12
4117 26 14
weather_mean_temperature ... weather_mean_moisture \
0 14.283636 ... 58.121212
1 16.873193 ... 53.888554
2 18.670261 ... 65.890435
3 19.258796 ... 58.773148
4 21.973000 ... 49.794000
... ... ... ...
4113 11.096000 ... 37.200000
4114 9.835294 ... 61.197861
4115 7.068000 ... 75.320000
4116 7.010000 ... 82.950000
4117 9.211111 ... 52.333333
weather_max_moisture weather_min_moisture holiday_percentage \
0 95.0 25.0 0.360606
1 93.0 19.0 0.361446
2 97.0 25.0 0.375652
3 96.0 22.0 0.222222
4 95.0 20.0 0.288000
... ... ... ...
4113 51.0 24.0 0.000000
4114 98.0 25.0 0.256684
4115 92.0 51.0 0.000000
4116 95.0 53.0 0.000000
4117 76.0 39.0 0.000000
Lockdown year month weekday collection_intervall number_collections
0 0.0 2020 5 4 14 1
1 0.0 2020 6 4 14 1
2 0.0 2020 6 0 24 1
3 0.0 2020 7 4 18 1
4 0.0 2020 8 4 21 1
... ... ... ... ... ... ...
4113 1.0 2021 4 1 2 1
4114 1.0 2021 5 2 8 1
4115 1.0 2021 5 3 2 1
4116 1.0 2021 5 4 1 1
4117 1.0 2021 5 4 1 1
[4118 rows x 25 columns]
%% Cell type:markdown id:6e02f3a6 tags:
Hinzufügen der Information der geschätzten Anzahl der Leerungen jedes Conatainers
%% Cell type:code id:aab3daa4 tags:
......@@ -35,28 +121,28 @@
%%%% Output: execute_result
number_collections
container_id
70B3D500700016DA 20
70B3D500700016DE 11
70B3D500700016DF 46
70B3D500700016E0 38
70B3D500700016E5 18
70B3D500700016DA 22
70B3D500700016DF 44
70B3D500700016E0 43
70B3D500700016E5 24
70B3D500700016E6 104
... ...
70B3D50070001782 138
70B3D50070001786 9
70B3D50070001787 81
70B3D50070001788 44
70B3D50070001789 99
70B3D50070001782 118
70B3D50070001786 5
70B3D50070001787 64
70B3D50070001788 45
70B3D50070001789 112
[76 rows x 1 columns]
[72 rows x 1 columns]
%% Cell type:code id:6bba9c37 tags:
``` python
relevant_data=pd.DataFrame({'container_id':df["container_id"],'collection_intervall':df["collection_intervall"],
relevant_data=pd.DataFrame({'container_id':df["container_id"],'collection_intervall':df["last_collection"],
'pre_height':df["pre_height"],'post_height':df["post_height"],
'sensor_mean_temperature':df["sensor_mean_temperature"],'lockdown':df['Lockdown']})
data_all=relevant_data.groupby(['container_id']).mean()
data_all['number_collections']=number_collections['number_collections']
data_all
......@@ -64,37 +150,37 @@
%%%% Output: execute_result
collection_intervall pre_height post_height \
container_id
70B3D500700016DA 18.450000 123.700000 12.400000
70B3D500700016DE 27.545455 93.090909 34.000000
70B3D500700016DF 7.652174 72.956522 17.739130
70B3D500700016E0 9.815789 53.578947 2.526316
70B3D500700016E5 19.444444 60.222222 6.555556
70B3D500700016DA -16.818182 114.363636 11.636364
70B3D500700016DF -7.954545 65.136364 14.636364
70B3D500700016E0 -8.744186 49.581395 2.511628
70B3D500700016E5 -15.541667 51.416667 5.500000
70B3D500700016E6 -3.961538 67.711538 17.576923
... ... ... ...
70B3D50070001782 3.195652 68.898551 21.188406
70B3D50070001786 14.777778 64.888889 28.666667
70B3D50070001787 4.962963 77.753086 16.913580
70B3D50070001788 8.500000 68.363636 10.727273
70B3D50070001789 4.222222 73.575758 11.676768
70B3D50070001782 -3.652542 66.203390 18.050847
70B3D50070001786 -26.000000 64.000000 26.400000
70B3D50070001787 -6.109375 77.656250 12.875000
70B3D50070001788 -8.311111 67.733333 10.888889
70B3D50070001789 -3.776786 69.142857 12.500000
sensor_mean_temperature lockdown number_collections
container_id
70B3D500700016DA 9.618631 0.550000 20
70B3D500700016DE 11.847655 0.909091 11
70B3D500700016DF 12.407369 0.652174 46
70B3D500700016E0 18.086524 0.578947 38
70B3D500700016E5 10.636161 0.555556 18
70B3D500700016DA 9.286537 0.590909 22
70B3D500700016DF 11.800192 0.681818 44
70B3D500700016E0 18.144672 0.581395 43
70B3D500700016E5 11.360191 0.541667 24
70B3D500700016E6 11.270862 0.653846 104
... ... ... ...
70B3D50070001782 23.158873 0.275362 138
70B3D50070001786 22.170899 0.000000 9
70B3D50070001787 7.347588 0.728395 81
70B3D50070001788 11.766303 0.522727 44
70B3D50070001789 18.541976 0.434343 99
70B3D50070001782 21.802890 0.330508 118
70B3D50070001786 25.662432 0.000000 5
70B3D50070001787 6.496864 0.796875 64
70B3D50070001788 11.653665 0.533333 45
70B3D50070001789 18.658766 0.446429 112
[76 rows x 6 columns]
[72 rows x 6 columns]
%% Cell type:markdown id:63323896 tags:
Rausfiltern aller "Ausreißer Container", damit Modelling nicht verfälscht wird. <br>
"Ausreißer Container" sind definiert als alle Contaner, die mehr als 40 Leerungen haben oder im Durchschnitt ein Leerungsintervall von über 75 Tage. Schwellwerte wurden anhand der Visualisierung des Clustering Notebooks ausgewählt.
......@@ -114,42 +200,41 @@
```
%%%% Output: execute_result
['70B3D500700016DA',
'70B3D500700016DE',
'70B3D500700016E0',
'70B3D500700016E5',
'70B3D500700016E7',
'70B3D500700016EB',
'70B3D500700016F1',
'70B3D500700016F2',
'70B3D500700016F4',
'70B3D500700016FA',
'70B3D50070001700',
'70B3D500700016FC',
'70B3D50070001704',
'70B3D50070001706',
'70B3D50070001709',
'70B3D50070001710',
'70B3D50070001712',
'70B3D50070001716',
'70B3D50070001725',
'70B3D50070001726',
'70B3D50070001727',
'70B3D5007000172B',
'70B3D5007000172C',
'70B3D5007000172D',
'70B3D5007000172E',
'70B3D50070001730',
'70B3D50070001734',
'70B3D50070001737',
'70B3D50070001738',
'70B3D5007000173A',
'70B3D5007000173C',
'70B3D50070001740',
'70B3D50070001742',
'70B3D50070001747',
'70B3D5007000174F',
'70B3D50070001770',
'70B3D50070001779',
'70B3D50070001780',
'70B3D5007000177C',
'70B3D50070001786']
%% Cell type:code id:01762362 tags:
``` python
......@@ -161,99 +246,17 @@
train_data=train_data.append(df[df['container_id']==item])
```
%% Cell type:code id:92cd50bb tags:
%% Cell type:code id:a1dcbff5 tags:
``` python
train_data
data_path = r'..\..\data\modeling\train'
train_data.to_csv(path_or_buf=data_path+'\\train_data.txt')
```
%%%% Output: execute_result
Unnamed: 0 timestamp container_id \
0 0 2020-05-22 18:51:01.742945 70B3D500700016DA
1 1 2020-06-05 14:49:42.681218 70B3D500700016DA
2 2 2020-06-29 13:47:52.050553 70B3D500700016DA
3 3 2020-07-17 13:46:18.287249 70B3D500700016DA
4 4 2020-08-07 09:44:36.149679 70B3D500700016DA
... ... ... ...
4381 4381 2020-08-08 15:42:32.866709 70B3D50070001786
4382 4382 2020-08-09 15:42:30.118122 70B3D50070001786
4383 4383 2020-08-11 12:42:24.962069 70B3D50070001786
4384 4384 2020-09-07 13:40:11.695782 70B3D50070001786
4385 4385 2020-09-14 15:39:33.709211 70B3D50070001786
last_collection pre_height post_height \
0 -14 days +06:00:58.208000 136 16
1 -14 days +04:01:19.058000 120 14
2 -24 days +01:01:50.633000 136 14
3 -18 days +00:01:33.806000 128 12
4 -21 days +04:01:42.126000 118 14
... ... ... ...
4381 -2 days +21:00:01.687000 64 28
4382 -1 days +00:00:02.748000 60 24
4383 -2 days +03:00:05.138000 70 30
4384 -28 days +23:02:13.362000 62 30
4385 -8 days +22:00:37.993000 64 28
sensor_mean_temperature sensor_max_temperature sensor_min_temperature \
0 15.251029 47 0
1 16.410714 44 4
2 18.255446 43 4
3 19.053476 45 7
4 21.981524 47 6
... ... ... ...
4381 33.296296 59 16
4382 32.217391 60 15
4383 28.121951 60 15
4384 20.341463 55 7
4385 20.869281 44 6
weather_mean_temperature ... weather_mean_moisture \
0 14.283636 ... 58.121212
1 16.873193 ... 53.888554
2 18.670261 ... 65.890435
3 19.258796 ... 58.773148
4 21.973000 ... 49.794000
... ... ... ...
4381 28.659259 ... 40.925926
4382 28.104167 ... 44.416667
4383 28.011111 ... 47.222222
4384 19.830663 ... 67.885978
4385 18.970000 ... 58.441176
weather_max_moisture weather_min_moisture holiday_percentage \
0 95.0 25.0 0.360606
1 93.0 19.0 0.361446
2 97.0 25.0 0.375652
3 96.0 22.0 0.222222
4 95.0 20.0 0.288000
... ... ... ...
4381 64.0 23.0 0.592593
4382 75.0 21.0 1.000000
4383 69.0 23.0 0.177778
4384 98.0 28.0 0.295840
4385 94.0 26.0 0.282353
Lockdown year month weekday collection_intervall number_collections
0 0.0 2020 5 4 14 1
1 0.0 2020 6 4 14 1
2 0.0 2020 6 0 24 1
3 0.0 2020 7 4 18 1
4 0.0 2020 8 4 21 1
... ... ... ... ... ... ...
4381 0.0 2020 8 5 2 1
4382 0.0 2020 8 6 1 1
4383 0.0 2020 8 1 2 1
4384 0.0 2020 9 0 28 1
4385 0.0 2020 9 0 8 1
[632 rows x 25 columns]
%% Cell type:code id:a1dcbff5 tags:
%% Cell type:code id:1b271bda tags:
``` python
data_path = r'..\..\data\modeling\train'
train_data.to_csv(path_or_buf=data_path+'\\train_data.txt')
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment