Commit 64510496 authored by Lukas-Stingl's avatar Lukas-Stingl
Browse files
parents 48fe31ce 3200adbb
This diff is collapsed.
%% Cell type:code id: tags:
```
import os
import pandas as pd
from matplotlib import pyplot as plt
from pmdarima.arima import ADFTest
from pmdarima import auto_arima
#import sys
#sys.path.append('../utils')
#import prepro
```
%% Cell type:code id: tags:
```
csv_folder = '../data/modeling/train/clustered/clust0/daily/'
csv_files = [csv for csv in os.listdir(csv_folder) if csv.endswith('.csv')]
daily = []
for file in csv_files:
new = pd.DataFrame()
# import DataFrame
df = pd.read_csv('../data/modeling/train/clustered/clust0/daily/' + file)
new = df[['time_stamp', 'inter_pol']]
new.set_index('time_stamp', inplace = True)
daily.append(new)
```
%% Cell type:code id: tags:
```
#test for stionarity
for df in daily:
adf_test = ADFTest(alpha = 0.05)
print(adf_test.should_diff(df))
```
%%%% Output: stream
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
(0.01, False)
%% Cell type:code id: tags:
```
plt.plot(daily[0])
```
%%%% Output: execute_result
[<matplotlib.lines.Line2D at 0x7fbef3a72160>]
%%%% Output: display_data
![]()
%% Cell type:code id: tags:
```
train = daily[0][:300]
test = daily[0][-66:]
plt.plot(train)
plt.plot(test)
```
%%%% Output: execute_result
[<matplotlib.lines.Line2D at 0x7fbe49e9e700>]
%%%% Output: display_data
![]()
%% Cell type:code id: tags:
```
arima_model = auto_arima(train,start_p=5,d=1,start_q=5,
max_p=20,max_d=20,max_q=20, start_P=5,
D=1, start_Q=5, max_P=20,max_D=20,
max_Q=20, m=12, seasonal=True,
error_action='warn',trace=True,
supress_warnings=True,stepwise=True,
random_state=20,n_fits=100)
```
%%%% Output: stream
Performing stepwise search to minimize aic
%%%% Output: error
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-65-ca7c907b264c> in <module>
----> 1 arima_model = auto_arima(train,start_p=5,d=1,start_q=5,
2 max_p=20,max_d=20,max_q=20, start_P=5,
3 D=1, start_Q=5, max_P=20,max_D=20,
4 max_Q=20, m=12, seasonal=True,
5 error_action='warn',trace=True,
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/pmdarima/arima/auto.py in auto_arima(y, X, start_p, d, start_q, max_p, max_d, max_q, start_P, D, start_Q, max_P, max_D, max_Q, max_order, m, seasonal, stationary, information_criterion, alpha, test, seasonal_test, stepwise, n_jobs, start_params, trend, method, maxiter, offset_test_args, seasonal_test_args, suppress_warnings, error_action, trace, random, random_state, n_fits, return_valid_fits, out_of_sample_size, scoring, scoring_args, with_intercept, sarimax_kwargs, **fit_args)
715 )
716
--> 717 sorted_res = search.solve()
718 return _return_wrapper(sorted_res, return_valid_fits, start, trace)
719
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/pmdarima/arima/_auto_solvers.py in solve(self)
284
285 # fit a baseline p, d, q model
--> 286 self._do_fit((p, d, q), (P, D, Q, m))
287
288 # null model with possible constant
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/pmdarima/arima/_auto_solvers.py in _do_fit(self, order, seasonal_order, constant)
231 self.k += 1
232
--> 233 fit, fit_time, new_ic = self._fit_arima(
234 order=order,
235 seasonal_order=seasonal_order,
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/pmdarima/arima/_auto_solvers.py in _fit_candidate_model(y, X, order, seasonal_order, start_params, trend, method, maxiter, fit_params, suppress_warnings, trace, error_action, out_of_sample_size, scoring, scoring_args, with_intercept, information_criterion, **kwargs)
504
505 try:
--> 506 fit.fit(y, X=X, **fit_params)
507
508 # for non-stationarity errors or singular matrices, return None
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/pmdarima/arima/arima.py in fit(self, y, X, **fit_args)
480
481 # Internal call
--> 482 self._fit(y, X, **fit_args)
483
484 # now make a forecast if we're validating to compute the
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/pmdarima/arima/arima.py in _fit(self, y, X, **fit_args)
401 with warnings.catch_warnings(record=False):
402 warnings.simplefilter('ignore')
--> 403 fit, self.arima_res_ = _fit_wrapper()
404 else:
405 fit, self.arima_res_ = _fit_wrapper()
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/pmdarima/arima/arima.py in _fit_wrapper()
391 disp = fit_args.pop("disp", 0)
392
--> 393 return arima, arima.fit(start_params=start_params,
394 method=method,
395 maxiter=_maxiter,
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/tsa/statespace/mlemodel.py in fit(self, start_params, transformed, includes_fixed, cov_type, cov_kwds, method, maxiter, full_output, disp, callback, return_params, optim_score, optim_complex_step, optim_hessian, flags, low_memory, **kwargs)
688 flags['hessian_method'] = optim_hessian
689 fargs = (flags,)
--> 690 mlefit = super(MLEModel, self).fit(start_params, method=method,
691 fargs=fargs,
692 maxiter=maxiter,
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/base/model.py in fit(self, start_params, method, maxiter, full_output, disp, fargs, callback, retall, skip_hessian, **kwargs)
517 warn_convergence = kwargs.pop('warn_convergence', True)
518 optimizer = Optimizer()
--> 519 xopt, retvals, optim_settings = optimizer._fit(f, score, start_params,
520 fargs, kwargs,
521 hessian=hess,
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/base/optimizer.py in _fit(self, objective, gradient, start_params, fargs, kwargs, hessian, method, maxiter, full_output, disp, callback, retall)
222
223 func = fit_funcs[method]
--> 224 xopt, retvals = func(objective, gradient, start_params, fargs, kwargs,
225 disp=disp, maxiter=maxiter, callback=callback,
226 retall=retall, full_output=full_output,
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/base/optimizer.py in _fit_lbfgs(f, score, start_params, fargs, kwargs, disp, maxiter, callback, retall, full_output, hess)
627 func = f
628
--> 629 retvals = optimize.fmin_l_bfgs_b(func, start_params, maxiter=maxiter,
630 callback=callback, args=fargs,
631 bounds=bounds, disp=disp,
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/lbfgsb.py in fmin_l_bfgs_b(func, x0, fprime, args, approx_grad, bounds, m, factr, pgtol, epsilon, iprint, maxfun, maxiter, disp, callback, maxls)
195 'maxls': maxls}
196
--> 197 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
198 **opts)
199 d = {'grad': res['jac'],
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options)
358 # until the completion of the current minimization iteration.
359 # Overwrite f and g:
--> 360 f, g = func_and_grad(x)
361 elif task_str.startswith(b'NEW_X'):
362 # new iteration
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py in fun_and_grad(self, x)
259 self._update_x_impl(x)
260 self._update_fun()
--> 261 self._update_grad()
262 return self.f, self.g
263
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py in _update_grad(self)
229 def _update_grad(self):
230 if not self.g_updated:
--> 231 self._update_grad_impl()
232 self.g_updated = True
233
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py in update_grad()
149 self._update_fun()
150 self.ngev += 1
--> 151 self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
152 **finite_diff_options)
153
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/_numdiff.py in approx_derivative(fun, x0, method, rel_step, abs_step, f0, bounds, sparsity, as_linear_operator, args, kwargs)
484
485 if sparsity is None:
--> 486 return _dense_difference(fun_wrapped, x0, f0, h,
487 use_one_sided, method)
488 else:
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/_numdiff.py in _dense_difference(fun, x0, f0, h, use_one_sided, method)
555 x = x0 + h_vecs[i]
556 dx = x[i] - x0[i] # Recompute dx as exactly representable number.
--> 557 df = fun(x) - f0
558 elif method == '3-point' and use_one_sided[i]:
559 x1 = x0 + h_vecs[i]
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/_numdiff.py in fun_wrapped(x)
435
436 def fun_wrapped(x):
--> 437 f = np.atleast_1d(fun(x, *args, **kwargs))
438 if f.ndim > 1:
439 raise RuntimeError("`fun` return value has "
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py in fun_wrapped(x)
128 def fun_wrapped(x):
129 self.nfev += 1
--> 130 return fun(x, *args)
131
132 def update_fun():
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/base/model.py in f(params, *args)
499
500 def f(params, *args):
--> 501 return -self.loglike(params, *args) / nobs
502
503 if method == 'newton':
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/tsa/statespace/mlemodel.py in loglike(self, params, *args, **kwargs)
923 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
924
--> 925 loglike = self.ssm.loglike(complex_step=complex_step, **kwargs)
926
927 # Koopman, Shephard, and Doornik recommend maximizing the average
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/tsa/statespace/kalman_filter.py in loglike(self, **kwargs)
981 kwargs.setdefault('conserve_memory',
982 MEMORY_CONSERVE ^ MEMORY_NO_LIKELIHOOD)
--> 983 kfilter = self._filter(**kwargs)
984 loglikelihood_burn = kwargs.get('loglikelihood_burn',
985 self.loglikelihood_burn)
~/opt/miniconda3/envs/bda/lib/python3.9/site-packages/statsmodels/tsa/statespace/kalman_filter.py in _filter(self, filter_method, inversion_method, stability_method, conserve_memory, filter_timing, tolerance, loglikelihood_burn, complex_step)
904
905 # Run the filter
--> 906 kfilter()
907
908 return kfilter
KeyboardInterrupt:
%% Cell type:code id: tags:
```
arima_model = auto_
```
%%%% Output: stream
Performing stepwise search to minimize aic
ARIMA(5,1,5)(0,0,0)[0] intercept : AIC=inf, Time=1.10 sec
ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=2493.904, Time=0.02 sec
ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=2400.849, Time=0.06 sec
ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=2275.303, Time=0.07 sec
ARIMA(0,1,0)(0,0,0)[0] : AIC=2491.918, Time=0.01 sec
ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=2270.152, Time=0.14 sec
ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=2261.452, Time=0.16 sec
ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=2291.336, Time=0.15 sec
ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=2252.986, Time=0.20 sec
ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=2293.144, Time=0.18 sec
ARIMA(4,1,1)(0,0,0)[0] intercept : AIC=2229.131, Time=0.25 sec
ARIMA(4,1,0)(0,0,0)[0] intercept : AIC=2237.383, Time=0.16 sec
ARIMA(5,1,1)(0,0,0)[0] intercept : AIC=2224.109, Time=0.45 sec
ARIMA(5,1,0)(0,0,0)[0] intercept : AIC=2235.238, Time=0.22 sec
ARIMA(6,1,1)(0,0,0)[0] intercept : AIC=2226.100, Time=0.64 sec
ARIMA(5,1,2)(0,0,0)[0] intercept : AIC=inf, Time=0.78 sec
ARIMA(4,1,2)(0,0,0)[0] intercept : AIC=inf, Time=0.66 sec
ARIMA(6,1,0)(0,0,0)[0] intercept : AIC=2227.380, Time=0.32 sec
ARIMA(6,1,2)(0,0,0)[0] intercept : AIC=inf, Time=0.91 sec
ARIMA(5,1,1)(0,0,0)[0] : AIC=2222.143, Time=0.24 sec
ARIMA(4,1,1)(0,0,0)[0] : AIC=2227.153, Time=0.12 sec
ARIMA(5,1,0)(0,0,0)[0] : AIC=2233.261, Time=0.12 sec
ARIMA(6,1,1)(0,0,0)[0] : AIC=2224.133, Time=0.45 sec
ARIMA(5,1,2)(0,0,0)[0] : AIC=2193.221, Time=0.59 sec
ARIMA(4,1,2)(0,0,0)[0] : AIC=2193.016, Time=0.32 sec
ARIMA(3,1,2)(0,0,0)[0] : AIC=2192.071, Time=0.18 sec
ARIMA(2,1,2)(0,0,0)[0] : AIC=2192.551, Time=0.54 sec
ARIMA(3,1,1)(0,0,0)[0] : AIC=2250.992, Time=0.10 sec
ARIMA(3,1,3)(0,0,0)[0] : AIC=2193.665, Time=0.29 sec
ARIMA(2,1,1)(0,0,0)[0] : AIC=2259.454, Time=0.10 sec
ARIMA(2,1,3)(0,0,0)[0] : AIC=2191.677, Time=0.24 sec
ARIMA(1,1,3)(0,0,0)[0] : AIC=inf, Time=0.31 sec
ARIMA(2,1,4)(0,0,0)[0] : AIC=2193.645, Time=0.32 sec
ARIMA(1,1,2)(0,0,0)[0] : AIC=inf, Time=0.20 sec
ARIMA(1,1,4)(0,0,0)[0] : AIC=inf, Time=0.33 sec
ARIMA(3,1,4)(0,0,0)[0] : AIC=2195.340, Time=0.75 sec
ARIMA(2,1,3)(0,0,0)[0] intercept : AIC=inf, Time=0.72 sec
Best model: ARIMA(2,1,3)(0,0,0)[0]
Total fit time: 12.479 seconds
%% Cell type:code id: tags:
```
arima_model.summary()
```
%%%% Output: execute_result
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 290
Model: SARIMAX(5, 1, 1) Log Likelihood -1062.448
Date: Tue, 13 Jul 2021 AIC 2138.897
Time: 00:57:34 BIC 2164.562
Sample: 0 HQIC 2149.181
- 290
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.0239 0.082 0.293 0.769 -0.136 0.184
ar.L2 -0.1874 0.080 -2.351 0.019 -0.344 -0.031
ar.L3 -0.2115 0.087 -2.434 0.015 -0.382 -0.041
ar.L4 -0.2139 0.077 -2.778 0.005 -0.365 -0.063
ar.L5 -0.2423 0.069 -3.512 0.000 -0.378 -0.107
ma.L1 0.9020 0.059 15.346 0.000 0.787 1.017
sigma2 90.5831 6.429 14.091 0.000 77.983 103.183
===================================================================================
Ljung-Box (L1) (Q): 0.10 Jarque-Bera (JB): 20.58
Prob(Q): 0.75 Prob(JB): 0.00
Heteroskedasticity (H): 1.17 Skew: 0.48
Prob(H) (two-sided): 0.44 Kurtosis: 3.89
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
"""
%% Cell type:code id: tags:
```
pred = pd.DataFrame(arima_model.predict(n_periods = 75), index = test.index)
pred.columns = ['predicted_heigth']
pred.head(5)
```
%%%% Output: execute_result
predicted_heigth
time_stamp
2021-02-23 94.674403
2021-02-24 88.454877
2021-02-25 88.645007
2021-02-26 92.813250
2021-02-27 98.118809
%% Cell type:code id: tags:
```
plt.figure(figsize=(8,5))
plt.plot(train, label = 'Training')
plt.plot(pred, label = 'Predicted')
plt.legend(loc = 'upper left')
plt.show()
```
%%%% Output: display_data
![]()
%% Cell type:code id: tags:
```
for t
```
......
This diff is collapsed.
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: osx-64
appnope=0.1.2=py39hecd8cb5_1001
backcall=0.2.0=py_0
beautifulsoup4=4.9.3=pypi_0
ca-certificates=2020.10.14=0
certifi=2021.5.30=py39hecd8cb5_0
cycler=0.10.0=pypi_0
cython=0.29.23=pypi_0
decorator=4.4.2=py_0
ipykernel=5.3.4=py39h01d92e1_0
ipython=7.22.0=py39h01d92e1_0
ipython_genutils=0.2.0=pyhd3eb1b0_1
jedi=0.17.2=py39hecd8cb5_1
joblib=1.0.1=pypi_0
jupyter_client=5.3.3=py_0
jupyter_core=4.5.0=py_0
kiwisolver=1.3.1=pypi_0
libcxx=10.0.0=1
libffi=3.3=hb1e8313_2
libsodium=1.0.18=h1de35cc_0
llvmlite=0.36.0=pypi_0
mat4py=0.5.0=pypi_0
matplotlib=3.4.2=pypi_0
ncurses=6.2=h0a44026_1
numba=0.53.1=pypi_0
numpy=1.20.3=pypi_0
openssl=1.1.1k=h9ed2024_0
pandas=1.2.4=pypi_0
parso=0.7.0=py_0
pexpect=4.8.0=pyhd3eb1b0_3
pickleshare=0.7.5=pyhd3eb1b0_1003
pillow=8.2.0=pypi_0
pip=21.1.2=py39hecd8cb5_0
prompt-toolkit=3.0.8=py_0
ptyprocess=0.7.0=pyhd3eb1b0_2
pygments=2.7.1=py_0
pyparsing=2.4.7=pypi_0
python=3.9.5=h88f2d9e_3
python-dateutil=2.8.1=py_0
pytz=2021.1=pypi_0
pyzmq=20.0.0=py39h23ab428_1
readline=8.1=h9ed2024_0
scikit-learn=0.24.2=pypi_0
scipy=1.6.3=pypi_0
seaborn=0.11.1=pypi_0
setuptools=52.0.0=py39hecd8cb5_0
six=1.15.0=py_0
soupsieve=2.2.1=pypi_0
sqlite=3.35.4=hce871da_0
threadpoolctl=2.1.0=pypi_0
tk=8.6.10=hb0a8c7a_0
tornado=6.1=py39h9ed2024_0
traitlets=5.0.5=py_0
tslearn=0.5.0.5=pypi_0
tzdata=2020f=h52ac0ba_0
wcwidth=0.2.5=py_0
wheel=0.36.2=pyhd3eb1b0_0
xz=5.2.5=h1de35cc_0
zeromq=4.3.3=hb1e8313_3
zlib=1.2.11=h1de35cc_3
#pip install requirements.txt
ipykernel
numpy
pandas
scipy
scikit-learn
tslearn
matplotlib
seaborn
tensorflow
keras
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from scipy.signal import lfilter
from tslearn.clustering import TimeSeriesKMeans
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.utils import to_time_series_dataset
#define functions
'''brauchbare Werte werden in Integer umgewandelt, Messfehler [Height > 190 & Temperatur > 100] werden bereineigt.
Wobei Messfehler in Bezug auf die Höhe gelöscht werden und Temperatur Messfehler mit NaN Werten überschrieben werden '''
def cleaning_del(df):
new = pd.DataFrame()
#setting device id to join multiple bins later in the process
new['device_id'] = df['deveui']
#seeting time stamp
new['time_stamp'] = pd.to_datetime(df['created_at'], format='%Y-%m-%d')
#deleting rows with values height > 190 -> Measurement errors
new['Height'] = df['Height'].str.replace('cm', '').astype(int)
new = new[new.Height < 190]
#casting temperature to int and replace values > 100 with NaN
new['Temperature'] = df['Temperature'].str.replace('C', '').astype(int)
new.loc[(new.Temperature > 100), 'Temperature'] = np.nan
#casting Tilt to int
new['Tilt'] = df['Tilt'].str.replace('Degree', '').astype(int)
#sorting values acording to time_stamp
new.sort_values(by=['time_stamp'], ascending=True, inplace = True)
return new
''' unterschiedliche Smoothing verfahren werden angewandt - einmal auf die ungruppierten Daten und einmal auf die gruppierten Daten. Als aggregationsfunktion wird .mean() verwendet. Als Level wird auf eintägig guppiert.
NaN Temperaturwerte werden mit dem durchschnittlichen Temperaturwert überschrieben - gleiches gilt für Tilt.
beim mov_avg auf height werden NaN durch Interpolation gefüllt
'''
def smoothing_fillingNaN(df):
#smooth with lfilter
n = 60 # the larger n is, the smoother curve will be
b = [1.0 / n] * n
a = 1
df['lfilter'] = lfilter(b,a, df['Height'])
#moving average smoothing
df['mov_avg'] = df['Height'].rolling(30).mean()
#minimum moving average
df['min_avg'] = df['Height'].rolling(30).min()
#creating new DataFrame on Level Daily with aggregation max()
daily = df.groupby(pd.Grouper(key='time_stamp', axis=0,
freq='1D', sort=True)).mean()
#add device_id
daily['device_id'] = df.iloc[1, 0]
#further smoothing on daily level with rolling mean window 2
daily['mov_avg'] = daily['Height'].rolling(2).mean()