Commit 10b6748e authored by Robin Schnaidt's avatar Robin Schnaidt

Add data frame preparation

parent 6570fec7
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"from tecohelper.hfilehelper import H5FileHelper\n",
"from tecohelper.anvilhelper import AnvilHelper\n",
"from tecohelper.config import RTLS,LABELS\n",
"import pandas as pd\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"import plotly.graph_objects as go\n",
"import plotly.express as px\n",
"import matplotlib.pyplot as plt\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"input_columns = ['left_acc_x', 'left_acc_y', 'left_acc_z', 'left_gyr_x','left_gyr_y', 'left_gyr_z', \n",
" 'left_quat_w', 'left_quat_x', 'left_quat_y','left_quat_z', \n",
" 'hip_acc_x', 'hip_acc_y', 'hip_acc_z', 'hip_gyr_x', 'hip_gyr_y', 'hip_gyr_z', \n",
" 'hip_quat_w','hip_quat_x', 'hip_quat_y', 'hip_quat_z',\n",
" 'right_acc_x', 'right_acc_y', 'right_acc_z', 'right_gyr_x', 'right_gyr_y', 'right_gyr_z', \n",
" 'right_quat_w', 'right_quat_x', 'right_quat_y', 'right_quat_z',\n",
" 'rtls_accuracy', 'rtls_accuracy_radius',\n",
" 'rtls_mapped_position', 'rtls_state', 'rtls_x_filtered',\n",
" 'rtls_x_unfiltered', 'rtls_y_filtered', 'rtls_y_unfiltered',\n",
" 'rtls_z_filtered', 'rtls_z_unfiltered']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for grupe_index in range(1,6):\n",
" file = \"data/Gruppe{}_data_recording_annotated.h5\".format(grupe_index)\n",
" print(file)\n",
" helper = H5FileHelper(file)\n",
" keys = helper.recordings\n",
" for seg_key in keys:\n",
" print(seg_key)\n",
" rec = AnvilHelper(file,\n",
" seg_key,\n",
" \"data/{}.txt\".format(seg_key))\n",
"\n",
"\n",
"\n",
" labels_df = {}\n",
" for labelindex,label in enumerate(LABELS):\n",
" # extract all tokens(segments) for each label as a Dataframe\n",
" len_tokens = len( rec.tokens[label])\n",
" df_per_label = pd.DataFrame()\n",
" for index in tqdm(range(len_tokens)):\n",
" dfs = rec.get_token_dataframe(label, index)\n",
" df_per_label = pd.concat([df_per_label,dfs[input_columns]])\n",
" df_per_label[\"label\"] = labelindex \n",
" labels_df[label] = df_per_label\n",
"\n",
" # df_all contains all segements of all labels of one sequence\n",
" df_all = pd.DataFrame()\n",
" for key in labels_df.keys():\n",
" df_all = pd.concat([df_all,labels_df[key]])\n",
"\n",
" df_all.sort_index(inplace=True)\n",
" \n",
" # delete duplicated rows\n",
" df_NAN = pd.DataFrame( index = pd.date_range(start=df_all.index[0], end=df_all.index[-1], freq='20ms'))\n",
" df_take_trop = df_all[(df_all[\"label\"]>=0) & (df_all[\"label\"]<=4)]\n",
" df_walk_stand = df_all.append(df_take_trop)\n",
" df_walk_stand = df_walk_stand[~df_walk_stand.index.duplicated(keep=False)] \n",
" df_take_trop = df_take_trop[~df_take_trop.index.duplicated()]\n",
" df_all = df_walk_stand.append(df_take_trop)\n",
" df_all.sort_index(inplace=True)\n",
" df_merged = df_NAN.merge(df_all, how='outer', left_index=True, right_index=True)\n",
" df_merged.fillna(method='ffill', inplace=True)\n",
"\n",
" # change the rest labels as \"other:0\"\n",
" df_take_drop = df_merged[((df_merged[\"label\"]>=1)&(df_merged[\"label\"]<=2))]\n",
" df_other = df_merged.append(df_take_drop)\n",
" df_other = df_other[~df_other.index.duplicated(keep=False)] \n",
" df_other[\"label\"]=0\n",
"\n",
" df_all = df_other.append(df_take_drop).sort_index()\n",
" df_all.sort_index(inplace=True)\n",
" df_all.to_csv(\"Csv_data/\"+seg_key+\".csv\", index=True)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"files = os.listdir(\"Csv_data/\")\n",
"df = []\n",
"for index,file in enumerate(files):\n",
" #print(file)\n",
" df_all = pd.read_csv(\"Csv_data/\"+file, index_col=[0])\n",
" df_all[\"id\"] = index\n",
" df.append(df_all)\n",
"df = pd.concat(df)\n",
"df.to_csv(\"Csv_data/all.csv\", index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
......@@ -14,7 +14,7 @@ as listed below.
### Data Pre-Processing:
(As Provided by TECO)
Dataframe_preparation.ipynb: Data preprocessing (as provided by TECO/Kinemic)
### Data Exploration:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment