Commit d02a5194 authored by Willi's avatar Willi
Browse files

minor changes

parent 841b57d3
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
This diff is collapsed.
Metadata-Version: 1.0
Name: DataLoadingAPI
Version: 1.0
Summary: API for loading h5 based files using TECO-Helpers
Summary: API for CSV-Based Data for TECO's Smart-Data-Analytics practical course
Home-page: UNKNOWN
Author: Willi Becker
Author-email: utdjv@student.kit.edu
......
README.md
setup.py
DataLoadingAPI/DataLoadingExample.py
DataLoadingAPI/RecordingFlyweight.py
DataLoadingAPI/RecordingH5TxtMapper.py
DataLoadingAPI/DataSetGenerator.py
DataLoadingAPI/sensorDataKeys.py
DataLoadingAPI.egg-info/PKG-INFO
DataLoadingAPI.egg-info/SOURCES.txt
DataLoadingAPI.egg-info/dependency_links.txt
DataLoadingAPI.egg-info/requires.txt
DataLoadingAPI.egg-info/top_level.txt
tecohelper/__init__.py
tecohelper/anvilhelper.py
tecohelper/config.py
tecohelper/hfilehelper.py
tecohelper/recording.py
\ No newline at end of file
DataLoadingAPI.egg-info/top_level.txt
\ No newline at end of file
..\DataLoadingAPI\DataLoadingExample.py
..\DataLoadingAPI\RecordingFlyweight.py
..\DataLoadingAPI\RecordingH5TxtMapper.py
..\DataLoadingAPI\__pycache__\DataLoadingExample.cpython-38.pyc
..\DataLoadingAPI\__pycache__\RecordingFlyweight.cpython-38.pyc
..\DataLoadingAPI\__pycache__\RecordingH5TxtMapper.cpython-38.pyc
..\DataLoadingAPI\DataSetGenerator.py
..\DataLoadingAPI\__pycache__\DataSetGenerator.cpython-38.pyc
..\DataLoadingAPI\__pycache__\sensorDataKeys.cpython-38.pyc
..\DataLoadingAPI\sensorDataKeys.py
..\tecohelper\__init__.py
..\tecohelper\__pycache__\__init__.cpython-38.pyc
..\tecohelper\__pycache__\anvilhelper.cpython-38.pyc
..\tecohelper\__pycache__\config.cpython-38.pyc
..\tecohelper\__pycache__\hfilehelper.cpython-38.pyc
..\tecohelper\__pycache__\recording.cpython-38.pyc
..\tecohelper\anvilhelper.py
..\tecohelper\config.py
..\tecohelper\hfilehelper.py
..\tecohelper\recording.py
PKG-INFO
SOURCES.txt
dependency_links.txt
......
......@@ -4,7 +4,7 @@ matplotlib==3.2.1
numexpr==2.7.1
numpy==1.18.5
pandas==1.0.4
prompt-toolkit==1.0.14
prompt-toolkit
Pygments
PyInquirer==1.0.3
pyparsing==2.4.7
......
from tecohelper import hfilehelper as hf
from tecohelper.anvilhelper import AnvilHelper
import pandas as pd
import datetime as dt
from DataLoadingAPI.RecordingFlyweight import RecordingFlyweight
from DataLoadingAPI.RecordingH5TxtMapper import mapRecordingsToFiles
import glob
import matplotlib.pyplot as plt
import time
h5FilePath = 'C:\\Users\\Willi\\Desktop\\PSDA\\Aufgabe_3\\TECO_Praktikum_SS2020\\Proband5\\data_recording_clap_annotated.h5'
anvilExportFilePath = 'C:\\Users\\Willi\\Desktop\\PSDA\\Aufgabe_3\\TECO_Praktikum_SS2020\\Proband5\\17-16-05.txt'
helper = hf.H5FileHelper(h5FilePath)
recordings = helper.recordings
recordings.sort()
anvilHelper = AnvilHelper(h5FilePath, recordings[0],anvilExportFilePath)
"""
Start from the root directory of your annotations
"""
dct = mapRecordingsToFiles("C:\\Users\\Willi\\Desktop\\PSDA\\Aufgabe_3\\Annotationen")
rf= RecordingFlyweight(dct)
"""
You can explore other sensor-data by setting the correct keys. The following link leads to a
document that lists all keys.
https://docs.google.com/document/d/1oxqseZpd7c6raixKAD2HNGJXllVYCYUSiLUGqLwEgXI/edit
"""
flw = rf.getMergedSeriesFromLeftIMUDF("left_acc_y")
"""
There are up to 30 recordings in the data-set.You can run the example on
the other examples by setting the correct index-valu in the 'recording_index' variable.
"""
recording_index = 20
final_graph_data = flw.iloc[:,recording_index].dropna(how='all', axis=0)
final_graph_data.plot()
start = time.time()
final_length= len(final_graph_data.index)
take, drop = rf.getLabelSubsequences(recording_index,final_length)
for seq in take:
plt.axvspan(seq[0], seq[len(seq)-1], color='red', alpha=0.5)
plt.show()
end = time.time()
print(end-start)
import datetime
import pandas as pd
import numpy as np
class DataSetGenerator:
def __init__(self, allCSVPath):
self.csvSourcePath = allCSVPath
def getDataForRecording(self, recordingIndex):
dt = np.dtype('f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,'
+ 'f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,'
+ 'f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,'
'f4,f4,f4,f4,f4,f4,f4,f4,f4,f4')
df = pd.read_csv("file://localhost/" + self.csvSourcePath)
df1 = df.loc[df.id == recordingIndex].iloc[:,1:-2]
list1 = df1.to_numpy(dtype=np.double)
return list1, df.loc[df.id == recordingIndex].iloc[:,-2].to_numpy(dtype=np.double)
def getDataForRecordingOneHotEncoded(self, recordingIndex):
list1, labels = self.getDataForRecording(recordingIndex)
return list1, getOneHotEncoding(np.unique(labels),labels)
def getDataForRecordingExtendedDims(self, recordingIndex):
list1, labels = self.getDataForRecording(recordingIndex)
return list1, increaseDims(labels, len(list1[0]))
def getDataForRecordingExtendedDimsOneHotEncoded(self, recordingIndex):
list1, labels = self.getDataForRecordingOneHotEncoded(recordingIndex)
return list1, increaseDims(labels, len(list1[0]))
def getLabelEncoding(distinctLabels, oneHotVectors):
labelList = list()
np.sort(distinctLabels)
for vector in oneHotVectors:
position = np.where(vector[0] == 1.0)[0]
if position == None:
continue
else:
label = distinctLabels[position]
labelList.append(label)
npList = np.array(labelList).flatten()
return npList
def getTimeDeltaIndexedDataframe(df):
copy = df
firstTimeStamp = datetime.datetime.strptime(copy.iloc[0,0], "%Y-%m-%d %H:%M:%S.%f")
copy.iloc[:,0] = copy.iloc[:,0].apply(lambda x: getTimeInSec(x, firstTimeStamp))
return copy
def getTimeInSec(x, firstTimeStamp):
delta = datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f") - firstTimeStamp
return delta.total_seconds()
def increaseDims(labelArray, numdims):
arr = list([])
for label in labelArray:
toAdd = [label for i in range(numdims)]
arr.append( toAdd)
return arr
def getOneHotEncoding(distinctLabels, labelVector):
tupleList = list()
np.sort(distinctLabels)
for label in labelVector:
tuple = np.zeros((len(distinctLabels)))
position = np.where(distinctLabels == label )
tuple[position] = 1
tupleList.append(tuple)
return tupleList
from tecohelper import hfilehelper as hf
from tecohelper.anvilhelper import AnvilHelper
import datetime as dt
import pandas as pd
import numpy as np
import math
import time
class RecordingFlyweight:
"""
This class takes a map of the following schema as input: {recording-id-string:(anvil-export-txt-file-absolute-path-string,h5-file-absolute-path-string)
...
}
"""
def __init__(self, recordingKeyExportFileMap):
self.recordingMap = recordingKeyExportFileMap
def getMergedSeriesFromLeftIMUDF(self, key):
finalDF = pd.DataFrame()
for recKey,tuple in self.recordingMap.items():
anvilHelper = AnvilHelper(tuple[0],recKey,tuple[1])
df = anvilHelper.recording.left_imu_dataframe
df.reset_index(inplace=True)
df.drop(['index'], axis=1, inplace=True)
finalDF[recKey] = df[key]
return finalDF
def getMergedSeriesFromRightIMUDF(self, key):
finalDF = pd.DataFrame()
for recKey,tuple in self.recordingMap.items():
anvilHelper = AnvilHelper(tuple[0],recKey,tuple[1])
df = anvilHelper.recording.right_imu_dataframe
df.reset_index(inplace=True)
df.drop(['index'], axis=1, inplace=True)
finalDF[recKey] = df[key]
return finalDF
def getMergedSeriesFromHipIMUDF(self, key):
finalDF = pd.DataFrame()
for recKey,tuple in self.recordingMap.items():
anvilHelper = AnvilHelper(tuple[0],recKey,tuple[1])
df = anvilHelper.recording.hip_imu_dataframe
df.reset_index(inplace=True)
df.drop(['index'], axis=1, inplace=True)
finalDF[recKey] = df[key]
return finalDF
"""
This method returns the video-frame-wise label durations.
inputs:
label: label-key-string for the corresponding annotation
direction (opt): a string representing the direction of the annotation
"""
def getLowerLevelLabelDurations(self, label, direction=-1):
finalDF= pd.DataFrame()
for recKey,tuple in self.recordingMap.items():
anvilHelper = AnvilHelper(tuple[0],recKey,tuple[1])
annotationsList = anvilHelper.tokens[label]
durs = list()
cleanedAnnotations = list()
test = list()
if direction == -1:
durs = [annotation[0][1] - annotation[0][0] for annotation in annotationsList]
elif direction != -1:
durs = [annotation[0][1] - annotation[0][0] for annotation in annotationsList if annotation[1] == direction]
finalDF[recKey] = pd.Series(np.array(durs))
return finalDF
"""
This method returns the video-frame-wise label durations.
inputs:
label: label-key-string for the corresponding annotation
direction (opt): a string representing the direction of the annotation
"""
def getLabelSubsequences(self,recordingNumIndex, timeSequenceLength):
mapList = list(self.recordingMap.items())[recordingNumIndex]
start = time.time()
anvilHelper = AnvilHelper(mapList[1][0], mapList[0], mapList[1][1])
end = time.time()
print(end-start)
result = 'other'
scalingFactor = math.ceil(timeSequenceLength / anvilHelper.end_annotation)
start = time.time()
token_rows = anvilHelper._get_token_rows('low-level.take-piece:piece_direction_on_table')
token_rows2 = anvilHelper._get_token_rows('low-level.drop-piece:bin_number')
resListTake = list()
resListDrop = list()
for t,tval in token_rows.iterrows():
interm = list()
for ind in tval.to_numpy()[0]:
interm.append([i + (ind) * scalingFactor for i in range(scalingFactor)])
resListTake.append(np.array(interm).flatten().flatten().flatten().tolist())
for t,tval in token_rows2.iterrows():
interm = list()
for ind in tval.to_numpy()[0]:
interm.append([i + (ind) * scalingFactor for i in range(scalingFactor)])
resListDrop.append(np.array(interm).flatten().flatten().flatten().tolist())
end = time.time()
print(end - start)
return np.array(resListTake).flatten(),np.array(resListDrop).flatten()
def getAdjustedLabelSequence(self,recordingNumIndex, targetSequenceLength ):
resListTake, resListDrop = self.getLabelSubsequences(recordingNumIndex, targetSequenceLength )
mapList = list(self.recordingMap.items())[recordingNumIndex]
start = time.time()
anvilHelper = AnvilHelper(mapList[1][0], mapList[0], mapList[1][1])
timeFrameLabels = list()
for i in range(targetSequenceLength):
if i in resListTake:
timeFrameLabels.append('take')
elif i in resListDrop:
timeFrameLabels.append('drop')
else:
timeFrameLabels.append('other')
return timeFrameLabels
import glob
from tecohelper.hfilehelper import H5FileHelper
"""
This methods maps a tuple containing the h5-file- and anvil-txt-export-file -paths to the corresponding recording keys.
It's best used in the following way:
1. unzip all the anvil-exports and h5s for each group in a separate folders
2. pass the root of the group-folders directory absolute path with escaped slashes as argument
3. initialize a RecordingFlyweight-Object with the returned dict as input-argument
4. load the data that is needed
returns: a dictionary with the keys being the recording keys (as they are fetched with TECO's H5FileHelper) and the
values being tuples of the form (h5-file-absolute-path-string, anvil-export-txt-file-absolute-path-string)
"""
def mapRecordingsToFiles(rootDirectoryAbsolutePath):
finalDict = {}
rootDirectoryAbsolutePathCleaned = str.replace(rootDirectoryAbsolutePath,"\\","/")
h5files = glob.glob(rootDirectoryAbsolutePathCleaned + "/*/*.h5", recursive=True)
print(h5files)
for h5file in h5files:
cutoffIndex = h5file.rindex('/')
groupRoot = h5file[:cutoffIndex]
anvilExportFiles = glob.glob(groupRoot + "/**/*.txt", recursive=True)
anvilExportFilesCleaned = [file for file in anvilExportFiles if not file.endswith("labels.txt") ]
helper = H5FileHelper(h5file)
recordingsList = helper.recordings
recordingsList.sort()
for i in range(len(recordingsList)):
finalDict.update({recordingsList[i]:(h5file,anvilExportFilesCleaned[i])})
return finalDict
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment