Commit 77187b18 authored by Matthias Braun's avatar Matthias Braun
Browse files

add some analysis foo

parent 85cb9ce1
from anatools import *
import matplotlib.pyplot as plt
# Some example combinations
def plotbench(data, name):
times = sel(data, "time")
times = map(times, todate)
results = sel(data, "results", selcond(eq("benchmark", name)))
results,times = multifilt((results, times), eq([0, "valid"], "1"))
results = sel(results, "reported_time")
results = map(results, tonum)
plt.plot(times, results, 'bo')
#!/usr/bin/env python
# Python utilities to analyze yaml data
# (This was developed in the context of analyzing stuff from
# but should be quiet generic)
# Example session after starting "ipython qtconsole --pylab"
# > run
# > data = load("data.json")
# > f = filt(data, contains("hw_model", "550"))
# > times = sel(f, "time")
# > times = map(times, todate)
# > results = sel(f, "results", selcond(eq("benchmark","186.crafty")))
# > results,times = multifilt((results, times), eq([0, "valid"], "1"))
# > results = sel(results, "reported_time")
# > results = map(results, tonum)
# > plot(times, results, 'bo')
import json
import sys
import numbers
from time import gmtime
import datetime
import __builtin__
def __pathget(d, path):
for p in path:
if d is None:
return None
d = p(d)
return d
def __prepare_key(key):
if isinstance(key, (list, tuple)):
prep_keys =, key)
return lambda d: __pathget(d, prep_keys)
if hasattr(key, '__call__'):
return key
# A number selects the nth entry in an array
if isinstance(key, numbers.Number):
return lambda d: d[key] if hasattr(d, '__getitem__') else None
# If it is not callable, then we assume it is a string or an integer
# to select a property/the nth array value
return lambda d: d.get(key) if hasattr(d, 'get') else None
def tonum(datum):
return float(datum)
return None
def totime(datum):
return datetime.time.fromtimestamp(int(datum))
return None
def todate(datum):
return None
def eq(key, value):
key = __prepare_key(key)
return lambda d: key(d) == value
def __selcond(d, prop):
if isinstance(d, (list, tuple)):
for x in d:
if prop(x):
return x
elif isinstance(d, dict):
for x in d.values():
if prop(x):
return x
return None
def selcond(prop):
return lambda d: __selcond(d, prop)
def contains(key, value):
key = __prepare_key(key)
return lambda d: value in key(d)
def filt(data, *funcs):
for f in funcs:
data = filter(f, data)
return data
def unzip(l):
return zip(*l)
def multifilt(datas, *funcs):
zipped = zip(*datas)
filtered = filt(zipped, *funcs)
return tuple(unzip(filtered))
def sel(data, *keys):
for key in keys:
key = __prepare_key(key)
data =, data)
return data
def map(data, *funcs):
for f in funcs:
data =, data)
return data
def load(filename):
return json.load(open(filename))
#!/usr/bin/env python
# Convert a spec result .raw file into a json document
import sys
import json
def datum(keys, value):
return (keys, value)
def read_spec_file(inp):
data = []
for line in inp:
line = line.replace("\n", "")
line = line.strip()
if line[0] == "#":
colon = line.find(":")
if colon < 0:
key = line[0:colon]
value = line[colon+1:]
value = value.strip()
keys = key.split(".")
data.append(datum(keys, value))
return data
def remove_indices(data, indices):
return tuple([data[x] for x in range(len(data)) if x not in indices])
def group(data, indices):
groups = dict()
for (keys, value) in data:
if max(indices) >= len(keys):
print "Invalid: %s: %s" % (keys, value)
key = tuple([keys[x] for x in indices])
group = groups.get(key)
if group is None:
group = []
groups[key] = group
new_keys = remove_indices(keys, indices)
group.append( (new_keys, value) )
return groups.iteritems()
def select(data, *matchers):
matcher_indices = [x for x in range(len(matchers)) if matchers[x] is not None]
selected = []
rest = []
for (keys, value) in data:
matched = True
new_keys = []
for m in range(len(matchers)):
matcher = matchers[m]
if matcher is None:
keyval = keys[m]
if keyval != matcher:
matched = False
if not matched:
rest.append( (keys, value) )
new_keys = remove_indices(keys, matcher_indices)
selected.append( (new_keys, value) )
return (selected, rest)
def extract_multiline_text(data, prefix):
text = []
rest = []
for (keys,value) in data:
assert len(keys) == 1
key = keys[0]
if key.startswith(prefix):
num = key[len(prefix):]
num = int(num)
text.append( (num, value) )
rest.append( (keys, value) )
sorted_text = sorted(text, key=lambda x: x[0])
text = "\n".join([x[1] for x in sorted_text])
return (text, rest)
def get_json_objects(data):
result = []
vendor_benchmark_grouped = group(data, [0,1])
for ((vendor,benchmark), data) in vendor_benchmark_grouped:
obj = dict()
obj["vendor"] = vendor
obj["benchmark"] = benchmark
(results, data) = select(data, "results")
(errors, data) = extract_multiline_text(data, "errors")
(notes, data) = extract_multiline_text(data, "notes")
(rawconfig, data) = extract_multiline_text(data, "rawconfig")
(sw_compiler, data) = extract_multiline_text(data, "sw_compiler")
obj["notes"] = notes
obj["rawconfig"] = rawconfig
obj["errors"] = errors
obj["sw_compiler"] = sw_compiler
for key,value in data:
k = ".".join(key)
obj[k] = value
resultlist = []
benchmark_tuning_run = group(results, [0,1,2])
for ((benchmark,tuning,run), data) in benchmark_tuning_run:
resultobj = {
"benchmark": benchmark,
"tuning": tuning,
"run": run,
for key,value in data:
k = ".".join(key)
resultobj[k] = value
obj["results"] = resultlist
return result
result = []
for filename in sys.argv[1:]:
inp = open(filename, "r")
data = read_spec_file(inp)
result += get_json_objects(data)
json.dump(result, sys.stdout, indent=2)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment