cnl_library.py 5.33 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python3
# -*- coding:utf-8 -*-

from io import StringIO

import json
import csv


Mario Hock's avatar
Mario Hock committed
10
11
12
13
## Exponential moving average
def calc_ema(values, alpha=0.2):
    ret = list()
    beta = 1 - alpha
Mario Hock's avatar
Mario Hock committed
14

Mario Hock's avatar
Mario Hock committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
    ## init
    it = iter(values)
    ema_value = float(next(it))
    ret.append(ema_value)

    ## loop
    for v in it:
        ema_value = alpha * float(v) + beta * ema_value
        ret.append(ema_value)

    return ret




## Helper functions for CNLParser -- but they could also be handy in other contexts.
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

def cnl_slice(file, start_delimiter, end_delimiter):

    ## Find beginning
    for line in file:
        if ( line.startswith(start_delimiter) ):
            break

    ## Skip comments and quit on end
    for line in file:
        if ( line.startswith(end_delimiter) ):
            return

        # skip empty or commented lines
        if ( not line or line[0] == "%" or line[0] == "#" ):
            continue

        yield line



Mario Hock's avatar
Mario Hock committed
52
53
54
55
56
57
58
59
60
61
def create_csv_index(csv_header):
    ## Create an index that maps csv_header names to tuple indices.
    csv_field_index = dict()
    i = 0
    for field in csv_header:
        csv_field_index[field] = i
        i += 1

    return csv_field_index

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76


def read_header(f):
    str_io = StringIO()

    for line in cnl_slice(f, "%% Begin_Header", "%% End_Header"):
        str_io.write(line)

    str_io.seek(0)
    header = json.load( str_io )

    return header



Mario Hock's avatar
Mario Hock committed
77
78
79
class CNLParser:
    def __init__(self, filename):
        self.filename = filename
80

81
        with open( self.filename ) as in_file:
Mario Hock's avatar
Mario Hock committed
82
83
            ## Check file format version.
            assert( in_file.readline() == "%% CPUnetLOGv1\n" )
84

Mario Hock's avatar
Mario Hock committed
85
86
            ## Read JSON header.
            self.header = read_header(in_file)
87

Mario Hock's avatar
Mario Hock committed
88
89
90
91
            ## Read CSV "header"
            csv_reader = csv.reader( cnl_slice(in_file, "%% Begin_Body", "%% End_Body"), skipinitialspace=True )
            self.csv_header = next(csv_reader)
            self.csv_index = create_csv_index(self.csv_header)
92
93


Mario Hock's avatar
Mario Hock committed
94
95
96
    def get_csv_iterator(self, fields=None):
        """
        Returns an iterator to get the csv-values line by line.
Mario Hock's avatar
Mario Hock committed
97

Mario Hock's avatar
Mario Hock committed
98
99
100
        @param fields [list] Only the "columns" specified in |fields| are included in the returned list (in that order).
                      [None] All "columns" are included (order defined by |self.csv_header|.
        """
Mario Hock's avatar
Mario Hock committed
101
102
103
104
105

        indices = None

        ## Only return selected columns (if the |fields| option is set).
        if ( fields ):
106
            indices = self.get_csv_indices_of(fields)
Mario Hock's avatar
Mario Hock committed
107
108
109


        ## Read from file.
110
        with open( self.filename ) as in_file:
Mario Hock's avatar
Mario Hock committed
111
112
113
114
115
            ## Find start of the CSV part.
            csv_reader = csv.reader( cnl_slice(in_file, "%% Begin_Body", "%% End_Body"), skipinitialspace=True )
            csv_header = next(csv_reader)
            assert( csv_header == self.csv_header )

Mario Hock's avatar
Mario Hock committed
116
117
            ## TODO convert every field to float..?

Mario Hock's avatar
Mario Hock committed
118
119
120
121
122
123
            ## Yield line by line.
            for line in csv_reader:
                if ( not indices ):
                    yield line
                else:
                    yield [ line[ind] for ind in indices ]
Mario Hock's avatar
Mario Hock committed
124
125
126
127
128
129
130
131
132
                    #yield [ float( line[ind] ) for ind in indices ]


    def get_csv_columns(self, fields=None):
        """
        Returns a dictionary holding the CSV values grouped into columns.

        Dict-keys correspond to |self.csv_header|, if |fields| is set only the specified columns are included.
        """
Mario Hock's avatar
Mario Hock committed
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

        ## TODO should we really use "get_..." for an I/O and computation intensive function..?

        if ( fields ):
            field_names = fields
        else:
            field_names = self.csv_header

        num_cols = len(field_names)


        ## Create a list for each column.
        cols = [ list() for i in range(num_cols) ]

        ## Read all csv lines and put the values in the corresponding columns,
        for line in self.get_csv_iterator(fields):
            for i in range(num_cols):
                cols[i].append( line[i] )


        ## Create output dictionary.
        ret = dict()
        for i in range(num_cols):
            ret[ field_names[i] ] = cols[i]

        return ret

Mario Hock's avatar
Mario Hock committed
160

Mario Hock's avatar
Mario Hock committed
161

162

Mario Hock's avatar
Mario Hock committed
163
    ## Convenience functions ##
164

Mario Hock's avatar
Mario Hock committed
165
166
167
    def get_json_header(self):
        return self.header

Mario Hock's avatar
Mario Hock committed
168
169
    def print_json_header(self):
        print( json.dumps(self.header, sort_keys=True, indent=4) )
170

Mario Hock's avatar
Mario Hock committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    def get_csv_index_of(self, field_name):
        return self.csv_index[field_name]

    def get_csv_indices_of(self, field_names):
        return [ self.get_csv_index_of(name) for name in field_names ]

    # Specific getters:

    def get_type(self):
        return self.header["General"]["Type"]

    def get_cpus(self):
        return self.header["ClassDefinitions"]["CPU"]["Siblings"]

    def get_nics(self):
        return self.header["ClassDefinitions"]["NIC"]["Siblings"]



## MAIN ##
if __name__ == "__main__":

    ### DEMO:
194
    import sys
Mario Hock's avatar
Mario Hock committed
195
196
197

    filename = sys.argv[1]
    print( filename )
Mario Hock's avatar
Mario Hock committed
198

Mario Hock's avatar
Mario Hock committed
199
200
    ## * Parse input file. *
    cnl_file = CNLParser(filename)
Mario Hock's avatar
Mario Hock committed
201

202

Mario Hock's avatar
Mario Hock committed
203
204
205
    ## Display header informations.
    print( cnl_file.get_type() )
    print( json.dumps(cnl_file.get_json_header(), sort_keys=True, indent=4) )
206

Mario Hock's avatar
Mario Hock committed
207
208
    print( "CPUs: " + str(cnl_file.get_cpus()) )
    print( "NICs: " + str(cnl_file.get_nics()) )
209

Mario Hock's avatar
Mario Hock committed
210
211
212
213
    ## Display some csv/data fields.
    names = None
    names = ["eth0.send", "eth0.receive"]
    print( names )
214

Mario Hock's avatar
Mario Hock committed
215
216
    for x in cnl_file.get_csv_iterator(names):
        print( ", ".join(x) )