cnl_library.py 6.26 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python3
# -*- coding:utf-8 -*-

from io import StringIO

import json
import csv


Mario Hock's avatar
Mario Hock committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

def merge_lists(first, second):
    """
    Merges two lists alternately.

    E.g.:

    first = [1, 2]
    second = ["A", "B"]

    result = [1, "A", 2, "B"]
    """

    return [item for pair in zip(first, second) for item in pair]


Mario Hock's avatar
Mario Hock committed
26
27
28
29
## Exponential moving average
def calc_ema(values, alpha=0.2):
    ret = list()
    beta = 1 - alpha
Mario Hock's avatar
Mario Hock committed
30

Mario Hock's avatar
Mario Hock committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    ## init
    it = iter(values)
    ema_value = float(next(it))
    ret.append(ema_value)

    ## loop
    for v in it:
        ema_value = alpha * float(v) + beta * ema_value
        ret.append(ema_value)

    return ret



45
46
47
def pretty_json(data):
    return json.dumps(data, sort_keys=True, indent=4)

Mario Hock's avatar
Mario Hock committed
48
49

## Helper functions for CNLParser -- but they could also be handy in other contexts.
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

def cnl_slice(file, start_delimiter, end_delimiter):

    ## Find beginning
    for line in file:
        if ( line.startswith(start_delimiter) ):
            break

    ## Skip comments and quit on end
    for line in file:
        if ( line.startswith(end_delimiter) ):
            return

        # skip empty or commented lines
        if ( not line or line[0] == "%" or line[0] == "#" ):
            continue

        yield line



Mario Hock's avatar
Mario Hock committed
71
72
73
74
75
76
77
78
79
80
def create_csv_index(csv_header):
    ## Create an index that maps csv_header names to tuple indices.
    csv_field_index = dict()
    i = 0
    for field in csv_header:
        csv_field_index[field] = i
        i += 1

    return csv_field_index

81
82
83
84
85
86
87
88
89
90
91
92
93
94


def read_header(f):
    str_io = StringIO()

    for line in cnl_slice(f, "%% Begin_Header", "%% End_Header"):
        str_io.write(line)

    str_io.seek(0)
    header = json.load( str_io )

    return header


Mario Hock's avatar
Mario Hock committed
95
class CNLParser:
96
97
98
99
    class WrongFileFormat_Exception(Exception):
        pass


Mario Hock's avatar
Mario Hock committed
100
101
    def __init__(self, filename):
        self.filename = filename
102

103
        with open( self.filename ) as in_file:
Mario Hock's avatar
Mario Hock committed
104
            ## Check file format version.
105
106
            if ( not in_file.readline() == "%% CPUnetLOGv1\n" ):
                raise self.WrongFileFormat_Exception()
107

Mario Hock's avatar
Mario Hock committed
108
109
            ## Read JSON header.
            self.header = read_header(in_file)
110

Mario Hock's avatar
Mario Hock committed
111
112
113
114
            ## Read CSV "header"
            csv_reader = csv.reader( cnl_slice(in_file, "%% Begin_Body", "%% End_Body"), skipinitialspace=True )
            self.csv_header = next(csv_reader)
            self.csv_index = create_csv_index(self.csv_header)
115
116


Mario Hock's avatar
Mario Hock committed
117
118
119
    def get_csv_iterator(self, fields=None):
        """
        Returns an iterator to get the csv-values line by line.
Mario Hock's avatar
Mario Hock committed
120

Mario Hock's avatar
Mario Hock committed
121
122
123
        @param fields [list] Only the "columns" specified in |fields| are included in the returned list (in that order).
                      [None] All "columns" are included (order defined by |self.csv_header|.
        """
Mario Hock's avatar
Mario Hock committed
124
125
126
127
128

        indices = None

        ## Only return selected columns (if the |fields| option is set).
        if ( fields ):
129
            indices = self.get_csv_indices_of(fields)
Mario Hock's avatar
Mario Hock committed
130
131
132


        ## Read from file.
133
        with open( self.filename ) as in_file:
Mario Hock's avatar
Mario Hock committed
134
135
136
137
138
            ## Find start of the CSV part.
            csv_reader = csv.reader( cnl_slice(in_file, "%% Begin_Body", "%% End_Body"), skipinitialspace=True )
            csv_header = next(csv_reader)
            assert( csv_header == self.csv_header )

Mario Hock's avatar
Mario Hock committed
139
140
            ## TODO convert every field to float..?

141

Mario Hock's avatar
Mario Hock committed
142
143
144
            ## Yield line by line.
            for line in csv_reader:
                if ( not indices ):
145
146
                    #yield line
                    yield [ float( v ) for v in line ]
Mario Hock's avatar
Mario Hock committed
147
                else:
148
149
                    #yield [ line[ind] for ind in indices ]
                    yield [ float( line[ind] ) for ind in indices ]
Mario Hock's avatar
Mario Hock committed
150
151
152
153
154
155
156
157


    def get_csv_columns(self, fields=None):
        """
        Returns a dictionary holding the CSV values grouped into columns.

        Dict-keys correspond to |self.csv_header|, if |fields| is set only the specified columns are included.
        """
Mario Hock's avatar
Mario Hock committed
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

        ## TODO should we really use "get_..." for an I/O and computation intensive function..?

        if ( fields ):
            field_names = fields
        else:
            field_names = self.csv_header

        num_cols = len(field_names)


        ## Create a list for each column.
        cols = [ list() for i in range(num_cols) ]

        ## Read all csv lines and put the values in the corresponding columns,
        for line in self.get_csv_iterator(fields):
            for i in range(num_cols):
                cols[i].append( line[i] )


        ## Create output dictionary.
        ret = dict()
        for i in range(num_cols):
            ret[ field_names[i] ] = cols[i]

        return ret

Mario Hock's avatar
Mario Hock committed
185

Mario Hock's avatar
Mario Hock committed
186
    ## Convenience functions ##
187

Mario Hock's avatar
Mario Hock committed
188
189
190
    def get_json_header(self):
        return self.header

Mario Hock's avatar
Mario Hock committed
191
192
    def print_json_header(self):
        print( json.dumps(self.header, sort_keys=True, indent=4) )
193

Mario Hock's avatar
Mario Hock committed
194
195
196
197
198
199
200
201
    def get_csv_index_of(self, field_name):
        return self.csv_index[field_name]

    def get_csv_indices_of(self, field_names):
        return [ self.get_csv_index_of(name) for name in field_names ]

    # Specific getters:

202
203
204
    def get_general_header(self):
        return self.header["General"]

Mario Hock's avatar
Mario Hock committed
205
206
207
    def get_type(self):
        return self.header["General"]["Type"]

Mario Hock's avatar
Mario Hock committed
208
209
210
    def get_comment(self):
        return self.header["General"]["Comment"]

Mario Hock's avatar
Mario Hock committed
211
212
213
214
215
216
    def get_cpus(self):
        return self.header["ClassDefinitions"]["CPU"]["Siblings"]

    def get_nics(self):
        return self.header["ClassDefinitions"]["NIC"]["Siblings"]

Mario Hock's avatar
Mario Hock committed
217
218
219
    def get_sysinfo(self):
        return self.header["General"]["SystemInfo"]

220
    def get_hostname(self):
221
222
223
224
        try:
            return self.get_sysinfo()["hostname"]
        except KeyError:
            return "(unknown)"
225

Mario Hock's avatar
Mario Hock committed
226
227
228
    def get_environment(self):
        return self.header["General"]["Environment"]

Mario Hock's avatar
Mario Hock committed
229
230
231
232
233
234


## MAIN ##
if __name__ == "__main__":

    ### DEMO:
235
    import sys
Mario Hock's avatar
Mario Hock committed
236
237
238

    filename = sys.argv[1]
    print( filename )
Mario Hock's avatar
Mario Hock committed
239

Mario Hock's avatar
Mario Hock committed
240
241
    ## * Parse input file. *
    cnl_file = CNLParser(filename)
Mario Hock's avatar
Mario Hock committed
242

243

Mario Hock's avatar
Mario Hock committed
244
245
246
    ## Display header informations.
    print( cnl_file.get_type() )
    print( json.dumps(cnl_file.get_json_header(), sort_keys=True, indent=4) )
247

Mario Hock's avatar
Mario Hock committed
248
249
    print( "CPUs: " + str(cnl_file.get_cpus()) )
    print( "NICs: " + str(cnl_file.get_nics()) )
250

Mario Hock's avatar
Mario Hock committed
251
252
253
254
    ## Display some csv/data fields.
    names = None
    names = ["eth0.send", "eth0.receive"]
    print( names )
255

Mario Hock's avatar
Mario Hock committed
256
257
    for x in cnl_file.get_csv_iterator(names):
        print( ", ".join(x) )