cnl_library.py 6.62 KB
Newer Older
1 2 3 4 5 6 7
#!/usr/bin/env python3
# -*- coding:utf-8 -*-

from io import StringIO

import json
import csv
8
import os
Mario Hock's avatar
Mario Hock committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24

def merge_lists(first, second):
    """
    Merges two lists alternately.

    E.g.:

    first = [1, 2]
    second = ["A", "B"]

    result = [1, "A", 2, "B"]
    """

    return [item for pair in zip(first, second) for item in pair]


Mario Hock's avatar
Mario Hock committed
25 26 27 28
## Exponential moving average
def calc_ema(values, alpha=0.2):
    ret = list()
    beta = 1 - alpha
Mario Hock's avatar
Mario Hock committed
29

Mario Hock's avatar
Mario Hock committed
30 31 32 33 34 35 36 37 38 39 40 41 42 43
    ## init
    it = iter(values)
    ema_value = float(next(it))
    ret.append(ema_value)

    ## loop
    for v in it:
        ema_value = alpha * float(v) + beta * ema_value
        ret.append(ema_value)

    return ret



44 45 46
def pretty_json(data):
    return json.dumps(data, sort_keys=True, indent=4)

Mario Hock's avatar
Mario Hock committed
47 48

## Helper functions for CNLParser -- but they could also be handy in other contexts.
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69

def cnl_slice(file, start_delimiter, end_delimiter):

    ## Find beginning
    for line in file:
        if ( line.startswith(start_delimiter) ):
            break

    ## Skip comments and quit on end
    for line in file:
        if ( line.startswith(end_delimiter) ):
            return

        # skip empty or commented lines
        if ( not line or line[0] == "%" or line[0] == "#" ):
            continue

        yield line



Mario Hock's avatar
Mario Hock committed
70 71 72 73 74 75 76 77 78 79
def create_csv_index(csv_header):
    ## Create an index that maps csv_header names to tuple indices.
    csv_field_index = dict()
    i = 0
    for field in csv_header:
        csv_field_index[field] = i
        i += 1

    return csv_field_index

80 81 82 83 84 85 86 87 88 89 90 91 92 93


def read_header(f):
    str_io = StringIO()

    for line in cnl_slice(f, "%% Begin_Header", "%% End_Header"):
        str_io.write(line)

    str_io.seek(0)
    header = json.load( str_io )

    return header


Mario Hock's avatar
Mario Hock committed
94
class CNLParser:
95 96 97 98
    class WrongFileFormat_Exception(Exception):
        pass


Mario Hock's avatar
Mario Hock committed
99 100
    def __init__(self, filename):
        self.filename = filename
Mario Hock's avatar
..  
Mario Hock committed
101
        #print (filename)
102

103 104 105
        if ( os.path.isdir(self.filename) ):
            raise self.WrongFileFormat_Exception()

106
        with open( self.filename ) as in_file:
107 108 109 110 111 112 113 114 115 116 117 118 119
            try:
                ## Check file format version.
                if ( not in_file.readline() == "%% CPUnetLOGv1\n" ):
                    raise self.WrongFileFormat_Exception()

                ## Read JSON header.
                self.header = read_header(in_file)

                ## Read CSV "header"
                csv_reader = csv.reader( cnl_slice(in_file, "%% Begin_Body", "%% End_Body"), skipinitialspace=True )
                self.csv_header = next(csv_reader)
                self.csv_index = create_csv_index(self.csv_header)
            except UnicodeDecodeError:
Mario Hock's avatar
..  
Mario Hock committed
120
                raise self.WrongFileFormat_Exception()
121 122


Mario Hock's avatar
Mario Hock committed
123 124 125
    def get_csv_iterator(self, fields=None):
        """
        Returns an iterator to get the csv-values line by line.
Mario Hock's avatar
Mario Hock committed
126

Mario Hock's avatar
Mario Hock committed
127 128 129
        @param fields [list] Only the "columns" specified in |fields| are included in the returned list (in that order).
                      [None] All "columns" are included (order defined by |self.csv_header|.
        """
Mario Hock's avatar
Mario Hock committed
130 131 132 133 134

        indices = None

        ## Only return selected columns (if the |fields| option is set).
        if ( fields ):
135
            indices = self.get_csv_indices_of(fields)
Mario Hock's avatar
Mario Hock committed
136 137 138


        ## Read from file.
139
        with open( self.filename ) as in_file:
Mario Hock's avatar
Mario Hock committed
140 141 142 143 144
            ## Find start of the CSV part.
            csv_reader = csv.reader( cnl_slice(in_file, "%% Begin_Body", "%% End_Body"), skipinitialspace=True )
            csv_header = next(csv_reader)
            assert( csv_header == self.csv_header )

Mario Hock's avatar
Mario Hock committed
145 146
            ## TODO convert every field to float..?

147

Mario Hock's avatar
Mario Hock committed
148 149 150
            ## Yield line by line.
            for line in csv_reader:
                if ( not indices ):
151 152
                    #yield line
                    yield [ float( v ) for v in line ]
Mario Hock's avatar
Mario Hock committed
153
                else:
154 155
                    #yield [ line[ind] for ind in indices ]
                    yield [ float( line[ind] ) for ind in indices ]
Mario Hock's avatar
Mario Hock committed
156 157 158 159 160 161 162 163


    def get_csv_columns(self, fields=None):
        """
        Returns a dictionary holding the CSV values grouped into columns.

        Dict-keys correspond to |self.csv_header|, if |fields| is set only the specified columns are included.
        """
Mario Hock's avatar
Mario Hock committed
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190

        ## TODO should we really use "get_..." for an I/O and computation intensive function..?

        if ( fields ):
            field_names = fields
        else:
            field_names = self.csv_header

        num_cols = len(field_names)


        ## Create a list for each column.
        cols = [ list() for i in range(num_cols) ]

        ## Read all csv lines and put the values in the corresponding columns,
        for line in self.get_csv_iterator(fields):
            for i in range(num_cols):
                cols[i].append( line[i] )


        ## Create output dictionary.
        ret = dict()
        for i in range(num_cols):
            ret[ field_names[i] ] = cols[i]

        return ret

Mario Hock's avatar
Mario Hock committed
191

Mario Hock's avatar
Mario Hock committed
192
    ## Convenience functions ##
193

Mario Hock's avatar
Mario Hock committed
194 195 196
    def get_json_header(self):
        return self.header

Mario Hock's avatar
Mario Hock committed
197 198
    def print_json_header(self):
        print( json.dumps(self.header, sort_keys=True, indent=4) )
199

Mario Hock's avatar
Mario Hock committed
200 201 202 203 204 205 206 207
    def get_csv_index_of(self, field_name):
        return self.csv_index[field_name]

    def get_csv_indices_of(self, field_names):
        return [ self.get_csv_index_of(name) for name in field_names ]

    # Specific getters:

208 209 210
    def get_general_header(self):
        return self.header["General"]

Mario Hock's avatar
Mario Hock committed
211 212 213
    def get_type(self):
        return self.header["General"]["Type"]

Mario Hock's avatar
Mario Hock committed
214 215 216
    def get_comment(self):
        return self.header["General"]["Comment"]

Mario Hock's avatar
Mario Hock committed
217 218 219 220 221 222
    def get_cpus(self):
        return self.header["ClassDefinitions"]["CPU"]["Siblings"]

    def get_nics(self):
        return self.header["ClassDefinitions"]["NIC"]["Siblings"]

Mario Hock's avatar
Mario Hock committed
223 224 225
    def get_sysinfo(self):
        return self.header["General"]["SystemInfo"]

226
    def get_hostname(self):
227 228 229 230
        try:
            return self.get_sysinfo()["hostname"]
        except KeyError:
            return "(unknown)"
231

Mario Hock's avatar
Mario Hock committed
232 233 234
    def get_environment(self):
        return self.header["General"]["Environment"]

Mario Hock's avatar
Mario Hock committed
235 236 237
    def get_human_readable_date(self):
        return self.header["General"]["Date"][0]

Mario Hock's avatar
Mario Hock committed
238 239 240 241 242 243


## MAIN ##
if __name__ == "__main__":

    ### DEMO:
244
    import sys
Mario Hock's avatar
Mario Hock committed
245 246 247

    filename = sys.argv[1]
    print( filename )
Mario Hock's avatar
Mario Hock committed
248

Mario Hock's avatar
Mario Hock committed
249 250
    ## * Parse input file. *
    cnl_file = CNLParser(filename)
Mario Hock's avatar
Mario Hock committed
251

252

Mario Hock's avatar
Mario Hock committed
253 254 255
    ## Display header informations.
    print( cnl_file.get_type() )
    print( json.dumps(cnl_file.get_json_header(), sort_keys=True, indent=4) )
256

Mario Hock's avatar
Mario Hock committed
257 258
    print( "CPUs: " + str(cnl_file.get_cpus()) )
    print( "NICs: " + str(cnl_file.get_nics()) )
259

Mario Hock's avatar
Mario Hock committed
260 261 262 263
    ## Display some csv/data fields.
    names = None
    names = ["eth0.send", "eth0.receive"]
    print( names )
264

Mario Hock's avatar
Mario Hock committed
265 266
    for x in cnl_file.get_csv_iterator(names):
        print( ", ".join(x) )