csfile.py 14.1 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
32
# Copyright (c) 2011-2012 by Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
33
34
35
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
# 01/01/2012  V0.1.1  finished implementation
37
# 04/01/2012  V0.1.3  implemented a debug mode using the python logging module
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksumfiles. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
45
import pwd
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
47
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
48
from datetime import datetime
49
import csbacklog
Daniel Armbruster's avatar
Daniel Armbruster committed
50

51
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
52
__subversion__ = "$Id$"
53
54
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
55
56
57
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
58
# variables
Daniel Armbruster's avatar
Daniel Armbruster committed
59

60
chunkSize = 1024 * 128 # 128kB
61

62
63
csfileLoggerName = ''
csfileLogInfo = {}
Daniel Armbruster's avatar
Daniel Armbruster committed
64

65
66
67
68
BASENAME = "checksumfile"
CSSUFFIX = ".cs"
RESULTSUFFIX = ".result"

Daniel Armbruster's avatar
Daniel Armbruster committed
69
70
71
72
73
74
75
76
77
78
79
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
80
    for root, dirs, files in os.walk(path, followlinks=followLinks):
Daniel Armbruster's avatar
Daniel Armbruster committed
81
82
83
84
85
86
87
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
88
    raise CsFileError(88, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
89
90
91
92
      +err.filename)
  else:
    return subDirs

93
94
95
96
97
def hasCsFile(path):
  """
  Checks if path contains a checksumfile. Returns True if path contains a file
  named with an CsFile filename.
  """
98
  return os.path.isfile(os.path.join(path, BASENAME+CSSUFFIX))
99

100
101
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
102
103
104
105
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
106
    self.msg = msg
107
    self.line = line
108
109

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
110
111
112
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

113
114
# -----------------------------------------------------------------------------
class CsFile:
115
  """
116
  Provides an interface to handle a csback checksumfile. A checksumfile
Daniel Armbruster's avatar
Daniel Armbruster committed
117
  possesses the ability to take the files for generating the checksums from a
118
  different sourcedirectory which can be configured with the srcdir variable.
119
120

  A checksumfiles usually contains checksumlines (type CsLine) of files.
121
  Generally this includes the files of the subdirectories in srcdir, too.
122
  """
123
  def __init__(self, filedir, srcdir, hashfunc='sha256'):
124
    self.filedir = filedir
125
126
    self.__cslines = []
    self.__hashfunc = hashfunc
127
    self.srcdir = srcdir
128
    self.logger = logging.LoggerAdapter(logging.getLogger( \
129
                    csfileLoggerName+".CsFile"), csfileLogInfo)
Daniel Armbruster's avatar
Daniel Armbruster committed
130

131
132
  def read(self):
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
133
    Read a checksumfile.
134
135
    """
    if not os.access(self.filedir, os.F_OK):
Daniel Armbruster's avatar
Daniel Armbruster committed
136
      raise CsFileError(136, "Invalid directory path.")
137
    path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
138
139
    # no checksumfile available -> create new file
    if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):
140
      self.logger.info("Creating checksumfile in '%s'", self.filedir)
Daniel Armbruster's avatar
Daniel Armbruster committed
141
142
      try:
        csfile = open(path, 'w')
143
      except IOError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
144
        raise CsFileError(144, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
145
          +err.filename)
146
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
147
        csfile.close()
148
    # checksumfile available -> read file
149
    else:
150
      try:
151
        self.logger.debug("Start reading checksumfile '%s'",path)
152
        csfile = open(path)
153
        self.__cslines = [CsLine(line.split(), self.srcdir) \
154
        for line in csfile if len(line.rstrip()) and line[0] != '#'] 
155
      except IOError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
156
        raise CsFileError(156, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
157
158
159
          +err.filename)
      else:
        csfile.close()
160
        self.logger.debug("Finished reading checksumfile '%s'", path)
161

Daniel Armbruster's avatar
Daniel Armbruster committed
162
  def append(self, cslines):
163
    """
164
    Append checksum lines to the checksumfile.
165
    """
166
    path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
167
168
    if 0 == len(cslines):
      self.logger.debug("Empty list passed. Nothing to append.")
169
    else:
170
171
172
173
174
175
176
177
      try:
        self.logger.debug("Start appending to checksumfile '%s'", path)
        csfile = open(path, 'a')
        for csline in cslines:
          self.logger.debug("Writing line '%s'", str(csline))
          if isinstance(csline, CsLine):
            csfile.write(str(csline) + '\n')
          else:
Daniel Armbruster's avatar
Daniel Armbruster committed
178
            raise CsFileError(178, "Argument must be of type CsLine.")
179
      except IOError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
180
        raise CsFileError(180, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
181
182
183
184
          +err.filename)
      else:
        self.logger.debug("Finished appending to checksumfile '%s'", path)
        csfile.close()
185

Daniel Armbruster's avatar
Daniel Armbruster committed
186
  def update(self, regexes=[]):
187
188
189
190
191
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
Daniel Armbruster's avatar
Daniel Armbruster committed
192
      raise CsFileError(192, "Pass regular expressions in a list.")
193
    self.logger.debug("Updating checksumfile ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
194
    # fetch cslines in current csfile
195
    self.read()
196
    self.logger.debug("Fetching files not registered yet.")
197
198
    registeredFiles = set(csline.filename for csline in self.__cslines)
    # fetch files
199
    newFiles = set()
200
201
    newFiles = set(file for file in os.listdir(self.srcdir) \
      if os.path.isfile(os.path.join(self.srcdir, file)))
Daniel Armbruster's avatar
Daniel Armbruster committed
202
    # exclude files matching regexes
203
    regexes.append(BASENAME+CSSUFFIX)
Daniel Armbruster's avatar
Daniel Armbruster committed
204
205
    regexes.append(BASENAME+RESULTSUFFIX)
    regexes.append(BASENAME+RESULTSUFFIX+r'\.[1-2]')
Daniel Armbruster's avatar
Daniel Armbruster committed
206
207
208
209
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
210
    regexes.remove(BASENAME+CSSUFFIX)
Daniel Armbruster's avatar
Daniel Armbruster committed
211
212
    regexes.append(BASENAME+RESULTSUFFIX)
    regexes.remove(BASENAME+RESULTSUFFIX+r'\.[1-2]')
Daniel Armbruster's avatar
Daniel Armbruster committed
213
    # exclude registered files
214
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
215
    # generate cslines of newFiles
216
217
    cslines = []
    for file in newFiles:
218
      csline = CsLine(file, self.srcdir, self.__hashfunc)
219
220
221
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
222
    path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
223
    self.logger.debug("Update of checksumfile '%s' finished.", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
224

225
  def check(self, regexes, beTolerant=False):
226
227
    """
    Check a checksum file which means:
228
    Calculate checksum of a file which is located in self.srcdir and check
229
    results. Files in the checksumfile matching one of the regexes are excluded.
Daniel Armbruster's avatar
Daniel Armbruster committed
230

231
232
    If the third argument (beTolerant) is set to True the checking process will
    be successful, too, if a file listed in a checksumfile is not available
233
234
235
    anymore. Then the warning will be issued. Otherwise a error will be printed
    to the result logfile. In case a checksum does not match or rather changed
    an critical message will be printed to the results logfile.
236

237
238
239
    Note that this function does not check if there are unregistered files in
    the directory. Adding checksum lines to the checksumfile has to be done by
    using the update function.
240
    """
241
    if 0 == len(self.__cslines):
Daniel Armbruster's avatar
Daniel Armbruster committed
242
      self.logger.info( \
243
        "CSFILE does not contain any lines or had not been read yet.")
244
245
246
247
248
249
250
251
252
253
254
255
256
    # configure handler for logger 
    # checksumfile.result filesize 15 kB each with 3 files
    resulthandler = logging.handlers.RotatingFileHandler(os.path.join( \
      self.filedir,BASENAME+RESULTSUFFIX), maxBytes=1024*15, \
      backupCount=2)
    format = '%(asctime)-15s %(hostname)s[%(pid)s] %(levelname)-8s %(message)s'
    logInfo = {'hostname': os.uname()[1], 'pid': os.getpid()}
    resulthandler.setLevel(logging.INFO)
    resulthandler.setFormatter(logging.Formatter(format))
    logger = logging.getLogger(csfileLoggerName+".CsFile.chk")
    logger.addHandler(resulthandler)
    resultLogger = logging.LoggerAdapter(logger, logInfo)
 
257
258
259
260
261
    # exclude those files matching regex in regexes
    self.logger.debug("Exclude files matching regexes")
    cslinesSet = set(self.__cslines)
    for regex in regexes:
      matching = set(csline for csline in self.__cslines \
Daniel Armbruster's avatar
Daniel Armbruster committed
262
        if None != re.match(regex, csline.filename))
263
264
265
      cslinesSet -= matching
    # perform check
    self.logger.debug("Start checking checksums ...")
266
    for csline in self.__cslines:
267
268
      if csline in cslinesSet: 
        self.logger.debug( \
Daniel Armbruster's avatar
Daniel Armbruster committed
269
          "Performing check of file '%s' with csline in checksumfile '%s'.", \
270
271
          os.path.join(csline.srcdir,csline.filename), self.filedir) 
        csline.check(resultLogger, beTolerant)
272
    self.logger.debug("Finished checking checksums.")
273
    logger.removeHandler(resulthandler)
274

275
  def display(self):
276
    """
277
    Display the content of the checksum file to stdout.
278
    """
279
280
    if 0 == len(self.__cslines):
      self.logger.info("CSFILE does not contain any lines.")
281
282
283
    for line in self.__cslines:
      sys.stdout.write(line)

284

285
286
# -----------------------------------------------------------------------------
class CsLine:
287
288
289
  """
  Class to handle a checksum and further data for a registered file.
  """
290
  def __init__(self, *args):
291
    self.logger = logging.LoggerAdapter(logging.getLogger( \
292
                    csfileLoggerName+".CsLine"), csfileLogInfo)
293
    if isinstance(args[0], list) and isinstance(args[1], str):
294
295
      argList = args[0]
      self.checksum = argList[0]
296
      self.filename = argList[1]
297
298
299
300
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
301
      self.srcdir = args[1]
302
303
    elif isinstance(args[0], str) and isinstance(args[1], str) and \
      isinstance(args[2], str):
304
      self.checksum = ''
305
      self.filename = args[0]
306
      self.srcdir = args[1]
307
      self.hashfunc = args[2]
308
309
310
311
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
    else:
Daniel Armbruster's avatar
Daniel Armbruster committed
312
      CsFileError(312, "Invalid argument(s).")
Daniel Armbruster's avatar
Daniel Armbruster committed
313
314

  def generate(self, chunkSize):
315
316
317
318
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
319
    # generate checksum
320
    path = os.path.join(self.srcdir, self.filename) 
321
    self.logger.debug("Calculating checksum for '%s'", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
322
323
324
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
325
      file = open(path, 'rb')
326
327
328
329
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
330
331
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
332
      raise CsFileError(332, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
333
        +err.filename)
334
335
336
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
337
338
    # set remaining data
    self.creationDateFile = \
339
      datetime.fromtimestamp(os.path.getctime(path)).strftime( \
Daniel Armbruster's avatar
Daniel Armbruster committed
340
341
342
343
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")

344
  def check(self, resultLogger, beTolerant=False):
345
    """
346
347
348
349
350
    Check a checksum line. The checksum of the line will be compared with the
    checksum of the file located in src. In case no file had been found in src
    and the beTolerant flag had been set to True the status of the line will be
    set to 'warning' else to 'error'. If the checksums match the status was set
    to 'ok' else to 'error', too.
351
    """
352
353
    hashfunc = hashlib.new(self.hashfunc)
    blockSize = chunkSize * hashfunc.block_size
354
    path = os.path.join(self.srcdir, self.filename)
355
    try:
356
      file = open(path, 'rb')
357
358
    except IOError:
      if beTolerant:
359
360
        resultLogger.warning("While checking: file '%s' does not exist.", path)
        self.logger.warning("While checking: file '%s' does not exist.", path)
361
      else:
362
363
        resultLogger.error("While checking: file '%s' does not exist.", path)
        self.logger.error("While checking: file '%s' does not exist.", path)
364
365
    else:
      # calculate checksum
366
367
368
369
370
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
371
372
373
      file.close()
      # checks
      if checksum == self.checksum:
374
375
        resultLogger.info("Check of file '%s' was successful.", path)
        self.logger.debug("Check of file '%s' was successful.", path)
376
      else:
377
378
        resultLogger.critical("File '%s' has no integrity anymore.", path)
        self.logger.critical("File '%s' has no integrity anymore.", path)
379

Daniel Armbruster's avatar
Daniel Armbruster committed
380
  def __str__(self):
381
382
383
    """
    String representation of a checksum line.
    """
384
385
386
    return '{0} {1} {2} {3} {4} {5}'.format(self.checksum, self.filename, \
    self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
    self.creationDateChecksum)
Daniel Armbruster's avatar
Daniel Armbruster committed
387
    
388
# -----------------------------------------------------------------------------
389
# Tests
390
if __name__ == '__main__':
391
392
393
394
395
396
  try:
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
397
  
Daniel Armbruster's avatar
Daniel Armbruster committed
398
# ----- END OF csfile.py -----