csfile.py 14 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
32
# Copyright (c) 2011-2012 by Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
33
34
35
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
# 01/01/2012  V0.1.1  finished implementation
37
# 04/01/2012  V0.1.3  implemented a debug mode using the python logging module
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksum files. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
45
import pwd
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
47
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
48
from datetime import datetime
49
import csbacklog
Daniel Armbruster's avatar
Daniel Armbruster committed
50

51
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
52
__subversion__ = "$Id$"
53
54
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
55
56
57
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
58
# variables
Daniel Armbruster's avatar
Daniel Armbruster committed
59

60
chunkSize = 1024 * 128 # 128kB
61

62
63
csfileLoggerName = ''
csfileLogInfo = {}
Daniel Armbruster's avatar
Daniel Armbruster committed
64

65
66
67
68
BASENAME = "checksumfile"
CSSUFFIX = ".cs"
RESULTSUFFIX = ".result"

Daniel Armbruster's avatar
Daniel Armbruster committed
69
70
71
72
73
74
75
76
77
78
79
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
80
    for root, dirs, files in os.walk(path, followlinks=followLinks):
Daniel Armbruster's avatar
Daniel Armbruster committed
81
82
83
84
85
86
87
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
88
    raise CsFileError(83, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
89
90
91
92
      +err.filename)
  else:
    return subDirs

93
94
95
96
97
def hasCsFile(path):
  """
  Checks if path contains a checksumfile. Returns True if path contains a file
  named with an CsFile filename.
  """
98
  return os.path.isfile(os.path.join(path, BASENAME+CSSUFFIX))
99

100
101
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
102
103
104
105
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
106
    self.msg = msg
107
    self.line = line
108
109

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
110
111
112
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

113
114
# -----------------------------------------------------------------------------
class CsFile:
115
  """
116
  Provides an interface to handle a csback checksumfile. A checksumfile
Daniel Armbruster's avatar
Daniel Armbruster committed
117
  possesses the ability to take the files for generating the checksums from a
118
  different sourcedirectory which can be configured with the srcdir variable.
119
120

  A checksumfiles usually contains checksumlines (type CsLine) of files.
121
  Generally this includes the files of the subdirectories in srcdir, too.
122
  """
123
  def __init__(self, filedir, srcdir, hashfunc='sha256'):
124
    self.filedir = filedir
125
126
    self.__cslines = []
    self.__hashfunc = hashfunc
127
    self.srcdir = srcdir
128
    self.logger = logging.LoggerAdapter(logging.getLogger( \
129
                    csfileLoggerName+".CsFile"), csfileLogInfo)
Daniel Armbruster's avatar
Daniel Armbruster committed
130

131
132
  def read(self):
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
133
    Read a checksumfile.
134
135
    """
    if not os.access(self.filedir, os.F_OK):
136
      raise CsFileError(134, "Invalid directory path.")
137
    path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
138
139
    # no checksumfile available -> create new file
    if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):
140
      self.logger.info("Creating checksumfile in '%s'", self.filedir)
Daniel Armbruster's avatar
Daniel Armbruster committed
141
142
      try:
        csfile = open(path, 'w')
143
      except IOError as err:
144
        raise CsFileError(142, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
145
          +err.filename)
146
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
147
        csfile.close()
148
    # checksumfile available -> read file
149
    else:
150
      try:
151
        self.logger.debug("Start reading checksumfile '%s'",path)
152
        csfile = open(path)
153
        self.__cslines = [CsLine(line.split(), self.srcdir) \
154
        for line in csfile if len(line.rstrip()) and line[0] != '#'] 
155
      except IOError as err:
156
        raise CsFileError(154, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
157
158
159
          +err.filename)
      else:
        csfile.close()
160
        self.logger.debug("Finished reading checksumfile '%s'", path)
161

Daniel Armbruster's avatar
Daniel Armbruster committed
162
  def append(self, cslines):
163
    """
164
    Append checksum lines to the checksumfile.
165
    """
166
    path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
167
168
    if 0 == len(cslines):
      self.logger.debug("Empty list passed. Nothing to append.")
169
    else:
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
      try:
        self.logger.debug("Start appending to checksumfile '%s'", path)
        csfile = open(path, 'a')
        for csline in cslines:
          self.logger.debug("Writing line '%s'", str(csline))
          if isinstance(csline, CsLine):
            csfile.write(str(csline) + '\n')
          else:
            raise CsFileError(197, "Argument must be of type CsLine.")
      except IOError as err:
        raise CsFileError(199, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
          +err.filename)
      else:
        self.logger.debug("Finished appending to checksumfile '%s'", path)
        csfile.close()
185

Daniel Armbruster's avatar
Daniel Armbruster committed
186
  def update(self, regexes=[]):
187
188
189
190
191
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
192
      raise CsFileError(211, "Pass regular expressions in a list.")
193
    self.logger.debug("Updating checksumfile ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
194
    # fetch cslines in current csfile
195
    self.read()
196
    self.logger.debug("Fetching files not registered yet.")
197
198
    registeredFiles = set(csline.filename for csline in self.__cslines)
    # fetch files
199
    newFiles = set()
200
201
    newFiles = set(file for file in os.listdir(self.srcdir) \
      if os.path.isfile(os.path.join(self.srcdir, file)))
Daniel Armbruster's avatar
Daniel Armbruster committed
202
    # exclude files matching regexes
203
204
    regexes.append(BASENAME+CSSUFFIX)
    regexes.append(BASENAME+RESULTSUFFIX+r'\.[1-2]')
Daniel Armbruster's avatar
Daniel Armbruster committed
205
206
207
208
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
209
210
    regexes.remove(BASENAME+CSSUFFIX)
    regexes.remove(BASENAME+RESULTSUFFIX+r'\.[1-2]')
Daniel Armbruster's avatar
Daniel Armbruster committed
211
    # exclude registered files
212
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
213
    # generate cslines of newFiles
214
215
    cslines = []
    for file in newFiles:
216
      csline = CsLine(file, self.srcdir, self.__hashfunc)
217
218
219
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
220
    path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
221
    self.logger.debug("Update of checksumfile '%s' finished.", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
222

223
  def check(self, regexes, beTolerant=False):
224
225
    """
    Check a checksum file which means:
226
    Calculate checksum of a file which is located in self.srcdir and check
227
    results. Files in the checksumfile matching one of the regexes are excluded.
Daniel Armbruster's avatar
Daniel Armbruster committed
228

229
230
    If the third argument (beTolerant) is set to True the checking process will
    be successful, too, if a file listed in a checksumfile is not available
231
232
233
    anymore. Then the warning will be issued. Otherwise a error will be printed
    to the result logfile. In case a checksum does not match or rather changed
    an critical message will be printed to the results logfile.
234

235
236
237
    Note that this function does not check if there are unregistered files in
    the directory. Adding checksum lines to the checksumfile has to be done by
    using the update function.
238
    """
239
    if 0 == len(self.__cslines):
240
241
      self.logger.debug( \
        "CSFILE does not contain any lines or had not been read yet.")
242
243
244
245
246
247
248
249
250
251
252
253
254
    # configure handler for logger 
    # checksumfile.result filesize 15 kB each with 3 files
    resulthandler = logging.handlers.RotatingFileHandler(os.path.join( \
      self.filedir,BASENAME+RESULTSUFFIX), maxBytes=1024*15, \
      backupCount=2)
    format = '%(asctime)-15s %(hostname)s[%(pid)s] %(levelname)-8s %(message)s'
    logInfo = {'hostname': os.uname()[1], 'pid': os.getpid()}
    resulthandler.setLevel(logging.INFO)
    resulthandler.setFormatter(logging.Formatter(format))
    logger = logging.getLogger(csfileLoggerName+".CsFile.chk")
    logger.addHandler(resulthandler)
    resultLogger = logging.LoggerAdapter(logger, logInfo)
 
255
256
257
258
259
260
261
262
263
    # exclude those files matching regex in regexes
    self.logger.debug("Exclude files matching regexes")
    cslinesSet = set(self.__cslines)
    for regex in regexes:
      matching = set(csline for csline in self.__cslines \
        if None != re.match(regex, csline.path))
      cslinesSet -= matching
    # perform check
    self.logger.debug("Start checking checksums ...")
264
    for csline in self.__cslines:
265
266
      if csline in cslinesSet: 
        self.logger.debug( \
267
268
269
          "Performing check of file '{0}' with csline in checksumfile '{1}'.",\
          os.path.join(csline.srcdir,csline.filename), self.filedir) 
        csline.check(resultLogger, beTolerant)
270
    self.logger.debug("Finished checking checksums.")
271
    logger.removeHandler(resulthandler)
272

273
  def display(self):
274
    """
275
    Display the content of the checksum file to stdout.
276
    """
277
278
    if 0 == len(self.__cslines):
      self.logger.info("CSFILE does not contain any lines.")
279
280
281
    for line in self.__cslines:
      sys.stdout.write(line)

282

283
284
# -----------------------------------------------------------------------------
class CsLine:
285
286
287
  """
  Class to handle a checksum and further data for a registered file.
  """
288
  def __init__(self, *args):
289
    self.logger = logging.LoggerAdapter(logging.getLogger( \
290
                    csfileLoggerName+".CsLine"), csfileLogInfo)
291
    if isinstance(args[0], list) and isinstance(args[1], str):
292
293
      argList = args[0]
      self.checksum = argList[0]
294
      self.filename = argList[1]
295
296
297
298
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
299
      self.srcdir = args[1]
300
301
    elif isinstance(args[0], str) and isinstance(args[1], str) and \
      isinstance(args[2], str):
302
      self.checksum = ''
303
      self.filename = args[0]
304
      self.srcdir = args[1]
305
      self.hashfunc = args[2]
306
307
308
309
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
    else:
310
      CsFileError(325, "Invalid argument(s).")
Daniel Armbruster's avatar
Daniel Armbruster committed
311
312

  def generate(self, chunkSize):
313
314
315
316
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
317
    # generate checksum
318
    path = os.path.join(self.srcdir, self.filename) 
319
    self.logger.debug("Calculating checksum for '%s'", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
320
321
322
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
323
      file = open(path, 'rb')
324
325
326
327
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
328
329
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
330
      raise CsFileError(344, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
331
        +err.filename)
332
333
334
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
335
336
    # set remaining data
    self.creationDateFile = \
337
      datetime.fromtimestamp(os.path.getctime(path)).strftime( \
Daniel Armbruster's avatar
Daniel Armbruster committed
338
339
340
341
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")

342
  def check(self, resultLogger, beTolerant=False):
343
    """
344
345
346
347
348
    Check a checksum line. The checksum of the line will be compared with the
    checksum of the file located in src. In case no file had been found in src
    and the beTolerant flag had been set to True the status of the line will be
    set to 'warning' else to 'error'. If the checksums match the status was set
    to 'ok' else to 'error', too.
349
    """
350
351
    hashfunc = hashlib.new(self.hashfunc)
    blockSize = chunkSize * hashfunc.block_size
352
    path = os.path.join(self.srcdir, self.filename)
353
    try:
354
      file = open(path, 'rb')
355
356
    except IOError:
      if beTolerant:
357
358
        resultLogger.warning("While checking: file '%s' does not exist.", path)
        self.logger.warning("While checking: file '%s' does not exist.", path)
359
      else:
360
361
        resultLogger.error("While checking: file '%s' does not exist.", path)
        self.logger.error("While checking: file '%s' does not exist.", path)
362
363
    else:
      # calculate checksum
364
365
366
367
368
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
369
370
371
      file.close()
      # checks
      if checksum == self.checksum:
372
373
        resultLogger.info("Check of file '%s' was successful.", path)
        self.logger.debug("Check of file '%s' was successful.", path)
374
      else:
375
376
        resultLogger.critical("File '%s' has no integrity anymore.", path)
        self.logger.critical("File '%s' has no integrity anymore.", path)
377

Daniel Armbruster's avatar
Daniel Armbruster committed
378
  def __str__(self):
379
380
381
    """
    String representation of a checksum line.
    """
382
383
384
    return '{0} {1} {2} {3} {4} {5}'.format(self.checksum, self.filename, \
    self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
    self.creationDateChecksum)
Daniel Armbruster's avatar
Daniel Armbruster committed
385
    
386
# -----------------------------------------------------------------------------
387
# Tests
388
if __name__ == '__main__':
389
390
391
392
393
394
  try:
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
395
  
Daniel Armbruster's avatar
Daniel Armbruster committed
396
# ----- END OF csfile.py -----