csfile.py 14.1 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
32
# Copyright (c) 2011-2012 by Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
33
34
35
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
# 01/01/2012  V0.1.1  finished implementation
37
# 04/01/2012  V0.1.3  implemented a debug mode using the python logging module
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksum files. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
45
import pwd
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
47
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
48
from datetime import datetime
Daniel Armbruster's avatar
Daniel Armbruster committed
49

50
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
51
__subversion__ = "$Id$"
52
53
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
54
55
56
57
58
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
# global variables

59
chunkSize = 1024 * 128 # 128kB
60

61
62
csfileLoggerName = ''
csfileLogInfo = {}
Daniel Armbruster's avatar
Daniel Armbruster committed
63

Daniel Armbruster's avatar
Daniel Armbruster committed
64
65
66
67
68
69
70
71
72
73
74
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
75
    for root, dirs, files in os.walk(path, followlinks=followLinks):
Daniel Armbruster's avatar
Daniel Armbruster committed
76
77
78
79
80
81
82
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
83
    raise CsFileError(83, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
84
85
86
87
      +err.filename)
  else:
    return subDirs

88
89
90
91
92
93
94
def hasCsFile(path):
  """
  Checks if path contains a checksumfile. Returns True if path contains a file
  named with an CsFile filename.
  """
  return os.path.isfile(os.path.join(path, CsFile.filename))

95
96
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
97
98
99
100
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
101
    self.msg = msg
102
    self.line = line
103
104

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
105
106
107
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

108
109
# -----------------------------------------------------------------------------
class CsFile:
110
  """
111
  Provides an interface to handle a csback checksumfile. A checksumfile
Daniel Armbruster's avatar
Daniel Armbruster committed
112
  possesses the ability to take the files for generating the checksums from a
113
114
115
116
  different sourcedirectory which can be configured with the srcpath variable.

  A checksumfiles usually contains checksumlines (type CsLine) of files.
  Generally this includes the files of the subdirectories in srcpath, too.
117
  """
118
119
  def __init__(self, filedir, srcpath, hashfunc='sha256', recursive=True, \
    followLinks=False):
120
    self.filedir = filedir
121
122
    self.__cslines = []
    self.__hashfunc = hashfunc
123
    self.srcpath = srcpath
124
125
    self.recursive = recursive
    self.followLinks = False
126
    self.logger = logging.LoggerAdapter(logging.getLogger( \
127
                    csfileLoggerName+".CsFile"), csfileLogInfo)
Daniel Armbruster's avatar
Daniel Armbruster committed
128

129
130
  def read(self):
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
131
    Read a checksumfile.
132
133
    """
    if not os.access(self.filedir, os.F_OK):
134
      raise CsFileError(134, "Invalid directory path.")
135
    path = os.path.join(self.filedir, CsFile.filename)
136
137
    # no checksumfile available -> create new file
    if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):
138
      self.logger.info("Creating checksumfile in '%s'", self.filedir)
Daniel Armbruster's avatar
Daniel Armbruster committed
139
140
      try:
        csfile = open(path, 'w')
141
      except IOError as err:
142
        raise CsFileError(142, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
143
          +err.filename)
144
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
145
        csfile.close()
146
    # checksumfile available -> read file
147
    else:
148
      try:
149
        self.logger.debug("Start reading checksumfile '%s'",path)
150
151
152
153
        csfile = open(path)
        self.__cslines = [CsLine(line.split()) for line in csfile \
        if len(line.rstrip()) and line[0] != '#'] 
      except IOError as err:
154
        raise CsFileError(154, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
155
156
157
          +err.filename)
      else:
        csfile.close()
158
        self.logger.debug("Finished reading checksumfile '%s'", path)
159
160
161
162
163

  def write(self):
    """
    Write the entire checksumfile.
    """
164
    path = os.path.join(self.filedir, CsFile.filename)
165
    try:
166
      self.logger.debug("Start writing checksumfile '%s'",path)
167
168
      csfile = open(path, 'w')
      for csline in self.__cslines:
169
        self.logger.debug("Writing line: '%s'",str(csline))
170
171
172
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
173
          raise CsFileError(173, "Argument must be of type CsLine.")
174
    except IOError as err:
175
      raise CsFileError(175, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
176
        +err.filename)
177
    else:
178
      csfile.close()
179
      self.logger.debug("Finished writing checksumfile '%s'", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
180

Daniel Armbruster's avatar
Daniel Armbruster committed
181
  def append(self, cslines):
182
    """
183
    Append checksum lines to the checksumfile.
184
    """
185
    path = os.path.join(self.filedir, CsFile.filename)
186
187
    if 0 == len(cslines):
      self.logger.debug("Empty list passed. Nothing to append.")
188
    else:
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
      try:
        self.logger.debug("Start appending to checksumfile '%s'", path)
        csfile = open(path, 'a')
        for csline in cslines:
          self.logger.debug("Writing line '%s'", str(csline))
          if isinstance(csline, CsLine):
            csfile.write(str(csline) + '\n')
          else:
            raise CsFileError(197, "Argument must be of type CsLine.")
      except IOError as err:
        raise CsFileError(199, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
          +err.filename)
      else:
        self.logger.debug("Finished appending to checksumfile '%s'", path)
        csfile.close()
204

Daniel Armbruster's avatar
Daniel Armbruster committed
205
  def update(self, regexes=[]):
206
207
208
209
210
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
211
      raise CsFileError(211, "Pass regular expressions in a list.")
212
    self.logger.debug("Updating checksumfile ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
213
    # fetch cslines in current csfile
214
    self.read()
215
    self.logger.debug("Fetching files not registered yet.")
216
    registeredFiles = set(csline.path for csline in self.__cslines)
Daniel Armbruster's avatar
Daniel Armbruster committed
217
    # fetch files (pathes)
218
219
220
221
222
223
224
225
226
227
228
229
    newFiles = set()
    if self.recursive:
      self.logger.debug("Fetching files recursively.")
      for root, dirs, files in os.walk(self.srcpath, \
        followlinks=self.followLinks):
        for file in files:
          newFiles.add(os.path.join(root, file))
    else:
      self.logger.debug("Fetching files not recursively.")
      newFiles = set(os.path.join(self.srcpath, file) \
        for file in os.listdir(self.srcpath) \
        if os.path.isfile(os.path.join(self.srcpath, file)))
Daniel Armbruster's avatar
Daniel Armbruster committed
230
    # exclude files matching regexes
231
    regexes.append(os.path.join(self.srcpath,CsFile.filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
232
233
234
235
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
236
    regexes.remove(os.path.join(self.srcpath,CsFile.filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
237
    # exclude registered files
238
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
239
    # generate cslines of newFiles
240
241
242
243
244
245
    cslines = []
    for file in newFiles:
      csline = CsLine(file, self.__hashfunc)
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
246
247
    path = os.path.join(self.filedir, CsFile.filename)
    self.logger.debug("Update of checksumfile '%s' finished.", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
248

249
  def check(self, regexes, beTolerant=False):
250
251
    """
    Check a checksum file which means:
252
253
    Calculate checksum of a file which is located in self.srcpath and check
    results. Files in the checksumfile matching one of the regexes are excluded.
Daniel Armbruster's avatar
Daniel Armbruster committed
254

255
256
257
258
    If the third argument (beTolerant) is set to True the checking process will
    be successful, too, if a file listed in a checksumfile is not available
    anymore. Then the status of the checksum line will be set to 'warning'.

259
260
261
    Note that this function does not check if there are unregistered files in
    the directory. Adding checksum lines to the checksumfile has to be done by
    using the update function.
262
    """
263
    if 0 == len(self.__cslines):
264
265
266
267
268
269
270
271
272
273
274
275
      self.logger.debug( \
        "CSFILE does not contain any lines or had not been read yet.")

    # exclude those files matching regex in regexes
    self.logger.debug("Exclude files matching regexes")
    cslinesSet = set(self.__cslines)
    for regex in regexes:
      matching = set(csline for csline in self.__cslines \
        if None != re.match(regex, csline.path))
      cslinesSet -= matching
    # perform check
    self.logger.debug("Start checking checksums ...")
276
    for csline in self.__cslines:
277
278
279
280
281
282
283
      if csline in cslinesSet: 
        path = csline.path.replace(self.filedir, self.srcpath)
        self.logger.debug( \
          "Performing check of csline-file '%s' with file '%s'", csline.path, \
          path) 
        csline.check(path, beTolerant)
    self.logger.debug("Finished checking checksums.")
284

285
  def display(self):
286
    """
287
    Display the content of the checksum file to stdout.
288
    """
289
290
    if 0 == len(self.__cslines):
      self.logger.info("CSFILE does not contain any lines.")
291
292
293
    for line in self.__cslines:
      sys.stdout.write(line)

294
295
296

  filename = ".cs"

297
298
# -----------------------------------------------------------------------------
class CsLine:
299
300
301
  """
  Class to handle a checksum and further data for a registered file.
  """
302
  def __init__(self, *args):
303
    self.logger = logging.LoggerAdapter(logging.getLogger( \
304
                    csfileLoggerName+".CsLine"), csfileLogInfo)
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
    if isinstance(args[0], list):
      argList = args[0]
      self.checksum = argList[0]
      self.path = argList[1]
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
      self.dateLastCheck = argList[6]
      self.statusLastCheck = argList[7]
    elif isinstance(args[0], str) and isinstance(args[1], str):
      self.checksum = ''
      self.path = args[0]
      self.hashfunc = args[1]
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
      self.dateLastCheck = ''
      self.statusLastCheck = ''
    else:
325
      CsFileError(325, "Invalid argument(s).")
Daniel Armbruster's avatar
Daniel Armbruster committed
326
327

  def generate(self, chunkSize):
328
329
330
331
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
332
    self.logger.debug("Calculating checksum for '%s'", self.path)
Daniel Armbruster's avatar
Daniel Armbruster committed
333
334
335
336
    # generate checksum
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
337
338
339
340
341
      file = open(self.path, 'rb')
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
342
343
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
344
      raise CsFileError(344, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
345
        +err.filename)
346
347
348
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
349
350
351
352
353
354
355
356
357
    # set remaining data
    self.creationDateFile = \
      datetime.fromtimestamp(os.path.getctime(self.path)).strftime( \
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
    self.dateLastCheck = self.creationDateChecksum 
    self.statusLastCheck = 'ok'

358
  def check(self, src, beTolerant=False):
359
    """
360
361
362
363
364
    Check a checksum line. The checksum of the line will be compared with the
    checksum of the file located in src. In case no file had been found in src
    and the beTolerant flag had been set to True the status of the line will be
    set to 'warning' else to 'error'. If the checksums match the status was set
    to 'ok' else to 'error', too.
365
    """
366
367
    hashfunc = hashlib.new(self.hashfunc)
    blockSize = chunkSize * hashfunc.block_size
368
    try:
369
370
371
372
      file = open(src, 'rb')
    except IOError:
      if beTolerant:
        self.statusLastCheck = 'warning'  
Daniel Armbruster's avatar
Daniel Armbruster committed
373
        self.logger.warning("While checking file '%s' does not exist.", src)
374
      else:
375
        self.statusLastCheck = 'error'
Daniel Armbruster's avatar
Daniel Armbruster committed
376
        self.logger.error("While checking file '%s' does not exist.", src)
377
378
    else:
      # calculate checksum
379
380
381
382
383
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
384
385
386
387
      file.close()
      # checks
      if checksum == self.checksum:
        self.statusLastCheck = 'ok'
388
        self.logger.debug("Check of file '%s' was successful.", src)
389
      else:
390
391
        self.statusLastCheck = 'error'
        self.logger.critical("File '%s' has no integrity anymore.", src)
392

Daniel Armbruster's avatar
Daniel Armbruster committed
393
  def __str__(self):
394
395
396
    """
    String representation of a checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
397
398
399
400
    return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, self.path, \
    self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
    self.creationDateChecksum, self.dateLastCheck, self.statusLastCheck)
    
401
# -----------------------------------------------------------------------------
402
# Tests
403
if __name__ == '__main__':
404
405
406
407
408
409
  try:
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
410
  
Daniel Armbruster's avatar
Daniel Armbruster committed
411
# ----- END OF csfile.py -----