csfile.py 14.1 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
32
# Copyright (c) 2011-2012 by Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
33
34
35
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
# 01/01/2012  V0.1.1  finished implementation
37
# 04/01/2012  V0.1.3  implemented a debug mode using the python logging module
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksum files. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
45
import pwd
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
47
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
48
from datetime import datetime
Daniel Armbruster's avatar
Daniel Armbruster committed
49

50
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
51
__subversion__ = "$Id$"
52
53
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
54
55
56
57
58
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
# global variables

59
chunkSize = 1024 * 128 # 128kB
60

61
62
csfileLoggerName = ''
csfileLogInfo = {}
Daniel Armbruster's avatar
Daniel Armbruster committed
63

Daniel Armbruster's avatar
Daniel Armbruster committed
64
65
66
67
68
69
70
71
72
73
74
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
75
    for root, dirs, files in os.walk(path, followlinks=followLinks):
Daniel Armbruster's avatar
Daniel Armbruster committed
76
77
78
79
80
81
82
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
83
    raise CsFileError(83, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
84
85
86
87
      +err.filename)
  else:
    return subDirs

88
89
90
91
92
def hasCsFile(path):
  """
  Checks if path contains a checksumfile. Returns True if path contains a file
  named with an CsFile filename.
  """
93
  return os.path.isfile(os.path.join(path, CsFile.basename+CsFile.csSuffix))
94

95
96
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
97
98
99
100
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
101
    self.msg = msg
102
    self.line = line
103
104

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
105
106
107
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

108
109
# -----------------------------------------------------------------------------
class CsFile:
110
  """
111
  Provides an interface to handle a csback checksumfile. A checksumfile
Daniel Armbruster's avatar
Daniel Armbruster committed
112
  possesses the ability to take the files for generating the checksums from a
113
114
115
116
  different sourcedirectory which can be configured with the srcpath variable.

  A checksumfiles usually contains checksumlines (type CsLine) of files.
  Generally this includes the files of the subdirectories in srcpath, too.
117
  """
118
  def __init__(self, filedir, srcpath, hashfunc='sha256'):
119
    self.filedir = filedir
120
121
    self.__cslines = []
    self.__hashfunc = hashfunc
122
    self.srcpath = srcpath
123
    self.logger = logging.LoggerAdapter(logging.getLogger( \
124
                    csfileLoggerName+".CsFile"), csfileLogInfo)
Daniel Armbruster's avatar
Daniel Armbruster committed
125

126
127
  def read(self):
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
128
    Read a checksumfile.
129
130
    """
    if not os.access(self.filedir, os.F_OK):
131
      raise CsFileError(134, "Invalid directory path.")
132
    path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
133
134
    # no checksumfile available -> create new file
    if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):
135
      self.logger.info("Creating checksumfile in '%s'", self.filedir)
Daniel Armbruster's avatar
Daniel Armbruster committed
136
137
      try:
        csfile = open(path, 'w')
138
      except IOError as err:
139
        raise CsFileError(142, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
140
          +err.filename)
141
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
142
        csfile.close()
143
    # checksumfile available -> read file
144
    else:
145
      try:
146
        self.logger.debug("Start reading checksumfile '%s'",path)
147
        csfile = open(path)
148
149
        self.__cslines = [CsLine(line.split(), self.srcpath) \
        for line in csfile if len(line.rstrip()) and line[0] != '#'] 
150
      except IOError as err:
151
        raise CsFileError(154, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
152
153
154
          +err.filename)
      else:
        csfile.close()
155
        self.logger.debug("Finished reading checksumfile '%s'", path)
156

157
158
  # The write function will be obsolete now.
  """
159
  def write(self):
160
    path = os.path.join(self.filedir, CsFile.filename)
161
    try:
162
      self.logger.debug("Start writing checksumfile '%s'",path)
163
164
      csfile = open(path, 'w')
      for csline in self.__cslines:
165
        self.logger.debug("Writing line: '%s'",str(csline))
166
167
168
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
169
          raise CsFileError(173, "Argument must be of type CsLine.")
170
    except IOError as err:
171
      raise CsFileError(175, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
172
        +err.filename)
173
    else:
174
      csfile.close()
175
      self.logger.debug("Finished writing checksumfile '%s'", path)
176
  """
Daniel Armbruster's avatar
Daniel Armbruster committed
177

Daniel Armbruster's avatar
Daniel Armbruster committed
178
  def append(self, cslines):
179
    """
180
    Append checksum lines to the checksumfile.
181
    """
182
    path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
183
184
    if 0 == len(cslines):
      self.logger.debug("Empty list passed. Nothing to append.")
185
    else:
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
      try:
        self.logger.debug("Start appending to checksumfile '%s'", path)
        csfile = open(path, 'a')
        for csline in cslines:
          self.logger.debug("Writing line '%s'", str(csline))
          if isinstance(csline, CsLine):
            csfile.write(str(csline) + '\n')
          else:
            raise CsFileError(197, "Argument must be of type CsLine.")
      except IOError as err:
        raise CsFileError(199, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
          +err.filename)
      else:
        self.logger.debug("Finished appending to checksumfile '%s'", path)
        csfile.close()
201

Daniel Armbruster's avatar
Daniel Armbruster committed
202
  def update(self, regexes=[]):
203
204
205
206
207
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
208
      raise CsFileError(211, "Pass regular expressions in a list.")
209
    self.logger.debug("Updating checksumfile ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
210
    # fetch cslines in current csfile
211
    self.read()
212
    self.logger.debug("Fetching files not registered yet.")
213
214
    registeredFiles = set(csline.filename for csline in self.__cslines)
    # fetch files
215
    newFiles = set()
216
217
    newFiles = set(file for file in os.listdir(self.srcpath) \
      if os.path.isfile(os.path.join(self.srcpath, file)))
Daniel Armbruster's avatar
Daniel Armbruster committed
218
    # exclude files matching regexes
219
220
    regexes.append(CsFile.basename+CsFile.csSuffix)
    regexes.append(CsFile.basename+CsFile.resultSuffix+r'\.[1-3]')
Daniel Armbruster's avatar
Daniel Armbruster committed
221
222
223
224
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
225
226
    regexes.remove(CsFile.basename+CsFile.csSuffix)
    regexes.remove(CsFile.basename+CsFile.resultSuffix+r'\.[1-3]')
Daniel Armbruster's avatar
Daniel Armbruster committed
227
    # exclude registered files
228
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
229
    # generate cslines of newFiles
230
231
    cslines = []
    for file in newFiles:
232
      csline = CsLine(file, self.srcpath, self.__hashfunc)
233
234
235
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
236
    path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
237
    self.logger.debug("Update of checksumfile '%s' finished.", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
238

239
  def check(self, regexes, beTolerant=False):
240
241
    """
    Check a checksum file which means:
242
243
    Calculate checksum of a file which is located in self.srcpath and check
    results. Files in the checksumfile matching one of the regexes are excluded.
Daniel Armbruster's avatar
Daniel Armbruster committed
244

245
246
247
248
    If the third argument (beTolerant) is set to True the checking process will
    be successful, too, if a file listed in a checksumfile is not available
    anymore. Then the status of the checksum line will be set to 'warning'.

249
250
251
    Note that this function does not check if there are unregistered files in
    the directory. Adding checksum lines to the checksumfile has to be done by
    using the update function.
252
    """
253
    if 0 == len(self.__cslines):
254
255
256
257
258
259
260
261
262
263
264
265
      self.logger.debug( \
        "CSFILE does not contain any lines or had not been read yet.")

    # exclude those files matching regex in regexes
    self.logger.debug("Exclude files matching regexes")
    cslinesSet = set(self.__cslines)
    for regex in regexes:
      matching = set(csline for csline in self.__cslines \
        if None != re.match(regex, csline.path))
      cslinesSet -= matching
    # perform check
    self.logger.debug("Start checking checksums ...")
266
    for csline in self.__cslines:
267
268
269
270
271
272
273
      if csline in cslinesSet: 
        path = csline.path.replace(self.filedir, self.srcpath)
        self.logger.debug( \
          "Performing check of csline-file '%s' with file '%s'", csline.path, \
          path) 
        csline.check(path, beTolerant)
    self.logger.debug("Finished checking checksums.")
274

275
  def display(self):
276
    """
277
    Display the content of the checksum file to stdout.
278
    """
279
280
    if 0 == len(self.__cslines):
      self.logger.info("CSFILE does not contain any lines.")
281
282
283
    for line in self.__cslines:
      sys.stdout.write(line)

284
285
286
  basename = "checksumfile"
  csSuffix = ".cs"
  resultSuffix = ".result"
287

288
289
# -----------------------------------------------------------------------------
class CsLine:
290
291
292
  """
  Class to handle a checksum and further data for a registered file.
  """
293
  def __init__(self, *args):
294
    self.logger = logging.LoggerAdapter(logging.getLogger( \
295
                    csfileLoggerName+".CsLine"), csfileLogInfo)
296
    if isinstance(args[0], list) and isinstance(args[1], str):
297
298
      argList = args[0]
      self.checksum = argList[0]
299
      self.filename = argList[1]
300
301
302
303
304
305
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
      self.dateLastCheck = argList[6]
      self.statusLastCheck = argList[7]
306
307
308
      self.path = args[1]
    elif isinstance(args[0], str) and isinstance(args[1], str) and \
      isinstance(args[2], str):
309
      self.checksum = ''
310
311
312
      self.filename = args[0]
      self.path = args[1]
      self.hashfunc = args[2]
313
314
315
316
317
318
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
      self.dateLastCheck = ''
      self.statusLastCheck = ''
    else:
319
      CsFileError(325, "Invalid argument(s).")
Daniel Armbruster's avatar
Daniel Armbruster committed
320
321

  def generate(self, chunkSize):
322
323
324
325
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
326
    # generate checksum
327
328
    path = os.path.join(self.path, self.filename) 
    self.logger.debug("Calculating checksum for '%s'", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
329
330
331
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
332
      file = open(path, 'rb')
333
334
335
336
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
337
338
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
339
      raise CsFileError(344, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
340
        +err.filename)
341
342
343
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
344
345
    # set remaining data
    self.creationDateFile = \
346
      datetime.fromtimestamp(os.path.getctime(path)).strftime( \
Daniel Armbruster's avatar
Daniel Armbruster committed
347
348
349
350
351
352
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
    self.dateLastCheck = self.creationDateChecksum 
    self.statusLastCheck = 'ok'

353
  def check(self, src, beTolerant=False):
354
    """
355
356
357
358
359
    Check a checksum line. The checksum of the line will be compared with the
    checksum of the file located in src. In case no file had been found in src
    and the beTolerant flag had been set to True the status of the line will be
    set to 'warning' else to 'error'. If the checksums match the status was set
    to 'ok' else to 'error', too.
360
    """
361
362
    hashfunc = hashlib.new(self.hashfunc)
    blockSize = chunkSize * hashfunc.block_size
363
    try:
364
365
366
367
      file = open(src, 'rb')
    except IOError:
      if beTolerant:
        self.statusLastCheck = 'warning'  
Daniel Armbruster's avatar
Daniel Armbruster committed
368
        self.logger.warning("While checking file '%s' does not exist.", src)
369
      else:
370
        self.statusLastCheck = 'error'
Daniel Armbruster's avatar
Daniel Armbruster committed
371
        self.logger.error("While checking file '%s' does not exist.", src)
372
373
    else:
      # calculate checksum
374
375
376
377
378
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
379
380
381
382
      file.close()
      # checks
      if checksum == self.checksum:
        self.statusLastCheck = 'ok'
383
        self.logger.debug("Check of file '%s' was successful.", src)
384
      else:
385
386
        self.statusLastCheck = 'error'
        self.logger.critical("File '%s' has no integrity anymore.", src)
387

Daniel Armbruster's avatar
Daniel Armbruster committed
388
  def __str__(self):
389
390
391
    """
    String representation of a checksum line.
    """
392
393
394
395
    return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, \
    self.filename, self.hashfunc, self.creationDateFile, \
    self.creationLocationChecksum, self.creationDateChecksum, \
    self.dateLastCheck, self.statusLastCheck)
Daniel Armbruster's avatar
Daniel Armbruster committed
396
    
397
# -----------------------------------------------------------------------------
398
# Tests
399
if __name__ == '__main__':
400
401
402
403
404
405
  try:
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
406
  
Daniel Armbruster's avatar
Daniel Armbruster committed
407
# ----- END OF csfile.py -----