csfile.py 13.6 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
32
# Copyright (c) 2011-2012 by Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
33
34
35
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
# 01/01/2012  V0.1.1  finished implementation
37
# 04/01/2012  V0.1.3  implemented a debug mode using the python logging module
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksum files. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
45
import pwd
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
47
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
48
from datetime import datetime
Daniel Armbruster's avatar
Daniel Armbruster committed
49

50
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
51
__subversion__ = "$Id$"
52
53
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
54
55
56
57
58
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
# global variables

59
chunkSize = 1024 * 128 # 128kB
60

61
62
csfileLoggerName = ''
csfileLogInfo = {}
Daniel Armbruster's avatar
Daniel Armbruster committed
63

Daniel Armbruster's avatar
Daniel Armbruster committed
64
65
66
67
68
69
70
71
72
73
74
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
75
    for root, dirs, files in os.walk(path, followlinks=followLinks):
Daniel Armbruster's avatar
Daniel Armbruster committed
76
77
78
79
80
81
82
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
83
    raise CsFileError(83, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
84
85
86
87
      +err.filename)
  else:
    return subDirs

88
89
90
91
92
93
94
def hasCsFile(path):
  """
  Checks if path contains a checksumfile. Returns True if path contains a file
  named with an CsFile filename.
  """
  return os.path.isfile(os.path.join(path, CsFile.filename))

95
96
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
97
98
99
100
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
101
    self.msg = msg
102
    self.line = line
103
104

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
105
106
107
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

108
109
# -----------------------------------------------------------------------------
class CsFile:
110
  """
111
  Provides an interface to handle a csback checksumfile. A checksumfile
Daniel Armbruster's avatar
Daniel Armbruster committed
112
  possesses the ability to take the files for generating the checksums from a
113
114
115
116
  different sourcedirectory which can be configured with the srcpath variable.

  A checksumfiles usually contains checksumlines (type CsLine) of files.
  Generally this includes the files of the subdirectories in srcpath, too.
117
  """
118
119
  def __init__(self, filedir, srcpath, hashfunc='sha256'):
    self.filedir = filedir
120
121
    self.__cslines = []
    self.__hashfunc = hashfunc
122
    self.srcpath = srcpath
123
    self.logger = logging.LoggerAdapter(logging.getLogger( \
124
                    csfileLoggerName+".CsFile"), csfileLogInfo)
Daniel Armbruster's avatar
Daniel Armbruster committed
125

126
127
  def read(self):
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
128
    Read a checksumfile.
129
130
131
    """
    if not os.access(self.filedir, os.F_OK):
      raise CsFileError(122, "Invalid directory path.")
132
    path = os.path.join(self.filedir, CsFile.filename)
133
134
    # no checksumfile available -> create new file
    if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):
135
      self.logger.info("Creating checksumfile in '%s'", self.filedir)
Daniel Armbruster's avatar
Daniel Armbruster committed
136
137
      try:
        csfile = open(path, 'w')
138
      except IOError as err:
139
        raise CsFileError(130, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
140
          +err.filename)
141
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
142
        csfile.close()
143
    # checksumfile available -> read file
144
    else:
145
      try:
146
        self.logger.debug("Start reading checksumfile '%s'",path)
147
148
149
150
151
152
153
154
        csfile = open(path)
        self.__cslines = [CsLine(line.split()) for line in csfile \
        if len(line.rstrip()) and line[0] != '#'] 
      except IOError as err:
        raise CsFileError(142, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
          +err.filename)
      else:
        csfile.close()
155
        self.logger.debug("Finished reading checksumfile '%s'", path)
156
157
158
159
160

  def write(self):
    """
    Write the entire checksumfile.
    """
161
    path = os.path.join(self.filedir, CsFile.filename)
162
    try:
163
      self.logger.debug("Start writing checksumfile '%s'",path)
164
165
      csfile = open(path, 'w')
      for csline in self.__cslines:
166
        self.logger.debug("Writing line: '%s'",str(csline))
167
168
169
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
170
          raise CsFileError(161, "Argument must be of type CsLine.")
171
    except IOError as err:
172
      raise CsFileError(163, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
173
        +err.filename)
174
    else:
175
      csfile.close()
176
      self.logger.debug("Finished writing checksumfile '%s'", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
177

Daniel Armbruster's avatar
Daniel Armbruster committed
178
  def append(self, cslines):
179
    """
180
    Append checksum lines to the checksumfile.
181
    """
182
    path = os.path.join(self.filedir, CsFile.filename)
183
184
    if 0 == len(cslines):
      self.logger.debug("Empty list passed. Nothing to append.")
185
    try:
186
      self.logger.debug("Start appending to checksumfile '%s'", path)
187
188
      csfile = open(path, 'a')
      for csline in cslines:
189
        self.logger.debug("Writing line '%s'", str(csline))
190
191
192
193
194
195
196
197
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
          raise CsFileError(184, "Argument must be of type CsLine.")
    except IOError as err:
      raise CsFileError(186, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
        +err.filename)
    else:
198
      self.logger.debug("Finished appending to checksumfile '%s'", path)
199
200
      csfile.close()

Daniel Armbruster's avatar
Daniel Armbruster committed
201
  def update(self, regexes=[]):
202
203
204
205
206
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
207
      raise CsFileError(198, "Pass regular expressions in a list.")
208
    self.logger.debug("Updating checksumfile ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
209
    # fetch cslines in current csfile
210
    self.read()
211
    self.logger.debug("Fetching files not registered yet.")
212
    registeredFiles = set(csline.path for csline in self.__cslines)
Daniel Armbruster's avatar
Daniel Armbruster committed
213
    # fetch files (pathes)
214
215
216
    newFiles = os.listdir(self.srcpath)
    newFiles = set(os.path.join(self.srcpath, file) for file in newFiles \
      if os.path.isfile(os.path.join(self.srcpath, file)))
Daniel Armbruster's avatar
Daniel Armbruster committed
217
    # exclude files matching regexes
218
    regexes.append(os.path.join(self.srcpath,CsFile.filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
219
220
221
222
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
223
    regexes.remove(os.path.join(self.srcpath,CsFile.filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
224
    # exclude registered files
225
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
226
    # generate cslines of newFiles
227
228
229
230
231
232
    cslines = []
    for file in newFiles:
      csline = CsLine(file, self.__hashfunc)
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
233
234
    path = os.path.join(self.filedir, CsFile.filename)
    self.logger.debug("Update of checksumfile '%s' finished.", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
235

236
  def check(self, regexes, beTolerant=False):
237
238
    """
    Check a checksum file which means:
239
240
    Calculate checksum of a file which is located in self.srcpath and check
    results. Files in the checksumfile matching one of the regexes are excluded.
Daniel Armbruster's avatar
Daniel Armbruster committed
241

242
243
244
245
    If the third argument (beTolerant) is set to True the checking process will
    be successful, too, if a file listed in a checksumfile is not available
    anymore. Then the status of the checksum line will be set to 'warning'.

246
247
248
    Note that this function does not check if there are unregistered files in
    the directory. Adding checksum lines to the checksumfile has to be done by
    using the update function.
249
    """
250
    if 0 == len(self.__cslines):
251
252
253
254
255
256
257
258
259
260
261
262
      self.logger.debug( \
        "CSFILE does not contain any lines or had not been read yet.")

    # exclude those files matching regex in regexes
    self.logger.debug("Exclude files matching regexes")
    cslinesSet = set(self.__cslines)
    for regex in regexes:
      matching = set(csline for csline in self.__cslines \
        if None != re.match(regex, csline.path))
      cslinesSet -= matching
    # perform check
    self.logger.debug("Start checking checksums ...")
263
    for csline in self.__cslines:
264
265
266
267
268
269
270
      if csline in cslinesSet: 
        path = csline.path.replace(self.filedir, self.srcpath)
        self.logger.debug( \
          "Performing check of csline-file '%s' with file '%s'", csline.path, \
          path) 
        csline.check(path, beTolerant)
    self.logger.debug("Finished checking checksums.")
271

272
  def display(self):
273
    """
274
    Display the content of the checksum file to stdout.
275
    """
276
277
    if 0 == len(self.__cslines):
      self.logger.info("CSFILE does not contain any lines.")
278
279
280
    for line in self.__cslines:
      sys.stdout.write(line)

281
282
283

  filename = ".cs"

284
285
# -----------------------------------------------------------------------------
class CsLine:
286
287
288
  """
  Class to handle a checksum and further data for a registered file.
  """
289
  def __init__(self, *args):
290
    self.logger = logging.LoggerAdapter(logging.getLogger( \
291
                    csfileLoggerName+".CsLine"), csfileLogInfo)
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
    if isinstance(args[0], list):
      argList = args[0]
      self.checksum = argList[0]
      self.path = argList[1]
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
      self.dateLastCheck = argList[6]
      self.statusLastCheck = argList[7]
    elif isinstance(args[0], str) and isinstance(args[1], str):
      self.checksum = ''
      self.path = args[0]
      self.hashfunc = args[1]
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
      self.dateLastCheck = ''
      self.statusLastCheck = ''
    else:
312
      CsFileError(290, "Invalid argument(s).")
Daniel Armbruster's avatar
Daniel Armbruster committed
313
314

  def generate(self, chunkSize):
315
316
317
318
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
319
    self.logger.debug("Calculating checksum for '%s'", self.path)
Daniel Armbruster's avatar
Daniel Armbruster committed
320
321
322
323
    # generate checksum
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
324
325
326
327
328
      file = open(self.path, 'rb')
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
329
330
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
331
      raise CsFileError(309, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
332
        +err.filename)
333
334
335
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
336
337
338
339
340
341
342
343
344
    # set remaining data
    self.creationDateFile = \
      datetime.fromtimestamp(os.path.getctime(self.path)).strftime( \
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
    self.dateLastCheck = self.creationDateChecksum 
    self.statusLastCheck = 'ok'

345
  def check(self, src, beTolerant=False):
346
    """
347
348
349
350
351
    Check a checksum line. The checksum of the line will be compared with the
    checksum of the file located in src. In case no file had been found in src
    and the beTolerant flag had been set to True the status of the line will be
    set to 'warning' else to 'error'. If the checksums match the status was set
    to 'ok' else to 'error', too.
352
    """
353
354
    hashfunc = hashlib.new(self.hashfunc)
    blockSize = chunkSize * hashfunc.block_size
355
    try:
356
357
358
359
      file = open(src, 'rb')
    except IOError:
      if beTolerant:
        self.statusLastCheck = 'warning'  
Daniel Armbruster's avatar
Daniel Armbruster committed
360
        self.logger.warning("While checking file '%s' does not exist.", src)
361
      else:
362
        self.statusLastCheck = 'error'
Daniel Armbruster's avatar
Daniel Armbruster committed
363
        self.logger.error("While checking file '%s' does not exist.", src)
364
365
    else:
      # calculate checksum
366
367
368
369
370
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
371
372
373
374
      file.close()
      # checks
      if checksum == self.checksum:
        self.statusLastCheck = 'ok'
375
        self.logger.debug("Check of file '%s' was successful.", src)
376
      else:
377
378
        self.statusLastCheck = 'error'
        self.logger.critical("File '%s' has no integrity anymore.", src)
379

Daniel Armbruster's avatar
Daniel Armbruster committed
380
  def __str__(self):
381
382
383
    """
    String representation of a checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
384
385
386
387
    return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, self.path, \
    self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
    self.creationDateChecksum, self.dateLastCheck, self.statusLastCheck)
    
388
# -----------------------------------------------------------------------------
389
# Tests
390
if __name__ == '__main__':
391
392
393
394
395
396
  try:
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
397
  
Daniel Armbruster's avatar
Daniel Armbruster committed
398
# ----- END OF csfile.py -----