csfile.py 12.8 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
32
# Copyright (c) 2011-2012 by Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
33
34
35
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
# 01/01/2012  V0.1.1  finished implementation
37
# 04/01/2012  V0.1.3  implemented a debug mode using the python logging module
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksum files. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
45
import pwd
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
47
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
48
from datetime import datetime
Daniel Armbruster's avatar
Daniel Armbruster committed
49

50
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
51
__subversion__ = "$Id$"
52
53
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
54
55
56
57
58
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
# global variables

59
chunkSize = 1024 * 128 # 128kB
60

61
62
csfileLoggerName = ''
csfileLogInfo = {}
Daniel Armbruster's avatar
Daniel Armbruster committed
63

Daniel Armbruster's avatar
Daniel Armbruster committed
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
    for root, dirs, files in os.walk(path, True, None, followLinks):
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
83
    raise CsFileError(83, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
84
85
86
87
      +err.filename)
  else:
    return subDirs

88
89
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
90
91
92
93
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
94
    self.msg = msg
95
    self.line = line
96
97

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
98
99
100
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

101
102
# -----------------------------------------------------------------------------
class CsFile:
103
  """
104
  Provides an interface to handle a csback checksumfile. A checksumfile
Daniel Armbruster's avatar
Daniel Armbruster committed
105
  possesses the ability to take the files for generating the checksums from a
106
  different sourcdirectory which can be configured with the srcpath variable.
107
  """
108
109
110
  def __init__(self, filedir, srcpath, hashfunc='sha256'):
    self.filedir = filedir
    self.filename = ".cs"
111
112
    self.__cslines = []
    self.__hashfunc = hashfunc
113
    self.srcpath = srcpath
114
    self.logger = logging.LoggerAdapter(logging.getLogger( \
115
                    csfileLoggerName+".CsFile"), csfileLogInfo)
Daniel Armbruster's avatar
Daniel Armbruster committed
116

117
118
  def read(self):
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
119
    Read a checksumfile.
120
121
122
123
124
125
126
    """
    if not os.access(self.filedir, os.F_OK):
      raise CsFileError(122, "Invalid directory path.")
    path = os.path.join(self.filedir, self.filename)
    # no checksumfile available -> create new file
    if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):
      self.logger.info("Creating checksumfile in %s", self.filedir)
Daniel Armbruster's avatar
Daniel Armbruster committed
127
128
      try:
        csfile = open(path, 'w')
129
      except IOError as err:
130
        raise CsFileError(130, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
131
          +err.filename)
132
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
133
        csfile.close()
134
    # checksumfile available -> read file
135
    else:
136
137
138
139
140
141
142
143
144
145
146
      try:
        self.logger.debug("Start reading checksumfile %s",path)
        csfile = open(path)
        self.__cslines = [CsLine(line.split()) for line in csfile \
        if len(line.rstrip()) and line[0] != '#'] 
      except IOError as err:
        raise CsFileError(142, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
          +err.filename)
      else:
        csfile.close()
        self.logger.debug("Finished reading checksumfile: %s", path)
147
148
149
150
151

  def write(self):
    """
    Write the entire checksumfile.
    """
152
    path = os.path.join(self.filedir, self.filename)
153
    try:
154
      self.logger.debug("Start writing checksumfile: %s",path)
155
156
      csfile = open(path, 'w')
      for csline in self.__cslines:
157
        self.logger.debug("Writing line: %s",str(csline))
158
159
160
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
161
          raise CsFileError(161, "Argument must be of type CsLine.")
162
    except IOError as err:
163
      raise CsFileError(163, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
164
        +err.filename)
165
    else:
166
      csfile.close()
167
      self.logger.debug("Finished writing checksumfile: %s", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
168

Daniel Armbruster's avatar
Daniel Armbruster committed
169
  def append(self, cslines):
170
    """
171
    Append checksum lines to the checksumfile.
172
    """
173
    path = os.path.join(self.filedir, self.filename)
174
175
    if 0 == len(cslines):
      self.logger.debug("Empty list passed. Nothing to append.")
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
    try:
      self.logger.debug("Start appending to checksumfile: %s", path)
      csfile = open(path, 'a')
      for csline in cslines:
        self.logger.debug("Writing line: %s", str(csline))
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
          raise CsFileError(184, "Argument must be of type CsLine.")
    except IOError as err:
      raise CsFileError(186, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
        +err.filename)
    else:
      self.logger.debug("Finished appending to checksumfile: %s", path)
      csfile.close()

Daniel Armbruster's avatar
Daniel Armbruster committed
192
  def update(self, regexes=[]):
193
194
195
196
197
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
198
      raise CsFileError(198, "Pass regular expressions in a list.")
199
    self.logger.debug("Updating checksumfile ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
200
    # fetch cslines in current csfile
201
    self.read()
202
    self.logger.debug("Fetching files not registered yet.")
203
    registeredFiles = set(csline.path for csline in self.__cslines)
Daniel Armbruster's avatar
Daniel Armbruster committed
204
    # fetch files (pathes)
205
206
207
    newFiles = os.listdir(self.srcpath)
    newFiles = set(os.path.join(self.srcpath, file) for file in newFiles \
      if os.path.isfile(os.path.join(self.srcpath, file)))
Daniel Armbruster's avatar
Daniel Armbruster committed
208
    # exclude files matching regexes
209
    regexes.append(os.path.join(self.srcpath,self.filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
210
211
212
213
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
214
    regexes.remove(os.path.join(self.srcpath,self.filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
215
    # exclude registered files
216
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
217
    # generate cslines of newFiles
218
219
220
221
222
223
    cslines = []
    for file in newFiles:
      csline = CsLine(file, self.__hashfunc)
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
224
225
    path = os.path.join(self.filedir, self.filename)
    self.logger.debug("Update of checksumfile: %s finished.", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
226

227
  def check(self, srcDir, beTolerant=False):
228
229
230
231
232
    """
    Check a checksum file which means:
    1. read checksum file
    2, calculate checksum of file which is located in srcDir and check results
    3. write the result to the checksum file
Daniel Armbruster's avatar
Daniel Armbruster committed
233

234
235
236
237
    If the third argument (beTolerant) is set to True the checking process will
    be successful, too, if a file listed in a checksumfile is not available
    anymore. Then the status of the checksum line will be set to 'warning'.

Daniel Armbruster's avatar
Daniel Armbruster committed
238
239
240
    Note that this function does not perform a check if there are unregistered
    files in the directory. Adding checksum lines to the checksumfile has to be
    done by the update function.
241
    """
242
    self.logger.debug("Start checking checksums.")
243
    self.read()
244
245
    if 0 == len(self.__cslines):
      self.logger.debug("CSFILE does not contain any lines.")
246
    for csline in self.__cslines:
247
248
249
250
      filename = csline.path.split(os.sep)[-1]
      self.logger.debug("Performing check of file with source: %s", \
        os.path.join(srcDir,filename)).display()
      csline.check(os.path.join(srcDir,filename), beTolerant)
251
    self.write()
252
253

  def displayLines(self):
254
255
256
257
    """
    Display the content of the checksum file at stdout.
    """
    if not len(self.__cslines):
258
      raise CsFileError(258, "CSFILE does not contain any lines.")
259
260
261
262
263
    for line in self.__cslines:
      sys.stdout.write(line)

# -----------------------------------------------------------------------------
class CsLine:
264
265
266
  """
  Class to handle a checksum and further data for a registered file.
  """
267
  def __init__(self, *args):
268
    self.logger = logging.LoggerAdapter(logging.getLogger( \
269
                    csfileLoggerName+".CsLine"), csfileLogInfo)
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
    if isinstance(args[0], list):
      argList = args[0]
      self.checksum = argList[0]
      self.path = argList[1]
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
      self.dateLastCheck = argList[6]
      self.statusLastCheck = argList[7]
    elif isinstance(args[0], str) and isinstance(args[1], str):
      self.checksum = ''
      self.path = args[0]
      self.hashfunc = args[1]
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
      self.dateLastCheck = ''
      self.statusLastCheck = ''
    else:
290
      CsFileError(290, "Invalid argument(s).")
Daniel Armbruster's avatar
Daniel Armbruster committed
291
292

  def generate(self, chunkSize):
293
294
295
296
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
297
    self.logger.debug("Calculating checksum for file: %s", self.path)
Daniel Armbruster's avatar
Daniel Armbruster committed
298
299
300
301
    # generate checksum
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
302
303
304
305
306
      file = open(self.path, 'rb')
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
307
308
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
309
      raise CsFileError(309, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
310
        +err.filename)
311
312
313
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
314
315
316
317
318
319
320
321
322
    # set remaining data
    self.creationDateFile = \
      datetime.fromtimestamp(os.path.getctime(self.path)).strftime( \
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
    self.dateLastCheck = self.creationDateChecksum 
    self.statusLastCheck = 'ok'

323
  def check(self, src, beTolerant=False):
324
    """
325
326
327
328
329
    Check a checksum line. The checksum of the line will be compared with the
    checksum of the file located in src. In case no file had been found in src
    and the beTolerant flag had been set to True the status of the line will be
    set to 'warning' else to 'error'. If the checksums match the status was set
    to 'ok' else to 'error', too.
330
    """
331
332
    hashfunc = hashlib.new(self.hashfunc)
    blockSize = chunkSize * hashfunc.block_size
333
    try:
334
335
336
337
338
      file = open(src, 'rb')
    except IOError:
      if beTolerant:
        self.statusLastCheck = 'warning'  
        self.logger.warning("While checking file %s does not exist", src)
339
      else:
340
341
342
343
        self.statusLastCheck = 'error'
        self.logger.error("While checking file %s does not exist", src)
    else:
      # calculate checksum
344
345
346
347
348
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
349
350
351
352
      file.close()
      # checks
      if checksum == self.checksum:
        self.statusLastCheck = 'ok'
Daniel Armbruster's avatar
Daniel Armbruster committed
353
        self.logger.debug("Check of file: %s was successful.", src)
354
      else:
355
        self.logger.critical("File %s has no integrity anymore.", src)
356

Daniel Armbruster's avatar
Daniel Armbruster committed
357
  def __str__(self):
358
359
360
    """
    String representation of a checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
361
362
363
364
    return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, self.path, \
    self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
    self.creationDateChecksum, self.dateLastCheck, self.statusLastCheck)
    
365
# -----------------------------------------------------------------------------
366
# Tests
367
if __name__ == '__main__':
368
369
370
371
372
373
  try:
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
374
  
Daniel Armbruster's avatar
Daniel Armbruster committed
375
# ----- END OF csfile.py -----