csfile.py 12.3 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
# Copyright (c) 2011 by Daniel Armbruster
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
37
# 01/01/2012  V0.1.1  finished implementation
# 02/01/2012  V0.1.2  implemented debugMode
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksum files. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
Daniel Armbruster's avatar
Daniel Armbruster committed
45
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
47
from datetime import datetime
Daniel Armbruster's avatar
Daniel Armbruster committed
48

49
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
50
__subversion__ = "$Id$"
51
52
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
53
54
55
56
57
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
# global variables

58
chunkSize = 1024 * 128 # 128kB
59
60
61

__moduleLoggerName = ''
__logInfo = {}
Daniel Armbruster's avatar
Daniel Armbruster committed
62

Daniel Armbruster's avatar
Daniel Armbruster committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
    for root, dirs, files in os.walk(path, True, None, followLinks):
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
    raise CsFileError("[Errno "+str(err.errno)+"] "+err.strerror+": " \
      +err.filename)
  else:
    return subDirs

87
88
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
89
90
91
92
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
93
    self.msg = msg
94
    self.line = line
95
96

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
97
98
99
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

100
101
# -----------------------------------------------------------------------------
class CsFile:
102
103
104
  """
  Provides an interface to handle a csback checksum file.
  """
Daniel Armbruster's avatar
Daniel Armbruster committed
105
106
107
  def __init__(self, filepath, hashfunc='sha256'):
    self.__filepath = filepath
    self.__filename = ".cs"
108
109
    self.__cslines = []
    self.__hashfunc = hashfunc
110
111
    self.logger = logging.LoggerAdapter(logging.getLogger( \
                    __moduleLoggerName+".CsFile"), __logInfo)
112
    self.createFile()
Daniel Armbruster's avatar
Daniel Armbruster committed
113
114
115
116
117
118

  def createFile(self):
    path = self.__filepath+os.sep+self.__filename
    if not os.path.isfile(path):
      try:
        csfile = open(path, 'w')
119
      except IOError as err:
120
        raise CsFileError(99, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
121
          +err.filename)
122
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
123
        csfile.close()
124
125

  def read(self):
126
127
128
    """
    Read a checksum file.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
129
    path = self.__filepath+os.sep+self.__filename
130
    try:
131
      self.logger.debug("Start reading checksumfile %s",path)
Daniel Armbruster's avatar
Daniel Armbruster committed
132
      csfile = open(path)
133
134
      self.__cslines = [CsLine(line.split()) for line in csfile \
      if len(line.rstrip()) and line[0] != '#'] 
135
136
137
    except IOError as err:
      # Maybe better to create the file here and keep on going.
      # Will be managed later during further dev.
138
      raise CsFileError(118, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
139
        +err.filename)
140
141
    else:
      csfile.close()
142
      self.logger.debug("Finished reading checksumfile: %s", path)
143
144
145
146
147
148
149

  def write(self):
    """
    Write the entire checksumfile.
    """
    path = self.__filepath+os.sep+self.__filename
    try:
150
      self.logger.debug("Start writing checksumfile: %s",path)
151
152
      csfile = open(path, 'w')
      for csline in self.__cslines:
153
        self.logger.debug("Writing line: %s",str(csline))
154
155
156
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
157
          raise CsFileError(140, "Argument must be of type CsLine.")
158
    except IOError as err:
159
      raise CsFileError(142, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
160
        +err.filename)
161
    else:
162
      csfile.close()
163
      self.logger.debug("Finished writing checksumfile: %s", path)
Daniel Armbruster's avatar
Daniel Armbruster committed
164

Daniel Armbruster's avatar
Daniel Armbruster committed
165
  def append(self, cslines):
166
167
168
    """
    Append checksum lines to the checksum file.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
169
    path = self.__filepath+os.sep+self.__filename
170
171
    if 0 == len(cslines):
      self.logger.debug("Empty list passed. Nothing to append.")
172
      try:
173
        self.logger.debug("Start appending to checksumfile: %s", path)
174
175
        csfile = open(path, 'a')
        for csline in cslines:
176
          self.logger.debug("Writing line: %s", str(csline))
177
178
179
180
181
182
183
184
          if isinstance(csline, CsLine):
            csfile.write(str(csline) + '\n')
          else:
            raise CsFileError(167, "Argument must be of type CsLine.")
      except IOError as err:
        raise CsFileError(169, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
          +err.filename)
      else:
185
        self.logger.debug("Finished appending to checksumfile: %s", path)
186
        csfile.close()
Daniel Armbruster's avatar
Daniel Armbruster committed
187
188
      
  def update(self, regexes=[]):
189
190
191
192
193
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
194
      raise CsFileError(182, "Pass regular expressions in a list.")
195
    self.logger.debug("Updating checksumfile ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
196
    # fetch cslines in current csfile
197
    self.read()
198
    self.logger.debug("Fetching files not registered yet.")
199
    registeredFiles = set(csline.path for csline in self.__cslines)
Daniel Armbruster's avatar
Daniel Armbruster committed
200
201
202
203
204
205
206
207
208
209
210
    # fetch files (pathes)
    newFiles = os.listdir(self.__filepath)
    newFiles = set(os.path.join(self.__filepath, file) for file in newFiles \
      if os.path.isfile(os.path.join(self.__filepath, file)))
    # exclude files matching regexes
    regexes.append(os.path.join(self.__filepath,self.__filename))
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
    regexes.remove(os.path.join(self.__filepath,self.__filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
211
    # exclude registered files
212
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
213
    # generate cslines of newFiles
214
215
216
217
218
219
    cslines = []
    for file in newFiles:
      csline = CsLine(file, self.__hashfunc)
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
Daniel Armbruster's avatar
Daniel Armbruster committed
220

221
  def check(self, srcDir, beTolerant=False):
222
223
224
225
226
    """
    Check a checksum file which means:
    1. read checksum file
    2, calculate checksum of file which is located in srcDir and check results
    3. write the result to the checksum file
Daniel Armbruster's avatar
Daniel Armbruster committed
227

228
229
230
231
232
233
234
    If the third argument (beTolerant) is set to True the checking process will
    be successful, too, if a file listed in a checksumfile is not available
    anymore. Then the status of the checksum line will be set to 'warning'.

    Note that this function does not perform a check if there unregistered files
    in the directory. Adding checksum lines to the checksumfile is done by the
    update function.
235
    """
236
    self.logger.debug("Start checking checksums.")
237
    self.read()
238
239
    if 0 == len(self.__cslines):
      self.logger.debug("CSFILE does not contain any lines.")
240
    for csline in self.__cslines:
241
242
243
244
      filename = csline.path.split(os.sep)[-1]
      self.logger.debug("Performing check of file with source: %s", \
        os.path.join(srcDir,filename)).display()
      csline.check(os.path.join(srcDir,filename), beTolerant)
245
    self.write()
246
247

  def displayLines(self):
248
249
250
251
252
    """
    Display the content of the checksum file at stdout.
    """
    if not len(self.__cslines):
      raise CsFileError("CSFILE does not contain any lines.", 193)
253
254
255
256
257
    for line in self.__cslines:
      sys.stdout.write(line)

# -----------------------------------------------------------------------------
class CsLine:
258
259
260
  """
  Class to handle a checksum and further data for a registered file.
  """
261
  def __init__(self, *args):
262
263
    self.logger = logging.LoggerAdapter(logging.getLogger( \
                    __moduleLoggerName+".CsLine"), __logInfo)
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    if isinstance(args[0], list):
      argList = args[0]
      self.checksum = argList[0]
      self.path = argList[1]
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
      self.dateLastCheck = argList[6]
      self.statusLastCheck = argList[7]
    elif isinstance(args[0], str) and isinstance(args[1], str):
      self.checksum = ''
      self.path = args[0]
      self.hashfunc = args[1]
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
      self.dateLastCheck = ''
      self.statusLastCheck = ''
    else:
Daniel Armbruster's avatar
Daniel Armbruster committed
284
      CsFileError(273, "Invalid argument")
Daniel Armbruster's avatar
Daniel Armbruster committed
285
286

  def generate(self, chunkSize):
287
288
289
290
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
291
    self.logger.debug("Calculating checksum for file: %", self.path)
Daniel Armbruster's avatar
Daniel Armbruster committed
292
293
294
295
    # generate checksum
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
296
297
298
299
300
      file = open(self.path, 'rb')
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
301
302
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
303
      raise CsFileError(293, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
304
        +err.filename)
305
306
307
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
308
309
310
311
312
313
314
315
316
    # set remaining data
    self.creationDateFile = \
      datetime.fromtimestamp(os.path.getctime(self.path)).strftime( \
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
    self.dateLastCheck = self.creationDateChecksum 
    self.statusLastCheck = 'ok'

317
  def check(self, src, beTolerant=False):
318
    """
319
320
321
322
323
    Check a checksum line. The checksum of the line will be compared with the
    checksum of the file located in src. In case no file had been found in src
    and the beTolerant flag had been set to True the status of the line will be
    set to 'warning' else to 'error'. If the checksums match the status was set
    to 'ok' else to 'error', too.
324
    """
325
326
    hashfunc = hashlib.new(self.hashfunc)
    blockSize = chunkSize * hashfunc.block_size
327
    try:
328
329
330
331
332
      file = open(src, 'rb')
    except IOError:
      if beTolerant:
        self.statusLastCheck = 'warning'  
        self.logger.warning("While checking file %s does not exist", src)
333
      else:
334
335
336
337
        self.statusLastCheck = 'error'
        self.logger.error("While checking file %s does not exist", src)
    else:
      # calculate checksum
338
339
340
341
342
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
343
344
345
346
347
      file.close()
      # checks
      if checksum == self.checksum:
        self.statusLastCheck = 'ok'
      else:
348
        self.logger.critical("File %s has no integrity anymore.", src)
349

Daniel Armbruster's avatar
Daniel Armbruster committed
350
  def __str__(self):
351
352
353
    """
    String representation of a checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
354
355
356
357
    return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, self.path, \
    self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
    self.creationDateChecksum, self.dateLastCheck, self.statusLastCheck)
    
358
# -----------------------------------------------------------------------------
359
# Tests
360
if __name__ == '__main__':
361
  try:
Daniel Armbruster's avatar
Daniel Armbruster committed
362
    debugMode = True
363
364
365
366
367
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
368
369
  
  
Daniel Armbruster's avatar
Daniel Armbruster committed
370
# ----- END OF csfile.py -----