csfile.py 12.5 KB
Newer Older
1
#!/usr/bin/env python
Daniel Armbruster's avatar
Daniel Armbruster committed
2
## @file csfile.py
Daniel Armbruster's avatar
Daniel Armbruster committed
3
4
# @brief  Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
5
6
7
8
9
10
11
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 15/09/2011
# 
Daniel Armbruster's avatar
Daniel Armbruster committed
12
13
# Purpose: Provide a module to read, write and treat with a csback
# checksumfiles.
Daniel Armbruster's avatar
Daniel Armbruster committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
# Copyright (c) 2011 by Daniel Armbruster
# 
# REVISIONS and CHANGES 
# 15/09/2011  V0.1    Daniel Armbruster
Daniel Armbruster's avatar
Daniel Armbruster committed
36
37
# 01/01/2012  V0.1.1  finished implementation
# 02/01/2012  V0.1.2  implemented debugMode
Daniel Armbruster's avatar
Daniel Armbruster committed
38
39
# 
# =============================================================================
40
""" CsFile module to handle checksum files. """
Daniel Armbruster's avatar
Daniel Armbruster committed
41

Daniel Armbruster's avatar
Daniel Armbruster committed
42
43
import os
import re
44
import sys
Daniel Armbruster's avatar
Daniel Armbruster committed
45
import hashlib
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import logging
Daniel Armbruster's avatar
Daniel Armbruster committed
47
from datetime import datetime
Daniel Armbruster's avatar
Daniel Armbruster committed
48

49
__version__ = "V0.1"
Daniel Armbruster's avatar
Daniel Armbruster committed
50
__subversion__ = "$Id$"
51
52
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
Daniel Armbruster's avatar
Daniel Armbruster committed
53
54
55
56
57
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"

# -----------------------------------------------------------------------------
# global variables

58
chunkSize = 1024 * 128 # 128kB
Daniel Armbruster's avatar
Daniel Armbruster committed
59
debugMode = False
Daniel Armbruster's avatar
Daniel Armbruster committed
60

Daniel Armbruster's avatar
Daniel Armbruster committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -----------------------------------------------------------------------------
# functions

def getSubDirectories(path, regexes, followLinks=False):
  """
  To generate a list of subdirectories of path using os.walk(). Note that path
  itself was not appended to the list.
  """
  subDirs = set()
  try:
    # collect subdirectories
    for root, dirs, files in os.walk(path, True, None, followLinks):
      for dir in dirs:
        subDirs.add(os.path.join(root, dir))
    # exclude directories matching regexes
    for regex in regexes:
      matching = set(dir for dir in subDirs if None != re.match(regex, dir))
      subDirs -= matching
  except OSError as err:
    raise CsFileError("[Errno "+str(err.errno)+"] "+err.strerror+": " \
      +err.filename)
  else:
    return subDirs

85
86
# -----------------------------------------------------------------------------
class CsFileError(Exception):
Daniel Armbruster's avatar
Daniel Armbruster committed
87
88
89
90
  """
  Exception class of csfile module.
  """
  def __init__(self, line, msg):
91
    self.msg = msg
92
    self.line = line
93
94

  def display(self):
Daniel Armbruster's avatar
Daniel Armbruster committed
95
96
97
98
99
100
101
102
103
    sys.stderr.write("csfile (ERROR): " + str(self.msg) + "\n")
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")

class CsReport(CsFileError):
  """
  Auxiliary class for debugging.
  """
  def display(self):
    sys.stderr.write("csfile (REPORT): " + str(self.msg) + "\n")
104
    sys.stderr.write("triggered in line: " + str(self.line) + "\n")
105
106
107

# -----------------------------------------------------------------------------
class CsFile:
108
109
110
  """
  Provides an interface to handle a csback checksum file.
  """
Daniel Armbruster's avatar
Daniel Armbruster committed
111
112
113
  def __init__(self, filepath, hashfunc='sha256'):
    self.__filepath = filepath
    self.__filename = ".cs"
114
115
    self.__cslines = []
    self.__hashfunc = hashfunc
116
    self.createFile()
Daniel Armbruster's avatar
Daniel Armbruster committed
117
118
119
120
121
122

  def createFile(self):
    path = self.__filepath+os.sep+self.__filename
    if not os.path.isfile(path):
      try:
        csfile = open(path, 'w')
123
      except IOError as err:
124
        raise CsFileError(99, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
125
          +err.filename)
126
      else:
Daniel Armbruster's avatar
Daniel Armbruster committed
127
        csfile.close()
128
129

  def read(self):
130
131
132
    """
    Read a checksum file.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
133
    path = self.__filepath+os.sep+self.__filename
134
    try:
Daniel Armbruster's avatar
Daniel Armbruster committed
135
      if debugMode:
136
        CsReport(111, "Start reading checksumfile: "+path).display()
Daniel Armbruster's avatar
Daniel Armbruster committed
137
      csfile = open(path)
138
139
      self.__cslines = [CsLine(line.split()) for line in csfile \
      if len(line.rstrip()) and line[0] != '#'] 
140
141
142
    except IOError as err:
      # Maybe better to create the file here and keep on going.
      # Will be managed later during further dev.
143
      raise CsFileError(118, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
144
        +err.filename)
145
146
    else:
      csfile.close()
Daniel Armbruster's avatar
Daniel Armbruster committed
147
      if debugMode:
148
        CsReport(123, "Finished reading checksumfile: "+path).display()
149
150
151
152
153
154
155

  def write(self):
    """
    Write the entire checksumfile.
    """
    path = self.__filepath+os.sep+self.__filename
    try:
Daniel Armbruster's avatar
Daniel Armbruster committed
156
      if debugMode:
157
        CsReport(132, "Start writing checksumfile: "+path).display()
158
159
      csfile = open(path, 'w')
      for csline in self.__cslines:
Daniel Armbruster's avatar
Daniel Armbruster committed
160
        if debugMode:
161
          CsReport(136, "Writing line: "+ str(csline)).display()
162
163
164
        if isinstance(csline, CsLine):
          csfile.write(str(csline) + '\n')
        else:
165
          raise CsFileError(140, "Argument must be of type CsLine.")
166
    except IOError as err:
167
      raise CsFileError(142, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
168
        +err.filename)
169
    else:
170
      csfile.close()
Daniel Armbruster's avatar
Daniel Armbruster committed
171
      if debugMode:
172
        CsReport(147, "Finished writing checksumfile: "+path)
Daniel Armbruster's avatar
Daniel Armbruster committed
173

Daniel Armbruster's avatar
Daniel Armbruster committed
174
  def append(self, cslines):
175
176
177
    """
    Append checksum lines to the checksum file.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
178
    path = self.__filepath+os.sep+self.__filename
Daniel Armbruster's avatar
Daniel Armbruster committed
179
    if 0 == len(cslines) and debugMode:
180
      CsReport(155, "Empty list passed. Nothing to append.").display()
181
    else:
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
      try:
        if debugMode:
          CsReport(159, "Start appending to checksumfile: "+path).display()
        csfile = open(path, 'a')
        for csline in cslines:
          if debugMode:
            CsReport(163, "Writing line: "+str(csline)).display()
          if isinstance(csline, CsLine):
            csfile.write(str(csline) + '\n')
          else:
            raise CsFileError(167, "Argument must be of type CsLine.")
      except IOError as err:
        raise CsFileError(169, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
          +err.filename)
      else:
        if debugMode:
          CsReport(173, "Finished appending to checksumfile: "+path).display()
        csfile.close()
Daniel Armbruster's avatar
Daniel Armbruster committed
200
201
      
  def update(self, regexes=[]):
202
203
204
205
206
    """
    Update a checksum file. Also includes appending of not registered files to
    checksum file in current directory.
    """
    if not isinstance(regexes, list):
207
      raise CsFileError(182, "Pass regular expressions in a list.")
Daniel Armbruster's avatar
Daniel Armbruster committed
208
    if debugMode:
209
      CsReport(184, "Updating checksumfile ...").display()
Daniel Armbruster's avatar
Daniel Armbruster committed
210
    # fetch cslines in current csfile
211
    self.read()
Daniel Armbruster's avatar
Daniel Armbruster committed
212
    if debugMode:
213
214
      CsReport(188, "Fetching files not registered yet.").display()
    registeredFiles = set(csline.path for csline in self.__cslines)
Daniel Armbruster's avatar
Daniel Armbruster committed
215
216
217
218
219
220
221
222
223
224
225
    # fetch files (pathes)
    newFiles = os.listdir(self.__filepath)
    newFiles = set(os.path.join(self.__filepath, file) for file in newFiles \
      if os.path.isfile(os.path.join(self.__filepath, file)))
    # exclude files matching regexes
    regexes.append(os.path.join(self.__filepath,self.__filename))
    for regex in regexes:
      matching = set(file for file in newFiles \
        if None != re.match(regex, file))
      newFiles -= matching
    regexes.remove(os.path.join(self.__filepath,self.__filename))
Daniel Armbruster's avatar
Daniel Armbruster committed
226
    # exclude registered files
227
    newFiles -= registeredFiles
Daniel Armbruster's avatar
Daniel Armbruster committed
228
    # generate cslines of newFiles
229
230
231
232
233
234
    cslines = []
    for file in newFiles:
      csline = CsLine(file, self.__hashfunc)
      csline.generate(chunkSize)
      cslines.append(csline)
    self.append(cslines)
Daniel Armbruster's avatar
Daniel Armbruster committed
235

Daniel Armbruster's avatar
Daniel Armbruster committed
236
  def check(self, srcDir='', beTolerant=False):
237
238
239
240
241
    """
    Check a checksum file which means:
    1. read checksum file
    2, calculate checksum of file which is located in srcDir and check results
    3. write the result to the checksum file
Daniel Armbruster's avatar
Daniel Armbruster committed
242
243
244
245

    If the third argument (beTolerant) is set True the checking process will be
    successful, too, if a file listed in a checksumfile is not available
    anymore.
246
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
247
    if debugMode:
Daniel Armbruster's avatar
Daniel Armbruster committed
248
      CsReport(220, "Start checking checksums.").display()
249
    self.read()
Daniel Armbruster's avatar
Daniel Armbruster committed
250
    if 0 == len(self.__cslines) and debugMode:
Daniel Armbruster's avatar
Daniel Armbruster committed
251
      CsReport(223, "CSFILE does not contain any lines.").display()
252
253
    for csline in self.__cslines:
      if not len(srcDir):
Daniel Armbruster's avatar
Daniel Armbruster committed
254
        if debugMode:
Daniel Armbruster's avatar
Daniel Armbruster committed
255
          CsReport(227, "Performing check of file with file itself.").display()
256
257
258
        csline.check()
      else:
        filename = csline.path.split(os.sep)[-1]
Daniel Armbruster's avatar
Daniel Armbruster committed
259
        if debugMode:
Daniel Armbruster's avatar
Daniel Armbruster committed
260
          CsReport(232, "Performing check of file with source: "+ \
Daniel Armbruster's avatar
Daniel Armbruster committed
261
            srcDir+os.sep+filename).display()
262
263
        csline.check(srcDir+os.sep+filename)
    self.write()
264
265

  def displayLines(self):
266
267
268
269
270
    """
    Display the content of the checksum file at stdout.
    """
    if not len(self.__cslines):
      raise CsFileError("CSFILE does not contain any lines.", 193)
271
272
273
274
275
    for line in self.__cslines:
      sys.stdout.write(line)

# -----------------------------------------------------------------------------
class CsLine:
276
277
278
  """
  Class to handle a checksum and further data for a registered file.
  """
Daniel Armbruster's avatar
Daniel Armbruster committed
279

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
  def __init__(self, *args):
    if isinstance(args[0], list):
      argList = args[0]
      self.checksum = argList[0]
      self.path = argList[1]
      self.hashfunc = argList[2]
      self.creationDateFile = argList[3]
      self.creationLocationChecksum = argList[4]
      self.creationDateChecksum = argList[5]
      self.dateLastCheck = argList[6]
      self.statusLastCheck = argList[7]
    elif isinstance(args[0], str) and isinstance(args[1], str):
      self.checksum = ''
      self.path = args[0]
      self.hashfunc = args[1]
      self.creationDateFile = ''
      self.creationLocationChecksum = ''
      self.creationDateChecksum = ''
      self.dateLastCheck = ''
      self.statusLastCheck = ''
    else:
Daniel Armbruster's avatar
Daniel Armbruster committed
301
      CsFileError(273, "Invalid argument")
Daniel Armbruster's avatar
Daniel Armbruster committed
302
303

  def generate(self, chunkSize):
304
305
306
307
    """
    Generate the checksum and establish corresponding data for a file. The
    result is a fully configured checksum line.
    """
308
    if debugMode:
Daniel Armbruster's avatar
Daniel Armbruster committed
309
      CsReport(281, "Calculating checksum for file: "+self.path).display()
Daniel Armbruster's avatar
Daniel Armbruster committed
310
311
312
313
    # generate checksum
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
314
315
316
317
318
      file = open(self.path, 'rb')
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
Daniel Armbruster's avatar
Daniel Armbruster committed
319
320
      self.checksum = hashfunc.hexdigest()
    except IOError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
321
      raise CsFileError(293, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
322
        +err.filename)
323
324
325
    else:
      file.close()
      
Daniel Armbruster's avatar
Daniel Armbruster committed
326
327
328
329
330
331
332
333
334
335
    # set remaining data
    self.creationDateFile = \
      datetime.fromtimestamp(os.path.getctime(self.path)).strftime( \
      "%Y/%m/%d-%H:%M:%S")
    self.creationLocationChecksum = os.uname()[1]
    self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
    self.dateLastCheck = self.creationDateChecksum 
    self.statusLastCheck = 'ok'

  def check(self, src=''):
336
337
338
339
340
341
342
343
344
345
    """
    Check a checksum line. Either compare the checksum of the line with the
    checksum of the file located at src or compare the checksum with the checksum
    of the file located at the path of its one checksum line.
    """
    # calculate checksum
    try:
      hashfunc = hashlib.new(self.hashfunc)
      blockSize = chunkSize * hashfunc.block_size
      if 0 == len(src):
346
        file = open(self.path, 'rb')
347
      else:
348
349
350
351
352
353
        file = open(src, 'rb')
      data = file.read(blockSize)
      while data:
        hashfunc.update(data)
        data = file.read(blockSize)
      checksum = hashfunc.hexdigest()
354
355
    except IOError as err:
      # Maybe better to avoid Exception here and just put the status to notice
Daniel Armbruster's avatar
Daniel Armbruster committed
356
      raise CsFileError(328, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
Daniel Armbruster's avatar
Daniel Armbruster committed
357
        +err.filename)
358
359
360
361
    else:
      file.close()
      # checks
      if not len(self.checksum):
Daniel Armbruster's avatar
Daniel Armbruster committed
362
        raise CsFileError(334, "Caclulation of checksum was not successful.")
363
364
365
366
      if checksum == self.checksum:
        self.statusLastCheck = 'ok'
      else:
        self.statusLastCheck = 'error'
367

Daniel Armbruster's avatar
Daniel Armbruster committed
368
  def __str__(self):
369
370
371
    """
    String representation of a checksum line.
    """
Daniel Armbruster's avatar
Daniel Armbruster committed
372
373
374
375
    return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, self.path, \
    self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
    self.creationDateChecksum, self.dateLastCheck, self.statusLastCheck)
    
376
# -----------------------------------------------------------------------------
377
# Tests
378
if __name__ == '__main__':
379
  try:
Daniel Armbruster's avatar
Daniel Armbruster committed
380
    debugMode = True
381
382
383
384
385
    file = CsFile('/home/daniel/')
    file.read()
  except CsFileError as err:
    err.display()
    sys.exit()
Daniel Armbruster's avatar
Daniel Armbruster committed
386
387
  
  
Daniel Armbruster's avatar
Daniel Armbruster committed
388
# ----- END OF csfile.py -----