Commit 8949577e authored by Daniel Armbruster's avatar Daniel Armbruster Committed by thomas.forbriger
Browse files

The enhancements which were considered in ticket:158 are implemented

This is a legacy commit from before 2015-05-18.
It may be incomplete as well as inconsistent.
See COPYING.legacy and README.history for details.
now. Not completely tested yet.

SVN Path:
SVN Revision: 4375
SVN UUID:     67feda4a-a26e-11df-9d6e-31afc202ad0c
parent 5e1315ef
......@@ -31,6 +31,8 @@
# 03/01/2012 V0.1 Daniel Armbruster
# 10/01/2012 V0.2 adjustments because every subdirectory now contains its own
# checksumfile.
# =============================================================================
......@@ -42,7 +44,7 @@ import logging
import csfile
import csbacklog
__version__ = "V0.1"
__version__ = "V0.2"
__subversion__ = "$Id$"
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
......@@ -86,8 +88,8 @@ def help():
-v|--verbose Be verbose.
-h|--help Display this help.
-e REGEX While checking a checksumfile exclude files if their pathes
are matching REGEX(s).
-e REGEX While checking a checksumfile(s) exclude files and
directories matching REGEX(s).
-R|--notrecursive Do not search in subdirectories of PATH.
-d|--debug Debug mode. Be really verbose.
-f|--followlinks Follow symbolic links. Only available if option -R is not
......@@ -100,13 +102,16 @@ def help():
-l|--logging Switch on logging to files. Logfiles will be located in
SOURCEPATH Optional sourcepath for comparison with files backed up in
PATH. PATH must contain the csback checksumfile.
PATH. PATH and its subdirectories (if option '-R' had not
been selected) must contain the csback checksumfile(s).
Note that the directory structure (if option '-R' is not
set) bellow SOURCEPATH should be equal to those in PATH
because otherwise the files can not be found.
because otherwise the files won't be found.
If SOURCEPATH is not passed a check of files located in
PATH with its checksumfiles will be performed.
PATH Path to perform check for.
PATH Path to perform check with its checksumfile(s). If option
'-R' had not been set the check will be performed
additionally for PATHs' subdirectories.
......@@ -127,7 +132,7 @@ def main(argv=None):
opts, args = getopt.getopt(argv[1:], "vhe:Rdftl", ["help", "verbose", \
"notrecursive", "debug", "followlinks", "tolerant", "logging"])
except getopt.GetoptError as err:
raise Usage(130,err.msg)
raise Usage(135,err.msg)
verbose = False
debugMode = False
notRecursive = False
......@@ -156,36 +161,48 @@ def main(argv=None):
elif opt in ("-l", "--logging"):
raise Usage(159,"Unhandled option chosen.")
raise Usage(164,"Unhandled option chosen.")
if verbose or debugMode:
if 1 == len(args):
sourcepath = str(args[0]).rstrip(os.sep)+os.sep
sourceDirs = [sourcepath]
inputpath = sourcepath
inputDirs = sourceDirs
elif 2 == len(args):
sourcepath = str(args[0]).rstrip(os.sep)+os.sep
sourceDirs = [sourcepath]
inputpath = str(args[1]).rstrip(os.sep)+os.sep
inputDirs = [inputpath]
raise Usage(171,"Invalid argument(s).")
raise Usage(180,"Invalid argument(s).")
# major part
logger.getLogger().info("Start checking checksums ...")
logger.getLogger().debug("Collecting subdirectories ...")
# recursive
if not notRecursive:
sourceDirs.extend( \
csfile.getSubDirectories(sourcepath, regexes, followLinks))
if sourceDirs[0] != inputDirs[0]:
inputDirs.extend( \
csfile.getSubDirectories(inputpath, regexes, followLinks))
if len(sourceDirs) != len(inputDirs):
raise Error(193, \
"Directory structure of inputpath and sourcepath different.")
if not csfile.hasCsFile(inputpath):
raise Error(175,"PATH does not contain a checksumfile.")
if notRecursive:
logger.getLogger().debug("Fetching subdirectories for regexes in %s", \
subdirs = csfile.getSubDirectories(inputpath, regexes, followLinks)
subdirs = [subdir+"*" for subdir in subdirs]
checksumfile = csfile.CsFile(inputpath, sourcepath)
checksumfile.check(regexes, beTolerant)
raise Error(197,"PATH does not contain a checksumfile.")
paths = list(zip(inputDirs, sourceDirs))
logger.getLogger().info("Start checking checksums ...")
for path in paths:
checksumfile = csfile.CsFile(path[0], path[1])
checksumfile.check(regexes, beTolerant)
except Usage as err:
......@@ -45,7 +45,7 @@ import logging
import csfile
import csbacklog
__version__ = "V0.1"
__version__ = "V0.3"
__subversion__ = "$Id$"
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
......@@ -96,7 +96,7 @@ def help():
-t|--target ROOTDIR Root target directory for checksumfile. The checksumfile
will be put to the appropiated location as the files had
in PATH or rather Path's subdirectories. So target must
in PATH or rather Paths' subdirectories. So target must
have the same subdirectory structure as PATH.
-l|--logging Switch on logging to files. Logfiles will be located in
......@@ -46,6 +46,7 @@ import pwd
import hashlib
import logging
from datetime import datetime
import csbacklog
__version__ = "V0.1"
__subversion__ = "$Id$"
......@@ -54,13 +55,17 @@ __author__ = "Daniel Armbruster"
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"
# -----------------------------------------------------------------------------
# global variables
# variables
chunkSize = 1024 * 128 # 128kB
csfileLoggerName = ''
csfileLogInfo = {}
BASENAME = "checksumfile"
CSSUFFIX = ".cs"
RESULTSUFFIX = ".result"
# -----------------------------------------------------------------------------
# functions
......@@ -90,7 +95,7 @@ def hasCsFile(path):
Checks if path contains a checksumfile. Returns True if path contains a file
named with an CsFile filename.
return os.path.isfile(os.path.join(path, CsFile.basename+CsFile.csSuffix))
return os.path.isfile(os.path.join(path, BASENAME+CSSUFFIX))
# -----------------------------------------------------------------------------
class CsFileError(Exception):
......@@ -110,16 +115,16 @@ class CsFile:
Provides an interface to handle a csback checksumfile. A checksumfile
possesses the ability to take the files for generating the checksums from a
different sourcedirectory which can be configured with the srcpath variable.
different sourcedirectory which can be configured with the srcdir variable.
A checksumfiles usually contains checksumlines (type CsLine) of files.
Generally this includes the files of the subdirectories in srcpath, too.
Generally this includes the files of the subdirectories in srcdir, too.
def __init__(self, filedir, srcpath, hashfunc='sha256'):
def __init__(self, filedir, srcdir, hashfunc='sha256'):
self.filedir = filedir
self.__cslines = []
self.__hashfunc = hashfunc
self.srcpath = srcpath
self.srcdir = srcdir
self.logger = logging.LoggerAdapter(logging.getLogger( \
csfileLoggerName+".CsFile"), csfileLogInfo)
......@@ -129,7 +134,7 @@ class CsFile:
if not os.access(self.filedir, os.F_OK):
raise CsFileError(134, "Invalid directory path.")
path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
# no checksumfile available -> create new file
if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):"Creating checksumfile in '%s'", self.filedir)
......@@ -145,7 +150,7 @@ class CsFile:
self.logger.debug("Start reading checksumfile '%s'",path)
csfile = open(path)
self.__cslines = [CsLine(line.split(), self.srcpath) \
self.__cslines = [CsLine(line.split(), self.srcdir) \
for line in csfile if len(line.rstrip()) and line[0] != '#']
except IOError as err:
raise CsFileError(154, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
......@@ -154,32 +159,11 @@ class CsFile:
self.logger.debug("Finished reading checksumfile '%s'", path)
# The write function will be obsolete now.
def write(self):
path = os.path.join(self.filedir, CsFile.filename)
self.logger.debug("Start writing checksumfile '%s'",path)
csfile = open(path, 'w')
for csline in self.__cslines:
self.logger.debug("Writing line: '%s'",str(csline))
if isinstance(csline, CsLine):
csfile.write(str(csline) + '\n')
raise CsFileError(173, "Argument must be of type CsLine.")
except IOError as err:
raise CsFileError(175, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
self.logger.debug("Finished writing checksumfile '%s'", path)
def append(self, cslines):
Append checksum lines to the checksumfile.
path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
if 0 == len(cslines):
self.logger.debug("Empty list passed. Nothing to append.")
......@@ -213,38 +197,40 @@ class CsFile:
registeredFiles = set(csline.filename for csline in self.__cslines)
# fetch files
newFiles = set()
newFiles = set(file for file in os.listdir(self.srcpath) \
if os.path.isfile(os.path.join(self.srcpath, file)))
newFiles = set(file for file in os.listdir(self.srcdir) \
if os.path.isfile(os.path.join(self.srcdir, file)))
# exclude files matching regexes
for regex in regexes:
matching = set(file for file in newFiles \
if None != re.match(regex, file))
newFiles -= matching
# exclude registered files
newFiles -= registeredFiles
# generate cslines of newFiles
cslines = []
for file in newFiles:
csline = CsLine(file, self.srcpath, self.__hashfunc)
csline = CsLine(file, self.srcdir, self.__hashfunc)
path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
path = os.path.join(self.filedir, BASENAME+CSSUFFIX)
self.logger.debug("Update of checksumfile '%s' finished.", path)
def check(self, regexes, beTolerant=False):
Check a checksum file which means:
Calculate checksum of a file which is located in self.srcpath and check
Calculate checksum of a file which is located in self.srcdir and check
results. Files in the checksumfile matching one of the regexes are excluded.
If the third argument (beTolerant) is set to True the checking process will
be successful, too, if a file listed in a checksumfile is not available
anymore. Then the status of the checksum line will be set to 'warning'.
anymore. Then the warning will be issued. Otherwise a error will be printed
to the result logfile. In case a checksum does not match or rather changed
an critical message will be printed to the results logfile.
Note that this function does not check if there are unregistered files in
the directory. Adding checksum lines to the checksumfile has to be done by
......@@ -253,7 +239,19 @@ class CsFile:
if 0 == len(self.__cslines):
self.logger.debug( \
"CSFILE does not contain any lines or had not been read yet.")
# configure handler for logger
# checksumfile.result filesize 15 kB each with 3 files
resulthandler = logging.handlers.RotatingFileHandler(os.path.join( \
self.filedir,BASENAME+RESULTSUFFIX), maxBytes=1024*15, \
format = '%(asctime)-15s %(hostname)s[%(pid)s] %(levelname)-8s %(message)s'
logInfo = {'hostname': os.uname()[1], 'pid': os.getpid()}
logger = logging.getLogger(csfileLoggerName+".CsFile.chk")
resultLogger = logging.LoggerAdapter(logger, logInfo)
# exclude those files matching regex in regexes
self.logger.debug("Exclude files matching regexes")
cslinesSet = set(self.__cslines)
......@@ -265,12 +263,12 @@ class CsFile:
self.logger.debug("Start checking checksums ...")
for csline in self.__cslines:
if csline in cslinesSet:
path = csline.path.replace(self.filedir, self.srcpath)
self.logger.debug( \
"Performing check of csline-file '%s' with file '%s'", csline.path, \
csline.check(path, beTolerant)
"Performing check of file '{0}' with csline in checksumfile '{1}'.",\
os.path.join(csline.srcdir,csline.filename), self.filedir)
csline.check(resultLogger, beTolerant)
self.logger.debug("Finished checking checksums.")
def display(self):
......@@ -281,9 +279,6 @@ class CsFile:
for line in self.__cslines:
basename = "checksumfile"
csSuffix = ".cs"
resultSuffix = ".result"
# -----------------------------------------------------------------------------
class CsLine:
......@@ -301,12 +296,12 @@ class CsLine:
self.creationDateFile = argList[3]
self.creationLocationChecksum = argList[4]
self.creationDateChecksum = argList[5]
self.path = args[1]
self.srcdir = args[1]
elif isinstance(args[0], str) and isinstance(args[1], str) and \
isinstance(args[2], str):
self.checksum = ''
self.filename = args[0]
self.path = args[1]
self.srcdir = args[1]
self.hashfunc = args[2]
self.creationDateFile = ''
self.creationLocationChecksum = ''
......@@ -320,7 +315,7 @@ class CsLine:
result is a fully configured checksum line.
# generate checksum
path = os.path.join(self.path, self.filename)
path = os.path.join(self.srcdir, self.filename)
self.logger.debug("Calculating checksum for '%s'", path)
hashfunc =
......@@ -344,7 +339,7 @@ class CsLine:
self.creationLocationChecksum = os.uname()[1]
self.creationDateChecksum ="%Y/%m/%d-%H:%M:%S")
def check(self, src, beTolerant=False):
def check(self, resultLogger, beTolerant=False):
Check a checksum line. The checksum of the line will be compared with the
checksum of the file located in src. In case no file had been found in src
......@@ -354,15 +349,16 @@ class CsLine:
hashfunc =
blockSize = chunkSize * hashfunc.block_size
path = os.path.join(self.srcdir, self.filename)
file = open(src, 'rb')
file = open(path, 'rb')
except IOError:
if beTolerant:
self.statusLastCheck = 'warning'
self.logger.warning("While checking file '%s' does not exist.", src)
resultLogger.warning("While checking: file '%s' does not exist.", path)
self.logger.warning("While checking: file '%s' does not exist.", path)
self.statusLastCheck = 'error'
self.logger.error("While checking file '%s' does not exist.", src)
resultLogger.error("While checking: file '%s' does not exist.", path)
self.logger.error("While checking: file '%s' does not exist.", path)
# calculate checksum
data =
......@@ -373,11 +369,11 @@ class CsLine:
# checks
if checksum == self.checksum:
self.statusLastCheck = 'ok'
self.logger.debug("Check of file '%s' was successful.", src)"Check of file '%s' was successful.", path)
self.logger.debug("Check of file '%s' was successful.", path)
self.statusLastCheck = 'error'
self.logger.critical("File '%s' has no integrity anymore.", src)
resultLogger.critical("File '%s' has no integrity anymore.", path)
self.logger.critical("File '%s' has no integrity anymore.", path)
def __str__(self):
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment