Commit e1f07540 authored by Daniel Armbruster's avatar Daniel Armbruster Committed by thomas.forbriger
Browse files

Checksumfile generation and update process adjusted.

This is a legacy commit from before 2015-05-18.
It may be incomplete as well as inconsistent.
See COPYING.legacy and README.history for details.

Now every selected subdirectory including the directory itself contain a
checksumfile.



SVN Path:     http://gpitrsvn.gpi.uni-karlsruhe.de/repos/TFSoftware/trunk
SVN Revision: 4372
SVN UUID:     67feda4a-a26e-11df-9d6e-31afc202ad0c
parent a588fe94
......@@ -32,6 +32,7 @@
# REVISIONS and CHANGES
# 02/01/2012 V0.1 Daniel Armbruster
# 05/01/2012 V0.2 added logging flag to enable a file logging mechanism
# 10/01/2012 V0.3 introduced --hash flag
#
# =============================================================================
......@@ -67,7 +68,7 @@ class Usage(Error):
"\n"+__subversion__+"\nAuthor: "+__author__+ """
Usage: csbackgen [-v|--verbose] [-e REGEX [-e REGEX [...]]]
[-R|--notrecursive] [-d|--debug] [-f|--followlinks]
[-t|--target DIR] [-l|--logging] PATH
[-t|--target DIR] [-l|--logging] [--hash ARG] PATH
or: csbackgen -h|--help\n"""
sys.stderr.write("csbackgen: " + self.msg + "\n")
sys.stderr.write(usage_text)
......@@ -81,28 +82,42 @@ def help():
__subversion__+"\nAuthor: "+__author__+"""
Usage: csbackgen [-v|--verbose] [-e REGEX [-e REGEX [...]]]
[-R|--notrecursive] [-d|--debug] [-f|--followlinks]
[-t|--target DIR] [-l|--logging] PATH
[-t|--target ROOTDIR] [-l|--logging] [--hash ARG] PATH
or: csbackgen -h|--help
-------------------------------------------------------------------------------
-v|--verbose Be verbose.
-h|--help Display this help.
-e REGEX While generating a checksumfile exclude files matching
REGEX(s).
-R|--notrecursive Do not generate checksumfiles for subdirectories.
-d|--debug Debug mode. Be really verbose.
-f|--followlinks Follow symbolic links. Only available if option -R is not
set. Note that this option can lead to infinite recursion.
-t|--target Target directory for checksumfile.
-l|--logging Switch on logging to files. Logfiles will be located in
~/.csback/log/.
PATH Path to generate checksumfile for.
-v|--verbose Be verbose.
-h|--help Display this help.
-e REGEX While generating a checksumfile exclude files matching
REGEX(s).
-R|--notrecursive Do not generate checksumfiles for subdirectories of PATH.
-d|--debug Debug mode. Be really verbose.
-f|--followlinks Follow symbolic links. Only available if option -R is not
set. Note that this option can lead to infinite
recursion.
-t|--target ROOTDIR Root target directory for checksumfile. The checksumfile
will be put to the appropiated location as the files had
in PATH or rather Path's subdirectories. So target must
have the same subdirectory structure as PATH.
-l|--logging Switch on logging to files. Logfiles will be located in
~/.csback/log/.
--hash ARG Set the hash function algorithm. Valid values are:
sha224, sha256, sha384, sha512. (default: sha256)
PATH Path to generate checksumfile(s) for including its
subdirectories if option '-R' is not set.
-------------------------------------------------------------------------------
csbackgen.py will either generate a checksumfile if still no checksumfile is
available or in case there is an existing checksumfile csbackgen.py will append
the not yet registered files to the current checksumfile. In the latter case
csbackgen.py is working in its update mode.\n"""
csbackgen.py is working in its update mode.
Notice that in case PATH contains subdirectories and either option '-R' is set
or the subdirectory is excluded by a matching regular expression every
subdirectory will contain a checksumfile.\n"""
sys.stdout.write(help_text)
# -----------------------------------------------------------------------------
# global variables
VALIDHASHES = ['sha224', 'sha256', 'sha384', 'sha512']
# -----------------------------------------------------------------------------
def main(argv=None):
# configure logger
......@@ -117,7 +132,8 @@ def main(argv=None):
try:
try:
opts, args = getopt.getopt(argv[1:], "vhe:Rdft:l", ["help", "verbose", \
"notrecursive", "debug", "followlinks", "target=", "logging"])
"notrecursive", "debug", "followlinks", "target=", "logging", \
"hash="])
except getopt.GetoptError as err:
raise Usage(err.msg)
verbose = False
......@@ -127,6 +143,7 @@ def main(argv=None):
regexes = []
targetSet = False
enableLogging = False
hashfunc = 'sha256'
for opt, arg in opts:
if opt in ("-v", "--verbose"):
verbose = True
......@@ -144,9 +161,14 @@ def main(argv=None):
followLinks = True
elif opt in ("-t", "--target"):
targetSet = True
targetDirectory = str(arg)
targetDirectory = arg
elif opt in ("-l", "--logging"):
logger.configure()
elif opt in ("--hash",):
if arg in VALIDHASHES:
hashfunc = arg
else:
raise Usage("Invalid argument (hash).")
else:
raise Usage("Unhandled option chosen.")
......@@ -158,16 +180,35 @@ def main(argv=None):
srcpath = str(args[0]).rstrip(os.sep)+os.sep
else:
raise Usage("Invalid argument.")
if not os.path.isdir(srcpath):
raise Usage("PATH not a valid directory.")
# major part
logger.getLogger().debug("Start collecting subdirectories ...")
dirs = [srcpath]
if not targetSet:
targetDirectory = srcpath
if not os.path.isdir(targetDirectory):
raise Usage("Target directory not a valid directory.")
# major part
logger.getLogger().info("Start updating checksumfile in '%s'", srcpath)
csfile.CsFile(targetDirectory, srcpath=srcpath, recursive=not notRecursive, \
followLinks=followLinks).update(regexes)
targetdirs = dirs
else:
targetdirs = [targetDirectory]
if not os.path.isdir(targetDirectory):
raise Usage("Target directory not a valid directory.")
# recursive
if not notRecursive:
dirs.extend(csfile.getSubDirectories(srcpath, regexes, followLinks))
if targetSet:
targetdirs.extend([dir.replace(dirs[0],targetDirectory,1) \
for dir in dirs[1:]])
pathes = list(zip(targetdirs, dirs))
logger.getLogger().debug("Directories arranged.")
logger.getLogger().info("Start updating checksumfile(s) ...")
for path in pathes:
logger.getLogger().debug( \
"Updating checksumfile in '{0}' with files from '{1}' ...".format( \
path[0], path[1]))
csfile.CsFile(path[0], path[1], hashfunc=hashfunc).update(regexes)
except Usage as err:
err.display()
......@@ -177,7 +218,7 @@ def main(argv=None):
err.display()
return 2
else:
logger.getLogger().info("Checksumfile updated.")
logger.getLogger().info("Checksumfile(s) updated.")
return 0
......
......@@ -90,7 +90,7 @@ def hasCsFile(path):
Checks if path contains a checksumfile. Returns True if path contains a file
named with an CsFile filename.
"""
return os.path.isfile(os.path.join(path, CsFile.filename))
return os.path.isfile(os.path.join(path, CsFile.basename+CsFile.csSuffix))
# -----------------------------------------------------------------------------
class CsFileError(Exception):
......@@ -115,14 +115,11 @@ class CsFile:
A checksumfiles usually contains checksumlines (type CsLine) of files.
Generally this includes the files of the subdirectories in srcpath, too.
"""
def __init__(self, filedir, srcpath, hashfunc='sha256', recursive=True, \
followLinks=False):
def __init__(self, filedir, srcpath, hashfunc='sha256'):
self.filedir = filedir
self.__cslines = []
self.__hashfunc = hashfunc
self.srcpath = srcpath
self.recursive = recursive
self.followLinks = False
self.logger = logging.LoggerAdapter(logging.getLogger( \
csfileLoggerName+".CsFile"), csfileLogInfo)
......@@ -132,7 +129,7 @@ class CsFile:
"""
if not os.access(self.filedir, os.F_OK):
raise CsFileError(134, "Invalid directory path.")
path = os.path.join(self.filedir, CsFile.filename)
path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
# no checksumfile available -> create new file
if os.access(self.filedir, os.F_OK) and not os.path.isfile(path):
self.logger.info("Creating checksumfile in '%s'", self.filedir)
......@@ -148,8 +145,8 @@ class CsFile:
try:
self.logger.debug("Start reading checksumfile '%s'",path)
csfile = open(path)
self.__cslines = [CsLine(line.split()) for line in csfile \
if len(line.rstrip()) and line[0] != '#']
self.__cslines = [CsLine(line.split(), self.srcpath) \
for line in csfile if len(line.rstrip()) and line[0] != '#']
except IOError as err:
raise CsFileError(154, "[Errno "+str(err.errno)+"] "+err.strerror+": " \
+err.filename)
......@@ -157,10 +154,9 @@ class CsFile:
csfile.close()
self.logger.debug("Finished reading checksumfile '%s'", path)
# The write function will be obsolete now.
"""
def write(self):
"""
Write the entire checksumfile.
"""
path = os.path.join(self.filedir, CsFile.filename)
try:
self.logger.debug("Start writing checksumfile '%s'",path)
......@@ -177,12 +173,13 @@ class CsFile:
else:
csfile.close()
self.logger.debug("Finished writing checksumfile '%s'", path)
"""
def append(self, cslines):
"""
Append checksum lines to the checksumfile.
"""
path = os.path.join(self.filedir, CsFile.filename)
path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
if 0 == len(cslines):
self.logger.debug("Empty list passed. Nothing to append.")
else:
......@@ -213,37 +210,30 @@ class CsFile:
# fetch cslines in current csfile
self.read()
self.logger.debug("Fetching files not registered yet.")
registeredFiles = set(csline.path for csline in self.__cslines)
# fetch files (pathes)
registeredFiles = set(csline.filename for csline in self.__cslines)
# fetch files
newFiles = set()
if self.recursive:
self.logger.debug("Fetching files recursively.")
for root, dirs, files in os.walk(self.srcpath, \
followlinks=self.followLinks):
for file in files:
newFiles.add(os.path.join(root, file))
else:
self.logger.debug("Fetching files not recursively.")
newFiles = set(os.path.join(self.srcpath, file) \
for file in os.listdir(self.srcpath) \
if os.path.isfile(os.path.join(self.srcpath, file)))
newFiles = set(file for file in os.listdir(self.srcpath) \
if os.path.isfile(os.path.join(self.srcpath, file)))
# exclude files matching regexes
regexes.append(os.path.join(self.srcpath,CsFile.filename))
regexes.append(CsFile.basename+CsFile.csSuffix)
regexes.append(CsFile.basename+CsFile.resultSuffix+r'\.[1-3]')
for regex in regexes:
matching = set(file for file in newFiles \
if None != re.match(regex, file))
newFiles -= matching
regexes.remove(os.path.join(self.srcpath,CsFile.filename))
regexes.remove(CsFile.basename+CsFile.csSuffix)
regexes.remove(CsFile.basename+CsFile.resultSuffix+r'\.[1-3]')
# exclude registered files
newFiles -= registeredFiles
# generate cslines of newFiles
cslines = []
for file in newFiles:
csline = CsLine(file, self.__hashfunc)
csline = CsLine(file, self.srcpath, self.__hashfunc)
csline.generate(chunkSize)
cslines.append(csline)
self.append(cslines)
path = os.path.join(self.filedir, CsFile.filename)
path = os.path.join(self.filedir, CsFile.basename+CsFile.csSuffix)
self.logger.debug("Update of checksumfile '%s' finished.", path)
def check(self, regexes, beTolerant=False):
......@@ -291,8 +281,9 @@ class CsFile:
for line in self.__cslines:
sys.stdout.write(line)
filename = ".cs"
basename = "checksumfile"
csSuffix = ".cs"
resultSuffix = ".result"
# -----------------------------------------------------------------------------
class CsLine:
......@@ -302,20 +293,23 @@ class CsLine:
def __init__(self, *args):
self.logger = logging.LoggerAdapter(logging.getLogger( \
csfileLoggerName+".CsLine"), csfileLogInfo)
if isinstance(args[0], list):
if isinstance(args[0], list) and isinstance(args[1], str):
argList = args[0]
self.checksum = argList[0]
self.path = argList[1]
self.filename = argList[1]
self.hashfunc = argList[2]
self.creationDateFile = argList[3]
self.creationLocationChecksum = argList[4]
self.creationDateChecksum = argList[5]
self.dateLastCheck = argList[6]
self.statusLastCheck = argList[7]
elif isinstance(args[0], str) and isinstance(args[1], str):
self.path = args[1]
elif isinstance(args[0], str) and isinstance(args[1], str) and \
isinstance(args[2], str):
self.checksum = ''
self.path = args[0]
self.hashfunc = args[1]
self.filename = args[0]
self.path = args[1]
self.hashfunc = args[2]
self.creationDateFile = ''
self.creationLocationChecksum = ''
self.creationDateChecksum = ''
......@@ -329,12 +323,13 @@ class CsLine:
Generate the checksum and establish corresponding data for a file. The
result is a fully configured checksum line.
"""
self.logger.debug("Calculating checksum for '%s'", self.path)
# generate checksum
path = os.path.join(self.path, self.filename)
self.logger.debug("Calculating checksum for '%s'", path)
try:
hashfunc = hashlib.new(self.hashfunc)
blockSize = chunkSize * hashfunc.block_size
file = open(self.path, 'rb')
file = open(path, 'rb')
data = file.read(blockSize)
while data:
hashfunc.update(data)
......@@ -348,7 +343,7 @@ class CsLine:
# set remaining data
self.creationDateFile = \
datetime.fromtimestamp(os.path.getctime(self.path)).strftime( \
datetime.fromtimestamp(os.path.getctime(path)).strftime( \
"%Y/%m/%d-%H:%M:%S")
self.creationLocationChecksum = os.uname()[1]
self.creationDateChecksum = datetime.now().strftime("%Y/%m/%d-%H:%M:%S")
......@@ -394,9 +389,10 @@ class CsLine:
"""
String representation of a checksum line.
"""
return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, self.path, \
self.hashfunc, self.creationDateFile, self.creationLocationChecksum, \
self.creationDateChecksum, self.dateLastCheck, self.statusLastCheck)
return '{0} {1} {2} {3} {4} {5} {6} {7}'.format(self.checksum, \
self.filename, self.hashfunc, self.creationDateFile, \
self.creationLocationChecksum, self.creationDateChecksum, \
self.dateLastCheck, self.statusLastCheck)
# -----------------------------------------------------------------------------
# Tests
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment