csbackgen.py 12.7 KB
Newer Older
1
2
#!/usr/bin/env python
## @file csbackgen.py
3
# @brief Generate checksum files.
4
5
6
7
8
9
10
# 
# -----------------------------------------------------------------------------
# 
# $Id$
# @author Daniel Armbruster
# \date 11/09/2011
# 
11
# Purpose: Generate checksum files.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#
# ----
# This file is part of csback.
#
# csback is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# csback is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csback.  If not, see <http://www.gnu.org/licenses/>.
# ----
# 
30
# Copyright (c) 2012 by Daniel Armbruster
31
32
# 
# REVISIONS and CHANGES 
33
# 02/01/2012  V0.1  Daniel Armbruster
34
# 05/01/2012  V0.2  added logging flag to enable a file logging mechanism
35
# 10/01/2012  V0.3  introduced --hash flag
36
# 11/01/2012  V0.4  pid lockfile handler ability added
37
# 17/01/2012  V0.5  implemented file selection regarding time
38
39
40
# 
# =============================================================================
 
41
42
43
44
import getopt
import os
import sys
import re
45
import pwd
Daniel Armbruster's avatar
Daniel Armbruster committed
46
import logging
47
import csfile
48
import csbacklog
49
import pidlock
50

51
__version__ = "V0.5"
52
53
54
55
__subversion__ = "$Id$"
__license__ = "GPLv2"
__author__ = "Daniel Armbruster"
__copyright__ = "Copyright (c) 2012 by Daniel Armbruster"
56

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -----------------------------------------------------------------------------
class Error(Exception):

  def __init__(self, msg):
    self.msg = msg

  def display(self):
    sys.stderr.write("csbackgen (ERROR): " + self.msg + "\n")


class Usage(Error):

  def display(self):
    usage_text = "Version: "+__version__+"\nLicense: "+__license__+ \
      "\n"+__subversion__+"\nAuthor: "+__author__+ """
 Usage: csbackgen [-v|--verbose] [-e REGEX [-e REGEX [...]]]
73
                  [-R|--notrecursive] [-d|--debug] [-f|--followlinks]
74
                  [-t|--target DIR] [-l|--logging] [-H|--hash ARG]
75
76
                  [-L|--lock] [-D|--daystart] [--atime N] [--mtime N] 
                  [--ctime N] [--amin N] [--mmin N] [--cmin N] PATH
77
78
79
80
81
82
83
84
85
86
87
88
    or: csbackgen -h|--help\n"""
    sys.stderr.write("csbackgen: " + self.msg + "\n")
    sys.stderr.write(usage_text)

# -----------------------------------------------------------------------------
def help():
  """
  Printing helptext to stdout.
  """
  help_text = "Version: "+__version__+"\nLicense: "+__license__+"\n"+ \
    __subversion__+"\nAuthor: "+__author__+"""
 Usage: csbackgen [-v|--verbose] [-e REGEX [-e REGEX [...]]]
89
                  [-R|--notrecursive] [-d|--debug] [-f|--followlinks] 
90
                  [-t|--target ROOTDIR] [-l|--logging] [-H|--hash ARG]
91
92
                  [-L|--lock] [-D|--daystart] [--atime N] [--mtime N] 
                  [--ctime N] [--amin N] [--mmin N] [--cmin N] PATH
93
94
    or: csbackgen -h|--help
-------------------------------------------------------------------------------
95
96
97
98
99
100
101
102
103
104
105
 -v|--verbose         Be verbose.
 -h|--help            Display this help.
 -e REGEX             While generating a checksumfile exclude files matching
                      REGEX(s).
 -R|--notrecursive    Do not generate checksumfiles for subdirectories of PATH.
 -d|--debug           Debug mode. Be really verbose.
 -f|--followlinks     Follow symbolic links. Only available if option -R is not
                      set. Note that this option can lead to infinite
                      recursion.
 -t|--target ROOTDIR  Root target directory for checksumfile. The checksumfile
                      will be put to the appropiated location as the files had
106
                      in PATH or rather Paths' subdirectories. So target must
107
108
                      have the same subdirectory structure as PATH.
 -l|--logging         Switch on logging to files. Logfiles will be located in
109
                      /var/log/ .
110
 -H|--hash ARG        Set the hash function algorithm. Valid values are:
111
                      sha224, sha256, sha384, sha512. (default: sha256)
112
113
114
115
116
 -L|--lock            Lock the directories working at. This flag is useful in
                      case csbackgen was run simultaneously with other csback
                      processes working in the same directory. Setting this
                      option avoids checksumfile access problems which might
                      occur.
117
118
 -D|--daystart        Measure times (for --amin, --atime, --cmin, --ctime,
                      --mmin, and --mtime) from the beginning of today rather
Daniel Armbruster's avatar
Daniel Armbruster committed
119
                      than from 24 hours ago.
120
121
 --amin N             Exclude files which were last accessed N minutes ago.
 --atime N            Exclude files which were last accessed N*24 hours ago.
122
123
                      To match --atime +1, a file has to have been accessed at
                      least two days ago.
124
 --cmin N             Exclude files which status was last changed N minutes ago.
125
126
 --ctime N            Exclude files which status was last changed N*24 hours
                      ago.
127
128
 --mmin N             Exclude files which data was last modified N minutes ago.            
 --mtime N            Exclude files which data was last modified N*24 hours
129
                      ago.
130
131
 PATH                 Path to generate checksumfile(s) for including its
                      subdirectories if option '-R' is not set.
132
-------------------------------------------------------------------------------
133
134
135

1. General
==========
136
137
138
csbackgen.py will either generate a checksumfile if still no checksumfile is
available or in case there is an existing checksumfile csbackgen.py will append
the not yet registered files to the current checksumfile. In the latter case
139
140
csbackgen.py is working in its update mode.
Notice that in case PATH contains subdirectories and either option '-R' is set
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
or the subdirectory is excluded by a matching regular expression or time
specification every subdirectory will contain its own checksumfile.

2. Time specification
=====================
For file selection regarding time numeric arguments can be passed as follows:

    +N     for greater/older than N,

    -N     for less/younger than N,

    N      for exactly N.

a) Notice if several +N (for greater than N) or -N (for less than N) values are
passed with one parameter the last one will overwrite the previous values.
b) --cmin and --ctime arguments are evaluated platform dependent i.e. time of
most recent metadata change on Unix, or the time of creation on Windows. 
c) If times for selection file are specified here the files matching were
excluded in contrast to the Unix find command where files were included.\n"""
160
161
  sys.stdout.write(help_text)

162
163
164
165
# -----------------------------------------------------------------------------
# global variables
VALIDHASHES = ['sha224', 'sha256', 'sha384', 'sha512']

166
167
# -----------------------------------------------------------------------------
def main(argv=None):
168
  # configure logger
169
170
  logger = csbacklog.CsbackLog('csbackgen')
  
171
  console = logging.StreamHandler()
172
173
  console.setFormatter(logging.Formatter( \
    '%(name)-8s [%(lineno)d]: %(levelname)-8s %(message)s'))
174

175
176
177
178
  if argv is None:
    argv = sys.argv
  try:
    try:
179
      opts, args = getopt.getopt(argv[1:], "vhe:Rdft:lLH:D", ["help", 
180
        "verbose", "notrecursive", "debug", "followlinks", "target=", \
181
182
        "logging", "hash=", "lock", "daystart", "amin=", "atime=", \
        "cmin=", "ctime=", "mmin=", "mtime="])
183
184
185
186
187
    except getopt.GetoptError as err:
      raise Usage(err.msg)
    verbose = False
    debugMode = False
    notRecursive = False
188
    followlinks = False
189
    regexes = []
190
    targetSet = False
191
    enableLogging = False
192
    hashfunc = 'sha256'
193
    pidLock = False
194
195
196
    timeDict = {'daystart': False, 'hasaTimes': False, 'hascTimes': False, \
      'hasmTimes': False, 'amin': [], 'atime': [], 'cmin': [], 'ctime': [], \
      'mmin': [], 'mtime': []}
197
198
199
    for opt, arg in opts:
      if opt in ("-v", "--verbose"):
        verbose = True
200
        console.setLevel(logging.INFO)
201
202
203
204
205
206
207
208
      elif opt in ("-h", "--help"):
        sys.exit(help())
      elif opt in ("-R", "--notrecursive"):
        notRecursive = True
      elif opt in ("-e"):
        regexes.append(arg)
      elif opt in ("-d", "--debug"):
        debugMode = True
209
        console.setLevel(logging.DEBUG)
210
      elif opt in ("-f", "--followlinks"):
211
        followlinks = True
212
213
      elif opt in ("-t", "--target"):
        targetSet = True
Daniel Armbruster's avatar
Daniel Armbruster committed
214
        targetDirectory = arg.rstrip(os.sep)+os.sep
215
216
      elif opt in ("-l", "--logging"):
        logger.configure()
217
      elif opt in ("-H", "--hash"):
218
219
220
221
        if arg in VALIDHASHES:
          hashfunc = arg
        else:
          raise Usage("Invalid argument (hash).")
222
223
      elif opt in ("-L", "--lock"):
        pidLock = True
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
      elif opt in ("-D", "--daystart"):
        timeDict['daystart'] = True
      elif opt in ("--amin"):
        timeDict['hasaTimes'] = True
        timeDict['amin'].append(arg)
      elif opt in ("--cmin"):
        timeDict['hascTimes'] = True
        timeDict['cmin'].append(arg)
      elif opt in ("--mmin"):
        timeDict['hasmTimes'] = True
        timeDict['mmin'].append(arg)
      elif opt in ("--atime"):
        timeDict['hasaTimes'] = True
        timeDict['atime'].append(arg)
      elif opt in ("--ctime"):
        timeDict['hascTimes'] = True
        timeDict['ctime'].append(arg)
      elif opt in ("--mtime"):
        timeDict['hasmTimes'] = True
        timeDict['mtime'].append(arg)
244
245
      else:
        raise Usage("Unhandled option chosen.")
246
247

    if verbose or debugMode:
248
      logger.addHandler(console)
249

Daniel Armbruster's avatar
Daniel Armbruster committed
250
    # fetch remaining commandline arguments   
251
    if 1 == len(args):
252
      srcpath = str(args[0]).rstrip(os.sep)+os.sep
253
254
    else:
      raise Usage("Invalid argument.")
255
256
    if not os.path.isdir(srcpath):
      raise Usage("PATH not a valid directory.")
257
     
258
259
260
    # major part
    logger.getLogger().debug("Start collecting subdirectories ...")
    dirs = [srcpath]
Daniel Armbruster's avatar
Daniel Armbruster committed
261
262
    # if no target path for checksumfiles had been passed generate the and
    # create the checksum files in the srcpath
263
    if not targetSet:
264
      targetDirectory = srcpath
265
      targetdirs = dirs
Daniel Armbruster's avatar
Daniel Armbruster committed
266
    # otherwise use the target directory path passed
267
268
269
270
    else:
      targetdirs = [targetDirectory]
      if not os.path.isdir(targetDirectory):
        raise Usage("Target directory not a valid directory.")
Daniel Armbruster's avatar
Daniel Armbruster committed
271
272
    # generate checksumfiles for subdirectories (recursive mode)
    # so collect all available subdirectories here (excluding subdirectories)
273
    if not notRecursive:
274
      dirs.extend(csfile.getSubDirectories(srcpath, regexes, followlinks))
Daniel Armbruster's avatar
Daniel Armbruster committed
275
276
277
      # if a target directory had been passed extend the list of
      # target directories replacing the trunk of the source directory with the
      # target directory. Remember that this is only valid for recursive mode
278
279
280
      if targetSet:
        targetdirs.extend([dir.replace(dirs[0],targetDirectory,1) \
          for dir in dirs[1:]])
Daniel Armbruster's avatar
Daniel Armbruster committed
281
282
283
    
    # create a list of tuples now so that each target directory has its
    # corresponding source directory
284
285
286
287
    pathes = list(zip(targetdirs, dirs))
    logger.getLogger().debug("Directories arranged.")

    logger.getLogger().info("Start updating checksumfile(s) ...")
Daniel Armbruster's avatar
Daniel Armbruster committed
288
289
    # for all available pathes generate or rather update the checksumfile (after
    # performing pid lockfile checkings)
290
    for path in pathes:
291
      logger.getLogger().info( \
292
293
        "Updating checksumfile in '{0}' with files from '{1}' ...".format( \
        path[0], path[1]))
294
      lock = pidlock.PidLocker(path[0])
295
296
      checksumfile = csfile.CsFile(path[0], path[1], hashfunc=hashfunc, \
        timeDict=timeDict, followlinks=followlinks)
Daniel Armbruster's avatar
Daniel Armbruster committed
297
298
299

      # lock directory currently working in (use pid locking module) if possible
      # - else terminate raising an exception
300
301
302
303
304
305
306
307
308
      if lock.lockValid():
        raise pidlock.PidLockError(-1, \
          "Directory '{0}' locked.".format(path[0]))
      elif pidLock:
        lock.announce(os.getpid())
        checksumfile.update(regexes)
        lock.cancel(os.getpid())
      else:
        checksumfile.update(regexes)
309
310
      logger.getLogger().info( \
        "Finished updating checksumfile in '{0}'.".format(path[0]))
311
    
312
313
314
  except Usage as err:
    err.display()
    return 2
315
  except pidlock.PidLockError as err:
316
    logger.getLogger().critical("{0}".format(err.msg))
317
318
    err.display()
    return 2
319
  except csfile.CsFileError as err:
Daniel Armbruster's avatar
Daniel Armbruster committed
320
    logger.getLogger().error("{0}".format(err.msg))
321
322
323
    err.display()
    return 2
  else:
324
    logger.getLogger().info("Checksumfile(s) updated.")
325
326
327
328
329
330
331
332
    return 0
    

# -----------------------------------------------------------------------------
if __name__ == "__main__":
  sys.exit(main())

# ----- END OF csbackgen.py -----