Commit 7b549203 authored by thomas.forbriger's avatar thomas.forbriger

[TASK] (util): recode csbucscheck.sh

improve implementation of csbucscheck.sh by
- providing proper command line reading
- providing proper usage information
- providing comments in the code
- providing additional command line options to select an output directory and
  checksum binary executable
parent 644126b3
......@@ -25,6 +25,8 @@ csbucomparecsfiles.sh
csbucscheck.sh
Recurse into a read-only subdirectory tree and verify checksums.
This provides a complete and independent verification of the consistency,
completeness and correctes of csback supervised data archive.
csbusortdirlist.py
Collect all directories being checksum-supervised in wiki formatted tables.
......
......@@ -18,76 +18,225 @@
# 29/01/2014 V1.2 Christopher Sanz
# remove link to script cscomp.sh and include it in
# <csback_cscheck.sh>
# 13/06/2016 V1.3 Thomas Forbriger
# implement different type of checksums
#
# ============================================================================
#
VERSION="13/06/2016 V1.3"
# checksum file basename
CHECKSUMFILEBASE="checksumfile"
# name of csback checksum files
CHECKSUMFILE="${CHECKSUMFILEBASE}.cs"
# current working directory
HERE=$(pwd)
# base name of temporary directory used for intermediate lists and for report
# files
TMPBASE=$HERE/report
# default checksum binary executable
SHABIN=/usr/bin/sha512sum
TMPDIR=$HOME/tmp/$0.$$
if test $# -lt 1
# ============================================================================
# shell functions
# ===============
# usage function
function usage {
cat << HERE
This is <csbucscheck.sh>
version: $VERSION
usage: csbucscheck.sh [-v] [-d] [-r dir] dir
or: csbucscheck.sh -h
Recurse into a csback maintained data directory tree and check all files
against their checksums and test checksum files for completeness.
HERE
}
# ----------------------------------------------------------------------------
# usage function
function longusage {
cat << HERE
-h print detailed usage information
-v be verbose
-d print debug output
-r dir base directory for report
if not provided, a subdirectory in the current directory will
be created; the default is
$TMPBASE
-b bin binary executable to be used to verify checksums
default: $SHABIN
dir root directory of the csback supervised tree to be checked
The name of checksumfiles to be searched for is: $CHECKSUMFILE
HERE
}
# ============================================================================
# read command line
# =================
verbose=0
debug=0
# fetch commandline arguments
while getopts 'hvdr:b:' Option;
do
case ${Option} in
h) hflag=1
usage; longusage; exit 0;;
v) verbose=1;;
d) debug=1;;
r) TMPBASE=$1; shift;;
b) SHABIN=$1; shift;;
*) echo -e "ERROR: Unimplemented option chosen.\n"
usage; exit 2;;
esac
done
shift $((${OPTIND} - 1))
MINPARAMS=1
# check number of commandline arguments
if [ $# -lt ${MINPARAMS} ]
then
usage
exit 2
fi
TMPBASEDIR=$(dirname $TMPBASE)
if test ! ( -d $(dirname $TMPBASE) -a -w $(dirname $TMPBASE) )
then
echo $0 takes a path name as a single argument
echo the path must point to the root of csback controlled data structure
echo report output base directory $TMPBASEDIR is not a directory
echo or is not writable
exit 2
fi
DATADIR=$1
TMPDIR=$TMPBASE/report_$(basename $0 .sh)_$(basename $DATADIR)_$$
mkdir -p $TMPDIR
if test ! ( -d $TMPDIR -a -w $TMPDIR )
then
echo report output directory $TMPDIR is not a directory
echo or is not writable
exit 2
fi
if test ! -x $SHABIN
then
echo $SHABIN is not executable
exit2
fi
# ============================================================================
# start action
# ============
LOGLIST=
HERE=$(pwd)
nmcs=0 # counter for missing checksums
nmf=0 # counter for missing files
mkdir -p $TMPDIR
for FILE in $(find $DATADIR -name checksumfile.cs)
# ----------------------------------------------------------------------------
# cycle through all checksum files
# --------------------------------
for FILE in $(find $DATADIR -name $CHECKSUMFILE)
do
echo check $FILE
DIR=${FILE%/*}
cat $DIR/checksumfile.cs | cut -d' ' -f2 > $TMPDIR/chksm.tmp # write only filenames from checksumfile.cs in file
NLINES1=$(wc -l $TMPDIR/chksm.tmp | cut -d' ' -f1) # determine number of lines of chksm.tmp
#test1############## test if there is an entry in checksumfile.cs for every file
find $DIR -maxdepth 1 -type f -printf "%f\n" > $TMPDIR/lsfile.tmp #write the files of the directory in this file
NLINES2=$(wc -l $TMPDIR/lsfile.tmp | cut -d' ' -f1) # determine number of lines of lsfile.tmp
# extract all file names from checksum file
cat $DIR/$CHECKSUMFILE | cut -d' ' -f2 > $TMPDIR/chksm.tmp
# determine number of files found in checksum file
NLINES1=$(wc -l $TMPDIR/chksm.tmp | cut -d' ' -f1)
# test 1
# ------
# test if there is an entry in checksumfile.cs for every file in the
# directory
# collect all names of files in the current directory
find $DIR -maxdepth 1 -type f -printf "%f\n" > $TMPDIR/lsfile.tmp
# determine number of lines of lsfile.tmp
NLINES2=$(wc -l $TMPDIR/lsfile.tmp | cut -d' ' -f1)
# cycle through list filename by filename
for ((line=1;line<=$NLINES2;line++))
do
# extract filename with current index
FILECHECK=$(sed -ne ''$line'p' $TMPDIR/lsfile.tmp)
# ssek file name in corresponding list of file names in checksum file
CHECK=$(cat $TMPDIR/chksm.tmp | grep -x $FILECHECK )
if [[ $CHECK != $FILECHECK ]] && [[ $FILECHECK != checksumfile.* ]] # test if there is an entry in checksumfile.cs for every file
# report missing entry, if not found in checksum file
#
# the checksum entry is considered as missing, if no corresponding
# filename was found and if the file pattern does not match the csback
# report file name pattern
if [[ "$CHECK" != "$FILECHECK" ]] && [[ $FILECHECK != ${CHECKSUMFILEBASE}.* ]]
then
echo $DIR/$FILECHECK has no entry in $DIR/checksumfile.cs
echo $DIR/$FILECHECK has no entry in $DIR/$CHECKSUMFILE
echo $DIR/$FILECHECK >> $TMPDIR/missingCS.log
echo $DIR/$FILECHECK has no entry in $DIR/checksumfile.cs >> $TMPDIR/cscompVerbose.log
echo $DIR/$FILECHECK has no entry in $DIR/$CHECKSUMFILE \
>> $TMPDIR/cscompVerbose.log
nmcs=$((nmcs+1))
fi
nc=$((nc+1))
done
# summary report
echo $nc files checked
echo $DIR checked, total $nc files checked >> $TMPDIR/cscompVerbose.log
unset NLINES2 CHECK FILECHECK DIR
#end#of#test1#######
# ---- end of test 1 ----
# test 2
# ------
# check file contents against checksum
# cycle through all entries in checksum file and verify checksum
NLINES=$(wc -l $FILE | cut -f 1 -d ' ')
if test $NLINES -gt 0
then
echo " check $NLINES entries"
echo " verify $NLINES entries in $DIR/$CHECKSUMFILE"
cd $(dirname $FILE)
LOGFILE=$TMPDIR/$(echo $FILE | tr '/' '_')
cat $(basename $FILE) | tr -s ' ' | cut -f 1,2 -d ' ' \
| sed -e 's/ / /' | $SHABIN -c > $LOGFILE
LOGLIST+=" "$LOGFILE
else
echo " checksum file is empty"
echo " checksum file $DIR/$CHECKSUMFILE is empty"
fi
cd $HERE
done
# ----------------------------------------------------------------------------
# collect and evaluate reports
# ----------------------------
# delete intermediate files
rm $TMPDIR/lsfile.tmp $TMPDIR/chksm.tmp
# report checksum verification errors
echo "deviant results:"
egrep -v 'OK$' $LOGLIST
egrep -v 'OK$' $LOGLIST > $TMPDIR/deviant.log
# report missing file names in checksum files
echo $nmcs missing checksums in checksumfiles >> $TMPDIR/missingCS.log
echo list of missing checksums:
cat $TMPDIR/missingCS.log
# report statistics and report location
echo "number of checked entries:" $(cat $LOGLIST | wc -l)
echo results are archived in $TMPDIR
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment