csbusortdirlist.py 9.48 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#!/usr/bin/env python
# this is <csbusortdirlist.py>
# ----------------------------------------------------------------------------
# 
# Copyright (c) 2016 by Thomas Forbriger (BFO Schiltach) 
# 
# read a directory list and create a sorted table
# 
# REVISIONS and CHANGES 
#    10/06/2016   V1.0   Thomas Forbriger
# 
# ============================================================================
#
"""
This program takes a directory list like created by csbulistcsfiles.sh and
creates a sorted table which should support a completeness check.
"""
# ----------------------------------------------------------------------------
19
__version__ = "2016-06-10"
20
21
22
23
24
25
26
__author__ = "Thomas Forbriger"
# ----------------------------------------------------------------------------
import sys
import os
import string
import getopt

27
28
29
30
31
32
33
34
35
36
37
38
39
40
# ----------------------------------------------------------------------------

# exit status for any specific error
ERR_ERROR=100

# DEBUG output - global variable
DEBUG=False

# verbose output - global variable
VERBOSE=False

# name of checksum files
CHECKSUMFILE='checksumfile.cs'

41
# ============================================================================
42
43
44
# elementary functions
# --------------------

45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def usage():
    """
    print usage information
    """
    print(__version__)
    print('Usage: '+sys.argv[0]+' [-v] [-D] [-o] listfile tablefile')
    print('   or: '+sys.argv[0]+' --help|-h')

# ----------------------------------------------------------------------------
def help():
    """
    print detailed usage information (online help)
    """
    usage()
    print(
"""
This program takes a directory list like created by csbulistcsfiles.sh and
62
63
creates a sorted table which should support a completeness check. Pathnames
are expected to contain the string 'checksumfile.cs'.
64
65
66
67
68
69
70
71
72

  listfile  list of subdirectory path names
  tablefile name of file to write table to
  -v        be verbose
  -o        overwrite output file
  -D        produce debug output

""")
    print('Call "pydoc csbusortdirlist" to learn more')
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

# ----------------------------------------------------------------------------
def debugout(m):
    """
    print debugging message
    """
    if DEBUG:
        print("DEBUG: "+ m)

# ----------------------------------------------------------------------------
def verbose(m):
    """
    print verbose message
    """
    if VERBOSE:
        print(m)

# ============================================================================
class Basetable():
    """
    Base class for table objects.
    """

96
97
98
99
100
101
    def __init__(self, pathname):
        """
        Constructor: initialize from path name
        """
        self.collection='Unknown'

102
103
104
105
106
107
# ============================================================================
class SEEDlinktable(Basetable):
    """
    Class to hold a seedlink data structure
    """

108
109
110
111
112
    def __init__(self, pathname):
        """
        Constructor: initialize from path name
        """

113
# ============================================================================
114
115
116
117
118
class UIPCtable(Basetable):
    """
    Class to hold a UIPC data structure
    """

119
120
121
122
123
    def __init__(self, pathname):
        """
        Constructor: initialize from path name
        """

124
125
126
127
128
129
# ============================================================================
class DL1table(Basetable):
    """
    Class to hold a DL1 data structure
    """

130
131
132
133
134
    def __init__(self, pathname):
        """
        Constructor: initialize from path name
        """

135
136
137
138
139
140
# ============================================================================
class HPMOtable(Basetable):
    """
    Class to hold a HPMO data structure
    """

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
    def __init__(self, pathname):
        """
        Constructor: initialize from path name
        """

# ============================================================================
class Unknowntable(Basetable):
    """
    Class to hold an unkown data structure
    """

    def __init__(self, pathname):
        """
        Constructor: initialize from path name
        """

157
158
159
160
161
# ============================================================================
class PathLine():
    """
    Class to hold and parse a sinlge input line.
    """
162
# ----------------------------------------------------------------------------
163
164
165
166
167
168

    def __init__(self, line):
        """
        Constructor takes an input line string
        """
        self.datatype='Unknown'
169
170
171
172
173
174
175
176
        self.collection='Unknown'
        self.year='Unknown'
        self.channel='Unknown'
        self.pathname=line
        for s in line.split():
            if s.find(CHECKSUMFILE) > 0:
                self.pathname=s
                if s.find('seedlink') > 0:
177
                    self.datatype='seedlink'
178
179
180
181
182
183
184
185
                    keys=s[s.index('seedlink'):].split('/')
                    if len(keys) > 2:
                        self.year=keys[1]
                    if len(keys) > 4:
                        self.collection=keys[2]+':'+keys[3]
                    if len(keys) > 5:
                        self.channel=keys[4]
                elif s.find('DL1') > 0:
186
                    self.datatype='DL1'
187
188
189
190
191
192
193
194
                    keys=s[s.index('DL1'):].split('/')
                    if len(keys) > 2:
                        self.collection=keys[1]+':'+keys[2]
                    if len(keys) > 4:
                        self.year=keys[3]
                    if len(keys) > 5:
                        self.channel=keys[4]
                elif s.find('HPMO') > 0:
195
                    self.datatype='HPMO'
196
197
198
199
200
201
202
203
204
205
                    keys=s[s.index('HPMO/'):].split('/')
                    if len(keys) > 2:
                        self.collection=keys[1]
                        if keys[1] == 'HPMO_SNIFF':
                            self.collection=keys[1]
                            if len(keys) > 4:
                                self.year=keys[3]
                            if len(keys) > 5:
                                self.channel=keys[4][4:6]
                elif s.find('UIPC') > 0:
206
                    self.datatype='UIPC'
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
                    keys=s[s.index('UIPC'):].split('/')
                    if len(keys) > 2:
                        self.year=keys[1][0:4]
                    if len(keys) > 3:
                        self.collection=keys[2]
                    if len(keys) > 4:
                        self.channel=keys[3]
                    else:
                        self.channel='std'

# ----------------------------------------------------------------------------

    def report(self):
        """ 
        Report contents to stdout
        """
        print('type: %s  collection: %s  year: %s  channel: %s' %
                (self.datatype, self.collection, self.year, self.channel))
        print('  %s' % self.pathname)
226
227
228
229
230
231
232
233
234
235
236

# ============================================================================
# processing functions
# --------------------
def readlist(filename):
    """
    Read a list of subdirectory path names and return a table object
    """
    debugout("entered function readlist")
    verbose('read file %s' % filename)

237
238
    tables=dict()

239
240
241
    for line in open(filename, 'r'):
        inputline=line.rstrip('\n')
        pathline=PathLine(inputline)
242
        pathline.report()
243
244
245
        if pathline.datatype == 'seedlink':
            debugout("seedlink")
            debugout(pathline.pathname)
246
            table=SEEDlinktable(pathline.pathname)
247
248
249
        elif pathline.datatype == 'DL1':
            debugout("DL1")
            debugout(pathline.pathname)
250
            table=DL1table(pathline.pathname)
251
252
253
        elif pathline.datatype == 'HPMO':
            debugout("HPMO")
            debugout(pathline.pathname)
254
            table=HPMOtable(pathline.pathname)
255
256
257
        elif pathline.datatype == 'UIPC':
            debugout("UIPC")
            debugout(pathline.pathname)
258
            table=UIPCtable(pathline.pathname)
259
260
261
        else:
            verbose("input line of unknown data type:")
            verbose(line)
262
263
            table=Unknowntable(pathline.pathname)
#        tables[table.collection]=table
264
265
266

    debugout("finished function readlist")

267
268
# ============================================================================
def main(argv=None):
269
270
271
    """
    Then main function
    """
272
273
274
275
276
277
278
279
    argv = sys.argv

    (opts, args)=getopt.gnu_getopt(sys.argv[1:], 'voDh', ('help'))
   
    overwrite=False
   
    for opt in opts:
      if (opt[0]=='-v'):
280
281
        global VERBOSE
        VERBOSE=True
282
283
284
285
286
287
288
289
290
291
292
293
      elif (opt[0]=='-D'):
        global DEBUG
        DEBUG=True
      elif (opt[0]=='-o'):
        overwrite=True
      elif (opt[0]=='-h') or (opt[0]=='--help'):
        help()
        exit(0)
      else :
        usage()
        exit(0)
   
294
    if len(args) < 2:
295
296
297
298
299
      usage()
      exit(0)
   
    inputfilename=args[0]
    outputfilename=args[1]
300
301

    tabledata=readlist(inputfilename)
302
303
304
305
306
307
308
309
   
    # check for existing file
    # if an exception is raised here, this is just fine: we do not want to
    # overwrite an existing file
    if not overwrite:
      try:
        output=open(outputfilename, 'r')
      except IOError:
310
        verbose('create new file %s' % outputfilename)
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
      else:
        print('ERROR: output file %s exists' % outputfilename)
        exit(ERR_ERROR)
   
    if verbose:
      print('write to file %s' % outputfilename)
   
    # try to open for output
    try:
      output=open(outputfilename, 'w')
    except IOError as err:
      print('ERROR: could not open file %s for output' % outputfilename)
      raise err

# ----------------------------------------------------------------------------

if __name__ == '__main__':
    sys.exit(main())

# ----- END OF csbusortdirlist.py -----