Coverage for python/lsst/daf/persistence/fsScanner.py: 16%

64 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-07 09:51 +0000

1# 

2# LSST Data Management System 

3# Copyright 2008, 2009, 2010 LSST Corporation. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23 

24"""This module provides the FsScanner class.""" 

25 

26import glob 

27import os 

28import re 

29import sys 

30 

31 

32class FsScanner: 

33 """Class to scan a filesystem location for paths matching a template. 

34 

35 Decomposes the resulting paths into fields and passes them to a callback 

36 function. 

37 """ 

38 

39 def __init__(self, pathTemplate): 

40 """Constructor. Takes the path template, which should be in the form 

41 of a Python string with named format substitution specifications. 

42 Such a template would be suitable for generating a path given a set of 

43 fields in a dictionary. Does not handle hex (%x or %X). 

44 

45 Example: 

46 %(field)s/%(visit)d/%(exposure)d/raw-%(visit)d-e%(exposure)03d-c%(ccd)03d-a%(amp)03d.fits 

47 

48 Note that fields may appear multiple times; the second and subsequent 

49 appearances of such fields will have "_{number}" appended to them to 

50 disambiguate, although it is typically assumed that they will all be 

51 identical. 

52 

53 Trailing brackets (and their contents) can be used to indicate which HDU from a file should 

54 be used. They will not be included in the filename search. 

55 """ 

56 

57 # Trim any trailing braces off the end of the path template. 

58 if pathTemplate.endswith(']'): 

59 pathTemplate = pathTemplate[0:pathTemplate.rfind('[')] 

60 

61 # Change template into a globbable path specification. 

62 fmt = re.compile(r'%\((\w+)\).*?([dioueEfFgGcrs])') 

63 

64 self.globString = fmt.sub('*', pathTemplate) 

65 

66 # Change template into a regular expression. 

67 last = 0 

68 self.fields = {} 

69 self.reString = "" 

70 n = 0 

71 pos = 0 

72 for m in fmt.finditer(pathTemplate): 

73 fieldName = m.group(1) 

74 if fieldName in self.fields: 

75 fieldName += "_%d" % (n,) 

76 n += 1 

77 

78 prefix = pathTemplate[last:m.start(0)] 

79 last = m.end(0) 

80 self.reString += prefix 

81 

82 if m.group(2) in 'crs': 

83 fieldType = str 

84 self.reString += r'(?P<' + fieldName + '>.+)' 

85 elif m.group(2) in 'eEfFgG': 

86 fieldType = float 

87 self.reString += r'(?P<' + fieldName + r'>[\d.eE+-]+)' 

88 else: 

89 fieldType = int 

90 self.reString += r'(?P<' + fieldName + r'>[\d+-]+)' 

91 

92 self.fields[fieldName] = dict(pos=pos, fieldType=fieldType) 

93 pos += 1 

94 

95 self.reString += pathTemplate[last:] 

96 

97 def getFields(self): 

98 """Return the list of fields that will be returned from matched 

99 paths, in order.""" 

100 

101 fieldList = ["" for i in range(len(self.fields))] 

102 for f in list(self.fields.keys()): 

103 fieldList[self.fields[f]['pos']] = f 

104 return fieldList 

105 

106 def isNumeric(self, name): 

107 """Return true if the given field contains a number.""" 

108 

109 return self.fields[name]['fieldType'] in (float, int) 

110 

111 def isInt(self, name): 

112 """Return true if the given field contains an integer.""" 

113 

114 return self.fields[name]['fieldType'] == int 

115 

116 def isFloat(self, name): 

117 """Return true if the given field contains an float.""" 

118 

119 return self.fields[name]['fieldType'] == float 

120 

121 def processPath(self, location): 

122 """ 

123 Scan a given path location. Return info about paths that conform to the path template: 

124 :param location: 

125 :return: Path info: {path: {key:value ...}, ...} e.g.: 

126 {'0239622/instcal0239622.fits.fz': {'visit_0': 239622, 'visit': 239622}} 

127 """ 

128 ret = {} 

129 curdir = os.getcwd() 

130 os.chdir(location) 

131 pathList = glob.glob(self.globString) 

132 for path in pathList: 

133 m = re.search(self.reString, path) 

134 if m: 

135 dataId = m.groupdict() 

136 for f in self.fields: 

137 if self.isInt(f): 

138 dataId[f] = int(dataId[f]) 

139 elif self.isFloat(f): 

140 dataId[f] = float(dataId[f]) 

141 ret[path] = dataId 

142 else: 

143 print("Warning: unmatched path: %s" % (path,), file=sys.stderr) 

144 os.chdir(curdir) 

145 return ret