Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes that transform (part of) a Gen2 filename template into a regular 

22expression that we can use to extract Gen2 data IDs from files. 

23""" 

24from __future__ import annotations 

25 

26__all__ = ["PathElementParser"] 

27 

28 

29import logging 

30from abc import ABC, abstractmethod 

31import re 

32from typing import ClassVar, Dict, Optional 

33 

34 

35class FormattableRegEx(ABC): 

36 """An interface that generates a regular expression from a template and 

37 a data ID. 

38 

39 This is used by `PathElementParser` to abstract over whether a path 

40 element's regex needs to include values from a data ID extracted from 

41 parent path elements or not. 

42 """ 

43 

44 @abstractmethod 

45 def format(self, dataId: dict) -> re.Pattern: 

46 """Substitute values from the given data ID and return a regular 

47 expression. 

48 

49 Parameters 

50 ---------- 

51 dataId : `dict` 

52 A dictionary whose entries may be used to format the regular 

53 expression. May include unused entries. 

54 """ 

55 raise NotImplementedError() 

56 

57 

58class FixedRegEx(FormattableRegEx): 

59 """A trivial implementation of `FormattableRegEx` that does no formatting. 

60 

61 Parameters 

62 ---------- 

63 regex : `re.Pattern` 

64 The fixed regular expression to return. 

65 """ 

66 def __init__(self, regex: re.Pattern): 

67 self.regex = regex 

68 

69 __slots__ = ("regex",) 

70 

71 def format(self, dataId: dict) -> re.Pattern: 

72 # Docstring inherited from FormattableRegEx. 

73 return self.regex 

74 

75 def __str__(self): 

76 return f"{type(self).__name__}({self.regex})" 

77 

78 

79class SubstitutableRegEx: 

80 """An implementation of `FormattableRegEx` formed from a concatenation of 

81 actual regular terms and %-style format strings. 

82 """ 

83 def __init__(self): 

84 self._terms = [] 

85 

86 __slots__ = ("_terms",) 

87 

88 def addRegexTerm(self, regex: str): 

89 """Add a regular expression term. 

90 """ 

91 self._terms.append((regex, False)) 

92 

93 def addSubstitutionTerm(self, template: str): 

94 """Add a %-style format template term. 

95 """ 

96 self._terms.append((template, True)) 

97 

98 def format(self, dataId: dict) -> re.Pattern: 

99 # Docstring inherited from FormattableRegEx. 

100 return re.compile("".join(re.escape(s % dataId) if isSub else s 

101 for s, isSub in self._terms)) 

102 

103 def simplify(self) -> FormattableRegEx: 

104 """Return a possibly-simplified version of this object. 

105 

106 If `addSubstitionTerm` was never called, this returns a simple 

107 `FixedRegEx`. 

108 """ 

109 if not any(isSub for _, isSub in self._terms): 

110 return FixedRegEx(re.compile("".join(s for s, _ in self._terms))) 

111 else: 

112 return self 

113 

114 

115class PathElementParser: 

116 """An object that matches Gen2 file names and extracts Gen2 data IDs. 

117 

118 Parameters 

119 ---------- 

120 target : `str` 

121 Either a full Gen2 path template or the part of one the corresponds to 

122 a single path element (a subdirectory or file name). 

123 allKeys : `dict` [`str`, `type`] 

124 A dictionary that provides types for all Gen2 data ID keys that are 

125 substituted into the given template. Additional key-value pairs may 

126 be present and will be ignored. 

127 previousKeys : `dict` [`str`, `type`], optional 

128 A dictionary containing key strings and types for Gen2 data ID keys 

129 that have been extracted from previous path elements of the same 

130 template. Values for these keys must be provided via the 

131 ``lastDataId`` argument when calling `parse`. 

132 """ 

133 def __init__(self, template: str, allKeys: Dict[str, type], *, 

134 previousKeys: Optional[Dict[str, type]] = None): 

135 self.template = template 

136 self.keys = {} 

137 # For each template path element, we iterate over each %-tagged 

138 # substitution string. 

139 last = 0 

140 self.regex = SubstitutableRegEx() 

141 for match in self.TEMPLATE_RE.finditer(self.template): 

142 # Copy the (escaped) regular string between the last substitution 

143 # and this one, escaping it appropriately. 

144 self.regex.addRegexTerm(re.escape(self.template[last:match.start()])) 

145 # Pull out the data ID key from the name used in the 

146 # substitution string. Use that and the substition 

147 # type to come up with the pattern to use in the regex. 

148 name = match.group("name") 

149 if name == "patch": 

150 pattern = r"\d+,\d+" 

151 elif match.group("type") in "id": # integers 

152 pattern = r"0*\d+" 

153 else: 

154 pattern = ".+" 

155 # Create a new named groups for the first occurence of a key 

156 # within an element. 

157 if name not in self.keys: 

158 if previousKeys and name in previousKeys: 

159 # Key is new to this part of the template, but it appeared 

160 # in some previous part of the template. We'll format the 

161 # original template with the data ID from that previous 

162 # step later. 

163 start, stop = match.span() 

164 self.regex.addSubstitutionTerm(self.template[start:stop]) 

165 else: 

166 # Key is new; expect to extract a data ID value from it. 

167 self.regex.addRegexTerm(r"(?P<%s>%s)" % (name, pattern)) 

168 self.keys[name] = allKeys[name] 

169 else: 

170 # Require a match with the last group for a second 

171 # occurrence. 

172 self.regex.addRegexTerm(r"(?P=<%s>)" % name) 

173 # Remember the end of this match 

174 last = match.end() 

175 # Append anything remaining after the last substitution string. 

176 self.regex.addRegexTerm(re.escape(self.template[last:])) 

177 # If there are no substitutions, join and compile into a single regex 

178 # now. 

179 self.regex = self.regex.simplify() 

180 

181 __slots__ = ("keys", "template", "regex") 

182 

183 TEMPLATE_RE: ClassVar[re.Pattern] = re.compile(r"\%\((?P<name>\w+)\)[^\%]*?(?P<type>[idrs])") 

184 """Regular expression that matches a single substitution in 

185 Gen2 CameraMapper template, such as "%(tract)04d". 

186 """ 

187 

188 def __str__(self): 

189 return f"{type(self).__name__}({self.regex})" 

190 

191 def parse(self, name: str, lastDataId: dict, *, log: Optional[logging.Logger] = None) -> Optional[dict]: 

192 """Parse the path element. 

193 

194 Parameters 

195 ---------- 

196 name : `str` 

197 The path name to parse. 

198 lastDataId : `dict` 

199 The cumulative Gen2 data ID obtaining by calling `parse` on parsers 

200 for parent directories of the same path. 

201 log : `logging.Logger`, optional 

202 Log to use to report warnings and debug information. 

203 

204 Returns 

205 ------- 

206 dataId : `dict` or `None` 

207 Gen2 data ID that combines key-value pairs obtained from this path 

208 with those from ``lastDataId``. `None` if ``name`` is not matched 

209 by this parser. If the keys extracted are inconsistent with those 

210 in ``lastDataID``, a warning is sent to ``log`` and `None` is 

211 returned. 

212 """ 

213 m = self.regex.format(lastDataId).fullmatch(name) 

214 if m is None: 

215 return None 

216 newDataId = {k: v(m.group(k)) for k, v in self.keys.items()} 

217 for commonKey in lastDataId.keys() & newDataId.keys(): 

218 if newDataId[commonKey] != lastDataId[commonKey]: 

219 if log is not None: 

220 log.warning("Inconsistent value %s=%r when parsing %r with %r.", 

221 commonKey, newDataId[commonKey], name, lastDataId) 

222 return None 

223 newDataId.update(lastDataId) 

224 return newDataId 

225 

226 keys: Dict[str, type] 

227 """Dictionary mapping Gen2 data ID key to the type of its associated 

228 value, covering only those keys that can be extracted from this path 

229 element. 

230 """ 

231 

232 template: str 

233 """The portion of the original Gen2 filename template that this parser was 

234 constructed with. 

235 """ 

236 

237 regex: re.Pattern 

238 """A regular expression that can be used to match the path element and 

239 populate the Gen2 data ID items whose keys are in ``keys``. 

240 """