Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Classes that transform (part of) a Gen2 filename template into a regular 

22expression that we can use to extract Gen2 data IDs from files. 

23""" 

24from __future__ import annotations 

25 

26__all__ = ["PathElementParser"] 

27 

28 

29from abc import ABC, abstractmethod 

30import re 

31from typing import ClassVar, Dict, Optional 

32 

33from lsst.log import Log 

34 

35 

36class FormattableRegEx(ABC): 

37 """An interface that generates a regular expression from a template and 

38 a data ID. 

39 

40 This is used by `PathElementParser` to abstract over whether a path 

41 element's regex needs to include values from a data ID extracted from 

42 parent path elements or not. 

43 """ 

44 

45 @abstractmethod 

46 def format(self, dataId: dict) -> re.Pattern: 

47 """Substitute values from the given data ID and return a regular 

48 expression. 

49 

50 Parameters 

51 ---------- 

52 dataId : `dict` 

53 A dictionary whose entries may be used to format the regular 

54 expression. May include unused entries. 

55 """ 

56 raise NotImplementedError() 

57 

58 

59class FixedRegEx(FormattableRegEx): 

60 """A trivial implementation of `FormattableRegEx` that does no formatting. 

61 

62 Parameters 

63 ---------- 

64 regex : `re.Pattern` 

65 The fixed regular expression to return. 

66 """ 

67 def __init__(self, regex: re.Pattern): 

68 self.regex = regex 

69 

70 __slots__ = ("regex",) 

71 

72 def format(self, dataId: dict) -> re.Pattern: 

73 # Docstring inherited from FormattableRegEx. 

74 return self.regex 

75 

76 

77class SubstitutableRegEx: 

78 """An implementation of `FormattableRegEx` formed from a concatenation of 

79 actual regular terms and %-style format strings. 

80 """ 

81 def __init__(self): 

82 self._terms = [] 

83 

84 __slots__ = ("_terms",) 

85 

86 def addRegexTerm(self, regex: str): 

87 """Add a regular expression term. 

88 """ 

89 self._terms.append((regex, False)) 

90 

91 def addSubstitutionTerm(self, template: str): 

92 """Add a %-style format template term. 

93 """ 

94 self._terms.append((template, True)) 

95 

96 def format(self, dataId: dict) -> re.Pattern: 

97 # Docstring inherited from FormattableRegEx. 

98 return re.compile("".join(re.escape(s % dataId) if isSub else s 

99 for s, isSub in self._terms)) 

100 

101 def simplify(self) -> FormattableRegEx: 

102 """Return a possibly-simplified version of this object. 

103 

104 If `addSubstitionTerm` was never called, this returns a simple 

105 `FixedRegEx`. 

106 """ 

107 if not any(isSub for _, isSub in self._terms): 

108 return FixedRegEx(re.compile("".join(s for s, _ in self._terms))) 

109 else: 

110 return self 

111 

112 

113class PathElementParser: 

114 """An object that matches Gen2 file names and extracts Gen2 data IDs. 

115 

116 Parameters 

117 ---------- 

118 target : `str` 

119 Either a full Gen2 path template or the part of one the corresponds to 

120 a single path element (a subdirectory or file name). 

121 allKeys : `dict` [`str`, `type`] 

122 A dictionary that provides types for all Gen2 data ID keys that are 

123 substituted into the given template. Additional key-value pairs may 

124 be present and will be ignored. 

125 previousKeys : `dict` [`str`, `type`], optional 

126 A dictionary containing key strings and types for Gen2 data ID keys 

127 that have been extracted from previous path elements of the same 

128 template. Values for these keys must be provided via the 

129 ``lastDataId`` argument when calling `parse`. 

130 """ 

131 def __init__(self, template: str, allKeys: Dict[str, type], *, 

132 previousKeys: Optional[Dict[str, type]] = None): 

133 self.template = template 

134 self.keys = {} 

135 # For each template path element, we iterate over each %-tagged 

136 # substitution string. 

137 last = 0 

138 self.regex = SubstitutableRegEx() 

139 for match in self.TEMPLATE_RE.finditer(self.template): 

140 # Copy the (escaped) regular string between the last substitution 

141 # and this one, escaping it appropriately. 

142 self.regex.addRegexTerm(re.escape(self.template[last:match.start()])) 

143 # Pull out the data ID key from the name used in the 

144 # substitution string. Use that and the substition 

145 # type to come up with the pattern to use in the regex. 

146 name = match.group("name") 

147 if name == "patch": 

148 pattern = r"\d+,\d+" 

149 elif match.group("type") in "id": # integers 

150 pattern = r"0*\d+" 

151 else: 

152 pattern = ".+" 

153 # Create a new named groups for the first occurence of a key 

154 # within an element. 

155 if name not in self.keys: 

156 if previousKeys and name in previousKeys: 

157 # Key is new to this part of the template, but it appeared 

158 # in some previous part of the template. We'll format the 

159 # original template with the data ID from that previous 

160 # step later. 

161 start, stop = match.span() 

162 self.regex.addSubstitutionTerm(self.template[start:stop]) 

163 else: 

164 # Key is new; expect to extract a data ID value from it. 

165 self.regex.addRegexTerm(r"(?P<%s>%s)" % (name, pattern)) 

166 self.keys[name] = allKeys[name] 

167 else: 

168 # Require a match with the last group for a second 

169 # occurrence. 

170 self.regex.addRegexTerm(r"(?P=<%s>)" % name) 

171 # Remember the end of this match 

172 last = match.end() 

173 # Append anything remaining after the last substitution string. 

174 self.regex.addRegexTerm(re.escape(self.template[last:])) 

175 # If there are no substitutions, join and compile into a single regex 

176 # now. 

177 self.regex = self.regex.simplify() 

178 

179 __slots__ = ("keys", "template", "regex") 

180 

181 TEMPLATE_RE: ClassVar[re.Pattern] = re.compile(r"\%\((?P<name>\w+)\)[^\%]*?(?P<type>[idrs])") 

182 """Regular expression that matches a single substitution in 

183 Gen2 CameraMapper template, such as "%(tract)04d". 

184 """ 

185 

186 def parse(self, name: str, lastDataId: dict, *, log: Optional[Log] = None) -> Optional[dict]: 

187 """Parse the path element. 

188 

189 Parameters 

190 ---------- 

191 name : `str` 

192 The path name to parse. 

193 lastDataId : `dict` 

194 The cumulative Gen2 data ID obtaining by calling `parse` on parsers 

195 for parent directories of the same path. 

196 log : `Log`, optional 

197 Log to use to report warnings and debug information. 

198 

199 Returns 

200 ------- 

201 dataId : `dict` or `None` 

202 Gen2 data ID that combines key-value pairs obtained from this path 

203 with those from ``lastDataId``. `None` if ``name`` is not matched 

204 by this parser. If the keys extracted are inconsistent with those 

205 in ``lastDataID``, a warning is sent to ``log`` and `None` is 

206 returned. 

207 """ 

208 m = self.regex.format(lastDataId).fullmatch(name) 

209 if m is None: 

210 return None 

211 newDataId = {k: v(m.group(k)) for k, v in self.keys.items()} 

212 for commonKey in lastDataId.keys() & newDataId.keys(): 

213 if newDataId[commonKey] != lastDataId[commonKey]: 

214 if log is not None: 

215 log.warn("Inconsistent value %s=%r when parsing %r with %r.", 

216 commonKey, newDataId[commonKey], name, lastDataId) 

217 return None 

218 newDataId.update(lastDataId) 

219 return newDataId 

220 

221 keys: Dict[str, type] 

222 """Dictionary mapping Gen2 data ID key to the type of its associated 

223 value, covering only those keys that can be extracted from this path 

224 element. 

225 """ 

226 

227 template: str 

228 """The portion of the original Gen2 filename template that this parser was 

229 constructed with. 

230 """ 

231 

232 regex: re.Pattern 

233 """A regular expression that can be used to match the path element and 

234 populate the Gen2 data ID items whose keys are in ``keys``. 

235 """