Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Concrete implementations of `PathElementHandler`. 

22 

23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 

24avoid a circular dependency between modules. 

25""" 

26from __future__ import annotations 

27 

28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"] 

29 

30from abc import abstractmethod 

31import re 

32from typing import ( 

33 Callable, 

34 List, 

35 Mapping, 

36 Optional, 

37 TYPE_CHECKING 

38) 

39 

40import lsst.afw.fits 

41from lsst.log import Log 

42from lsst.daf.butler import ( 

43 DataCoordinate, 

44 DatasetRef, 

45 DatasetType, 

46 FileDataset, 

47) 

48from ..translators import Translator, makeCalibrationLabel 

49from .parser import PathElementParser 

50from .scanner import PathElementHandler, DirectoryScanner 

51 

52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 from lsst.daf.butler import FormatterParameter 

54 

55 

56class IgnoreHandler(PathElementHandler): 

57 """A `PathElementHandler` that matches via a regular expression, and does 

58 nothing. 

59 

60 An `IgnoreHandler` is used to ignore file or directory patterns that can 

61 occur at any level in the directory tree, and have no relation to any 

62 Gen2 filename template. 

63 

64 Parameters 

65 ---------- 

66 pattern : `re.Pattern` 

67 A regular expression pattern. 

68 isForFiles : `bool` 

69 Whether this handler should be applied to files (`True`) or 

70 directories (`False`). 

71 """ 

72 def __init__(self, pattern: re.Pattern, isForFiles: bool): 

73 super().__init__() 

74 self._pattern = pattern 

75 self._isForFiles = isForFiles 

76 

77 __slots__ = ("_pattern", "_isForFiles") 

78 

79 def __str__(self): 

80 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})" 

81 

82 def isForFiles(self) -> bool: 

83 # Docstring inherited from PathElementHandler. 

84 return self._isForFiles 

85 

86 @property 

87 def rank(self) -> int: 

88 # Docstring inherited from PathElementHandler. 

89 return 0 

90 

91 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

92 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool: 

93 # Docstring inherited from PathElementHandler. 

94 if self._pattern.fullmatch(name): 

95 return True 

96 else: 

97 return False 

98 

99 

100class ParsedPathElementHandler(PathElementHandler): 

101 """An intermediate base class for `PathElementHandler` classes that utilize 

102 a `PathElementParser` to match a Gen2 filename template. 

103 

104 Parameters 

105 ---------- 

106 parser : `PathElementParser` 

107 An object that matches the path element this handler is responsible for 

108 and extracts a (partial) Gen2 data ID from it. 

109 """ 

110 def __init__(self, parser: PathElementParser): 

111 super().__init__() 

112 self._parser = parser 

113 

114 __slots__ = ("_parser",) 

115 

116 def __str__(self): 

117 return f"{type(self).__name__}(parser={self._parser})" 

118 

119 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

120 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool: 

121 # Docstring inherited from PathElementParser. 

122 nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log) 

123 if nextDataId2 is None: 

124 return False 

125 self.handle(path, nextDataId2, datasets, log=log, predicate=predicate) 

126 return True 

127 

128 @property 

129 def rank(self) -> int: 

130 # Docstring inherited from PathElementParser. 

131 return len(self._parser.keys) 

132 

133 @abstractmethod 

134 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

135 log: Log, predicate: Callable[[DataCoordinate], bool]): 

136 """Customization hook for ``__call__``. 

137 

138 Subclasses must override this method, while external callers (i.e. 

139 `DirectoryScanner` should instead invoke `__call__`. 

140 

141 Parameters 

142 ---------- 

143 path : `str` 

144 Full path of the file or directory. 

145 nextDataId2 : `dict` 

146 Gen2 data ID (usually partial) extracted from the path so far. 

147 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 

148 Dictionary that found datasets should be added to. 

149 log : `Log`, optional 

150 Log to use to report warnings and debug information. 

151 predicate : `~collections.abc.Callable` 

152 A callable taking a single `DataCoordinate` argument and returning 

153 `bool`, indicating whether that (Gen3) data ID represents one 

154 that should be included in the scan. 

155 formatterMap : `dict`, optional 

156 Map dataset type to specialist formatter. 

157 """ 

158 raise NotImplementedError() 

159 

160 

161class SkipHandler(ParsedPathElementHandler): 

162 """A `ParsedPathElementHandler` that does nothing with an entry other 

163 optionally logging a warning message. 

164 

165 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 

166 want to (or cannot) extract Gen3 datasets from, or other files/directories 

167 that alway appears at a fixed level in the diectory tree. 

168 

169 Parameters 

170 ---------- 

171 parser : `PathElementParser` 

172 An object that matches the path element this handler is responsible for 

173 and extracts a (partial) Gen2 data ID from it. 

174 isForFiles : `bool` 

175 Whether this handler should be applied to files (`True`) or 

176 directories (`False`). 

177 message : `str`, optional 

178 A message to log at warning level when this handler matches a path 

179 entry. If `None`, matched entrie will be silently skipped. 

180 """ 

181 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]): 

182 super().__init__(parser=parser) 

183 self._isForFiles = isForFiles 

184 self._message = message 

185 

186 __slots__ = ("_message", "_isForFiles") 

187 

188 def isForFiles(self) -> bool: 

189 # Docstring inherited from PathElementHandler. 

190 return self._isForFiles 

191 

192 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

193 log: Log, predicate: Callable[[DataCoordinate], bool]): 

194 # Docstring inherited from ParsedPathElementHandler. 

195 if self._message is not None: 

196 log.warn("Skipping %s: %s", path, self._message) 

197 

198 

199class SubdirectoryHandler(ParsedPathElementHandler): 

200 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 

201 

202 Parameters 

203 ---------- 

204 parser : `PathElementParser` 

205 An object that matches the path element this handler is responsible for 

206 and extracts a (partial) Gen2 data ID from it. 

207 

208 Notes 

209 ----- 

210 The nested `DirectoryScanner` is default-constructed and should be 

211 populated with child handlers after the `SubdirectoryHandler` is created. 

212 """ 

213 

214 def __init__(self, parser: PathElementParser): 

215 super().__init__(parser=parser) 

216 self.scanner = DirectoryScanner() 

217 

218 __slots__ = ("scanner",) 

219 

220 def isForFiles(self) -> bool: 

221 # Docstring inherited from PathElementHandler. 

222 return False 

223 

224 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

225 log: Log, predicate: Callable[[DataCoordinate], bool]): 

226 # Docstring inherited from ParsedPathElementHandler. 

227 if not nextDataId2: 

228 # We matched, and there's no data ID at all yet. That means the 

229 # full path so far is just a fixed string so we should descend 

230 # and the match is exclusive. 

231 scan = True 

232 else: 

233 dataId3 = self.translate(nextDataId2, partial=True, log=log) 

234 if dataId3 is not None: 

235 scan = predicate(dataId3) 

236 else: 

237 scan = True 

238 if scan: 

239 for handler in self.scanner: 

240 handler.lastDataId2 = nextDataId2 

241 self.scanner.scan(path, datasets, log=log, predicate=predicate) 

242 

243 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

244 # Docstring inherited from PathElementHandler. 

245 for handler in self.scanner: 

246 # Since we're recursing, we're always asking for a partial match, 

247 # because the data ID we have corresponds to different level than 

248 # the one child handlers operate at. 

249 result = handler.translate(dataId2, partial=True, log=log) 

250 if result is not None: 

251 return result 

252 return None 

253 

254 scanner: DirectoryScanner 

255 """Scanner object that holds handlers for the entries of the subdirectory 

256 matched by this handler (`DirectoryScanner`). 

257 """ 

258 

259 

260class TargetFileHandler(ParsedPathElementHandler): 

261 """A `PathElementHandler` that matches files that correspond to target 

262 datasets and outputs `FileDataset` instances for them. 

263 

264 Parameters 

265 ---------- 

266 parser : `PathElementParser` 

267 An object that matches the path element this handler is responsible for 

268 and extracts a (partial) Gen2 data ID from it. 

269 translator : `Translator` 

270 Object that translates data IDs from Gen2 to Gen3. 

271 datasetType : `lsst.daf.butler.DatasetType` 

272 Gen3 dataset type for the datasets this handler matches. 

273 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

274 A Gen 3 formatter class or fully-qualified name. 

275 """ 

276 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType, 

277 formatter: FormatterParameter = None): 

278 super().__init__(parser=parser) 

279 self._translator = translator 

280 self._datasetType = datasetType 

281 self._formatter = formatter 

282 

283 __slots__ = ("_translator", "_datasetType", "_formatter") 

284 

285 def __str__(self): 

286 return f"{type(self).__name__}({self._translator}, {self._datasetType})" 

287 

288 def isForFiles(self) -> bool: 

289 # Docstring inherited from PathElementHandler. 

290 return True 

291 

292 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

293 log: Log, predicate: Callable[[DataCoordinate], bool]): 

294 # Docstring inherited from ParsedPathElementHandler. 

295 dataId3 = self.translate(nextDataId2, partial=False, log=log) 

296 if predicate(dataId3): 

297 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)], 

298 path=path, formatter=self._formatter)) 

299 

300 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

301 # Docstring inherited from PathElementHandler. 

302 rawDataId3 = self._translator(dataId2, partial=partial, log=log) 

303 if partial: 

304 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe) 

305 else: 

306 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions) 

307 

308 

309class MultiExtensionFileHandler(TargetFileHandler): 

310 """Handler for FITS files that store image and metadata in multiple HDUs 

311 per file, for example DECam raw and Community Pipeline calibrations. 

312 

313 Notes 

314 ----- 

315 For now, this is only used by DECam, and may need to be made more generic 

316 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 

317 with other obs packages. 

318 """ 

319 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

320 log: Log, predicate: Callable[[DataCoordinate], bool]): 

321 dataId3 = self.translate(nextDataId2, partial=True, log=log) 

322 

323 def get_detectors(filename): 

324 fitsData = lsst.afw.fits.Fits(filename, 'r') 

325 # NOTE: The primary header (HDU=0) does not contain detector data. 

326 detectors = [] 

327 for i in range(1, fitsData.countHdus()): 

328 fitsData.setHdu(i) 

329 metadata = fitsData.readMetadata() 

330 detectors.append(metadata['CCDNUM']) 

331 return detectors 

332 

333 if predicate(dataId3): 

334 detectors = get_detectors(path) 

335 refs = [] 

336 for detector in detectors: 

337 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"], 

338 ccd=detector, filter=nextDataId2.get("filter")) 

339 newDataId3 = DataCoordinate.standardize(dataId3, 

340 graph=self._datasetType.dimensions, 

341 detector=detector, 

342 calibration_label=label) 

343 refs.append(DatasetRef(self._datasetType, newDataId3)) 

344 

345 datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter)) 

346 

347 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

348 assert partial is True, "We always require partial, to ignore 'ccdnum'" 

349 rawDataId3 = self._translator(dataId2, partial=partial, log=log) 

350 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)