Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Concrete implementations of `PathElementHandler`. 

22 

23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 

24avoid a circular dependency between modules. 

25""" 

26from __future__ import annotations 

27 

28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"] 

29 

30from abc import abstractmethod 

31import re 

32from typing import ( 

33 Callable, 

34 List, 

35 Mapping, 

36 Optional, 

37 TYPE_CHECKING 

38) 

39 

40import lsst.afw.fits 

41from lsst.log import Log 

42from lsst.daf.butler import ( 

43 DataCoordinate, 

44 DatasetRef, 

45 DatasetType, 

46 FileDataset, 

47) 

48from ..translators import Translator, makeCalibrationLabel 

49from .parser import PathElementParser 

50from .scanner import PathElementHandler, DirectoryScanner 

51 

52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 from lsst.daf.butler import FormatterParameter 

54 

55 

56class IgnoreHandler(PathElementHandler): 

57 """A `PathElementHandler` that matches via a regular expression, and does 

58 nothing. 

59 

60 An `IgnoreHandler` is used to ignore file or directory patterns that can 

61 occur at any level in the directory tree, and have no relation to any 

62 Gen2 filename template. 

63 

64 Parameters 

65 ---------- 

66 pattern : `re.Pattern` 

67 A regular expression pattern. 

68 isForFiles : `bool` 

69 Whether this handler should be applied to files (`True`) or 

70 directories (`False`). 

71 """ 

72 def __init__(self, pattern: re.Pattern, isForFiles: bool): 

73 super().__init__() 

74 self._pattern = pattern 

75 self._isForFiles = isForFiles 

76 

77 __slots__ = ("_pattern", "_isForFiles") 

78 

79 def isForFiles(self) -> bool: 

80 # Docstring inherited from PathElementHandler. 

81 return self._isForFiles 

82 

83 @property 

84 def rank(self) -> int: 

85 # Docstring inherited from PathElementHandler. 

86 return 0 

87 

88 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

89 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool: 

90 # Docstring inherited from PathElementHandler. 

91 if self._pattern.fullmatch(name): 

92 return True 

93 else: 

94 return False 

95 

96 

97class ParsedPathElementHandler(PathElementHandler): 

98 """An intermediate base class for `PathElementHandler` classes that utilize 

99 a `PathElementParser` to match a Gen2 filename template. 

100 

101 Parameters 

102 ---------- 

103 parser : `PathElementParser` 

104 An object that matches the path element this handler is responsible for 

105 and extracts a (partial) Gen2 data ID from it. 

106 """ 

107 def __init__(self, parser: PathElementParser): 

108 super().__init__() 

109 self._parser = parser 

110 

111 __slots__ = ("_parser",) 

112 

113 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

114 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool: 

115 # Docstring inherited from PathElementParser. 

116 nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log) 

117 if nextDataId2 is None: 

118 return False 

119 self.handle(path, nextDataId2, datasets, log=log, predicate=predicate) 

120 return True 

121 

122 @property 

123 def rank(self) -> int: 

124 # Docstring inherited from PathElementParser. 

125 return len(self._parser.keys) 

126 

127 @abstractmethod 

128 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

129 log: Log, predicate: Callable[[DataCoordinate], bool]): 

130 """Customization hook for ``__call__``. 

131 

132 Subclasses must override this method, while external callers (i.e. 

133 `DirectoryScanner` should instead invoke `__call__`. 

134 

135 Parameters 

136 ---------- 

137 path : `str` 

138 Full path of the file or directory. 

139 nextDataId2 : `dict` 

140 Gen2 data ID (usually partial) extracted from the path so far. 

141 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 

142 Dictionary that found datasets should be added to. 

143 log : `Log`, optional 

144 Log to use to report warnings and debug information. 

145 predicate : `~collections.abc.Callable` 

146 A callable taking a single `DataCoordinate` argument and returning 

147 `bool`, indicating whether that (Gen3) data ID represents one 

148 that should be included in the scan. 

149 formatterMap : `dict`, optional 

150 Map dataset type to specialist formatter. 

151 """ 

152 raise NotImplementedError() 

153 

154 

155class SkipHandler(ParsedPathElementHandler): 

156 """A `ParsedPathElementHandler` that does nothing with an entry other 

157 optionally logging a warning message. 

158 

159 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 

160 want to (or cannot) extract Gen3 datasets from, or other files/directories 

161 that alway appears at a fixed level in the diectory tree. 

162 

163 Parameters 

164 ---------- 

165 parser : `PathElementParser` 

166 An object that matches the path element this handler is responsible for 

167 and extracts a (partial) Gen2 data ID from it. 

168 isForFiles : `bool` 

169 Whether this handler should be applied to files (`True`) or 

170 directories (`False`). 

171 message : `str`, optional 

172 A message to log at warning level when this handler matches a path 

173 entry. If `None`, matched entrie will be silently skipped. 

174 """ 

175 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]): 

176 super().__init__(parser=parser) 

177 self._isForFiles = isForFiles 

178 self._message = message 

179 

180 __slots__ = ("_message", "_isForFiles") 

181 

182 def isForFiles(self) -> bool: 

183 # Docstring inherited from PathElementHandler. 

184 return self._isForFiles 

185 

186 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

187 log: Log, predicate: Callable[[DataCoordinate], bool]): 

188 # Docstring inherited from ParsedPathElementHandler. 

189 if self._message is not None: 

190 log.warn("Skipping %s: %s", path, self._message) 

191 

192 

193class SubdirectoryHandler(ParsedPathElementHandler): 

194 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 

195 

196 Parameters 

197 ---------- 

198 parser : `PathElementParser` 

199 An object that matches the path element this handler is responsible for 

200 and extracts a (partial) Gen2 data ID from it. 

201 

202 Notes 

203 ----- 

204 The nested `DirectoryScanner` is default-constructed and should be 

205 populated with child handlers after the `SubdirectoryHandler` is created. 

206 """ 

207 

208 def __init__(self, parser: PathElementParser): 

209 super().__init__(parser=parser) 

210 self.scanner = DirectoryScanner() 

211 

212 __slots__ = ("scanner",) 

213 

214 def isForFiles(self) -> bool: 

215 # Docstring inherited from PathElementHandler. 

216 return False 

217 

218 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

219 log: Log, predicate: Callable[[DataCoordinate], bool]): 

220 # Docstring inherited from ParsedPathElementHandler. 

221 if not nextDataId2: 

222 # We matched, and there's no data ID at all yet. That means the 

223 # full path so far is just a fixed string so we should descend 

224 # and the match is exclusive. 

225 scan = True 

226 else: 

227 dataId3 = self.translate(nextDataId2, partial=True, log=log) 

228 if dataId3 is not None: 

229 scan = predicate(dataId3) 

230 else: 

231 scan = True 

232 if scan: 

233 for handler in self.scanner: 

234 handler.lastDataId2 = nextDataId2 

235 self.scanner.scan(path, datasets, log=log, predicate=predicate) 

236 

237 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

238 # Docstring inherited from PathElementHandler. 

239 for handler in self.scanner: 

240 # Since we're recursing, we're always asking for a partial match, 

241 # because the data ID we have corresponds to different level than 

242 # the one child handlers operate at. 

243 result = handler.translate(dataId2, partial=True, log=log) 

244 if result is not None: 

245 return result 

246 return None 

247 

248 scanner: DirectoryScanner 

249 """Scanner object that holds handlers for the entries of the subdirectory 

250 matched by this handler (`DirectoryScanner`). 

251 """ 

252 

253 

254class TargetFileHandler(ParsedPathElementHandler): 

255 """A `PathElementHandler` that matches files that correspond to target 

256 datasets and outputs `FileDataset` instances for them. 

257 

258 Parameters 

259 ---------- 

260 parser : `PathElementParser` 

261 An object that matches the path element this handler is responsible for 

262 and extracts a (partial) Gen2 data ID from it. 

263 translator : `Translator` 

264 Object that translates data IDs from Gen2 to Gen3. 

265 datasetType : `lsst.daf.butler.DatasetType` 

266 Gen3 dataset type for the datasets this handler matches. 

267 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

268 A Gen 3 formatter class or fully-qualified name. 

269 """ 

270 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType, 

271 formatter: FormatterParameter = None): 

272 super().__init__(parser=parser) 

273 self._translator = translator 

274 self._datasetType = datasetType 

275 self._formatter = formatter 

276 

277 __slots__ = ("_translator", "_datasetType", "_formatter") 

278 

279 def isForFiles(self) -> bool: 

280 # Docstring inherited from PathElementHandler. 

281 return True 

282 

283 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

284 log: Log, predicate: Callable[[DataCoordinate], bool]): 

285 # Docstring inherited from ParsedPathElementHandler. 

286 dataId3 = self.translate(nextDataId2, partial=False, log=log) 

287 if predicate(dataId3): 

288 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)], 

289 path=path, formatter=self._formatter)) 

290 

291 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

292 # Docstring inherited from PathElementHandler. 

293 rawDataId3 = self._translator(dataId2, partial=partial, log=log) 

294 if partial: 

295 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe) 

296 else: 

297 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions) 

298 

299 

300class MultiExtensionFileHandler(TargetFileHandler): 

301 """Handler for FITS files that store image and metadata in multiple HDUs 

302 per file, for example DECam raw and Community Pipeline calibrations. 

303 

304 Notes 

305 ----- 

306 For now, this is only used by DECam, and may need to be made more generic 

307 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 

308 with other obs packages. 

309 """ 

310 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

311 log: Log, predicate: Callable[[DataCoordinate], bool]): 

312 dataId3 = self.translate(nextDataId2, partial=True, log=log) 

313 

314 def get_detectors(filename): 

315 fitsData = lsst.afw.fits.Fits(filename, 'r') 

316 # NOTE: The primary header (HDU=0) does not contain detector data. 

317 detectors = [] 

318 for i in range(1, fitsData.countHdus()): 

319 fitsData.setHdu(i) 

320 metadata = fitsData.readMetadata() 

321 detectors.append(metadata['CCDNUM']) 

322 return detectors 

323 

324 if predicate(dataId3): 

325 detectors = get_detectors(path) 

326 refs = [] 

327 for detector in detectors: 

328 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"], 

329 ccd=detector, filter=nextDataId2.get("filter")) 

330 newDataId3 = DataCoordinate.standardize(dataId3, 

331 graph=self._datasetType.dimensions, 

332 detector=detector, 

333 calibration_label=label) 

334 refs.append(DatasetRef(self._datasetType, newDataId3)) 

335 

336 datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter)) 

337 

338 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

339 assert partial is True, "We always require partial, to ignore 'ccdnum'" 

340 rawDataId3 = self._translator(dataId2, partial=partial, log=log) 

341 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)