Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Concrete implementations of `PathElementHandler`. 

22 

23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 

24avoid a circular dependency between modules. 

25""" 

26from __future__ import annotations 

27 

28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"] 

29 

30from abc import abstractmethod 

31import re 

32from typing import ( 

33 Callable, 

34 List, 

35 Mapping, 

36 Optional, 

37 TYPE_CHECKING 

38) 

39 

40import lsst.afw.fits 

41from lsst.daf.butler import ( 

42 DataCoordinate, 

43 DatasetRef, 

44 DatasetType, 

45 FileDataset, 

46) 

47from ..translators import Translator, makeCalibrationLabel 

48from .parser import PathElementParser 

49from .scanner import PathElementHandler, DirectoryScanner 

50 

51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true

52 from lsst.daf.butler import FormatterParameter 

53 

54 

55class IgnoreHandler(PathElementHandler): 

56 """A `PathElementHandler` that matches via a regular expression, and does 

57 nothing. 

58 

59 An `IgnoreHandler` is used to ignore file or directory patterns that can 

60 occur at any level in the directory tree, and have no relation to any 

61 Gen2 filename template. 

62 

63 Parameters 

64 ---------- 

65 pattern : `re.Pattern` 

66 A regular expression pattern. 

67 isForFiles : `bool` 

68 Whether this handler should be applied to files (`True`) or 

69 directories (`False`). 

70 """ 

71 def __init__(self, pattern: re.Pattern, isForFiles: bool): 

72 super().__init__() 

73 self._pattern = pattern 

74 self._isForFiles = isForFiles 

75 

76 __slots__ = ("_pattern", "_isForFiles") 

77 

78 def __str__(self): 

79 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})" 

80 

81 def isForFiles(self) -> bool: 

82 # Docstring inherited from PathElementHandler. 

83 return self._isForFiles 

84 

85 @property 

86 def rank(self) -> int: 

87 # Docstring inherited from PathElementHandler. 

88 return 0 

89 

90 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

91 predicate: Callable[[DataCoordinate], bool]) -> bool: 

92 # Docstring inherited from PathElementHandler. 

93 if self._pattern.fullmatch(name): 

94 return True 

95 else: 

96 return False 

97 

98 

99class ParsedPathElementHandler(PathElementHandler): 

100 """An intermediate base class for `PathElementHandler` classes that utilize 

101 a `PathElementParser` to match a Gen2 filename template. 

102 

103 Parameters 

104 ---------- 

105 parser : `PathElementParser` 

106 An object that matches the path element this handler is responsible for 

107 and extracts a (partial) Gen2 data ID from it. 

108 """ 

109 def __init__(self, parser: PathElementParser): 

110 super().__init__() 

111 self._parser = parser 

112 

113 __slots__ = ("_parser",) 

114 

115 def __str__(self): 

116 return f"{type(self).__name__}(parser={self._parser})" 

117 

118 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

119 predicate: Callable[[DataCoordinate], bool]) -> bool: 

120 # Docstring inherited from PathElementParser. 

121 nextDataId2 = self._parser.parse(name, self.lastDataId2) 

122 if nextDataId2 is None: 

123 return False 

124 self.handle(path, nextDataId2, datasets, predicate=predicate) 

125 return True 

126 

127 @property 

128 def rank(self) -> int: 

129 # Docstring inherited from PathElementParser. 

130 return len(self._parser.keys) 

131 

132 @abstractmethod 

133 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

134 predicate: Callable[[DataCoordinate], bool]): 

135 """Customization hook for ``__call__``. 

136 

137 Subclasses must override this method, while external callers (i.e. 

138 `DirectoryScanner` should instead invoke `__call__`. 

139 

140 Parameters 

141 ---------- 

142 path : `str` 

143 Full path of the file or directory. 

144 nextDataId2 : `dict` 

145 Gen2 data ID (usually partial) extracted from the path so far. 

146 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 

147 Dictionary that found datasets should be added to. 

148 predicate : `~collections.abc.Callable` 

149 A callable taking a single `DataCoordinate` argument and returning 

150 `bool`, indicating whether that (Gen3) data ID represents one 

151 that should be included in the scan. 

152 formatterMap : `dict`, optional 

153 Map dataset type to specialist formatter. 

154 """ 

155 raise NotImplementedError() 

156 

157 

158class SkipHandler(ParsedPathElementHandler): 

159 """A `ParsedPathElementHandler` that does nothing with an entry other 

160 optionally logging a warning message. 

161 

162 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 

163 want to (or cannot) extract Gen3 datasets from, or other files/directories 

164 that alway appears at a fixed level in the diectory tree. 

165 

166 Parameters 

167 ---------- 

168 parser : `PathElementParser` 

169 An object that matches the path element this handler is responsible for 

170 and extracts a (partial) Gen2 data ID from it. 

171 isForFiles : `bool` 

172 Whether this handler should be applied to files (`True`) or 

173 directories (`False`). 

174 message : `str`, optional 

175 A message to log at warning level when this handler matches a path 

176 entry. If `None`, matched entrie will be silently skipped. 

177 """ 

178 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]): 

179 super().__init__(parser=parser) 

180 self._isForFiles = isForFiles 

181 self._message = message 

182 

183 __slots__ = ("_message", "_isForFiles") 

184 

185 def isForFiles(self) -> bool: 

186 # Docstring inherited from PathElementHandler. 

187 return self._isForFiles 

188 

189 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

190 predicate: Callable[[DataCoordinate], bool]): 

191 # Docstring inherited from ParsedPathElementHandler. 

192 if self._message is not None: 

193 self.log.warn("Skipping %s: %s", path, self._message) 

194 

195 

196class SubdirectoryHandler(ParsedPathElementHandler): 

197 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 

198 

199 Parameters 

200 ---------- 

201 parser : `PathElementParser` 

202 An object that matches the path element this handler is responsible for 

203 and extracts a (partial) Gen2 data ID from it. 

204 

205 Notes 

206 ----- 

207 The nested `DirectoryScanner` is default-constructed and should be 

208 populated with child handlers after the `SubdirectoryHandler` is created. 

209 """ 

210 

211 def __init__(self, parser: PathElementParser): 

212 super().__init__(parser=parser) 

213 self.scanner = DirectoryScanner() 

214 

215 __slots__ = ("scanner",) 

216 

217 def isForFiles(self) -> bool: 

218 # Docstring inherited from PathElementHandler. 

219 return False 

220 

221 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

222 predicate: Callable[[DataCoordinate], bool]): 

223 # Docstring inherited from ParsedPathElementHandler. 

224 if not nextDataId2: 

225 # We matched, and there's no data ID at all yet. That means the 

226 # full path so far is just a fixed string so we should descend 

227 # and the match is exclusive. 

228 scan = True 

229 else: 

230 dataId3 = self.translate(nextDataId2, partial=True) 

231 if dataId3 is not None: 

232 scan = predicate(dataId3) 

233 else: 

234 scan = True 

235 if scan: 

236 for handler in self.scanner: 

237 handler.lastDataId2 = nextDataId2 

238 self.scanner.scan(path, datasets, predicate=predicate) 

239 

240 def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]: 

241 # Docstring inherited from PathElementHandler. 

242 for handler in self.scanner: 

243 # Since we're recursing, we're always asking for a partial match, 

244 # because the data ID we have corresponds to different level than 

245 # the one child handlers operate at. 

246 result = handler.translate(dataId2, partial=True) 

247 if result is not None: 

248 return result 

249 return None 

250 

251 scanner: DirectoryScanner 

252 """Scanner object that holds handlers for the entries of the subdirectory 

253 matched by this handler (`DirectoryScanner`). 

254 """ 

255 

256 

257class TargetFileHandler(ParsedPathElementHandler): 

258 """A `PathElementHandler` that matches files that correspond to target 

259 datasets and outputs `FileDataset` instances for them. 

260 

261 Parameters 

262 ---------- 

263 parser : `PathElementParser` 

264 An object that matches the path element this handler is responsible for 

265 and extracts a (partial) Gen2 data ID from it. 

266 translator : `Translator` 

267 Object that translates data IDs from Gen2 to Gen3. 

268 datasetType : `lsst.daf.butler.DatasetType` 

269 Gen3 dataset type for the datasets this handler matches. 

270 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

271 A Gen 3 formatter class or fully-qualified name. 

272 """ 

273 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType, 

274 formatter: FormatterParameter = None): 

275 super().__init__(parser=parser) 

276 self._translator = translator 

277 self._datasetType = datasetType 

278 self._formatter = formatter 

279 

280 __slots__ = ("_translator", "_datasetType", "_formatter") 

281 

282 def __str__(self): 

283 return f"{type(self).__name__}({self._translator}, {self._datasetType})" 

284 

285 def isForFiles(self) -> bool: 

286 # Docstring inherited from PathElementHandler. 

287 return True 

288 

289 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

290 predicate: Callable[[DataCoordinate], bool]): 

291 # Docstring inherited from ParsedPathElementHandler. 

292 dataId3 = self.translate(nextDataId2, partial=False) 

293 if predicate(dataId3): 

294 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)], 

295 path=path, formatter=self._formatter)) 

296 

297 def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]: 

298 # Docstring inherited from PathElementHandler. 

299 rawDataId3 = self._translator(dataId2, partial=partial) 

300 if partial: 

301 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe) 

302 else: 

303 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions) 

304 

305 

306class MultiExtensionFileHandler(TargetFileHandler): 

307 """Handler for FITS files that store image and metadata in multiple HDUs 

308 per file, for example DECam raw and Community Pipeline calibrations. 

309 

310 Notes 

311 ----- 

312 For now, this is only used by DECam, and may need to be made more generic 

313 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 

314 with other obs packages. 

315 """ 

316 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

317 predicate: Callable[[DataCoordinate], bool]): 

318 dataId3 = self.translate(nextDataId2, partial=True) 

319 

320 def get_detectors(filename): 

321 fitsData = lsst.afw.fits.Fits(filename, 'r') 

322 # NOTE: The primary header (HDU=0) does not contain detector data. 

323 detectors = [] 

324 for i in range(1, fitsData.countHdus()): 

325 fitsData.setHdu(i) 

326 metadata = fitsData.readMetadata() 

327 detectors.append(metadata['CCDNUM']) 

328 return detectors 

329 

330 if predicate(dataId3): 

331 detectors = get_detectors(path) 

332 refs = [] 

333 for detector in detectors: 

334 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"], 

335 ccd=detector, filter=nextDataId2.get("filter")) 

336 newDataId3 = DataCoordinate.standardize(dataId3, 

337 graph=self._datasetType.dimensions, 

338 detector=detector, 

339 calibration_label=label) 

340 refs.append(DatasetRef(self._datasetType, newDataId3)) 

341 

342 datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter)) 

343 

344 def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]: 

345 assert partial is True, "We always require partial, to ignore 'ccdnum'" 

346 rawDataId3 = self._translator(dataId2, partial=partial) 

347 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)