Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Concrete implementations of `PathElementHandler`. 

22 

23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 

24avoid a circular dependency between modules. 

25""" 

26from __future__ import annotations 

27 

28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"] 

29 

30from abc import abstractmethod 

31import re 

32from typing import ( 

33 Callable, 

34 List, 

35 Mapping, 

36 Optional, 

37 Tuple, 

38 TYPE_CHECKING 

39) 

40 

41import lsst.afw.fits 

42from lsst.daf.butler import ( 

43 DataCoordinate, 

44 DatasetRef, 

45 DatasetType, 

46 FileDataset, 

47) 

48from ..translators import Translator 

49from .parser import PathElementParser 

50from .scanner import PathElementHandler, DirectoryScanner 

51 

52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 from lsst.daf.butler import FormatterParameter 

54 

55 

56class IgnoreHandler(PathElementHandler): 

57 """A `PathElementHandler` that matches via a regular expression, and does 

58 nothing. 

59 

60 An `IgnoreHandler` is used to ignore file or directory patterns that can 

61 occur at any level in the directory tree, and have no relation to any 

62 Gen2 filename template. 

63 

64 Parameters 

65 ---------- 

66 pattern : `re.Pattern` 

67 A regular expression pattern. 

68 isForFiles : `bool` 

69 Whether this handler should be applied to files (`True`) or 

70 directories (`False`). 

71 """ 

72 def __init__(self, pattern: re.Pattern, isForFiles: bool): 

73 super().__init__() 

74 self._pattern = pattern 

75 self._isForFiles = isForFiles 

76 

77 __slots__ = ("_pattern", "_isForFiles") 

78 

79 def __str__(self): 

80 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})" 

81 

82 def isForFiles(self) -> bool: 

83 # Docstring inherited from PathElementHandler. 

84 return self._isForFiles 

85 

86 @property 

87 def rank(self) -> int: 

88 # Docstring inherited from PathElementHandler. 

89 return 0 

90 

91 def __call__(self, path: str, name: str, 

92 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

93 predicate: Callable[[DataCoordinate], bool]) -> bool: 

94 # Docstring inherited from PathElementHandler. 

95 if self._pattern.fullmatch(name): 

96 return True 

97 else: 

98 return False 

99 

100 

101class ParsedPathElementHandler(PathElementHandler): 

102 """An intermediate base class for `PathElementHandler` classes that utilize 

103 a `PathElementParser` to match a Gen2 filename template. 

104 

105 Parameters 

106 ---------- 

107 parser : `PathElementParser` 

108 An object that matches the path element this handler is responsible for 

109 and extracts a (partial) Gen2 data ID from it. 

110 """ 

111 def __init__(self, parser: PathElementParser): 

112 super().__init__() 

113 self._parser = parser 

114 

115 __slots__ = ("_parser",) 

116 

117 def __str__(self): 

118 return f"{type(self).__name__}(parser={self._parser})" 

119 

120 def __call__(self, path: str, name: str, 

121 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

122 predicate: Callable[[DataCoordinate], bool]) -> bool: 

123 # Docstring inherited from PathElementParser. 

124 nextDataId2 = self._parser.parse(name, self.lastDataId2) 

125 if nextDataId2 is None: 

126 return False 

127 self.handle(path, nextDataId2, datasets, predicate=predicate) 

128 return True 

129 

130 @property 

131 def rank(self) -> int: 

132 # Docstring inherited from PathElementParser. 

133 return len(self._parser.keys) 

134 

135 @abstractmethod 

136 def handle(self, path: str, nextDataId2: dict, 

137 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

138 predicate: Callable[[DataCoordinate], bool]): 

139 """Customization hook for ``__call__``. 

140 

141 Subclasses must override this method, while external callers (i.e. 

142 `DirectoryScanner` should instead invoke `__call__`. 

143 

144 Parameters 

145 ---------- 

146 path : `str` 

147 Full path of the file or directory. 

148 nextDataId2 : `dict` 

149 Gen2 data ID (usually partial) extracted from the path so far. 

150 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 

151 Dictionary that found datasets should be added to. 

152 predicate : `~collections.abc.Callable` 

153 A callable taking a single `DataCoordinate` argument and returning 

154 `bool`, indicating whether that (Gen3) data ID represents one 

155 that should be included in the scan. 

156 formatterMap : `dict`, optional 

157 Map dataset type to specialist formatter. 

158 """ 

159 raise NotImplementedError() 

160 

161 

162class SkipHandler(ParsedPathElementHandler): 

163 """A `ParsedPathElementHandler` that does nothing with an entry other 

164 optionally logging a warning message. 

165 

166 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 

167 want to (or cannot) extract Gen3 datasets from, or other files/directories 

168 that alway appears at a fixed level in the diectory tree. 

169 

170 Parameters 

171 ---------- 

172 parser : `PathElementParser` 

173 An object that matches the path element this handler is responsible for 

174 and extracts a (partial) Gen2 data ID from it. 

175 isForFiles : `bool` 

176 Whether this handler should be applied to files (`True`) or 

177 directories (`False`). 

178 message : `str`, optional 

179 A message to log at warning level when this handler matches a path 

180 entry. If `None`, matched entrie will be silently skipped. 

181 """ 

182 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]): 

183 super().__init__(parser=parser) 

184 self._isForFiles = isForFiles 

185 self._message = message 

186 

187 __slots__ = ("_message", "_isForFiles") 

188 

189 def isForFiles(self) -> bool: 

190 # Docstring inherited from PathElementHandler. 

191 return self._isForFiles 

192 

193 def handle(self, path: str, nextDataId2: dict, 

194 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

195 predicate: Callable[[DataCoordinate], bool]): 

196 # Docstring inherited from ParsedPathElementHandler. 

197 if self._message is not None: 

198 self.log.warn("Skipping %s: %s", path, self._message) 

199 

200 

201class SubdirectoryHandler(ParsedPathElementHandler): 

202 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 

203 

204 Parameters 

205 ---------- 

206 parser : `PathElementParser` 

207 An object that matches the path element this handler is responsible for 

208 and extracts a (partial) Gen2 data ID from it. 

209 

210 Notes 

211 ----- 

212 The nested `DirectoryScanner` is default-constructed and should be 

213 populated with child handlers after the `SubdirectoryHandler` is created. 

214 """ 

215 

216 def __init__(self, parser: PathElementParser): 

217 super().__init__(parser=parser) 

218 self.scanner = DirectoryScanner() 

219 

220 __slots__ = ("scanner",) 

221 

222 def isForFiles(self) -> bool: 

223 # Docstring inherited from PathElementHandler. 

224 return False 

225 

226 def handle(self, path: str, nextDataId2, 

227 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

228 predicate: Callable[[DataCoordinate], bool]): 

229 # Docstring inherited from ParsedPathElementHandler. 

230 if not nextDataId2: 

231 # We matched, and there's no data ID at all yet. That means the 

232 # full path so far is just a fixed string so we should descend 

233 # and the match is exclusive. 

234 scan = True 

235 else: 

236 dataId3, _ = self.translate(nextDataId2, partial=True) 

237 if dataId3 is not None: 

238 scan = predicate(dataId3) 

239 else: 

240 scan = True 

241 if scan: 

242 for handler in self.scanner: 

243 handler.lastDataId2 = nextDataId2 

244 self.scanner.scan(path, datasets, predicate=predicate) 

245 

246 def translate(self, dataId2: dict, *, partial: bool = False 

247 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

248 # Docstring inherited from PathElementHandler. 

249 for handler in self.scanner: 

250 # Since we're recursing, we're always asking for a partial match, 

251 # because the data ID we have corresponds to different level than 

252 # the one child handlers operate at. 

253 result, calibDate = handler.translate(dataId2, partial=True) 

254 if result is not None: 

255 return result, calibDate 

256 return None, None 

257 

258 scanner: DirectoryScanner 

259 """Scanner object that holds handlers for the entries of the subdirectory 

260 matched by this handler (`DirectoryScanner`). 

261 """ 

262 

263 

264class TargetFileHandler(ParsedPathElementHandler): 

265 """A `PathElementHandler` that matches files that correspond to target 

266 datasets and outputs `FileDataset` instances for them. 

267 

268 Parameters 

269 ---------- 

270 parser : `PathElementParser` 

271 An object that matches the path element this handler is responsible for 

272 and extracts a (partial) Gen2 data ID from it. 

273 translator : `Translator` 

274 Object that translates data IDs from Gen2 to Gen3. 

275 datasetType : `lsst.daf.butler.DatasetType` 

276 Gen3 dataset type for the datasets this handler matches. 

277 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

278 A Gen 3 formatter class or fully-qualified name. 

279 """ 

280 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType, 

281 formatter: FormatterParameter = None): 

282 super().__init__(parser=parser) 

283 self._translator = translator 

284 self._datasetType = datasetType 

285 self._formatter = formatter 

286 

287 __slots__ = ("_translator", "_datasetType", "_formatter") 

288 

289 def __str__(self): 

290 return f"{type(self).__name__}({self._translator}, {self._datasetType})" 

291 

292 def isForFiles(self) -> bool: 

293 # Docstring inherited from PathElementHandler. 

294 return True 

295 

296 def handle(self, path: str, nextDataId2, 

297 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

298 predicate: Callable[[DataCoordinate], bool]): 

299 # Docstring inherited from ParsedPathElementHandler. 

300 dataId3, calibDate = self.translate(nextDataId2, partial=False) 

301 if predicate(dataId3): 

302 datasets[self._datasetType][calibDate].append( 

303 FileDataset( 

304 refs=[DatasetRef(self._datasetType, dataId3)], 

305 path=path, formatter=self._formatter 

306 ) 

307 ) 

308 

309 def translate(self, dataId2: dict, *, partial: bool = False 

310 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

311 # Docstring inherited from PathElementHandler. 

312 rawDataId3, calibDate = self._translator(dataId2, partial=partial) 

313 if partial: 

314 return ( 

315 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe), 

316 calibDate, 

317 ) 

318 else: 

319 return ( 

320 DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions), 

321 calibDate 

322 ) 

323 

324 

325class MultiExtensionFileHandler(TargetFileHandler): 

326 """Handler for FITS files that store image and metadata in multiple HDUs 

327 per file, for example DECam raw and Community Pipeline calibrations. 

328 

329 Notes 

330 ----- 

331 For now, this is only used by DECam, and may need to be made more generic 

332 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 

333 with other obs packages. 

334 """ 

335 def handle(self, path: str, nextDataId2, 

336 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

337 predicate: Callable[[DataCoordinate], bool]): 

338 dataId3, calibDate = self.translate(nextDataId2, partial=True) 

339 

340 def get_detectors(filename): 

341 fitsData = lsst.afw.fits.Fits(filename, 'r') 

342 # NOTE: The primary header (HDU=0) does not contain detector data. 

343 detectors = [] 

344 for i in range(1, fitsData.countHdus()): 

345 fitsData.setHdu(i) 

346 metadata = fitsData.readMetadata() 

347 detectors.append(metadata['CCDNUM']) 

348 return detectors 

349 

350 if predicate(dataId3): 

351 detectors = get_detectors(path) 

352 refs = [] 

353 for detector in detectors: 

354 newDataId3 = DataCoordinate.standardize(dataId3, 

355 graph=self._datasetType.dimensions, 

356 detector=detector) 

357 refs.append(DatasetRef(self._datasetType, newDataId3)) 

358 

359 datasets[self._datasetType][calibDate].append( 

360 FileDataset(refs=refs, path=path, formatter=self._formatter) 

361 ) 

362 

363 def translate(self, dataId2: dict, *, partial: bool = False 

364 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

365 assert partial is True, "We always require partial, to ignore 'ccdnum'" 

366 rawDataId3, calibDate = self._translator(dataId2, partial=partial) 

367 return ( 

368 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe), 

369 calibDate, 

370 )