Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py: 38%

128 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-09 03:03 -0700

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Concrete implementations of `PathElementHandler`. 

22 

23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 

24avoid a circular dependency between modules. 

25""" 

26from __future__ import annotations 

27 

28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"] 

29 

30import re 

31from abc import abstractmethod 

32from typing import TYPE_CHECKING, Callable, List, Mapping, Optional, Tuple 

33 

34import lsst.afw.fits 

35from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, FileDataset, Progress 

36 

37from ..translators import Translator 

38from .parser import PathElementParser 

39from .scanner import DirectoryScanner, PathElementHandler 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from lsst.daf.butler import FormatterParameter 

43 

44 

45class IgnoreHandler(PathElementHandler): 

46 """A `PathElementHandler` that matches via a regular expression, and does 

47 nothing. 

48 

49 An `IgnoreHandler` is used to ignore file or directory patterns that can 

50 occur at any level in the directory tree, and have no relation to any 

51 Gen2 filename template. 

52 

53 Parameters 

54 ---------- 

55 pattern : `re.Pattern` 

56 A regular expression pattern. 

57 isForFiles : `bool` 

58 Whether this handler should be applied to files (`True`) or 

59 directories (`False`). 

60 """ 

61 

62 def __init__(self, pattern: re.Pattern, isForFiles: bool): 

63 super().__init__() 

64 self._pattern = pattern 

65 self._isForFiles = isForFiles 

66 

67 __slots__ = ("_pattern", "_isForFiles") 

68 

69 def __str__(self): 

70 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})" 

71 

72 def isForFiles(self) -> bool: 

73 # Docstring inherited from PathElementHandler. 

74 return self._isForFiles 

75 

76 @property 

77 def rank(self) -> int: 

78 # Docstring inherited from PathElementHandler. 

79 return 0 

80 

81 def __call__( 

82 self, 

83 path: str, 

84 name: str, 

85 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], 

86 *, 

87 predicate: Callable[[DataCoordinate], bool], 

88 ) -> bool: 

89 # Docstring inherited from PathElementHandler. 

90 if self._pattern.fullmatch(name): 

91 return True 

92 else: 

93 return False 

94 

95 

96class ParsedPathElementHandler(PathElementHandler): 

97 """An intermediate base class for `PathElementHandler` classes that utilize 

98 a `PathElementParser` to match a Gen2 filename template. 

99 

100 Parameters 

101 ---------- 

102 parser : `PathElementParser` 

103 An object that matches the path element this handler is responsible for 

104 and extracts a (partial) Gen2 data ID from it. 

105 """ 

106 

107 def __init__(self, parser: PathElementParser): 

108 super().__init__() 

109 self._parser = parser 

110 

111 __slots__ = ("_parser",) 

112 

113 def __str__(self): 

114 return f"{type(self).__name__}(parser={self._parser})" 

115 

116 def __call__( 

117 self, 

118 path: str, 

119 name: str, 

120 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], 

121 *, 

122 predicate: Callable[[DataCoordinate], bool], 

123 ) -> bool: 

124 # Docstring inherited from PathElementParser. 

125 nextDataId2 = self._parser.parse(name, self.lastDataId2) 

126 if nextDataId2 is None: 

127 return False 

128 self.handle(path, nextDataId2, datasets, predicate=predicate) 

129 return True 

130 

131 @property 

132 def rank(self) -> int: 

133 # Docstring inherited from PathElementParser. 

134 return len(self._parser.keys) 

135 

136 @abstractmethod 

137 def handle( 

138 self, 

139 path: str, 

140 nextDataId2: dict, 

141 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], 

142 *, 

143 predicate: Callable[[DataCoordinate], bool], 

144 ): 

145 """Customization hook for ``__call__``. 

146 

147 Subclasses must override this method, while external callers (i.e. 

148 `DirectoryScanner` should instead invoke `__call__`. 

149 

150 Parameters 

151 ---------- 

152 path : `str` 

153 Full path of the file or directory. 

154 nextDataId2 : `dict` 

155 Gen2 data ID (usually partial) extracted from the path so far. 

156 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 

157 Dictionary that found datasets should be added to. 

158 predicate : `~collections.abc.Callable` 

159 A callable taking a single `DataCoordinate` argument and returning 

160 `bool`, indicating whether that (Gen3) data ID represents one 

161 that should be included in the scan. 

162 formatterMap : `dict`, optional 

163 Map dataset type to specialist formatter. 

164 """ 

165 raise NotImplementedError() 

166 

167 

168class SkipHandler(ParsedPathElementHandler): 

169 """A `ParsedPathElementHandler` that does nothing with an entry other 

170 optionally logging a warning message. 

171 

172 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 

173 want to (or cannot) extract Gen3 datasets from, or other files/directories 

174 that alway appears at a fixed level in the diectory tree. 

175 

176 Parameters 

177 ---------- 

178 parser : `PathElementParser` 

179 An object that matches the path element this handler is responsible for 

180 and extracts a (partial) Gen2 data ID from it. 

181 isForFiles : `bool` 

182 Whether this handler should be applied to files (`True`) or 

183 directories (`False`). 

184 message : `str`, optional 

185 A message to log at warning level when this handler matches a path 

186 entry. If `None`, matched entrie will be silently skipped. 

187 """ 

188 

189 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]): 

190 super().__init__(parser=parser) 

191 self._isForFiles = isForFiles 

192 self._message = message 

193 

194 __slots__ = ("_message", "_isForFiles") 

195 

196 def isForFiles(self) -> bool: 

197 # Docstring inherited from PathElementHandler. 

198 return self._isForFiles 

199 

200 def handle( 

201 self, 

202 path: str, 

203 nextDataId2: dict, 

204 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], 

205 *, 

206 predicate: Callable[[DataCoordinate], bool], 

207 ): 

208 # Docstring inherited from ParsedPathElementHandler. 

209 if self._message is not None: 

210 self.log.warning("Skipping %s: %s", path, self._message) 

211 

212 

213class SubdirectoryHandler(ParsedPathElementHandler): 

214 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 

215 

216 Parameters 

217 ---------- 

218 parser : `PathElementParser` 

219 An object that matches the path element this handler is responsible for 

220 and extracts a (partial) Gen2 data ID from it. 

221 progress : `Progress`, optional 

222 Object to use to report incremental progress. 

223 

224 Notes 

225 ----- 

226 The nested `DirectoryScanner` is default-constructed and should be 

227 populated with child handlers after the `SubdirectoryHandler` is created. 

228 """ 

229 

230 def __init__(self, parser: PathElementParser, progress: Optional[Progress] = None): 

231 super().__init__(parser=parser) 

232 self.scanner = DirectoryScanner(progress=progress) 

233 

234 __slots__ = ("scanner",) 

235 

236 def isForFiles(self) -> bool: 

237 # Docstring inherited from PathElementHandler. 

238 return False 

239 

240 def handle( 

241 self, 

242 path: str, 

243 nextDataId2, 

244 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], 

245 *, 

246 predicate: Callable[[DataCoordinate], bool], 

247 ): 

248 # Docstring inherited from ParsedPathElementHandler. 

249 if not nextDataId2: 

250 # We matched, and there's no data ID at all yet. That means the 

251 # full path so far is just a fixed string so we should descend 

252 # and the match is exclusive. 

253 scan = True 

254 else: 

255 dataId3, _ = self.translate(nextDataId2, partial=True) 

256 if dataId3 is not None: 

257 scan = predicate(dataId3) 

258 else: 

259 scan = True 

260 if scan: 

261 for handler in self.scanner: 

262 handler.lastDataId2 = nextDataId2 

263 self.scanner.scan(path, datasets, predicate=predicate) 

264 

265 def translate( 

266 self, dataId2: dict, *, partial: bool = False 

267 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

268 # Docstring inherited from PathElementHandler. 

269 for handler in self.scanner: 

270 # Since we're recursing, we're always asking for a partial match, 

271 # because the data ID we have corresponds to different level than 

272 # the one child handlers operate at. 

273 result, calibDate = handler.translate(dataId2, partial=True) 

274 if result is not None: 

275 return result, calibDate 

276 return None, None 

277 

278 scanner: DirectoryScanner 

279 """Scanner object that holds handlers for the entries of the subdirectory 

280 matched by this handler (`DirectoryScanner`). 

281 """ 

282 

283 

284class TargetFileHandler(ParsedPathElementHandler): 

285 """A `PathElementHandler` that matches files that correspond to target 

286 datasets and outputs `FileDataset` instances for them. 

287 

288 Parameters 

289 ---------- 

290 parser : `PathElementParser` 

291 An object that matches the path element this handler is responsible for 

292 and extracts a (partial) Gen2 data ID from it. 

293 translator : `Translator` 

294 Object that translates data IDs from Gen2 to Gen3. 

295 datasetType : `lsst.daf.butler.DatasetType` 

296 Gen3 dataset type for the datasets this handler matches. 

297 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

298 A Gen 3 formatter class or fully-qualified name. 

299 """ 

300 

301 def __init__( 

302 self, 

303 parser: PathElementParser, 

304 translator: Translator, 

305 datasetType: DatasetType, 

306 formatter: FormatterParameter = None, 

307 ): 

308 super().__init__(parser=parser) 

309 self._translator = translator 

310 self._datasetType = datasetType 

311 self._formatter = formatter 

312 

313 __slots__ = ("_translator", "_datasetType", "_formatter") 

314 

315 def __str__(self): 

316 return f"{type(self).__name__}({self._translator}, {self._datasetType})" 

317 

318 def isForFiles(self) -> bool: 

319 # Docstring inherited from PathElementHandler. 

320 return True 

321 

322 def handle( 

323 self, 

324 path: str, 

325 nextDataId2, 

326 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], 

327 *, 

328 predicate: Callable[[DataCoordinate], bool], 

329 ): 

330 # Docstring inherited from ParsedPathElementHandler. 

331 dataId3, calibDate = self.translate(nextDataId2, partial=False) 

332 if predicate(dataId3): 

333 datasets[self._datasetType][calibDate].append( 

334 FileDataset( 

335 refs=[DatasetRef(self._datasetType, dataId3)], path=path, formatter=self._formatter 

336 ) 

337 ) 

338 

339 def translate( 

340 self, dataId2: dict, *, partial: bool = False 

341 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

342 # Docstring inherited from PathElementHandler. 

343 rawDataId3, calibDate = self._translator(dataId2, partial=partial) 

344 if partial: 

345 return ( 

346 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe), 

347 calibDate, 

348 ) 

349 else: 

350 return (DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions), calibDate) 

351 

352 

353class MultiExtensionFileHandler(TargetFileHandler): 

354 """Handler for FITS files that store image and metadata in multiple HDUs 

355 per file, for example DECam raw and Community Pipeline calibrations. 

356 

357 Notes 

358 ----- 

359 For now, this is only used by DECam, and may need to be made more generic 

360 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 

361 with other obs packages. 

362 """ 

363 

364 def handle( 

365 self, 

366 path: str, 

367 nextDataId2, 

368 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], 

369 *, 

370 predicate: Callable[[DataCoordinate], bool], 

371 ): 

372 dataId3, calibDate = self.translate(nextDataId2, partial=True) 

373 

374 def get_detectors(filename): 

375 fitsData = lsst.afw.fits.Fits(filename, "r") 

376 # NOTE: The primary header (HDU=0) does not contain detector data. 

377 detectors = [] 

378 for i in range(1, fitsData.countHdus()): 

379 fitsData.setHdu(i) 

380 metadata = fitsData.readMetadata() 

381 detectors.append(metadata["CCDNUM"]) 

382 return detectors 

383 

384 if predicate(dataId3): 

385 detectors = get_detectors(path) 

386 refs = [] 

387 for detector in detectors: 

388 newDataId3 = DataCoordinate.standardize( 

389 dataId3, graph=self._datasetType.dimensions, detector=detector 

390 ) 

391 refs.append(DatasetRef(self._datasetType, newDataId3)) 

392 

393 datasets[self._datasetType][calibDate].append( 

394 FileDataset(refs=refs, path=path, formatter=self._formatter) 

395 ) 

396 

397 def translate( 

398 self, dataId2: dict, *, partial: bool = False 

399 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

400 assert partial is True, "We always require partial, to ignore 'ccdnum'" 

401 rawDataId3, calibDate = self._translator(dataId2, partial=partial) 

402 return ( 

403 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe), 

404 calibDate, 

405 )