Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py: 34%

128 statements  

« prev     ^ index     » next       coverage.py v7.2.1, created at 2023-03-12 01:53 -0800

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Concrete implementations of `PathElementHandler`. 

22 

23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 

24avoid a circular dependency between modules. 

25""" 

26from __future__ import annotations 

27 

28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"] 

29 

30from abc import abstractmethod 

31import re 

32from typing import ( 

33 Callable, 

34 List, 

35 Mapping, 

36 Optional, 

37 Tuple, 

38 TYPE_CHECKING 

39) 

40 

41import lsst.afw.fits 

42from lsst.daf.butler import ( 

43 DataCoordinate, 

44 DatasetRef, 

45 DatasetType, 

46 FileDataset, 

47 Progress, 

48) 

49from ..translators import Translator 

50from .parser import PathElementParser 

51from .scanner import PathElementHandler, DirectoryScanner 

52 

53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true

54 from lsst.daf.butler import FormatterParameter 

55 

56 

57class IgnoreHandler(PathElementHandler): 

58 """A `PathElementHandler` that matches via a regular expression, and does 

59 nothing. 

60 

61 An `IgnoreHandler` is used to ignore file or directory patterns that can 

62 occur at any level in the directory tree, and have no relation to any 

63 Gen2 filename template. 

64 

65 Parameters 

66 ---------- 

67 pattern : `re.Pattern` 

68 A regular expression pattern. 

69 isForFiles : `bool` 

70 Whether this handler should be applied to files (`True`) or 

71 directories (`False`). 

72 """ 

73 def __init__(self, pattern: re.Pattern, isForFiles: bool): 

74 super().__init__() 

75 self._pattern = pattern 

76 self._isForFiles = isForFiles 

77 

78 __slots__ = ("_pattern", "_isForFiles") 

79 

80 def __str__(self): 

81 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})" 

82 

83 def isForFiles(self) -> bool: 

84 # Docstring inherited from PathElementHandler. 

85 return self._isForFiles 

86 

87 @property 

88 def rank(self) -> int: 

89 # Docstring inherited from PathElementHandler. 

90 return 0 

91 

92 def __call__(self, path: str, name: str, 

93 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

94 predicate: Callable[[DataCoordinate], bool]) -> bool: 

95 # Docstring inherited from PathElementHandler. 

96 if self._pattern.fullmatch(name): 

97 return True 

98 else: 

99 return False 

100 

101 

102class ParsedPathElementHandler(PathElementHandler): 

103 """An intermediate base class for `PathElementHandler` classes that utilize 

104 a `PathElementParser` to match a Gen2 filename template. 

105 

106 Parameters 

107 ---------- 

108 parser : `PathElementParser` 

109 An object that matches the path element this handler is responsible for 

110 and extracts a (partial) Gen2 data ID from it. 

111 """ 

112 def __init__(self, parser: PathElementParser): 

113 super().__init__() 

114 self._parser = parser 

115 

116 __slots__ = ("_parser",) 

117 

118 def __str__(self): 

119 return f"{type(self).__name__}(parser={self._parser})" 

120 

121 def __call__(self, path: str, name: str, 

122 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

123 predicate: Callable[[DataCoordinate], bool]) -> bool: 

124 # Docstring inherited from PathElementParser. 

125 nextDataId2 = self._parser.parse(name, self.lastDataId2) 

126 if nextDataId2 is None: 

127 return False 

128 self.handle(path, nextDataId2, datasets, predicate=predicate) 

129 return True 

130 

131 @property 

132 def rank(self) -> int: 

133 # Docstring inherited from PathElementParser. 

134 return len(self._parser.keys) 

135 

136 @abstractmethod 

137 def handle(self, path: str, nextDataId2: dict, 

138 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

139 predicate: Callable[[DataCoordinate], bool]): 

140 """Customization hook for ``__call__``. 

141 

142 Subclasses must override this method, while external callers (i.e. 

143 `DirectoryScanner` should instead invoke `__call__`. 

144 

145 Parameters 

146 ---------- 

147 path : `str` 

148 Full path of the file or directory. 

149 nextDataId2 : `dict` 

150 Gen2 data ID (usually partial) extracted from the path so far. 

151 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 

152 Dictionary that found datasets should be added to. 

153 predicate : `~collections.abc.Callable` 

154 A callable taking a single `DataCoordinate` argument and returning 

155 `bool`, indicating whether that (Gen3) data ID represents one 

156 that should be included in the scan. 

157 formatterMap : `dict`, optional 

158 Map dataset type to specialist formatter. 

159 """ 

160 raise NotImplementedError() 

161 

162 

163class SkipHandler(ParsedPathElementHandler): 

164 """A `ParsedPathElementHandler` that does nothing with an entry other 

165 optionally logging a warning message. 

166 

167 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 

168 want to (or cannot) extract Gen3 datasets from, or other files/directories 

169 that alway appears at a fixed level in the diectory tree. 

170 

171 Parameters 

172 ---------- 

173 parser : `PathElementParser` 

174 An object that matches the path element this handler is responsible for 

175 and extracts a (partial) Gen2 data ID from it. 

176 isForFiles : `bool` 

177 Whether this handler should be applied to files (`True`) or 

178 directories (`False`). 

179 message : `str`, optional 

180 A message to log at warning level when this handler matches a path 

181 entry. If `None`, matched entrie will be silently skipped. 

182 """ 

183 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]): 

184 super().__init__(parser=parser) 

185 self._isForFiles = isForFiles 

186 self._message = message 

187 

188 __slots__ = ("_message", "_isForFiles") 

189 

190 def isForFiles(self) -> bool: 

191 # Docstring inherited from PathElementHandler. 

192 return self._isForFiles 

193 

194 def handle(self, path: str, nextDataId2: dict, 

195 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

196 predicate: Callable[[DataCoordinate], bool]): 

197 # Docstring inherited from ParsedPathElementHandler. 

198 if self._message is not None: 

199 self.log.warning("Skipping %s: %s", path, self._message) 

200 

201 

202class SubdirectoryHandler(ParsedPathElementHandler): 

203 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 

204 

205 Parameters 

206 ---------- 

207 parser : `PathElementParser` 

208 An object that matches the path element this handler is responsible for 

209 and extracts a (partial) Gen2 data ID from it. 

210 progress : `Progress`, optional 

211 Object to use to report incremental progress. 

212 

213 Notes 

214 ----- 

215 The nested `DirectoryScanner` is default-constructed and should be 

216 populated with child handlers after the `SubdirectoryHandler` is created. 

217 """ 

218 

219 def __init__(self, parser: PathElementParser, progress: Optional[Progress] = None): 

220 super().__init__(parser=parser) 

221 self.scanner = DirectoryScanner(progress=progress) 

222 

223 __slots__ = ("scanner",) 

224 

225 def isForFiles(self) -> bool: 

226 # Docstring inherited from PathElementHandler. 

227 return False 

228 

229 def handle(self, path: str, nextDataId2, 

230 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

231 predicate: Callable[[DataCoordinate], bool]): 

232 # Docstring inherited from ParsedPathElementHandler. 

233 if not nextDataId2: 

234 # We matched, and there's no data ID at all yet. That means the 

235 # full path so far is just a fixed string so we should descend 

236 # and the match is exclusive. 

237 scan = True 

238 else: 

239 dataId3, _ = self.translate(nextDataId2, partial=True) 

240 if dataId3 is not None: 

241 scan = predicate(dataId3) 

242 else: 

243 scan = True 

244 if scan: 

245 for handler in self.scanner: 

246 handler.lastDataId2 = nextDataId2 

247 self.scanner.scan(path, datasets, predicate=predicate) 

248 

249 def translate(self, dataId2: dict, *, partial: bool = False 

250 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

251 # Docstring inherited from PathElementHandler. 

252 for handler in self.scanner: 

253 # Since we're recursing, we're always asking for a partial match, 

254 # because the data ID we have corresponds to different level than 

255 # the one child handlers operate at. 

256 result, calibDate = handler.translate(dataId2, partial=True) 

257 if result is not None: 

258 return result, calibDate 

259 return None, None 

260 

261 scanner: DirectoryScanner 

262 """Scanner object that holds handlers for the entries of the subdirectory 

263 matched by this handler (`DirectoryScanner`). 

264 """ 

265 

266 

267class TargetFileHandler(ParsedPathElementHandler): 

268 """A `PathElementHandler` that matches files that correspond to target 

269 datasets and outputs `FileDataset` instances for them. 

270 

271 Parameters 

272 ---------- 

273 parser : `PathElementParser` 

274 An object that matches the path element this handler is responsible for 

275 and extracts a (partial) Gen2 data ID from it. 

276 translator : `Translator` 

277 Object that translates data IDs from Gen2 to Gen3. 

278 datasetType : `lsst.daf.butler.DatasetType` 

279 Gen3 dataset type for the datasets this handler matches. 

280 formatter : `lsst.daf.butler.Formatter` or `str`, optional 

281 A Gen 3 formatter class or fully-qualified name. 

282 """ 

283 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType, 

284 formatter: FormatterParameter = None): 

285 super().__init__(parser=parser) 

286 self._translator = translator 

287 self._datasetType = datasetType 

288 self._formatter = formatter 

289 

290 __slots__ = ("_translator", "_datasetType", "_formatter") 

291 

292 def __str__(self): 

293 return f"{type(self).__name__}({self._translator}, {self._datasetType})" 

294 

295 def isForFiles(self) -> bool: 

296 # Docstring inherited from PathElementHandler. 

297 return True 

298 

299 def handle(self, path: str, nextDataId2, 

300 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

301 predicate: Callable[[DataCoordinate], bool]): 

302 # Docstring inherited from ParsedPathElementHandler. 

303 dataId3, calibDate = self.translate(nextDataId2, partial=False) 

304 if predicate(dataId3): 

305 datasets[self._datasetType][calibDate].append( 

306 FileDataset( 

307 refs=[DatasetRef(self._datasetType, dataId3)], 

308 path=path, formatter=self._formatter 

309 ) 

310 ) 

311 

312 def translate(self, dataId2: dict, *, partial: bool = False 

313 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

314 # Docstring inherited from PathElementHandler. 

315 rawDataId3, calibDate = self._translator(dataId2, partial=partial) 

316 if partial: 

317 return ( 

318 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe), 

319 calibDate, 

320 ) 

321 else: 

322 return ( 

323 DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions), 

324 calibDate 

325 ) 

326 

327 

328class MultiExtensionFileHandler(TargetFileHandler): 

329 """Handler for FITS files that store image and metadata in multiple HDUs 

330 per file, for example DECam raw and Community Pipeline calibrations. 

331 

332 Notes 

333 ----- 

334 For now, this is only used by DECam, and may need to be made more generic 

335 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 

336 with other obs packages. 

337 """ 

338 def handle(self, path: str, nextDataId2, 

339 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, 

340 predicate: Callable[[DataCoordinate], bool]): 

341 dataId3, calibDate = self.translate(nextDataId2, partial=True) 

342 

343 def get_detectors(filename): 

344 fitsData = lsst.afw.fits.Fits(filename, 'r') 

345 # NOTE: The primary header (HDU=0) does not contain detector data. 

346 detectors = [] 

347 for i in range(1, fitsData.countHdus()): 

348 fitsData.setHdu(i) 

349 metadata = fitsData.readMetadata() 

350 detectors.append(metadata['CCDNUM']) 

351 return detectors 

352 

353 if predicate(dataId3): 

354 detectors = get_detectors(path) 

355 refs = [] 

356 for detector in detectors: 

357 newDataId3 = DataCoordinate.standardize(dataId3, 

358 graph=self._datasetType.dimensions, 

359 detector=detector) 

360 refs.append(DatasetRef(self._datasetType, newDataId3)) 

361 

362 datasets[self._datasetType][calibDate].append( 

363 FileDataset(refs=refs, path=path, formatter=self._formatter) 

364 ) 

365 

366 def translate(self, dataId2: dict, *, partial: bool = False 

367 ) -> Tuple[Optional[DataCoordinate], Optional[str]]: 

368 assert partial is True, "We always require partial, to ignore 'ccdnum'" 

369 rawDataId3, calibDate = self._translator(dataId2, partial=partial) 

370 return ( 

371 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe), 

372 calibDate, 

373 )