Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21"""Concrete implementations of `PathElementHandler`. 

22 

23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 

24avoid a circular dependency between modules. 

25""" 

26from __future__ import annotations 

27 

28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"] 

29 

30from abc import abstractmethod 

31import re 

32from typing import ( 

33 Callable, 

34 List, 

35 Mapping, 

36 Optional, 

37) 

38 

39import lsst.afw.fits 

40from lsst.log import Log 

41from lsst.daf.butler import ( 

42 DataCoordinate, 

43 DatasetRef, 

44 DatasetType, 

45 FileDataset, 

46) 

47from ..translators import Translator, makeCalibrationLabel 

48from .parser import PathElementParser 

49from .scanner import PathElementHandler, DirectoryScanner 

50 

51 

52class IgnoreHandler(PathElementHandler): 

53 """A `PathElementHandler` that matches via a regular expression, and does 

54 nothing. 

55 

56 An `IgnoreHandler` is used to ignore file or directory patterns that can 

57 occur at any level in the directory tree, and have no relation to any 

58 Gen2 filename template. 

59 

60 Parameters 

61 ---------- 

62 pattern : `re.Pattern` 

63 A regular expression pattern. 

64 isForFiles : `bool` 

65 Whether this handler should be applied to files (`True`) or 

66 directories (`False`). 

67 """ 

68 def __init__(self, pattern: re.Pattern, isForFiles: bool): 

69 super().__init__() 

70 self._pattern = pattern 

71 self._isForFiles = isForFiles 

72 

73 __slots__ = ("_pattern", "_isForFiles") 

74 

75 def isForFiles(self) -> bool: 

76 # Docstring inherited from PathElementHandler. 

77 return self._isForFiles 

78 

79 @property 

80 def rank(self) -> int: 

81 # Docstring inherited from PathElementHandler. 

82 return 0 

83 

84 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

85 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool: 

86 # Docstring inherited from PathElementHandler. 

87 if self._pattern.fullmatch(name): 

88 return True 

89 else: 

90 return False 

91 

92 

93class ParsedPathElementHandler(PathElementHandler): 

94 """An intermediate base class for `PathElementHandler` classes that utilize 

95 a `PathElementParser` to match a Gen2 filename template. 

96 

97 Parameters 

98 ---------- 

99 parser : `PathElementParser` 

100 An object that matches the path element this handler is responsible for 

101 and extracts a (partial) Gen2 data ID from it. 

102 """ 

103 def __init__(self, parser: PathElementParser): 

104 super().__init__() 

105 self._parser = parser 

106 

107 __slots__ = ("_parser",) 

108 

109 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *, 

110 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool: 

111 # Docstring inherited from PathElementParser. 

112 nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log) 

113 if nextDataId2 is None: 

114 return False 

115 self.handle(path, nextDataId2, datasets, log=log, predicate=predicate) 

116 return True 

117 

118 @property 

119 def rank(self) -> int: 

120 # Docstring inherited from PathElementParser. 

121 return len(self._parser.keys) 

122 

123 @abstractmethod 

124 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

125 log: Log, predicate: Callable[[DataCoordinate], bool]): 

126 """Customization hook for ``__call__``. 

127 

128 Subclasses must override this method, while external callers (i.e. 

129 `DirectoryScanner` should instead invoke `__call__`. 

130 

131 Parameters 

132 ---------- 

133 path : `str` 

134 Full path of the file or directory. 

135 nextDataId2 : `dict` 

136 Gen2 data ID (usually partial) extracted from the path so far. 

137 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 

138 Dictionary that found datasets should be added to. 

139 log : `Log`, optional 

140 Log to use to report warnings and debug information. 

141 predicate : `~collections.abc.Callable` 

142 A callable taking a single `DataCoordinate` argument and returning 

143 `bool`, indicating whether that (Gen3) data ID represents one 

144 that should be included in the scan. 

145 """ 

146 raise NotImplementedError() 

147 

148 

149class SkipHandler(ParsedPathElementHandler): 

150 """A `ParsedPathElementHandler` that does nothing with an entry other 

151 optionally logging a warning message. 

152 

153 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 

154 want to (or cannot) extract Gen3 datasets from, or other files/directories 

155 that alway appears at a fixed level in the diectory tree. 

156 

157 Parameters 

158 ---------- 

159 parser : `PathElementParser` 

160 An object that matches the path element this handler is responsible for 

161 and extracts a (partial) Gen2 data ID from it. 

162 isForFiles : `bool` 

163 Whether this handler should be applied to files (`True`) or 

164 directories (`False`). 

165 message : `str`, optional 

166 A message to log at warning level when this handler matches a path 

167 entry. If `None`, matched entrie will be silently skipped. 

168 """ 

169 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]): 

170 super().__init__(parser=parser) 

171 self._isForFiles = isForFiles 

172 self._message = message 

173 

174 __slots__ = ("_message", "_isForFiles") 

175 

176 def isForFiles(self) -> bool: 

177 # Docstring inherited from PathElementHandler. 

178 return self._isForFiles 

179 

180 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *, 

181 log: Log, predicate: Callable[[DataCoordinate], bool]): 

182 # Docstring inherited from ParsedPathElementHandler. 

183 if self._message is not None: 

184 log.warn("Skipping %s: %s", path, self._message) 

185 

186 

187class SubdirectoryHandler(ParsedPathElementHandler): 

188 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 

189 

190 Parameters 

191 ---------- 

192 parser : `PathElementParser` 

193 An object that matches the path element this handler is responsible for 

194 and extracts a (partial) Gen2 data ID from it. 

195 

196 Notes 

197 ----- 

198 The nested `DirectoryScanner` is default-constructed and should be 

199 populated with child handlers after the `SubdirectoryHandler` is created. 

200 """ 

201 

202 def __init__(self, parser: PathElementParser): 

203 super().__init__(parser=parser) 

204 self.scanner = DirectoryScanner() 

205 

206 __slots__ = ("scanner",) 

207 

208 def isForFiles(self) -> bool: 

209 # Docstring inherited from PathElementHandler. 

210 return False 

211 

212 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

213 log: Log, predicate: Callable[[DataCoordinate], bool]): 

214 # Docstring inherited from ParsedPathElementHandler. 

215 if not nextDataId2: 

216 # We matched, and there's no data ID at all yet. That means the 

217 # full path so far is just a fixed string so we should descend 

218 # and the match is exclusive. 

219 scan = True 

220 else: 

221 dataId3 = self.translate(nextDataId2, partial=True, log=log) 

222 if dataId3 is not None: 

223 scan = predicate(dataId3) 

224 else: 

225 scan = True 

226 if scan: 

227 for handler in self.scanner: 

228 handler.lastDataId2 = nextDataId2 

229 self.scanner.scan(path, datasets, log=log, predicate=predicate) 

230 

231 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

232 # Docstring inherited from PathElementHandler. 

233 for handler in self.scanner: 

234 # Since we're recursing, we're always asking for a partial match, 

235 # because the data ID we have corresponds to different level than 

236 # the one child handlers operate at. 

237 result = handler.translate(dataId2, partial=True, log=log) 

238 if result is not None: 

239 return result 

240 return None 

241 

242 scanner: DirectoryScanner 

243 """Scanner object that holds handlers for the entries of the subdirectory 

244 matched by this handler (`DirectoryScanner`). 

245 """ 

246 

247 

248class TargetFileHandler(ParsedPathElementHandler): 

249 """A `PathElementHandler` that matches files that correspond to target 

250 datasets and outputs `FileDataset` instances for them. 

251 

252 Parameters 

253 ---------- 

254 parser : `PathElementParser` 

255 An object that matches the path element this handler is responsible for 

256 and extracts a (partial) Gen2 data ID from it. 

257 translator : `Translator` 

258 Object that translates data IDs from Gen2 to Gen3. 

259 datasetType : `lsst.daf.butler.DatasetType` 

260 Gen3 dataset type for the datasets this handler matches. 

261 """ 

262 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType): 

263 super().__init__(parser=parser) 

264 self._translator = translator 

265 self._datasetType = datasetType 

266 

267 __slots__ = ("_translator", "_datasetType") 

268 

269 def isForFiles(self) -> bool: 

270 # Docstring inherited from PathElementHandler. 

271 return True 

272 

273 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

274 log: Log, predicate: Callable[[DataCoordinate], bool]): 

275 # Docstring inherited from ParsedPathElementHandler. 

276 dataId3 = self.translate(nextDataId2, partial=False, log=log) 

277 if predicate(dataId3): 

278 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)], 

279 path=path)) 

280 

281 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

282 # Docstring inherited from PathElementHandler. 

283 rawDataId3 = self._translator(dataId2, partial=partial, log=log) 

284 if partial: 

285 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe) 

286 else: 

287 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions) 

288 

289 

290class MultiExtensionFileHandler(TargetFileHandler): 

291 """Handler for FITS files that store image and metadata in multiple HDUs 

292 per file, for example DECam raw and Community Pipeline calibrations. 

293 

294 Notes 

295 ----- 

296 For now, this is only used by DECam, and may need to be made more generic 

297 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 

298 with other obs packages. 

299 """ 

300 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, 

301 log: Log, predicate: Callable[[DataCoordinate], bool]): 

302 dataId3 = self.translate(nextDataId2, partial=True, log=log) 

303 

304 def get_detectors(filename): 

305 fitsData = lsst.afw.fits.Fits(filename, 'r') 

306 # NOTE: The primary header (HDU=0) does not contain detector data. 

307 detectors = [] 

308 for i in range(1, fitsData.countHdus()): 

309 fitsData.setHdu(i) 

310 metadata = fitsData.readMetadata() 

311 detectors.append(metadata['CCDNUM']) 

312 return detectors 

313 

314 if predicate(dataId3): 

315 detectors = get_detectors(path) 

316 refs = [] 

317 for detector in detectors: 

318 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"], 

319 ccd=detector, filter=nextDataId2.get("filter")) 

320 newDataId3 = DataCoordinate.standardize(dataId3, 

321 graph=self._datasetType.dimensions, 

322 detector=detector, 

323 calibration_label=label) 

324 refs.append(DatasetRef(self._datasetType, newDataId3)) 

325 

326 datasets[self._datasetType].append(FileDataset(refs=refs, path=path)) 

327 

328 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]: 

329 assert partial is True, "We always require partial, to ignore 'ccdnum'" 

330 rawDataId3 = self._translator(dataId2, partial=partial, log=log) 

331 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)