Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py : 32%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Concrete implementations of `PathElementHandler`.
23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24avoid a circular dependency between modules.
25"""
26from __future__ import annotations
28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
30from abc import abstractmethod
31import re
32from typing import (
33 Callable,
34 List,
35 Mapping,
36 Optional,
37 TYPE_CHECKING
38)
40import lsst.afw.fits
41from lsst.log import Log
42from lsst.daf.butler import (
43 DataCoordinate,
44 DatasetRef,
45 DatasetType,
46 FileDataset,
47)
48from ..translators import Translator, makeCalibrationLabel
49from .parser import PathElementParser
50from .scanner import PathElementHandler, DirectoryScanner
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from lsst.daf.butler import FormatterParameter
56class IgnoreHandler(PathElementHandler):
57 """A `PathElementHandler` that matches via a regular expression, and does
58 nothing.
60 An `IgnoreHandler` is used to ignore file or directory patterns that can
61 occur at any level in the directory tree, and have no relation to any
62 Gen2 filename template.
64 Parameters
65 ----------
66 pattern : `re.Pattern`
67 A regular expression pattern.
68 isForFiles : `bool`
69 Whether this handler should be applied to files (`True`) or
70 directories (`False`).
71 """
72 def __init__(self, pattern: re.Pattern, isForFiles: bool):
73 super().__init__()
74 self._pattern = pattern
75 self._isForFiles = isForFiles
77 __slots__ = ("_pattern", "_isForFiles")
79 def isForFiles(self) -> bool:
80 # Docstring inherited from PathElementHandler.
81 return self._isForFiles
83 @property
84 def rank(self) -> int:
85 # Docstring inherited from PathElementHandler.
86 return 0
88 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
89 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
90 # Docstring inherited from PathElementHandler.
91 if self._pattern.fullmatch(name):
92 return True
93 else:
94 return False
97class ParsedPathElementHandler(PathElementHandler):
98 """An intermediate base class for `PathElementHandler` classes that utilize
99 a `PathElementParser` to match a Gen2 filename template.
101 Parameters
102 ----------
103 parser : `PathElementParser`
104 An object that matches the path element this handler is responsible for
105 and extracts a (partial) Gen2 data ID from it.
106 """
107 def __init__(self, parser: PathElementParser):
108 super().__init__()
109 self._parser = parser
111 __slots__ = ("_parser",)
113 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
114 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
115 # Docstring inherited from PathElementParser.
116 nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log)
117 if nextDataId2 is None:
118 return False
119 self.handle(path, nextDataId2, datasets, log=log, predicate=predicate)
120 return True
122 @property
123 def rank(self) -> int:
124 # Docstring inherited from PathElementParser.
125 return len(self._parser.keys)
127 @abstractmethod
128 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
129 log: Log, predicate: Callable[[DataCoordinate], bool]):
130 """Customization hook for ``__call__``.
132 Subclasses must override this method, while external callers (i.e.
133 `DirectoryScanner` should instead invoke `__call__`.
135 Parameters
136 ----------
137 path : `str`
138 Full path of the file or directory.
139 nextDataId2 : `dict`
140 Gen2 data ID (usually partial) extracted from the path so far.
141 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
142 Dictionary that found datasets should be added to.
143 log : `Log`, optional
144 Log to use to report warnings and debug information.
145 predicate : `~collections.abc.Callable`
146 A callable taking a single `DataCoordinate` argument and returning
147 `bool`, indicating whether that (Gen3) data ID represents one
148 that should be included in the scan.
149 formatterMap : `dict`, optional
150 Map dataset type to specialist formatter.
151 """
152 raise NotImplementedError()
155class SkipHandler(ParsedPathElementHandler):
156 """A `ParsedPathElementHandler` that does nothing with an entry other
157 optionally logging a warning message.
159 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
160 want to (or cannot) extract Gen3 datasets from, or other files/directories
161 that alway appears at a fixed level in the diectory tree.
163 Parameters
164 ----------
165 parser : `PathElementParser`
166 An object that matches the path element this handler is responsible for
167 and extracts a (partial) Gen2 data ID from it.
168 isForFiles : `bool`
169 Whether this handler should be applied to files (`True`) or
170 directories (`False`).
171 message : `str`, optional
172 A message to log at warning level when this handler matches a path
173 entry. If `None`, matched entrie will be silently skipped.
174 """
175 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
176 super().__init__(parser=parser)
177 self._isForFiles = isForFiles
178 self._message = message
180 __slots__ = ("_message", "_isForFiles")
182 def isForFiles(self) -> bool:
183 # Docstring inherited from PathElementHandler.
184 return self._isForFiles
186 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
187 log: Log, predicate: Callable[[DataCoordinate], bool]):
188 # Docstring inherited from ParsedPathElementHandler.
189 if self._message is not None:
190 log.warn("Skipping %s: %s", path, self._message)
193class SubdirectoryHandler(ParsedPathElementHandler):
194 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
196 Parameters
197 ----------
198 parser : `PathElementParser`
199 An object that matches the path element this handler is responsible for
200 and extracts a (partial) Gen2 data ID from it.
202 Notes
203 -----
204 The nested `DirectoryScanner` is default-constructed and should be
205 populated with child handlers after the `SubdirectoryHandler` is created.
206 """
208 def __init__(self, parser: PathElementParser):
209 super().__init__(parser=parser)
210 self.scanner = DirectoryScanner()
212 __slots__ = ("scanner",)
214 def isForFiles(self) -> bool:
215 # Docstring inherited from PathElementHandler.
216 return False
218 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
219 log: Log, predicate: Callable[[DataCoordinate], bool]):
220 # Docstring inherited from ParsedPathElementHandler.
221 if not nextDataId2:
222 # We matched, and there's no data ID at all yet. That means the
223 # full path so far is just a fixed string so we should descend
224 # and the match is exclusive.
225 scan = True
226 else:
227 dataId3 = self.translate(nextDataId2, partial=True, log=log)
228 if dataId3 is not None:
229 scan = predicate(dataId3)
230 else:
231 scan = True
232 if scan:
233 for handler in self.scanner:
234 handler.lastDataId2 = nextDataId2
235 self.scanner.scan(path, datasets, log=log, predicate=predicate)
237 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
238 # Docstring inherited from PathElementHandler.
239 for handler in self.scanner:
240 # Since we're recursing, we're always asking for a partial match,
241 # because the data ID we have corresponds to different level than
242 # the one child handlers operate at.
243 result = handler.translate(dataId2, partial=True, log=log)
244 if result is not None:
245 return result
246 return None
248 scanner: DirectoryScanner
249 """Scanner object that holds handlers for the entries of the subdirectory
250 matched by this handler (`DirectoryScanner`).
251 """
254class TargetFileHandler(ParsedPathElementHandler):
255 """A `PathElementHandler` that matches files that correspond to target
256 datasets and outputs `FileDataset` instances for them.
258 Parameters
259 ----------
260 parser : `PathElementParser`
261 An object that matches the path element this handler is responsible for
262 and extracts a (partial) Gen2 data ID from it.
263 translator : `Translator`
264 Object that translates data IDs from Gen2 to Gen3.
265 datasetType : `lsst.daf.butler.DatasetType`
266 Gen3 dataset type for the datasets this handler matches.
267 formatter : `lsst.daf.butler.Formatter` or `str`, optional
268 A Gen 3 formatter class or fully-qualified name.
269 """
270 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
271 formatter: FormatterParameter = None):
272 super().__init__(parser=parser)
273 self._translator = translator
274 self._datasetType = datasetType
275 self._formatter = formatter
277 __slots__ = ("_translator", "_datasetType", "_formatter")
279 def isForFiles(self) -> bool:
280 # Docstring inherited from PathElementHandler.
281 return True
283 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
284 log: Log, predicate: Callable[[DataCoordinate], bool]):
285 # Docstring inherited from ParsedPathElementHandler.
286 dataId3 = self.translate(nextDataId2, partial=False, log=log)
287 if predicate(dataId3):
288 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
289 path=path, formatter=self._formatter))
291 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
292 # Docstring inherited from PathElementHandler.
293 rawDataId3 = self._translator(dataId2, partial=partial, log=log)
294 if partial:
295 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
296 else:
297 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
300class MultiExtensionFileHandler(TargetFileHandler):
301 """Handler for FITS files that store image and metadata in multiple HDUs
302 per file, for example DECam raw and Community Pipeline calibrations.
304 Notes
305 -----
306 For now, this is only used by DECam, and may need to be made more generic
307 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
308 with other obs packages.
309 """
310 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
311 log: Log, predicate: Callable[[DataCoordinate], bool]):
312 dataId3 = self.translate(nextDataId2, partial=True, log=log)
314 def get_detectors(filename):
315 fitsData = lsst.afw.fits.Fits(filename, 'r')
316 # NOTE: The primary header (HDU=0) does not contain detector data.
317 detectors = []
318 for i in range(1, fitsData.countHdus()):
319 fitsData.setHdu(i)
320 metadata = fitsData.readMetadata()
321 detectors.append(metadata['CCDNUM'])
322 return detectors
324 if predicate(dataId3):
325 detectors = get_detectors(path)
326 refs = []
327 for detector in detectors:
328 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
329 ccd=detector, filter=nextDataId2.get("filter"))
330 newDataId3 = DataCoordinate.standardize(dataId3,
331 graph=self._datasetType.dimensions,
332 detector=detector,
333 calibration_label=label)
334 refs.append(DatasetRef(self._datasetType, newDataId3))
336 datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter))
338 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
339 assert partial is True, "We always require partial, to ignore 'ccdnum'"
340 rawDataId3 = self._translator(dataId2, partial=partial, log=log)
341 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)