21 """Concrete implementations of `PathElementHandler`.
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
26 from __future__
import annotations
28 __all__ = [
"IgnoreHandler",
"SkipHandler",
"SubdirectoryHandler",
"TargetFileHandler"]
30 from abc
import abstractmethod
41 from lsst.log
import Log
42 from lsst.daf.butler
import (
48 from ..translators
import Translator, makeCalibrationLabel
49 from .parser
import PathElementParser
50 from .scanner
import PathElementHandler, DirectoryScanner
53 from lsst.daf.butler
import FormatterParameter
57 """A `PathElementHandler` that matches via a regular expression, and does
60 An `IgnoreHandler` is used to ignore file or directory patterns that can
61 occur at any level in the directory tree, and have no relation to any
62 Gen2 filename template.
66 pattern : `re.Pattern`
67 A regular expression pattern.
69 Whether this handler should be applied to files (`True`) or
70 directories (`False`).
72 def __init__(self, pattern: re.Pattern, isForFiles: bool):
77 __slots__ = (
"_pattern",
"_isForFiles")
80 return f
"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
91 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
92 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
101 """An intermediate base class for `PathElementHandler` classes that utilize
102 a `PathElementParser` to match a Gen2 filename template.
106 parser : `PathElementParser`
107 An object that matches the path element this handler is responsible for
108 and extracts a (partial) Gen2 data ID from it.
114 __slots__ = (
"_parser",)
117 return f
"{type(self).__name__}(parser={self._parser})"
119 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
120 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
123 if nextDataId2
is None:
125 self.
handle(path, nextDataId2, datasets, log=log, predicate=predicate)
134 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
135 log: Log, predicate: Callable[[DataCoordinate], bool]):
136 """Customization hook for ``__call__``.
138 Subclasses must override this method, while external callers (i.e.
139 `DirectoryScanner` should instead invoke `__call__`.
144 Full path of the file or directory.
146 Gen2 data ID (usually partial) extracted from the path so far.
147 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
148 Dictionary that found datasets should be added to.
149 log : `Log`, optional
150 Log to use to report warnings and debug information.
151 predicate : `~collections.abc.Callable`
152 A callable taking a single `DataCoordinate` argument and returning
153 `bool`, indicating whether that (Gen3) data ID represents one
154 that should be included in the scan.
155 formatterMap : `dict`, optional
156 Map dataset type to specialist formatter.
158 raise NotImplementedError()
162 """A `ParsedPathElementHandler` that does nothing with an entry other
163 optionally logging a warning message.
165 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
166 want to (or cannot) extract Gen3 datasets from, or other files/directories
167 that alway appears at a fixed level in the diectory tree.
171 parser : `PathElementParser`
172 An object that matches the path element this handler is responsible for
173 and extracts a (partial) Gen2 data ID from it.
175 Whether this handler should be applied to files (`True`) or
176 directories (`False`).
177 message : `str`, optional
178 A message to log at warning level when this handler matches a path
179 entry. If `None`, matched entrie will be silently skipped.
181 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
186 __slots__ = (
"_message",
"_isForFiles")
192 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
193 log: Log, predicate: Callable[[DataCoordinate], bool]):
196 log.warn(
"Skipping %s: %s", path, self.
_message)
200 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
204 parser : `PathElementParser`
205 An object that matches the path element this handler is responsible for
206 and extracts a (partial) Gen2 data ID from it.
210 The nested `DirectoryScanner` is default-constructed and should be
211 populated with child handlers after the `SubdirectoryHandler` is created.
218 __slots__ = (
"scanner",)
224 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
225 log: Log, predicate: Callable[[DataCoordinate], bool]):
233 dataId3 = self.
translate(nextDataId2, partial=
True, log=log)
234 if dataId3
is not None:
235 scan = predicate(dataId3)
240 handler.lastDataId2 = nextDataId2
241 self.
scanner.scan(path, datasets, log=log, predicate=predicate)
243 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
249 result = handler.translate(dataId2, partial=
True, log=log)
250 if result
is not None:
254 scanner: DirectoryScanner
255 """Scanner object that holds handlers for the entries of the subdirectory
256 matched by this handler (`DirectoryScanner`).
261 """A `PathElementHandler` that matches files that correspond to target
262 datasets and outputs `FileDataset` instances for them.
266 parser : `PathElementParser`
267 An object that matches the path element this handler is responsible for
268 and extracts a (partial) Gen2 data ID from it.
269 translator : `Translator`
270 Object that translates data IDs from Gen2 to Gen3.
271 datasetType : `lsst.daf.butler.DatasetType`
272 Gen3 dataset type for the datasets this handler matches.
273 formatter : `lsst.daf.butler.Formatter` or `str`, optional
274 A Gen 3 formatter class or fully-qualified name.
276 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
277 formatter: FormatterParameter =
None):
283 __slots__ = (
"_translator",
"_datasetType",
"_formatter")
286 return f
"{type(self).__name__}({self._translator}, {self._datasetType})"
292 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
293 log: Log, predicate: Callable[[DataCoordinate], bool]):
295 dataId3 = self.
translate(nextDataId2, partial=
False, log=log)
296 if predicate(dataId3):
300 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
302 rawDataId3 = self.
_translator(dataId2, partial=partial, log=log)
304 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)
306 return DataCoordinate.standardize(rawDataId3, graph=self.
_datasetType.dimensions)
310 """Handler for FITS files that store image and metadata in multiple HDUs
311 per file, for example DECam raw and Community Pipeline calibrations.
315 For now, this is only used by DECam, and may need to be made more generic
316 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
317 with other obs packages.
319 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
320 log: Log, predicate: Callable[[DataCoordinate], bool]):
321 dataId3 = self.
translate(nextDataId2, partial=
True, log=log)
323 def get_detectors(filename):
324 fitsData = lsst.afw.fits.Fits(filename,
'r')
327 for i
in range(1, fitsData.countHdus()):
329 metadata = fitsData.readMetadata()
330 detectors.append(metadata[
'CCDNUM'])
333 if predicate(dataId3):
334 detectors = get_detectors(path)
336 for detector
in detectors:
338 ccd=detector, filter=nextDataId2.get(
"filter"))
339 newDataId3 = DataCoordinate.standardize(dataId3,
342 calibration_label=label)
347 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
348 assert partial
is True,
"We always require partial, to ignore 'ccdnum'"
349 rawDataId3 = self.
_translator(dataId2, partial=partial, log=log)
350 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)