21 """Concrete implementations of `PathElementHandler`. 23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 24 avoid a circular dependency between modules. 26 from __future__
import annotations
28 __all__ = [
"IgnoreHandler",
"SkipHandler",
"SubdirectoryHandler",
"TargetFileHandler"]
30 from abc
import abstractmethod
41 from lsst.log
import Log
42 from lsst.daf.butler
import (
48 from ..translators
import Translator, makeCalibrationLabel
49 from .parser
import PathElementParser
50 from .scanner
import PathElementHandler, DirectoryScanner
53 from lsst.daf.butler
import FormatterParameter
57 """A `PathElementHandler` that matches via a regular expression, and does 60 An `IgnoreHandler` is used to ignore file or directory patterns that can 61 occur at any level in the directory tree, and have no relation to any 62 Gen2 filename template. 66 pattern : `re.Pattern` 67 A regular expression pattern. 69 Whether this handler should be applied to files (`True`) or 70 directories (`False`). 72 def __init__(self, pattern: re.Pattern, isForFiles: bool):
77 __slots__ = (
"_pattern",
"_isForFiles")
88 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
89 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
98 """An intermediate base class for `PathElementHandler` classes that utilize 99 a `PathElementParser` to match a Gen2 filename template. 103 parser : `PathElementParser` 104 An object that matches the path element this handler is responsible for 105 and extracts a (partial) Gen2 data ID from it. 111 __slots__ = (
"_parser",)
113 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
114 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
117 if nextDataId2
is None:
119 self.
handle(path, nextDataId2, datasets, log=log, predicate=predicate)
128 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
129 log: Log, predicate: Callable[[DataCoordinate], bool]):
130 """Customization hook for ``__call__``. 132 Subclasses must override this method, while external callers (i.e. 133 `DirectoryScanner` should instead invoke `__call__`. 138 Full path of the file or directory. 140 Gen2 data ID (usually partial) extracted from the path so far. 141 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 142 Dictionary that found datasets should be added to. 143 log : `Log`, optional 144 Log to use to report warnings and debug information. 145 predicate : `~collections.abc.Callable` 146 A callable taking a single `DataCoordinate` argument and returning 147 `bool`, indicating whether that (Gen3) data ID represents one 148 that should be included in the scan. 149 formatterMap : `dict`, optional 150 Map dataset type to specialist formatter. 152 raise NotImplementedError()
156 """A `ParsedPathElementHandler` that does nothing with an entry other 157 optionally logging a warning message. 159 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 160 want to (or cannot) extract Gen3 datasets from, or other files/directories 161 that alway appears at a fixed level in the diectory tree. 165 parser : `PathElementParser` 166 An object that matches the path element this handler is responsible for 167 and extracts a (partial) Gen2 data ID from it. 169 Whether this handler should be applied to files (`True`) or 170 directories (`False`). 171 message : `str`, optional 172 A message to log at warning level when this handler matches a path 173 entry. If `None`, matched entrie will be silently skipped. 175 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
180 __slots__ = (
"_message",
"_isForFiles")
186 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
187 log: Log, predicate: Callable[[DataCoordinate], bool]):
190 log.warn(
"Skipping %s: %s", path, self.
_message)
194 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 198 parser : `PathElementParser` 199 An object that matches the path element this handler is responsible for 200 and extracts a (partial) Gen2 data ID from it. 204 The nested `DirectoryScanner` is default-constructed and should be 205 populated with child handlers after the `SubdirectoryHandler` is created. 212 __slots__ = (
"scanner",)
218 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
219 log: Log, predicate: Callable[[DataCoordinate], bool]):
227 dataId3 = self.
translate(nextDataId2, partial=
True, log=log)
228 if dataId3
is not None:
229 scan = predicate(dataId3)
234 handler.lastDataId2 = nextDataId2
235 self.
scanner.scan(path, datasets, log=log, predicate=predicate)
237 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
243 result = handler.translate(dataId2, partial=
True, log=log)
244 if result
is not None:
248 scanner: DirectoryScanner
249 """Scanner object that holds handlers for the entries of the subdirectory 250 matched by this handler (`DirectoryScanner`). 255 """A `PathElementHandler` that matches files that correspond to target 256 datasets and outputs `FileDataset` instances for them. 260 parser : `PathElementParser` 261 An object that matches the path element this handler is responsible for 262 and extracts a (partial) Gen2 data ID from it. 263 translator : `Translator` 264 Object that translates data IDs from Gen2 to Gen3. 265 datasetType : `lsst.daf.butler.DatasetType` 266 Gen3 dataset type for the datasets this handler matches. 267 formatter : `lsst.daf.butler.Formatter` or `str`, optional 268 A Gen 3 formatter class or fully-qualified name. 270 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
271 formatter: FormatterParameter =
None):
277 __slots__ = (
"_translator",
"_datasetType",
"_formatter")
283 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
284 log: Log, predicate: Callable[[DataCoordinate], bool]):
286 dataId3 = self.
translate(nextDataId2, partial=
False, log=log)
287 if predicate(dataId3):
291 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
293 rawDataId3 = self.
_translator(dataId2, partial=partial, log=log)
295 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)
297 return DataCoordinate.standardize(rawDataId3, graph=self.
_datasetType.dimensions)
301 """Handler for FITS files that store image and metadata in multiple HDUs 302 per file, for example DECam raw and Community Pipeline calibrations. 306 For now, this is only used by DECam, and may need to be made more generic 307 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 308 with other obs packages. 310 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
311 log: Log, predicate: Callable[[DataCoordinate], bool]):
312 dataId3 = self.
translate(nextDataId2, partial=
True, log=log)
314 def get_detectors(filename):
315 fitsData = lsst.afw.fits.Fits(filename,
'r') 318 for i
in range(1, fitsData.countHdus()):
320 metadata = fitsData.readMetadata()
321 detectors.append(metadata[
'CCDNUM'])
324 if predicate(dataId3):
325 detectors = get_detectors(path)
327 for detector
in detectors:
329 ccd=detector, filter=nextDataId2.get(
"filter"))
330 newDataId3 = DataCoordinate.standardize(dataId3,
333 calibration_label=label)
338 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
339 assert partial
is True,
"We always require partial, to ignore 'ccdnum'" 340 rawDataId3 = self.
_translator(dataId2, partial=partial, log=log)
341 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)