21 """Concrete implementations of `PathElementHandler`. 23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to 24 avoid a circular dependency between modules. 26 from __future__
import annotations
28 __all__ = [
"IgnoreHandler",
"SkipHandler",
"SubdirectoryHandler",
"TargetFileHandler"]
30 from abc
import abstractmethod
40 from lsst.log
import Log
41 from lsst.daf.butler
import (
47 from ..translators
import Translator, makeCalibrationLabel
48 from .parser
import PathElementParser
49 from .scanner
import PathElementHandler, DirectoryScanner
53 """A `PathElementHandler` that matches via a regular expression, and does 56 An `IgnoreHandler` is used to ignore file or directory patterns that can 57 occur at any level in the directory tree, and have no relation to any 58 Gen2 filename template. 62 pattern : `re.Pattern` 63 A regular expression pattern. 65 Whether this handler should be applied to files (`True`) or 66 directories (`False`). 68 def __init__(self, pattern: re.Pattern, isForFiles: bool):
73 __slots__ = (
"_pattern",
"_isForFiles")
84 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
85 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
94 """An intermediate base class for `PathElementHandler` classes that utilize 95 a `PathElementParser` to match a Gen2 filename template. 99 parser : `PathElementParser` 100 An object that matches the path element this handler is responsible for 101 and extracts a (partial) Gen2 data ID from it. 107 __slots__ = (
"_parser",)
109 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
110 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
113 if nextDataId2
is None:
115 self.
handle(path, nextDataId2, datasets, log=log, predicate=predicate)
124 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
125 log: Log, predicate: Callable[[DataCoordinate], bool]):
126 """Customization hook for ``__call__``. 128 Subclasses must override this method, while external callers (i.e. 129 `DirectoryScanner` should instead invoke `__call__`. 134 Full path of the file or directory. 136 Gen2 data ID (usually partial) extracted from the path so far. 137 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 138 Dictionary that found datasets should be added to. 139 log : `Log`, optional 140 Log to use to report warnings and debug information. 141 predicate : `~collections.abc.Callable` 142 A callable taking a single `DataCoordinate` argument and returning 143 `bool`, indicating whether that (Gen3) data ID represents one 144 that should be included in the scan. 146 raise NotImplementedError()
150 """A `ParsedPathElementHandler` that does nothing with an entry other 151 optionally logging a warning message. 153 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not 154 want to (or cannot) extract Gen3 datasets from, or other files/directories 155 that alway appears at a fixed level in the diectory tree. 159 parser : `PathElementParser` 160 An object that matches the path element this handler is responsible for 161 and extracts a (partial) Gen2 data ID from it. 163 Whether this handler should be applied to files (`True`) or 164 directories (`False`). 165 message : `str`, optional 166 A message to log at warning level when this handler matches a path 167 entry. If `None`, matched entrie will be silently skipped. 169 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
174 __slots__ = (
"_message",
"_isForFiles")
180 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
181 log: Log, predicate: Callable[[DataCoordinate], bool]):
184 log.warn(
"Skipping %s: %s", path, self.
_message)
188 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse. 192 parser : `PathElementParser` 193 An object that matches the path element this handler is responsible for 194 and extracts a (partial) Gen2 data ID from it. 198 The nested `DirectoryScanner` is default-constructed and should be 199 populated with child handlers after the `SubdirectoryHandler` is created. 206 __slots__ = (
"scanner",)
212 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
213 log: Log, predicate: Callable[[DataCoordinate], bool]):
221 dataId3 = self.
translate(nextDataId2, partial=
True, log=log)
222 if dataId3
is not None:
223 scan = predicate(dataId3)
228 handler.lastDataId2 = nextDataId2
229 self.
scanner.scan(path, datasets, log=log, predicate=predicate)
231 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
237 result = handler.translate(dataId2, partial=
True, log=log)
238 if result
is not None:
242 scanner: DirectoryScanner
243 """Scanner object that holds handlers for the entries of the subdirectory 244 matched by this handler (`DirectoryScanner`). 249 """A `PathElementHandler` that matches files that correspond to target 250 datasets and outputs `FileDataset` instances for them. 254 parser : `PathElementParser` 255 An object that matches the path element this handler is responsible for 256 and extracts a (partial) Gen2 data ID from it. 257 translator : `Translator` 258 Object that translates data IDs from Gen2 to Gen3. 259 datasetType : `lsst.daf.butler.DatasetType` 260 Gen3 dataset type for the datasets this handler matches. 262 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType):
267 __slots__ = (
"_translator",
"_datasetType")
273 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
274 log: Log, predicate: Callable[[DataCoordinate], bool]):
276 dataId3 = self.
translate(nextDataId2, partial=
False, log=log)
277 if predicate(dataId3):
281 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
283 rawDataId3 = self.
_translator(dataId2, partial=partial, log=log)
285 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)
287 return DataCoordinate.standardize(rawDataId3, graph=self.
_datasetType.dimensions)
291 """Handler for FITS files that store image and metadata in multiple HDUs 292 per file, for example DECam raw and Community Pipeline calibrations. 296 For now, this is only used by DECam, and may need to be made more generic 297 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used 298 with other obs packages. 300 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
301 log: Log, predicate: Callable[[DataCoordinate], bool]):
302 dataId3 = self.
translate(nextDataId2, partial=
True, log=log)
304 def get_detectors(filename):
305 fitsData = lsst.afw.fits.Fits(filename,
'r') 308 for i
in range(1, fitsData.countHdus()):
310 metadata = fitsData.readMetadata()
311 detectors.append(metadata[
'CCDNUM'])
314 if predicate(dataId3):
315 detectors = get_detectors(path)
317 for detector
in detectors:
319 ccd=detector, filter=nextDataId2.get(
"filter"))
320 newDataId3 = DataCoordinate.standardize(dataId3,
323 calibration_label=label)
326 datasets[self.
_datasetType].append(FileDataset(refs=refs, path=path))
328 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
329 assert partial
is True,
"We always require partial, to ignore 'ccdnum'" 330 rawDataId3 = self.
_translator(dataId2, partial=partial, log=log)
331 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)