21 """Concrete implementations of `PathElementHandler`.
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
26 from __future__
import annotations
28 __all__ = [
"IgnoreHandler",
"SkipHandler",
"SubdirectoryHandler",
"TargetFileHandler"]
30 from abc
import abstractmethod
39 from lsst.log
import Log
40 from lsst.daf.butler
import (
46 from ..translators
import Translator
47 from .parser
import PathElementParser
48 from .scanner
import PathElementHandler, DirectoryScanner
52 """A `PathElementHandler` that matches via a regular expression, and does
55 An `IgnoreHandler` is used to ignore file or directory patterns that can
56 occur at any level in the directory tree, and have no relation to any
57 Gen2 filename template.
61 pattern : `re.Pattern`
62 A regular expression pattern.
64 Whether this handler should be applied to files (`True`) or
65 directories (`False`).
67 def __init__(self, pattern: re.Pattern, isForFiles: bool):
72 __slots__ = (
"_pattern",
"_isForFiles")
83 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
84 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
93 """An intermediate base class for `PathElementHandler` classes that utilize
94 a `PathElementParser` to match a Gen2 filename template.
98 parser : `PathElementParser`
99 An object that matches the path element this handler is responsible for
100 and extracts a (partial) Gen2 data ID from it.
106 __slots__ = (
"_parser",)
108 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
109 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
112 if nextDataId2
is None:
114 self.
handle(path, nextDataId2, datasets, log=log, predicate=predicate)
123 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
124 log: Log, predicate: Callable[[DataCoordinate], bool]):
125 """Customization hook for ``__call__``.
127 Subclasses must override this method, while external callers (i.e.
128 `DirectoryScanner` should instead invoke `__call__`.
133 Full path of the file or directory.
135 Gen2 data ID (usually partial) extracted from the path so far.
136 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
137 Dictionary that found datasets should be added to.
138 log : `Log`, optional
139 Log to use to report warnings and debug information.
140 predicate : `~collections.abc.Callable`
141 A callable taking a single `DataCoordinate` argument and returning
142 `bool`, indicating whether that (Gen3) data ID represents one
143 that should be included in the scan.
145 raise NotImplementedError()
149 """A `ParsedPathElementHandler` that does nothing with an entry other
150 optionally logging a warning message.
152 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
153 want to (or cannot) extract Gen3 datasets from, or other files/directories
154 that alway appears at a fixed level in the diectory tree.
158 parser : `PathElementParser`
159 An object that matches the path element this handler is responsible for
160 and extracts a (partial) Gen2 data ID from it.
162 Whether this handler should be applied to files (`True`) or
163 directories (`False`).
164 message : `str`, optional
165 A message to log at warning level when this handler matches a path
166 entry. If `None`, matched entrie will be silently skipped.
168 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
173 __slots__ = (
"_message",
"_isForFiles")
179 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
180 log: Log, predicate: Callable[[DataCoordinate], bool]):
183 log.warn(
"Skipping %s: %s", path, self.
_message)
187 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
191 parser : `PathElementParser`
192 An object that matches the path element this handler is responsible for
193 and extracts a (partial) Gen2 data ID from it.
197 The nested `DirectoryScanner` is default-constructed and should be
198 populated with child handlers after the `SubdirectoryHandler` is created.
205 __slots__ = (
"scanner",)
211 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
212 log: Log, predicate: Callable[[DataCoordinate], bool]):
220 dataId3 = self.
translate(nextDataId2, partial=
True, log=log)
221 if dataId3
is not None:
222 scan = predicate(dataId3)
227 handler.lastDataId2 = nextDataId2
228 self.
scanner.scan(path, datasets, log=log, predicate=predicate)
230 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
236 result = handler.translate(dataId2, partial=
True, log=log)
237 if result
is not None:
241 scanner: DirectoryScanner
242 """Scanner object that holds handlers for the entries of the subdirectory
243 matched by this handler (`DirectoryScanner`).
248 """A `PathElementHandler` that matches files that correspond to target
249 datasets and outputs `FileDataset` instances for them.
253 parser : `PathElementParser`
254 An object that matches the path element this handler is responsible for
255 and extracts a (partial) Gen2 data ID from it.
256 translator : `Translator`
257 Object that translates data IDs from Gen2 to Gen3.
258 datasetType : `lsst.daf.butler.DatasetType`
259 Gen3 dataset type for the datasets this handler matches.
261 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType):
266 __slots__ = (
"_translator",
"_datasetType")
272 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
273 log: Log, predicate: Callable[[DataCoordinate], bool]):
275 dataId3 = self.
translate(nextDataId2, partial=
False, log=log)
276 if predicate(dataId3):
280 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
282 rawDataId3 = self.
_translator(dataId2, partial=partial, log=log)
284 return DataCoordinate.standardize(rawDataId3, universe=self.
_datasetType.dimensions.universe)
286 return DataCoordinate.standardize(rawDataId3, graph=self.
_datasetType.dimensions)