21 """Interfaces and common code for recursively scanning directories for Gen2
24 The `PathElementHandler` ABC is defined here instead of ``handlers.py`` for
25 dependency reasons: `DirectoryScanner` uses the ABC, while its concrete
26 implementations use `DirectorySCanner`.
28 from __future__
import annotations
30 __all__ = [
"PathElementHandler",
"DirectoryScanner"]
32 from abc
import ABC, abstractmethod
44 from lsst.log
import Log
45 from lsst.daf.butler
import (
53 """An interface for objects that handle a single path element (directory or
54 file) in a Gen2 data repository.
56 Handlers are added to a `DirectoryScanner` instance, which then calls them
57 until one succeeds when it processes each element in a directory.
62 __slots__ = (
"lastDataId2",
"log")
66 """Report what kind of path element this object handlers.
70 Return `True` if this handler is for file entries, or `False` if it
73 raise NotImplementedError()
77 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
78 predicate: Callable[[DataCoordinate], bool]) -> bool:
79 """Apply the handler to a file path.
84 Full path of the file or directory.
86 Local name of the file or directory within its parent directory.
87 datasets : `dict` [`DatasetType`, `dict` ]
88 Dictionary that found datasets should be added to. Nested dicts
89 are keyed by either `None` (for most datasets) or a `str`
90 "CALIBDATE" for calibration datasets.
91 predicate : `~collections.abc.Callable`
92 A callable taking a single `DataCoordinate` argument and returning
93 `bool`, indicating whether that (Gen3) data ID represents one
94 that should be included in the scan.'
99 `True` if this handler was a match for the given path and no other
100 handlers need to be tried on it, `False` otherwise.
102 raise NotImplementedError()
107 """Return a rough indication of how flexible this handler is in terms
108 of the path element names it can match.
110 Handlers that match a constant path element should always return zero.
112 raise NotImplementedError()
114 def translate(self, dataId2: dict, *, partial: bool =
False
115 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
116 """Translate the given data ID from Gen2 to Gen3.
118 The default implementation returns `None`. Subclasses that are able
119 to translate data IDs should override this method.
125 partial : `bool`, optional
126 If `True` (`False` is default) this is a partial data ID for some
127 dataset, and missing keys are expected.
131 dataId3 : `lsst.daf.butler.DataCoordinate` or `None`
132 A Gen3 data ID, or `None` if this handler cannot translate data
134 calibDate : `str` or `None`
135 A Gen2 calibration "CALIBDATE" value, or `None` if there was no
136 such value in the template.
140 def __lt__(self, other: PathElementHandler):
141 """Handlers are sorted by rank to reduce the possibility that more
142 flexible handlers will have a chance to match something they shouldn't.
144 return self.
rankrank < other.rank
147 """The Gen2 data ID obtained by processing parent levels in the directory
150 This attribute should be reset by calling code whenever a new parent
151 directory is entered, before invoking `__call__`.
155 """A logger to use for all diagnostic messages (`lsst.log.Log`).
157 This attribute is set on a handler in `DirectoryScanner.add`; this avoids
158 needing to forward one through all subclass constructors.
163 """An object that uses `PathElementHandler` instances to process the files
164 and subdirectories in a directory tree.
168 log : `Log`, optional
169 Log to use to report warnings and debug information.
175 log = Log.getLogger(
"obs.base.gen2to3.walker")
178 __slots__ = (
"_files",
"_subdirectories",
"log")
180 def add(self, handler: PathElementHandler):
181 """Add a new handler to the scanner.
185 handler : `PathElementHandler`
186 The handler to be added.
188 handler.log = self.
loglog
189 if handler.isForFiles():
190 bisect.insort(self.
_files_files, handler)
194 def __iter__(self) -> Iterator[PathElementHandler]:
195 """Iterate over all handlers.
197 yield from self.
_files_files
200 def scan(self, path: str, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
201 predicate: Callable[[DataCoordinate], bool]):
202 """Process a directory.
207 Full path to the directory to be processed.
208 datasets : `dict` [`DatasetType`, `list` ]
209 Dictionary that found datasets should be added to. Nested lists
210 elements are tuples of `FileDataset` and an optional "CALIBDATE"
211 `str` value (for calibration datasets only).
212 predicate : `~collections.abc.Callable`
213 A callable taking a single `DataCoordinate` argument and returning
214 `bool`, indicating whether that (Gen3) data ID represents one
215 that should be included in the scan.
218 for entry
in os.scandir(path):
220 handlers = self.
_files_files
225 for handler
in handlers:
226 if handler(entry.path, entry.name, datasets, predicate=predicate):
229 unrecognized.append(entry.name)
231 self.
loglog.warn(
"Skipped unrecognized entries in %s: %s", path, unrecognized)
def scan(self, str path, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
def add(self, PathElementHandler handler)
def __init__(self, Optional[Log] log=None)
Iterator[PathElementHandler] __iter__(self)
Tuple[Optional[DataCoordinate], Optional[str]] translate(self, dict dataId2, *bool partial=False)
bool __call__(self, str path, str name, Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]] datasets, *Callable[[DataCoordinate], bool] predicate)
def __lt__(self, PathElementHandler other)