21 """Interfaces and common code for recursively scanning directories for Gen2
24 The `PathElementHandler` ABC is defined here instead of ``handlers.py`` for
25 dependency reasons: `DirectoryScanner` uses the ABC, while its concrete
26 implementations use `DirectorySCanner`.
28 from __future__
import annotations
30 __all__ = [
"PathElementHandler",
"DirectoryScanner"]
32 from abc
import ABC, abstractmethod
43 from lsst.log
import Log
44 from lsst.daf.butler
import (
52 """An interface for objects that handle a single path element (directory or
53 file) in a Gen2 data repository.
55 Handlers added to a `DirectoryScanner` instance, which then calls them
56 until one succeeds when it processes each element in a directoy.
61 __slots__ = (
"lastDataId2",)
65 """Report what kind of path element this object handlers.
69 Return `True` if this handler is for file entries, or `False` if it
72 raise NotImplementedError()
75 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
76 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
77 """Apply the handler to a file path.
82 Full path of the file or directory.
84 Local name of the file or directory within its parent directory.
85 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
86 Dictionary that found datasets should be added to.
88 Log to use to report warnings and debug information.
89 predicate : `~collections.abc.Callable`
90 A callable taking a single `DataCoordinate` argument and returning
91 `bool`, indicating whether that (Gen3) data ID represents one
92 that should be included in the scan.'
97 `True` if this handler was a match for the given path and no other
98 handlers need to be tried on it, `False` otherwise.
100 raise NotImplementedError()
105 """Return a rough indication of how flexible this handler is in terms
106 of the path element names it can match.
108 Handlers that match a constant path element should always return zero.
110 raise NotImplementedError()
112 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
113 """Translate the given data ID from Gen2 to Gen3.
115 The default implementation returns `None`. Subclasses that are able
116 to translate data IDs should override this method.
122 partial : `bool`, optional
123 If `True` (`False` is default) this is a partial data ID for some
124 dataset, and missing keys are expected.
125 log : log : `Log`, optional
126 Log to use to report warnings and debug information.
130 dataId3 : `lsst.daf.butler.DataCoordinate` or `None`
131 A Gen3 data ID, or `None` if this handler cannot translate data
136 def __lt__(self, other: PathElementHandler):
137 """Handlers are sorted by rank to reduce the possibility that more
138 flexible handlers will have a chance to match something they shouldn't.
140 return self.
rank < other.rank
143 """The Gen2 data ID obtained by processing parent levels in the directory
146 This attribute should be reset by calling code whenever a new parent
147 directory is entered, before invoking `__call__`.
152 """An object that uses `PathElementHandler` instances to process the files
153 and subdirectories in a directory tree.
159 __slots__ = (
"_files",
"_subdirectories")
161 def add(self, handler: PathElementHandler):
162 """Add a new handler to the scanner.
166 handler : `PathElementHandler`
167 The handler to be added.
169 if handler.isForFiles():
170 bisect.insort(self.
_files, handler)
174 def __iter__(self) -> Iterator[PathElementHandler]:
175 """Iterate over all handlers.
180 def scan(self, path: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
181 log: Log, predicate: Callable[[DataCoordinate], bool]):
182 """Process a directory.
187 Full path to the directory to be processed.
188 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
189 Dictionary that found datasets should be added to.
190 log : `Log`, optional
191 Log to use to report warnings and debug information.
192 predicate : `~collections.abc.Callable`
193 A callable taking a single `DataCoordinate` argument and returning
194 `bool`, indicating whether that (Gen3) data ID represents one
195 that should be included in the scan.
198 for entry
in os.scandir(path):
205 for handler
in handlers:
206 if handler(entry.path, entry.name, datasets, log=log, predicate=predicate):
209 unrecognized.append(entry.name)
211 log.warn(
"Skipped unrecognized entries in %s: %s", path, unrecognized)