21 """Interfaces and common code for recursively scanning directories for Gen2 24 The `PathElementHandler` ABC is defined here instead of ``handlers.py`` for 25 dependency reasons: `DirectoryScanner` uses the ABC, while its concrete 26 implementations use `DirectorySCanner`. 28 from __future__
import annotations
30 __all__ = [
"PathElementHandler",
"DirectoryScanner"]
32 from abc
import ABC, abstractmethod
43 from lsst.log
import Log
44 from lsst.daf.butler
import (
52 """An interface for objects that handle a single path element (directory or 53 file) in a Gen2 data repository. 55 Handlers added to a `DirectoryScanner` instance, which then calls them 56 until one succeeds when it processes each element in a directoy. 61 __slots__ = (
"lastDataId2",)
65 """Report what kind of path element this object handlers. 69 Return `True` if this handler is for file entries, or `False` if it 72 raise NotImplementedError()
75 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
76 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
77 """Apply the handler to a file path. 82 Full path of the file or directory. 84 Local name of the file or directory within its parent directory. 85 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 86 Dictionary that found datasets should be added to. 88 Log to use to report warnings and debug information. 89 predicate : `~collections.abc.Callable` 90 A callable taking a single `DataCoordinate` argument and returning 91 `bool`, indicating whether that (Gen3) data ID represents one 92 that should be included in the scan.' 97 `True` if this handler was a match for the given path and no other 98 handlers need to be tried on it, `False` otherwise. 100 raise NotImplementedError()
105 """Return a rough indication of how flexible this handler is in terms 106 of the path element names it can match. 108 Handlers that match a constant path element should always return zero. 110 raise NotImplementedError()
112 def translate(self, dataId2: dict, *, partial: bool =
False, log: Log) -> Optional[DataCoordinate]:
113 """Translate the given data ID from Gen2 to Gen3. 115 The default implementation returns `None`. Subclasses that are able 116 to translate data IDs should override this method. 122 partial : `bool`, optional 123 If `True` (`False` is default) this is a partial data ID for some 124 dataset, and missing keys are expected. 125 log : log : `Log`, optional 126 Log to use to report warnings and debug information. 130 dataId3 : `lsst.daf.butler.DataCoordinate` or `None` 131 A Gen3 data ID, or `None` if this handler cannot translate data 136 def __lt__(self, other: PathElementHandler):
137 """Handlers are sorted by rank to reduce the possibility that more 138 flexible handlers will have a chance to match something they shouldn't. 140 return self.
rank < other.rank
143 """The Gen2 data ID obtained by processing parent levels in the directory 146 This attribute should be reset by calling code whenever a new parent 147 directory is entered, before invoking `__call__`. 152 """An object that uses `PathElementHandler` instances to process the files 153 and subdirectories in a directory tree. 159 __slots__ = (
"_files",
"_subdirectories")
161 def add(self, handler: PathElementHandler):
162 """Add a new handler to the scanner. 166 handler : `PathElementHandler` 167 The handler to be added. 169 if handler.isForFiles():
170 bisect.insort(self.
_files, handler)
174 def __iter__(self) -> Iterator[PathElementHandler]:
175 """Iterate over all handlers. 180 def scan(self, path: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
181 log: Log, predicate: Callable[[DataCoordinate], bool]):
182 """Process a directory. 187 Full path to the directory to be processed. 188 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ] 189 Dictionary that found datasets should be added to. 190 log : `Log`, optional 191 Log to use to report warnings and debug information. 192 predicate : `~collections.abc.Callable` 193 A callable taking a single `DataCoordinate` argument and returning 194 `bool`, indicating whether that (Gen3) data ID represents one 195 that should be included in the scan. 198 for entry
in os.scandir(path):
205 for handler
in handlers:
206 if handler(entry.path, entry.name, datasets, log=log, predicate=predicate):
209 unrecognized.append(entry.name)
211 log.warn(
"Skipped unrecognized entries in %s: %s", path, unrecognized)