21 """Classes used in `RepoWalker` construction.
23 The objects here form a temporary tree that is pruned and then transformed
24 into a similar tree of `PathElementHandler` instances. See `BuilderNode`
25 method documentation for more information.
27 from __future__
import annotations
29 __all__ = [
"BuilderSkipInput",
"BuilderTargetInput",
"BuilderTree"]
31 from abc
import ABC, abstractmethod
42 from lsst.daf.butler
import DatasetType, DimensionUniverse, StorageClass
43 from ..translators
import Translator
44 from .parser
import PathElementParser
45 from .scanner
import PathElementHandler, DirectoryScanner
46 from .handlers
import IgnoreHandler, SubdirectoryHandler, SkipHandler, TargetFileHandler
50 """Abstract interface for nodes in the temporary tree that is used to
51 construct a `RepoWalker`.
55 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
56 """Attempt to prune this node and its children from the tree.
60 replacement : `BuilderNode`
61 The result of recursively pruning child nodes; often just ``self``.
62 messages : `list` [`str`]
63 Warning messages that should be logged by a parent node when a
64 matching path element is encountered, if this node is pruned.
66 If `True`, this node may be pruned from the tree (but will not
67 necessarily be - it may correspond to a path element that should
68 be skipped with siblings that should not be).
70 raise NotImplementedError()
73 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
74 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
75 ) -> PathElementHandler:
76 """Transform this node in the build tree into a corresponding
77 `PathElementHandler`, recursing to any children.
79 Must be called after `prune`.
83 parser : `PathElementParser`
84 An object that matches the path element the new handler is
85 responsible for and extracts a (partial) Gen2 data ID from it.
86 allKeys : `dict` [`str`, `type`]
87 A mapping from Gen2 data ID key to the type of its value. Will
88 contain all keys that may be extracted by the given parser, and
90 cumulativeKeys : `dict` [`str`, `type`], optional
91 A dictionary containing key strings and types for Gen2 data ID keys
92 that have been extracted from previous path elements for this
93 template, including those extracted by ``parser``.
97 handler : `PathElementHandler`
100 raise NotImplementedError()
104 """An intermediate base for `BuilderNode` classes that are provided as
105 direct inputs to a `RepoWalker`, and generally correspond to exactly one
111 The complete Gen2 template to be matched (not just the template for
113 keys : `dict` [`str`, `type`]
114 A mapping from Gen2 data ID key to the type of its value.
116 def __init__(self, template: str, keys: Dict[str, type]):
122 """The complete Gen2 template to be matched (`str`).
125 keys: Dict[str, type]
126 """A mapping from Gen2 data ID key to the type of its value
127 (`dict` [`str`, `type`]).
131 """The path elements (file or directory levels) of `template`
137 """An input to a `RepoWalker` that indicates that matched files should be
138 skipped, possibly with a warning message.
140 BuilderSkipInputs can be pruned. When they are not pruned, they build
141 `SkipHandler` instances.
146 The complete Gen2 template to be matched (not just the template for
148 keys : `dict` [`str`, `type`]
149 A mapping from Gen2 data ID key to the type of its value.
150 message : `str`, optional
151 If not `None`, a warning message that should be printed either when a
152 matching file is enountered or a directory that may contain such files
154 isForFiles : `bool`, optional
155 If `True` (default), this handler should be run on files. Otherwise it
156 should be run on directories.
158 def __init__(self, template: str, keys: Dict[str, type], message: Optional[str] =
None, *,
159 isForFiles: bool =
True):
160 super().
__init__(template=template, keys=keys)
164 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
165 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
166 ) -> PathElementHandler:
170 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
176 """An input to a `RepoWalker` that matches files that correspond to
177 datasets that we want to extract.
179 BuilderTargetInputs can never be pruned, and always build
180 `TargetFileHandler` instances.
184 datasetTypeName : `str`
185 Name of the dataset type.
187 Full Gen2 filename template.
188 keys : `dict` [`str`, `type`]
189 Dictionary that maps Gen2 data ID key to the type of its value.
190 storageClass : `StorageClass`
191 `StorageClass` for the Gen3 dataset type.
192 universe : `DimensionUniverse`
193 All candidate dimensions for the Gen3 dataset type.
195 Additional keyword argumetns are passed to `Translator.makeMatching`,
196 in along with ``datasetTypeName`` and ``keys``.
198 def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type],
199 storageClass: StorageClass, universe: DimensionUniverse, **kwargs: Any):
200 super().
__init__(template=template, keys=keys)
201 self.
_translator = Translator.makeMatching(datasetTypeName, keys, **kwargs)
203 storageClass=storageClass, universe=universe)
205 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
206 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
207 ) -> PathElementHandler:
211 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
213 return self, [],
False
215 datasetType: DatasetType
216 """The Gen3 dataset type extracted by the hander this object builds
217 (`lsst.daf.butler.DatasetType`).
222 """A `BuilderNode` that represents a subdirectory to be skipped,
223 created by pruning `BuilderTree` that contained only `BuilderSkipInput`
226 BuilderPrunedTrees can be pruned. When they are not pruned, they
227 build `SkipHandler` instances.
231 messages : `list` [`str`]
232 A list of warning messages to be printed when the handler produced by
233 this builder matches a subdirectory.
239 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
240 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
241 ) -> PathElementHandler:
244 return SkipHandler(parser=parser, isForFiles=
False, message=message)
246 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
252 """A `BuilderNode` that represents a collection of `BuilderInput` instances
253 that all have the same template.
255 def __init__(self, old: BuilderInput, new: BuilderInput):
257 if isinstance(old, BuilderDuplicateInputs):
264 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
265 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
266 ) -> PathElementHandler:
269 return SkipHandler(parser=parser, isForFiles=
False, message=message)
271 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
276 newChild, childMessages, toPruneChild = child.prune()
280 unprunable.append(newChild)
281 newChildren.append(newChildren)
283 if len(unprunable) == 0:
287 elif len(unprunable) == 1
and not self.
_messages:
292 return unprunable[0], [],
False
298 nested = [f
"{c.datasetType.name} (target)" for c
in unprunable]
300 self.
_messages = [f
"ambiguous match: [{', '.join(nested)}]"]
305 """A `BuilderNode` that represents a directory.
307 This is the only `BuilderNode` class that is not a leaf node. If all
308 of its children can be pruned, it is replaced by a `BuilderPrunedTree`
309 (which can then be pruned itself). It builds `SubdirectoryHandler`
310 instances when not pruned.
315 def insert(self, level: int, leaf: BuilderInput):
316 """Insert an input leaf node into the tree, recursively constructing
317 intermediate parents in order to put it at the right level.
322 The level ``self``is at in the larger tree, with zero the
323 repository root. The right level for the leaf is given by the
324 length of ``leaf.elements``.
325 leaf : `BuilderInput`
326 The leaf node to insert.
328 nextLevel = level + 1
329 element = leaf.elements[level]
330 if nextLevel == len(leaf.elements):
332 if conflict
is not None:
339 child.insert(nextLevel, leaf)
341 def fill(self, scanner: DirectoryScanner, allKeys: Dict[str, type], previousKeys: Dict[str, type], *,
342 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]):
343 """Fill a `DirectoryScanner` instance by recursively building all
348 scanner : `DirectoryScanner`
350 allKeys : `dict` [`str`, `type`]
351 Mapping from Gen2 data ID key to its value type, covering all keys
352 that could be used in any child template.
353 previousKeys : `dict` [`str`, `type`], optional
354 A dictionary containing key strings and types for Gen2 data ID keys
355 that have been extracted from previous path elements of the same
358 if fileIgnoreRegEx
is not None:
360 if dirIgnoreRegEx
is not None:
362 for template, child
in self.
_children.items():
364 cumulativeKeys = previousKeys.copy()
365 cumulativeKeys.update(parser.keys)
366 scanner.add(child.build(parser, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
367 dirIgnoreRegEx=dirIgnoreRegEx))
369 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
375 for template, child
in list(self.
_children.items()):
376 newChild, childMessages, toPruneChild = child.prune()
377 newChildren[template] = newChild
378 messages.extend(childMessages)
385 return self, [],
False
387 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
388 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
389 ) -> PathElementHandler:
392 self.
fill(built.scanner, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
393 dirIgnoreRegEx=dirIgnoreRegEx)