21 """Classes used in `RepoWalker` construction. 23 The objects here form a temporary tree that is pruned and then transformed 24 into a similar tree of `PathElementHandler` instances. See `BuilderNode` 25 method documentation for more information. 27 from __future__
import annotations
29 __all__ = [
"BuilderSkipInput",
"BuilderTargetInput",
"BuilderTree"]
31 from abc
import ABC, abstractmethod
42 from lsst.daf.butler
import DatasetType, DimensionUniverse, StorageClass
43 from ..translators
import Translator
44 from .parser
import PathElementParser
45 from .scanner
import PathElementHandler, DirectoryScanner
46 from .handlers
import (IgnoreHandler, SubdirectoryHandler, SkipHandler,
47 TargetFileHandler, MultiExtensionFileHandler)
51 """Abstract interface for nodes in the temporary tree that is used to 52 construct a `RepoWalker`. 56 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
57 """Attempt to prune this node and its children from the tree. 61 replacement : `BuilderNode` 62 The result of recursively pruning child nodes; often just ``self``. 63 messages : `list` [`str`] 64 Warning messages that should be logged by a parent node when a 65 matching path element is encountered, if this node is pruned. 67 If `True`, this node may be pruned from the tree (but will not 68 necessarily be - it may correspond to a path element that should 69 be skipped with siblings that should not be). 71 raise NotImplementedError()
74 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
75 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
76 ) -> PathElementHandler:
77 """Transform this node in the build tree into a corresponding 78 `PathElementHandler`, recursing to any children. 80 Must be called after `prune`. 84 parser : `PathElementParser` 85 An object that matches the path element the new handler is 86 responsible for and extracts a (partial) Gen2 data ID from it. 87 allKeys : `dict` [`str`, `type`] 88 A mapping from Gen2 data ID key to the type of its value. Will 89 contain all keys that may be extracted by the given parser, and 91 cumulativeKeys : `dict` [`str`, `type`], optional 92 A dictionary containing key strings and types for Gen2 data ID keys 93 that have been extracted from previous path elements for this 94 template, including those extracted by ``parser``. 98 handler : `PathElementHandler` 101 raise NotImplementedError()
105 """An intermediate base for `BuilderNode` classes that are provided as 106 direct inputs to a `RepoWalker`, and generally correspond to exactly one 112 The complete Gen2 template to be matched (not just the template for 114 keys : `dict` [`str`, `type`] 115 A mapping from Gen2 data ID key to the type of its value. 117 def __init__(self, template: str, keys: Dict[str, type]):
123 """The complete Gen2 template to be matched (`str`). 126 keys: Dict[str, type]
127 """A mapping from Gen2 data ID key to the type of its value 128 (`dict` [`str`, `type`]). 132 """The path elements (file or directory levels) of `template` 138 """An input to a `RepoWalker` that indicates that matched files should be 139 skipped, possibly with a warning message. 141 BuilderSkipInputs can be pruned. When they are not pruned, they build 142 `SkipHandler` instances. 147 The complete Gen2 template to be matched (not just the template for 149 keys : `dict` [`str`, `type`] 150 A mapping from Gen2 data ID key to the type of its value. 151 message : `str`, optional 152 If not `None`, a warning message that should be printed either when a 153 matching file is enountered or a directory that may contain such files 155 isForFiles : `bool`, optional 156 If `True` (default), this handler should be run on files. Otherwise it 157 should be run on directories. 159 def __init__(self, template: str, keys: Dict[str, type], message: Optional[str] =
None, *,
160 isForFiles: bool =
True):
161 super().
__init__(template=template, keys=keys)
165 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
166 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
167 ) -> PathElementHandler:
171 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
177 """An input to a `RepoWalker` that matches files that correspond to 178 datasets that we want to extract. 180 BuilderTargetInputs can never be pruned, and always build 181 `TargetFileHandler` instances. 185 datasetTypeName : `str` 186 Name of the dataset type. 188 Full Gen2 filename template. 189 keys : `dict` [`str`, `type`] 190 Dictionary that maps Gen2 data ID key to the type of its value. 191 storageClass : `StorageClass` 192 `StorageClass` for the Gen3 dataset type. 193 universe : `DimensionUniverse` 194 All candidate dimensions for the Gen3 dataset type. 196 Additional keyword arguments are passed to `Translator.makeMatching`, 197 in along with ``datasetTypeName`` and ``keys``. 199 def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type],
200 storageClass: StorageClass, universe: DimensionUniverse, **kwargs: Any):
202 template = template.split(
'[%(')[0]
203 super().
__init__(template=template, keys=keys)
204 self.
_translator = Translator.makeMatching(datasetTypeName, keys, **kwargs)
206 storageClass=storageClass, universe=universe)
208 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
209 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
210 ) -> PathElementHandler:
221 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
223 return self, [],
False 225 datasetType: DatasetType
226 """The Gen3 dataset type extracted by the hander this object builds 227 (`lsst.daf.butler.DatasetType`). 232 """A `BuilderNode` that represents a subdirectory to be skipped, 233 created by pruning `BuilderTree` that contained only `BuilderSkipInput` 236 BuilderPrunedTrees can be pruned. When they are not pruned, they 237 build `SkipHandler` instances. 241 messages : `list` [`str`] 242 A list of warning messages to be printed when the handler produced by 243 this builder matches a subdirectory. 249 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
250 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
251 ) -> PathElementHandler:
254 return SkipHandler(parser=parser, isForFiles=
False, message=message)
256 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
262 """A `BuilderNode` that represents a collection of `BuilderInput` instances 263 that all have the same template. 265 def __init__(self, old: BuilderInput, new: BuilderInput):
267 if isinstance(old, BuilderDuplicateInputs):
274 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
275 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
276 ) -> PathElementHandler:
279 return SkipHandler(parser=parser, isForFiles=
False, message=message)
281 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
286 newChild, childMessages, toPruneChild = child.prune()
290 unprunable.append(newChild)
291 newChildren.append(newChildren)
293 if len(unprunable) == 0:
297 elif len(unprunable) == 1
and not self.
_messages:
302 return unprunable[0], [],
False 308 nested = [f
"{c.datasetType.name} (target)" for c
in unprunable]
310 self.
_messages = [f
"ambiguous match: [{', '.join(nested)}]"]
315 """A `BuilderNode` that represents a directory. 317 This is the only `BuilderNode` class that is not a leaf node. If all 318 of its children can be pruned, it is replaced by a `BuilderPrunedTree` 319 (which can then be pruned itself). It builds `SubdirectoryHandler` 320 instances when not pruned. 325 def insert(self, level: int, leaf: BuilderInput):
326 """Insert an input leaf node into the tree, recursively constructing 327 intermediate parents in order to put it at the right level. 332 The level ``self``is at in the larger tree, with zero the 333 repository root. The right level for the leaf is given by the 334 length of ``leaf.elements``. 335 leaf : `BuilderInput` 336 The leaf node to insert. 338 nextLevel = level + 1
339 element = leaf.elements[level]
340 if nextLevel == len(leaf.elements):
342 if conflict
is not None:
349 child.insert(nextLevel, leaf)
351 def fill(self, scanner: DirectoryScanner, allKeys: Dict[str, type], previousKeys: Dict[str, type], *,
352 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]):
353 """Fill a `DirectoryScanner` instance by recursively building all 358 scanner : `DirectoryScanner` 360 allKeys : `dict` [`str`, `type`] 361 Mapping from Gen2 data ID key to its value type, covering all keys 362 that could be used in any child template. 363 previousKeys : `dict` [`str`, `type`], optional 364 A dictionary containing key strings and types for Gen2 data ID keys 365 that have been extracted from previous path elements of the same 368 if fileIgnoreRegEx
is not None:
370 if dirIgnoreRegEx
is not None:
372 for template, child
in self.
_children.items():
374 cumulativeKeys = previousKeys.copy()
375 cumulativeKeys.update(parser.keys)
376 scanner.add(child.build(parser, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
377 dirIgnoreRegEx=dirIgnoreRegEx))
379 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
385 for template, child
in list(self.
_children.items()):
386 newChild, childMessages, toPruneChild = child.prune()
387 newChildren[template] = newChild
388 messages.extend(childMessages)
395 return self, [],
False 397 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
398 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
399 ) -> PathElementHandler:
402 self.
fill(built.scanner, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
403 dirIgnoreRegEx=dirIgnoreRegEx)