21 """Classes used in `RepoWalker` construction.
23 The objects here form a temporary tree that is pruned and then transformed
24 into a similar tree of `PathElementHandler` instances. See `BuilderNode`
25 method documentation for more information.
27 from __future__
import annotations
29 __all__ = [
"BuilderSkipInput",
"BuilderTargetInput",
"BuilderTree"]
31 from abc
import ABC, abstractmethod
42 from lsst.daf.butler
import DatasetType, DimensionUniverse, StorageClass, FormatterParameter
43 from ..translators
import TranslatorFactory
44 from .parser
import PathElementParser
45 from .scanner
import PathElementHandler, DirectoryScanner
46 from .handlers
import (IgnoreHandler, SubdirectoryHandler, SkipHandler,
51 """Abstract interface for nodes in the temporary tree that is used to
52 construct a `RepoWalker`.
56 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
57 """Attempt to prune this node and its children from the tree.
61 replacement : `BuilderNode`
62 The result of recursively pruning child nodes; often just ``self``.
63 messages : `list` [`str`]
64 Warning messages that should be logged by a parent node when a
65 matching path element is encountered, if this node is pruned.
67 If `True`, this node may be pruned from the tree (but will not
68 necessarily be - it may correspond to a path element that should
69 be skipped with siblings that should not be).
71 raise NotImplementedError()
74 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
75 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
76 ) -> PathElementHandler:
77 """Transform this node in the build tree into a corresponding
78 `PathElementHandler`, recursing to any children.
80 Must be called after `prune`.
84 parser : `PathElementParser`
85 An object that matches the path element the new handler is
86 responsible for and extracts a (partial) Gen2 data ID from it.
87 allKeys : `dict` [`str`, `type`]
88 A mapping from Gen2 data ID key to the type of its value. Will
89 contain all keys that may be extracted by the given parser, and
91 cumulativeKeys : `dict` [`str`, `type`], optional
92 A dictionary containing key strings and types for Gen2 data ID keys
93 that have been extracted from previous path elements for this
94 template, including those extracted by ``parser``.
98 handler : `PathElementHandler`
101 raise NotImplementedError()
105 """An intermediate base for `BuilderNode` classes that are provided as
106 direct inputs to a `RepoWalker`, and generally correspond to exactly one
112 The complete Gen2 template to be matched (not just the template for
114 keys : `dict` [`str`, `type`]
115 A mapping from Gen2 data ID key to the type of its value.
117 def __init__(self, template: str, keys: Dict[str, type]):
123 """The complete Gen2 template to be matched (`str`).
126 keys: Dict[str, type]
127 """A mapping from Gen2 data ID key to the type of its value
128 (`dict` [`str`, `type`]).
132 """The path elements (file or directory levels) of `template`
138 """An input to a `RepoWalker` that indicates that matched files should be
139 skipped, possibly with a warning message.
141 BuilderSkipInputs can be pruned. When they are not pruned, they build
142 `SkipHandler` instances.
147 The complete Gen2 template to be matched (not just the template for
149 keys : `dict` [`str`, `type`]
150 A mapping from Gen2 data ID key to the type of its value.
151 message : `str`, optional
152 If not `None`, a warning message that should be printed either when a
153 matching file is enountered or a directory that may contain such files
155 isForFiles : `bool`, optional
156 If `True` (default), this handler should be run on files. Otherwise it
157 should be run on directories.
159 def __init__(self, template: str, keys: Dict[str, type], message: Optional[str] =
None, *,
160 isForFiles: bool =
True):
161 super().
__init__(template=template, keys=keys)
165 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
166 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
167 ) -> PathElementHandler:
171 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
177 """An input to a `RepoWalker` that matches files that correspond to
178 datasets that we want to extract.
180 BuilderTargetInputs can never be pruned, and always build
181 `TargetFileHandler` instances.
185 datasetTypeName : `str`
186 Name of the dataset type.
188 Full Gen2 filename template.
189 keys : `dict` [`str`, `type`]
190 Dictionary that maps Gen2 data ID key to the type of its value.
191 storageClass : `StorageClass`
192 `StorageClass` for the Gen3 dataset type.
193 universe : `DimensionUniverse`
194 All candidate dimensions for the Gen3 dataset type.
195 formatter : `lsst.daf.butler.Formatter` or `str`, optional
196 A Gen 3 formatter class or fully-qualified name.
197 translatorFactory : `TranslatorFactory`
198 Object that can be used to construct data ID translators.
199 targetHandler : `PathElementHandler`, optional
200 Override target handler for this dataset type.
202 Additional keyword arguments are passed to `Translator.makeMatching`,
203 in along with ``datasetTypeName`` and ``keys``.
205 def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type],
206 storageClass: StorageClass, universe: DimensionUniverse,
207 formatter: FormatterParameter, translatorFactory: TranslatorFactory,
208 targetHandler: Optional[PathElementHandler] =
None,
211 template = template.split(
'[%(')[0]
212 super().
__init__(template=template, keys=keys)
213 self.
_translator = translatorFactory.makeMatching(datasetTypeName, keys, **kwargs)
215 storageClass=storageClass, universe=universe,
216 isCalibration=(
"calibDate" in keys))
218 if targetHandler
is None:
219 targetHandler = TargetFileHandler
222 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
223 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
224 ) -> PathElementHandler:
229 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
231 return self, [],
False
233 datasetType: DatasetType
234 """The Gen3 dataset type extracted by the handler this object builds
235 (`lsst.daf.butler.DatasetType`).
240 """A `BuilderNode` that represents a subdirectory to be skipped,
241 created by pruning `BuilderTree` that contained only `BuilderSkipInput`
244 BuilderPrunedTrees can be pruned. When they are not pruned, they
245 build `SkipHandler` instances.
249 messages : `list` [`str`]
250 A list of warning messages to be printed when the handler produced by
251 this builder matches a subdirectory.
257 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
258 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
259 ) -> PathElementHandler:
262 return SkipHandler(parser=parser, isForFiles=
False, message=message)
264 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
270 """A `BuilderNode` that represents a collection of `BuilderInput` instances
271 that all have the same template.
273 def __init__(self, old: BuilderInput, new: BuilderInput):
275 if isinstance(old, BuilderDuplicateInputs):
282 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
283 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
284 ) -> PathElementHandler:
287 return SkipHandler(parser=parser, isForFiles=
False, message=message)
289 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
294 newChild, childMessages, toPruneChild = child.prune()
298 unprunable.append(newChild)
299 newChildren.append(newChildren)
301 if len(unprunable) == 0:
305 elif len(unprunable) == 1
and not self.
_messages:
310 return unprunable[0], [],
False
316 nested = [f
"{c.datasetType.name} (target)" for c
in unprunable]
318 self.
_messages = [f
"ambiguous match: [{', '.join(nested)}]"]
323 """A `BuilderNode` that represents a directory.
325 This is the only `BuilderNode` class that is not a leaf node. If all
326 of its children can be pruned, it is replaced by a `BuilderPrunedTree`
327 (which can then be pruned itself). It builds `SubdirectoryHandler`
328 instances when not pruned.
333 def insert(self, level: int, leaf: BuilderInput):
334 """Insert an input leaf node into the tree, recursively constructing
335 intermediate parents in order to put it at the right level.
340 The level ``self``is at in the larger tree, with zero the
341 repository root. The right level for the leaf is given by the
342 length of ``leaf.elements``.
343 leaf : `BuilderInput`
344 The leaf node to insert.
346 nextLevel = level + 1
347 element = leaf.elements[level]
348 if nextLevel == len(leaf.elements):
350 if conflict
is not None:
357 child.insert(nextLevel, leaf)
359 def fill(self, scanner: DirectoryScanner, allKeys: Dict[str, type], previousKeys: Dict[str, type], *,
360 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]):
361 """Fill a `DirectoryScanner` instance by recursively building all
366 scanner : `DirectoryScanner`
368 allKeys : `dict` [`str`, `type`]
369 Mapping from Gen2 data ID key to its value type, covering all keys
370 that could be used in any child template.
371 previousKeys : `dict` [`str`, `type`], optional
372 A dictionary containing key strings and types for Gen2 data ID keys
373 that have been extracted from previous path elements of the same
375 fileIgnoreRegEx : `re.Pattern`, optional
376 A regular expression pattern that identifies non-dataset files that
377 can be ignored, to be applied at all levels of the directory tree.
378 dirIgnoreRegEx : `re.Pattern`, optional
379 A regular expression pattern that identifies non-dataset
380 subdirectories that can be ignored, to be applied at all levels of
383 if fileIgnoreRegEx
is not None:
385 if dirIgnoreRegEx
is not None:
387 for template, child
in self.
_children.items():
389 cumulativeKeys = previousKeys.copy()
390 cumulativeKeys.update(parser.keys)
391 scanner.add(child.build(parser, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
392 dirIgnoreRegEx=dirIgnoreRegEx))
394 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
400 for template, child
in list(self.
_children.items()):
401 newChild, childMessages, toPruneChild = child.prune()
402 newChildren[template] = newChild
403 messages.extend(childMessages)
410 return self, [],
False
412 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
413 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
414 ) -> PathElementHandler:
417 self.
fill(built.scanner, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
418 dirIgnoreRegEx=dirIgnoreRegEx)