Coverage for python/lsst/obs/base/gen2to3/repoWalker/builders.py : 30%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes used in `RepoWalker` construction.
23The objects here form a temporary tree that is pruned and then transformed
24into a similar tree of `PathElementHandler` instances. See `BuilderNode`
25method documentation for more information.
26"""
27from __future__ import annotations
29__all__ = ["BuilderSkipInput", "BuilderTargetInput", "BuilderTree"]
31from abc import ABC, abstractmethod
32import os
33import re
34from typing import (
35 Any,
36 Dict,
37 List,
38 Optional,
39 Tuple,
40)
42from lsst.daf.butler import DatasetType, DimensionUniverse, StorageClass
43from ..translators import Translator
44from .parser import PathElementParser
45from .scanner import PathElementHandler, DirectoryScanner
46from .handlers import IgnoreHandler, SubdirectoryHandler, SkipHandler, TargetFileHandler
49class BuilderNode(ABC):
50 """Abstract interface for nodes in the temporary tree that is used to
51 construct a `RepoWalker`.
52 """
54 @abstractmethod
55 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
56 """Attempt to prune this node and its children from the tree.
58 Returns
59 -------
60 replacement : `BuilderNode`
61 The result of recursively pruning child nodes; often just ``self``.
62 messages : `list` [`str`]
63 Warning messages that should be logged by a parent node when a
64 matching path element is encountered, if this node is pruned.
65 prune : `bool`
66 If `True`, this node may be pruned from the tree (but will not
67 necessarily be - it may correspond to a path element that should
68 be skipped with siblings that should not be).
69 """
70 raise NotImplementedError()
72 @abstractmethod
73 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
74 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
75 ) -> PathElementHandler:
76 """Transform this node in the build tree into a corresponding
77 `PathElementHandler`, recursing to any children.
79 Must be called after `prune`.
81 Parameters
82 ----------
83 parser : `PathElementParser`
84 An object that matches the path element the new handler is
85 responsible for and extracts a (partial) Gen2 data ID from it.
86 allKeys : `dict` [`str`, `type`]
87 A mapping from Gen2 data ID key to the type of its value. Will
88 contain all keys that may be extracted by the given parser, and
89 possibly others.
90 cumulativeKeys : `dict` [`str`, `type`], optional
91 A dictionary containing key strings and types for Gen2 data ID keys
92 that have been extracted from previous path elements for this
93 template, including those extracted by ``parser``.
95 Returns
96 -------
97 handler : `PathElementHandler`
98 A new handler object.
99 """
100 raise NotImplementedError()
103class BuilderInput(BuilderNode):
104 """An intermediate base for `BuilderNode` classes that are provided as
105 direct inputs to a `RepoWalker`, and generally correspond to exactly one
106 Gen2 dataset type.
108 Parameters
109 ----------
110 template : `str`
111 The complete Gen2 template to be matched (not just the template for
112 one path element).
113 keys : `dict` [`str`, `type`]
114 A mapping from Gen2 data ID key to the type of its value.
115 """
116 def __init__(self, template: str, keys: Dict[str, type]):
117 self.template = template
118 self.keys = keys
119 self.elements = self.template.split(os.path.sep)
121 template: str
122 """The complete Gen2 template to be matched (`str`).
123 """
125 keys: Dict[str, type]
126 """A mapping from Gen2 data ID key to the type of its value
127 (`dict` [`str`, `type`]).
128 """
130 elements: List[str]
131 """The path elements (file or directory levels) of `template`
132 (`list` of `str`).
133 """
136class BuilderSkipInput(BuilderInput):
137 """An input to a `RepoWalker` that indicates that matched files should be
138 skipped, possibly with a warning message.
140 BuilderSkipInputs can be pruned. When they are not pruned, they build
141 `SkipHandler` instances.
143 Parameters
144 ----------
145 template : `str`
146 The complete Gen2 template to be matched (not just the template for
147 one path element).
148 keys : `dict` [`str`, `type`]
149 A mapping from Gen2 data ID key to the type of its value.
150 message : `str`, optional
151 If not `None`, a warning message that should be printed either when a
152 matching file is enountered or a directory that may contain such files
153 is skipped.
154 isForFiles : `bool`, optional
155 If `True` (default), this handler should be run on files. Otherwise it
156 should be run on directories.
157 """
158 def __init__(self, template: str, keys: Dict[str, type], message: Optional[str] = None, *,
159 isForFiles: bool = True):
160 super().__init__(template=template, keys=keys)
161 self._message = message
162 self._isForFiles = isForFiles
164 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
165 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
166 ) -> PathElementHandler:
167 # Docstring inherited from BuilderNode.
168 return SkipHandler(parser=parser, isForFiles=self._isForFiles, message=self._message)
170 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
171 # Docstring inherited from BuilderNode.
172 return self, [self._message] if self._message is not None else [], True
175class BuilderTargetInput(BuilderInput):
176 """An input to a `RepoWalker` that matches files that correspond to
177 datasets that we want to extract.
179 BuilderTargetInputs can never be pruned, and always build
180 `TargetFileHandler` instances.
182 Parameters
183 ----------
184 datasetTypeName : `str`
185 Name of the dataset type.
186 template : `str`
187 Full Gen2 filename template.
188 keys : `dict` [`str`, `type`]
189 Dictionary that maps Gen2 data ID key to the type of its value.
190 storageClass : `StorageClass`
191 `StorageClass` for the Gen3 dataset type.
192 universe : `DimensionUniverse`
193 All candidate dimensions for the Gen3 dataset type.
194 kwargs:
195 Additional keyword argumetns are passed to `Translator.makeMatching`,
196 in along with ``datasetTypeName`` and ``keys``.
197 """
198 def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type],
199 storageClass: StorageClass, universe: DimensionUniverse, **kwargs: Any):
200 super().__init__(template=template, keys=keys)
201 self._translator = Translator.makeMatching(datasetTypeName, keys, **kwargs)
202 self.datasetType = DatasetType(datasetTypeName, dimensions=self._translator.dimensionNames,
203 storageClass=storageClass, universe=universe)
205 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
206 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
207 ) -> PathElementHandler:
208 # Docstring inherited from BuilderNode.
209 return TargetFileHandler(parser=parser, translator=self._translator, datasetType=self.datasetType)
211 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
212 # Docstring inherited from BuilderNode.
213 return self, [], False
215 datasetType: DatasetType
216 """The Gen3 dataset type extracted by the hander this object builds
217 (`lsst.daf.butler.DatasetType`).
218 """
221class BuilderPrunedTree(BuilderNode):
222 """A `BuilderNode` that represents a subdirectory to be skipped,
223 created by pruning `BuilderTree` that contained only `BuilderSkipInput`
224 instances.
226 BuilderPrunedTrees can be pruned. When they are not pruned, they
227 build `SkipHandler` instances.
229 Parameters
230 ----------
231 messages : `list` [`str`]
232 A list of warning messages to be printed when the handler produced by
233 this builder matches a subdirectory.
234 """
236 def __init__(self, messages: List[str]):
237 self._messages = messages
239 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
240 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
241 ) -> PathElementHandler:
242 # Docstring inherited from BuilderNode.
243 message = "; ".join(self._messages) if self._messages else None
244 return SkipHandler(parser=parser, isForFiles=False, message=message)
246 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
247 # Docstring inherited from BuilderNode.
248 return self, self._messages, True
251class BuilderDuplicateInputs(BuilderNode):
252 """A `BuilderNode` that represents a collection of `BuilderInput` instances
253 that all have the same template.
254 """
255 def __init__(self, old: BuilderInput, new: BuilderInput):
256 self._children = []
257 if isinstance(old, BuilderDuplicateInputs):
258 self._children.extend(old._children)
259 else:
260 self._children.append(old)
261 self._children.append(new)
262 self._messages = [] # populated in prune()
264 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
265 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
266 ) -> PathElementHandler:
267 # Docstring inherited from BuilderNode.
268 message = "; ".join(self._messages) if self._messages else None
269 return SkipHandler(parser=parser, isForFiles=False, message=message)
271 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
272 # Docstring inherited from BuilderNode.
273 unprunable = []
274 newChildren = []
275 for child in self._children:
276 newChild, childMessages, toPruneChild = child.prune()
277 if toPruneChild:
278 self._messages.extend(childMessages)
279 else:
280 unprunable.append(newChild)
281 newChildren.append(newChildren)
282 self._children = newChildren
283 if len(unprunable) == 0:
284 # All children are just skips, so we can prune this node if we
285 # remember their messages.
286 return self, self._messages, True
287 elif len(unprunable) == 1 and not self._messages:
288 # Exactly one child is a target, and the others were ignored with
289 # no warning messages. Tell parent node to just use that child,
290 # so if we see any matching files, we just assume they're for that
291 # target.
292 return unprunable[0], [], False
293 else:
294 # Multiple targets or skips with messages, which means we won't
295 # know how to handle any matching files. Replace any messages we
296 # have with a single message that combines them all as well as
297 # any target dataset types that they are ambiguous with.
298 nested = [f"{c.datasetType.name} (target)" for c in unprunable]
299 nested.extend(self._messages)
300 self._messages = [f"ambiguous match: [{', '.join(nested)}]"]
301 return self, self._messages, True
304class BuilderTree(BuilderNode):
305 """A `BuilderNode` that represents a directory.
307 This is the only `BuilderNode` class that is not a leaf node. If all
308 of its children can be pruned, it is replaced by a `BuilderPrunedTree`
309 (which can then be pruned itself). It builds `SubdirectoryHandler`
310 instances when not pruned.
311 """
312 def __init__(self):
313 self._children = {} # Maps template path element to BuilderNode
315 def insert(self, level: int, leaf: BuilderInput):
316 """Insert an input leaf node into the tree, recursively constructing
317 intermediate parents in order to put it at the right level.
319 Parameters
320 ----------
321 level : `int`
322 The level ``self``is at in the larger tree, with zero the
323 repository root. The right level for the leaf is given by the
324 length of ``leaf.elements``.
325 leaf : `BuilderInput`
326 The leaf node to insert.
327 """
328 nextLevel = level + 1
329 element = leaf.elements[level]
330 if nextLevel == len(leaf.elements):
331 conflict = self._children.get(element)
332 if conflict is not None:
333 # Sadly, the Gen2 butler has some actual dataset types that
334 # use the exact same template.
335 leaf = BuilderDuplicateInputs(conflict, leaf)
336 self._children[element] = leaf
337 else:
338 child = self._children.setdefault(element, BuilderTree())
339 child.insert(nextLevel, leaf)
341 def fill(self, scanner: DirectoryScanner, allKeys: Dict[str, type], previousKeys: Dict[str, type], *,
342 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]):
343 """Fill a `DirectoryScanner` instance by recursively building all
344 child nodes.
346 Parameters
347 ----------
348 scanner : `DirectoryScanner`
349 Object to populate.
350 allKeys : `dict` [`str`, `type`]
351 Mapping from Gen2 data ID key to its value type, covering all keys
352 that could be used in any child template.
353 previousKeys : `dict` [`str`, `type`], optional
354 A dictionary containing key strings and types for Gen2 data ID keys
355 that have been extracted from previous path elements of the same
356 template.
357 """
358 if fileIgnoreRegEx is not None:
359 scanner.add(IgnoreHandler(fileIgnoreRegEx, isForFiles=True))
360 if dirIgnoreRegEx is not None:
361 scanner.add(IgnoreHandler(dirIgnoreRegEx, isForFiles=False))
362 for template, child in self._children.items():
363 parser = PathElementParser(template, allKeys, previousKeys=previousKeys)
364 cumulativeKeys = previousKeys.copy()
365 cumulativeKeys.update(parser.keys)
366 scanner.add(child.build(parser, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
367 dirIgnoreRegEx=dirIgnoreRegEx))
369 def prune(self) -> Tuple[BuilderNode, List[str], bool]:
370 # Docstring inherited from BuilderNode.
371 toPruneThis = True
372 newChildren = {}
373 messages = []
374 # Recursively prune children.
375 for template, child in list(self._children.items()):
376 newChild, childMessages, toPruneChild = child.prune()
377 newChildren[template] = newChild
378 messages.extend(childMessages)
379 if not toPruneChild:
380 toPruneThis = False
381 self._children = newChildren
382 if toPruneThis:
383 return BuilderPrunedTree(messages), messages, True
384 else:
385 return self, [], False
387 def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
388 fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
389 ) -> PathElementHandler:
390 # Docstring inherited from BuilderNode.
391 built = SubdirectoryHandler(parser)
392 self.fill(built.scanner, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
393 dirIgnoreRegEx=dirIgnoreRegEx)
394 return built