lsst.obs.base  19.0.0-51-gb87bce2
builders.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Classes used in `RepoWalker` construction.
22 
23 The objects here form a temporary tree that is pruned and then transformed
24 into a similar tree of `PathElementHandler` instances. See `BuilderNode`
25 method documentation for more information.
26 """
27 from __future__ import annotations
28 
29 __all__ = ["BuilderSkipInput", "BuilderTargetInput", "BuilderTree"]
30 
31 from abc import ABC, abstractmethod
32 import os
33 import re
34 from typing import (
35  Any,
36  Dict,
37  List,
38  Optional,
39  Tuple,
40 )
41 
42 from lsst.daf.butler import DatasetType, DimensionUniverse, StorageClass, FormatterParameter
43 from ..translators import Translator
44 from .parser import PathElementParser
45 from .scanner import PathElementHandler, DirectoryScanner
46 from .handlers import (IgnoreHandler, SubdirectoryHandler, SkipHandler,
47  TargetFileHandler)
48 
49 
50 class BuilderNode(ABC):
51  """Abstract interface for nodes in the temporary tree that is used to
52  construct a `RepoWalker`.
53  """
54 
55  @abstractmethod
56  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
57  """Attempt to prune this node and its children from the tree.
58 
59  Returns
60  -------
61  replacement : `BuilderNode`
62  The result of recursively pruning child nodes; often just ``self``.
63  messages : `list` [`str`]
64  Warning messages that should be logged by a parent node when a
65  matching path element is encountered, if this node is pruned.
66  prune : `bool`
67  If `True`, this node may be pruned from the tree (but will not
68  necessarily be - it may correspond to a path element that should
69  be skipped with siblings that should not be).
70  """
71  raise NotImplementedError()
72 
73  @abstractmethod
74  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
75  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
76  ) -> PathElementHandler:
77  """Transform this node in the build tree into a corresponding
78  `PathElementHandler`, recursing to any children.
79 
80  Must be called after `prune`.
81 
82  Parameters
83  ----------
84  parser : `PathElementParser`
85  An object that matches the path element the new handler is
86  responsible for and extracts a (partial) Gen2 data ID from it.
87  allKeys : `dict` [`str`, `type`]
88  A mapping from Gen2 data ID key to the type of its value. Will
89  contain all keys that may be extracted by the given parser, and
90  possibly others.
91  cumulativeKeys : `dict` [`str`, `type`], optional
92  A dictionary containing key strings and types for Gen2 data ID keys
93  that have been extracted from previous path elements for this
94  template, including those extracted by ``parser``.
95 
96  Returns
97  -------
98  handler : `PathElementHandler`
99  A new handler object.
100  """
101  raise NotImplementedError()
102 
103 
105  """An intermediate base for `BuilderNode` classes that are provided as
106  direct inputs to a `RepoWalker`, and generally correspond to exactly one
107  Gen2 dataset type.
108 
109  Parameters
110  ----------
111  template : `str`
112  The complete Gen2 template to be matched (not just the template for
113  one path element).
114  keys : `dict` [`str`, `type`]
115  A mapping from Gen2 data ID key to the type of its value.
116  """
117  def __init__(self, template: str, keys: Dict[str, type]):
118  self.template = template
119  self.keys = keys
120  self.elements = self.template.split(os.path.sep)
121 
122  template: str
123  """The complete Gen2 template to be matched (`str`).
124  """
125 
126  keys: Dict[str, type]
127  """A mapping from Gen2 data ID key to the type of its value
128  (`dict` [`str`, `type`]).
129  """
130 
131  elements: List[str]
132  """The path elements (file or directory levels) of `template`
133  (`list` of `str`).
134  """
135 
136 
138  """An input to a `RepoWalker` that indicates that matched files should be
139  skipped, possibly with a warning message.
140 
141  BuilderSkipInputs can be pruned. When they are not pruned, they build
142  `SkipHandler` instances.
143 
144  Parameters
145  ----------
146  template : `str`
147  The complete Gen2 template to be matched (not just the template for
148  one path element).
149  keys : `dict` [`str`, `type`]
150  A mapping from Gen2 data ID key to the type of its value.
151  message : `str`, optional
152  If not `None`, a warning message that should be printed either when a
153  matching file is enountered or a directory that may contain such files
154  is skipped.
155  isForFiles : `bool`, optional
156  If `True` (default), this handler should be run on files. Otherwise it
157  should be run on directories.
158  """
159  def __init__(self, template: str, keys: Dict[str, type], message: Optional[str] = None, *,
160  isForFiles: bool = True):
161  super().__init__(template=template, keys=keys)
162  self._message = message
163  self._isForFiles = isForFiles
164 
165  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
166  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
167  ) -> PathElementHandler:
168  # Docstring inherited from BuilderNode.
169  return SkipHandler(parser=parser, isForFiles=self._isForFiles, message=self._message)
170 
171  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
172  # Docstring inherited from BuilderNode.
173  return self, [self._message] if self._message is not None else [], True
174 
175 
177  """An input to a `RepoWalker` that matches files that correspond to
178  datasets that we want to extract.
179 
180  BuilderTargetInputs can never be pruned, and always build
181  `TargetFileHandler` instances.
182 
183  Parameters
184  ----------
185  datasetTypeName : `str`
186  Name of the dataset type.
187  template : `str`
188  Full Gen2 filename template.
189  keys : `dict` [`str`, `type`]
190  Dictionary that maps Gen2 data ID key to the type of its value.
191  storageClass : `StorageClass`
192  `StorageClass` for the Gen3 dataset type.
193  universe : `DimensionUniverse`
194  All candidate dimensions for the Gen3 dataset type.
195  formatter : `lsst.daf.butler.Formatter` or `str`, optional
196  A Gen 3 formatter class or fully-qualified name.
197  targetHandler : `PathElementHandler`, optional
198  Override target handler for this dataset type.
199  kwargs:
200  Additional keyword arguments are passed to `Translator.makeMatching`,
201  in along with ``datasetTypeName`` and ``keys``.
202  """
203  def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type],
204  storageClass: StorageClass, universe: DimensionUniverse,
205  formatter: FormatterParameter,
206  targetHandler: Optional[PathElementHandler] = None, **kwargs: Any):
207  # strip off [%HDU] identifiers from e.g. DECAM Community Pipeline products
208  template = template.split('[%(')[0]
209  super().__init__(template=template, keys=keys)
210  self._translator = Translator.makeMatching(datasetTypeName, keys, **kwargs)
211  self.datasetType = DatasetType(datasetTypeName, dimensions=self._translator.dimensionNames,
212  storageClass=storageClass, universe=universe)
213  self._formatter = formatter
214  if targetHandler is None:
215  targetHandler = TargetFileHandler
216  self._handler = targetHandler
217 
218  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
219  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
220  ) -> PathElementHandler:
221 
222  return self._handler(parser=parser, translator=self._translator, datasetType=self.datasetType,
223  formatter=self._formatter)
224 
225  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
226  # Docstring inherited from BuilderNode.
227  return self, [], False
228 
229  datasetType: DatasetType
230  """The Gen3 dataset type extracted by the handler this object builds
231  (`lsst.daf.butler.DatasetType`).
232  """
233 
234 
236  """A `BuilderNode` that represents a subdirectory to be skipped,
237  created by pruning `BuilderTree` that contained only `BuilderSkipInput`
238  instances.
239 
240  BuilderPrunedTrees can be pruned. When they are not pruned, they
241  build `SkipHandler` instances.
242 
243  Parameters
244  ----------
245  messages : `list` [`str`]
246  A list of warning messages to be printed when the handler produced by
247  this builder matches a subdirectory.
248  """
249 
250  def __init__(self, messages: List[str]):
251  self._messages = messages
252 
253  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
254  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
255  ) -> PathElementHandler:
256  # Docstring inherited from BuilderNode.
257  message = "; ".join(self._messages) if self._messages else None
258  return SkipHandler(parser=parser, isForFiles=False, message=message)
259 
260  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
261  # Docstring inherited from BuilderNode.
262  return self, self._messages, True
263 
264 
266  """A `BuilderNode` that represents a collection of `BuilderInput` instances
267  that all have the same template.
268  """
269  def __init__(self, old: BuilderInput, new: BuilderInput):
270  self._children = []
271  if isinstance(old, BuilderDuplicateInputs):
272  self._children.extend(old._children)
273  else:
274  self._children.append(old)
275  self._children.append(new)
276  self._messages = [] # populated in prune()
277 
278  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
279  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
280  ) -> PathElementHandler:
281  # Docstring inherited from BuilderNode.
282  message = "; ".join(self._messages) if self._messages else None
283  return SkipHandler(parser=parser, isForFiles=False, message=message)
284 
285  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
286  # Docstring inherited from BuilderNode.
287  unprunable = []
288  newChildren = []
289  for child in self._children:
290  newChild, childMessages, toPruneChild = child.prune()
291  if toPruneChild:
292  self._messages.extend(childMessages)
293  else:
294  unprunable.append(newChild)
295  newChildren.append(newChildren)
296  self._children = newChildren
297  if len(unprunable) == 0:
298  # All children are just skips, so we can prune this node if we
299  # remember their messages.
300  return self, self._messages, True
301  elif len(unprunable) == 1 and not self._messages:
302  # Exactly one child is a target, and the others were ignored with
303  # no warning messages. Tell parent node to just use that child,
304  # so if we see any matching files, we just assume they're for that
305  # target.
306  return unprunable[0], [], False
307  else:
308  # Multiple targets or skips with messages, which means we won't
309  # know how to handle any matching files. Replace any messages we
310  # have with a single message that combines them all as well as
311  # any target dataset types that they are ambiguous with.
312  nested = [f"{c.datasetType.name} (target)" for c in unprunable]
313  nested.extend(self._messages)
314  self._messages = [f"ambiguous match: [{', '.join(nested)}]"]
315  return self, self._messages, True
316 
317 
319  """A `BuilderNode` that represents a directory.
320 
321  This is the only `BuilderNode` class that is not a leaf node. If all
322  of its children can be pruned, it is replaced by a `BuilderPrunedTree`
323  (which can then be pruned itself). It builds `SubdirectoryHandler`
324  instances when not pruned.
325  """
326  def __init__(self):
327  self._children = {} # Maps template path element to BuilderNode
328 
329  def insert(self, level: int, leaf: BuilderInput):
330  """Insert an input leaf node into the tree, recursively constructing
331  intermediate parents in order to put it at the right level.
332 
333  Parameters
334  ----------
335  level : `int`
336  The level ``self``is at in the larger tree, with zero the
337  repository root. The right level for the leaf is given by the
338  length of ``leaf.elements``.
339  leaf : `BuilderInput`
340  The leaf node to insert.
341  """
342  nextLevel = level + 1
343  element = leaf.elements[level]
344  if nextLevel == len(leaf.elements):
345  conflict = self._children.get(element)
346  if conflict is not None:
347  # Sadly, the Gen2 butler has some actual dataset types that
348  # use the exact same template.
349  leaf = BuilderDuplicateInputs(conflict, leaf)
350  self._children[element] = leaf
351  else:
352  child = self._children.setdefault(element, BuilderTree())
353  child.insert(nextLevel, leaf)
354 
355  def fill(self, scanner: DirectoryScanner, allKeys: Dict[str, type], previousKeys: Dict[str, type], *,
356  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]):
357  """Fill a `DirectoryScanner` instance by recursively building all
358  child nodes.
359 
360  Parameters
361  ----------
362  scanner : `DirectoryScanner`
363  Object to populate.
364  allKeys : `dict` [`str`, `type`]
365  Mapping from Gen2 data ID key to its value type, covering all keys
366  that could be used in any child template.
367  previousKeys : `dict` [`str`, `type`], optional
368  A dictionary containing key strings and types for Gen2 data ID keys
369  that have been extracted from previous path elements of the same
370  template.
371  """
372  if fileIgnoreRegEx is not None:
373  scanner.add(IgnoreHandler(fileIgnoreRegEx, isForFiles=True))
374  if dirIgnoreRegEx is not None:
375  scanner.add(IgnoreHandler(dirIgnoreRegEx, isForFiles=False))
376  for template, child in self._children.items():
377  parser = PathElementParser(template, allKeys, previousKeys=previousKeys)
378  cumulativeKeys = previousKeys.copy()
379  cumulativeKeys.update(parser.keys)
380  scanner.add(child.build(parser, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
381  dirIgnoreRegEx=dirIgnoreRegEx))
382 
383  def prune(self) -> Tuple[BuilderNode, List[str], bool]:
384  # Docstring inherited from BuilderNode.
385  toPruneThis = True
386  newChildren = {}
387  messages = []
388  # Recursively prune children.
389  for template, child in list(self._children.items()):
390  newChild, childMessages, toPruneChild = child.prune()
391  newChildren[template] = newChild
392  messages.extend(childMessages)
393  if not toPruneChild:
394  toPruneThis = False
395  self._children = newChildren
396  if toPruneThis:
397  return BuilderPrunedTree(messages), messages, True
398  else:
399  return self, [], False
400 
401  def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
402  fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
403  ) -> PathElementHandler:
404  # Docstring inherited from BuilderNode.
405  built = SubdirectoryHandler(parser)
406  self.fill(built.scanner, allKeys, cumulativeKeys, fileIgnoreRegEx=fileIgnoreRegEx,
407  dirIgnoreRegEx=dirIgnoreRegEx)
408  return built
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput.build
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:218
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler
Definition: handlers.py:161
lsst.obs.base.gen2to3.repoWalker.builders.BuilderSkipInput._message
_message
Definition: builders.py:161
lsst.obs.base.gen2to3.repoWalker.builders.BuilderSkipInput.__init__
def __init__(self, str template, Dict[str, type] keys, Optional[str] message=None, *bool isForFiles=True)
Definition: builders.py:159
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler
Definition: handlers.py:199
lsst.obs.base.gen2to3.repoWalker.builders.BuilderInput.__init__
def __init__(self, str template, Dict[str, type] keys)
Definition: builders.py:117
lsst.obs.base.gen2to3.repoWalker.builders.BuilderInput.template
template
Definition: builders.py:118
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput._handler
_handler
Definition: builders.py:213
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree
Definition: builders.py:318
lsst.obs.base.gen2to3.repoWalker.builders.BuilderNode.prune
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:56
lsst.obs.base.gen2to3.repoWalker.builders.BuilderInput.keys
keys
Definition: builders.py:119
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree.build
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:401
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree.__init__
def __init__(self)
Definition: builders.py:326
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput.prune
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:225
lsst.obs.base.gen2to3.repoWalker.builders.BuilderSkipInput.prune
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:171
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput._formatter
_formatter
Definition: builders.py:210
lsst.obs.base.gen2to3.repoWalker.builders.BuilderDuplicateInputs._messages
_messages
Definition: builders.py:276
lsst.obs.base.gen2to3.repoWalker.builders.BuilderPrunedTree.__init__
def __init__(self, List[str] messages)
Definition: builders.py:250
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput.__init__
def __init__(self, *str datasetTypeName, str template, Dict[str, type] keys, StorageClass storageClass, DimensionUniverse universe, FormatterParameter formatter, Optional[PathElementHandler] targetHandler=None, **Any kwargs)
Definition: builders.py:203
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree._children
_children
Definition: builders.py:327
lsst.obs.base.gen2to3.repoWalker.builders.BuilderDuplicateInputs
Definition: builders.py:265
lsst.obs.base.gen2to3.repoWalker.builders.BuilderDuplicateInputs.build
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:278
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput
Definition: builders.py:176
lsst.obs.base.gen2to3.repoWalker.builders.BuilderPrunedTree
Definition: builders.py:235
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler
Definition: handlers.py:56
lsst.obs.base.gen2to3.repoWalker.builders.BuilderInput.elements
elements
Definition: builders.py:120
lsst.obs.base.gen2to3.repoWalker.builders.BuilderNode
Definition: builders.py:50
lsst.obs.base.gen2to3.repoWalker.builders.BuilderInput
Definition: builders.py:104
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput._translator
_translator
Definition: builders.py:207
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree.prune
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:383
lsst.obs.base.gen2to3.repoWalker.builders.BuilderSkipInput
Definition: builders.py:137
lsst.obs.base.gen2to3.repoWalker.builders.BuilderDuplicateInputs._children
_children
Definition: builders.py:270
lsst.obs.base.gen2to3.repoWalker.builders.BuilderNode.build
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:74
lsst.obs.base.gen2to3.repoWalker.builders.BuilderPrunedTree.prune
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:260
lsst.obs.base.gen2to3.repoWalker.builders.BuilderPrunedTree.build
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:253
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree.insert
def insert(self, int level, BuilderInput leaf)
Definition: builders.py:329
lsst.obs.base.gen2to3.repoWalker.builders.BuilderDuplicateInputs.__init__
def __init__(self, BuilderInput old, BuilderInput new)
Definition: builders.py:269
lsst.obs.base.gen2to3.repoWalker.builders.BuilderSkipInput.build
PathElementHandler build(self, PathElementParser parser, Dict[str, type] allKeys, Dict[str, type] cumulativeKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:165
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTree.fill
def fill(self, DirectoryScanner scanner, Dict[str, type] allKeys, Dict[str, type] previousKeys, *Optional[re.Pattern] fileIgnoreRegEx, Optional[re.Pattern] dirIgnoreRegEx)
Definition: builders.py:355
lsst.obs.base.gen2to3.repoWalker.builders.BuilderDuplicateInputs.prune
Tuple[BuilderNode, List[str], bool] prune(self)
Definition: builders.py:285
lsst.obs.base.gen2to3.repoWalker.builders.BuilderSkipInput._isForFiles
_isForFiles
Definition: builders.py:162
lsst.obs.base.gen2to3.repoWalker.builders.BuilderTargetInput.datasetType
datasetType
Definition: builders.py:208
lsst.obs.base.gen2to3.repoWalker.builders.BuilderPrunedTree._messages
_messages
Definition: builders.py:251
lsst.obs.base.gen2to3.repoWalker.parser.PathElementParser
Definition: parser.py:116