lsst.obs.base  19.0.0-20-g6de566f+6
handlers.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Concrete implementations of `PathElementHandler`.
22 
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
25 """
26 from __future__ import annotations
27 
28 __all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
29 
30 from abc import abstractmethod
31 import re
32 from typing import (
33  Callable,
34  List,
35  Mapping,
36  Optional,
37 )
38 
39 from lsst.log import Log
40 from lsst.daf.butler import (
41  DataCoordinate,
42  DatasetRef,
43  DatasetType,
44  FileDataset,
45 )
46 from ..translators import Translator
47 from .parser import PathElementParser
48 from .scanner import PathElementHandler, DirectoryScanner
49 
50 
52  """A `PathElementHandler` that matches via a regular expression, and does
53  nothing.
54 
55  An `IgnoreHandler` is used to ignore file or directory patterns that can
56  occur at any level in the directory tree, and have no relation to any
57  Gen2 filename template.
58 
59  Parameters
60  ----------
61  pattern : `re.Pattern`
62  A regular expression pattern.
63  isForFiles : `bool`
64  Whether this handler should be applied to files (`True`) or
65  directories (`False`).
66  """
67  def __init__(self, pattern: re.Pattern, isForFiles: bool):
68  super().__init__()
69  self._pattern = pattern
70  self._isForFiles = isForFiles
71 
72  __slots__ = ("_pattern", "_isForFiles")
73 
74  def isForFiles(self) -> bool:
75  # Docstring inherited from PathElementHandler.
76  return self._isForFiles
77 
78  @property
79  def rank(self) -> int:
80  # Docstring inherited from PathElementHandler.
81  return 0
82 
83  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
84  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
85  # Docstring inherited from PathElementHandler.
86  if self._pattern.fullmatch(name):
87  return True
88  else:
89  return False
90 
91 
93  """An intermediate base class for `PathElementHandler` classes that utilize
94  a `PathElementParser` to match a Gen2 filename template.
95 
96  Parameters
97  ----------
98  parser : `PathElementParser`
99  An object that matches the path element this handler is responsible for
100  and extracts a (partial) Gen2 data ID from it.
101  """
102  def __init__(self, parser: PathElementParser):
103  super().__init__()
104  self._parser = parser
105 
106  __slots__ = ("_parser",)
107 
108  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
109  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
110  # Docstring inherited from PathElementParser.
111  nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log)
112  if nextDataId2 is None:
113  return False
114  self.handle(path, nextDataId2, datasets, log=log, predicate=predicate)
115  return True
116 
117  @property
118  def rank(self) -> int:
119  # Docstring inherited from PathElementParser.
120  return len(self._parser.keys)
121 
122  @abstractmethod
123  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
124  log: Log, predicate: Callable[[DataCoordinate], bool]):
125  """Customization hook for ``__call__``.
126 
127  Subclasses must override this method, while external callers (i.e.
128  `DirectoryScanner` should instead invoke `__call__`.
129 
130  Parameters
131  ----------
132  path : `str`
133  Full path of the file or directory.
134  nextDataId2 : `dict`
135  Gen2 data ID (usually partial) extracted from the path so far.
136  datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
137  Dictionary that found datasets should be added to.
138  log : `Log`, optional
139  Log to use to report warnings and debug information.
140  predicate : `~collections.abc.Callable`
141  A callable taking a single `DataCoordinate` argument and returning
142  `bool`, indicating whether that (Gen3) data ID represents one
143  that should be included in the scan.
144  """
145  raise NotImplementedError()
146 
147 
149  """A `ParsedPathElementHandler` that does nothing with an entry other
150  optionally logging a warning message.
151 
152  A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
153  want to (or cannot) extract Gen3 datasets from, or other files/directories
154  that alway appears at a fixed level in the diectory tree.
155 
156  Parameters
157  ----------
158  parser : `PathElementParser`
159  An object that matches the path element this handler is responsible for
160  and extracts a (partial) Gen2 data ID from it.
161  isForFiles : `bool`
162  Whether this handler should be applied to files (`True`) or
163  directories (`False`).
164  message : `str`, optional
165  A message to log at warning level when this handler matches a path
166  entry. If `None`, matched entrie will be silently skipped.
167  """
168  def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
169  super().__init__(parser=parser)
170  self._isForFiles = isForFiles
171  self._message = message
172 
173  __slots__ = ("_message", "_isForFiles")
174 
175  def isForFiles(self) -> bool:
176  # Docstring inherited from PathElementHandler.
177  return self._isForFiles
178 
179  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
180  log: Log, predicate: Callable[[DataCoordinate], bool]):
181  # Docstring inherited from ParsedPathElementHandler.
182  if self._message is not None:
183  log.warn("Skipping %s: %s", path, self._message)
184 
185 
187  """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
188 
189  Parameters
190  ----------
191  parser : `PathElementParser`
192  An object that matches the path element this handler is responsible for
193  and extracts a (partial) Gen2 data ID from it.
194 
195  Notes
196  -----
197  The nested `DirectoryScanner` is default-constructed and should be
198  populated with child handlers after the `SubdirectoryHandler` is created.
199  """
200 
201  def __init__(self, parser: PathElementParser):
202  super().__init__(parser=parser)
204 
205  __slots__ = ("scanner",)
206 
207  def isForFiles(self) -> bool:
208  # Docstring inherited from PathElementHandler.
209  return False
210 
211  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
212  log: Log, predicate: Callable[[DataCoordinate], bool]):
213  # Docstring inherited from ParsedPathElementHandler.
214  if not nextDataId2:
215  # We matched, and there's no data ID at all yet. That means the
216  # full path so far is just a fixed string so we should descend
217  # and the match is exclusive.
218  scan = True
219  else:
220  dataId3 = self.translate(nextDataId2, partial=True, log=log)
221  if dataId3 is not None:
222  scan = predicate(dataId3)
223  else:
224  scan = True
225  if scan:
226  for handler in self.scanner:
227  handler.lastDataId2 = nextDataId2
228  self.scanner.scan(path, datasets, log=log, predicate=predicate)
229 
230  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
231  # Docstring inherited from PathElementHandler.
232  for handler in self.scanner:
233  # Since we're recursing, we're always asking for a partial match,
234  # because the data ID we have corresponds to different level than
235  # the one child handlers operate at.
236  result = handler.translate(dataId2, partial=True, log=log)
237  if result is not None:
238  return result
239  return None
240 
241  scanner: DirectoryScanner
242  """Scanner object that holds handlers for the entries of the subdirectory
243  matched by this handler (`DirectoryScanner`).
244  """
245 
246 
248  """A `PathElementHandler` that matches files that correspond to target
249  datasets and outputs `FileDataset` instances for them.
250 
251  Parameters
252  ----------
253  parser : `PathElementParser`
254  An object that matches the path element this handler is responsible for
255  and extracts a (partial) Gen2 data ID from it.
256  translator : `Translator`
257  Object that translates data IDs from Gen2 to Gen3.
258  datasetType : `lsst.daf.butler.DatasetType`
259  Gen3 dataset type for the datasets this handler matches.
260  """
261  def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType):
262  super().__init__(parser=parser)
263  self._translator = translator
264  self._datasetType = datasetType
265 
266  __slots__ = ("_translator", "_datasetType")
267 
268  def isForFiles(self) -> bool:
269  # Docstring inherited from PathElementHandler.
270  return True
271 
272  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
273  log: Log, predicate: Callable[[DataCoordinate], bool]):
274  # Docstring inherited from ParsedPathElementHandler.
275  dataId3 = self.translate(nextDataId2, partial=False, log=log)
276  if predicate(dataId3):
277  datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
278  path=path))
279 
280  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
281  # Docstring inherited from PathElementHandler.
282  rawDataId3 = self._translator(dataId2, partial=partial, log=log)
283  if partial:
284  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
285  else:
286  return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)