lsst.obs.base  19.0.0-56-g64d9981
handlers.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Concrete implementations of `PathElementHandler`.
22 
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
25 """
26 from __future__ import annotations
27 
28 __all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
29 
30 from abc import abstractmethod
31 import re
32 from typing import (
33  Callable,
34  List,
35  Mapping,
36  Optional,
37  TYPE_CHECKING
38 )
39 
40 import lsst.afw.fits
41 from lsst.log import Log
42 from lsst.daf.butler import (
43  DataCoordinate,
44  DatasetRef,
45  DatasetType,
46  FileDataset,
47 )
48 from ..translators import Translator, makeCalibrationLabel
49 from .parser import PathElementParser
50 from .scanner import PathElementHandler, DirectoryScanner
51 
52 if TYPE_CHECKING:
53  from lsst.daf.butler import FormatterParameter
54 
55 
57  """A `PathElementHandler` that matches via a regular expression, and does
58  nothing.
59 
60  An `IgnoreHandler` is used to ignore file or directory patterns that can
61  occur at any level in the directory tree, and have no relation to any
62  Gen2 filename template.
63 
64  Parameters
65  ----------
66  pattern : `re.Pattern`
67  A regular expression pattern.
68  isForFiles : `bool`
69  Whether this handler should be applied to files (`True`) or
70  directories (`False`).
71  """
72  def __init__(self, pattern: re.Pattern, isForFiles: bool):
73  super().__init__()
74  self._pattern = pattern
75  self._isForFiles = isForFiles
76 
77  __slots__ = ("_pattern", "_isForFiles")
78 
79  def __str__(self):
80  return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
81 
82  def isForFiles(self) -> bool:
83  # Docstring inherited from PathElementHandler.
84  return self._isForFiles
85 
86  @property
87  def rank(self) -> int:
88  # Docstring inherited from PathElementHandler.
89  return 0
90 
91  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
92  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
93  # Docstring inherited from PathElementHandler.
94  if self._pattern.fullmatch(name):
95  return True
96  else:
97  return False
98 
99 
101  """An intermediate base class for `PathElementHandler` classes that utilize
102  a `PathElementParser` to match a Gen2 filename template.
103 
104  Parameters
105  ----------
106  parser : `PathElementParser`
107  An object that matches the path element this handler is responsible for
108  and extracts a (partial) Gen2 data ID from it.
109  """
110  def __init__(self, parser: PathElementParser):
111  super().__init__()
112  self._parser = parser
113 
114  __slots__ = ("_parser",)
115 
116  def __str__(self):
117  return f"{type(self).__name__}(parser={self._parser})"
118 
119  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
120  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
121  # Docstring inherited from PathElementParser.
122  nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log)
123  if nextDataId2 is None:
124  return False
125  self.handle(path, nextDataId2, datasets, log=log, predicate=predicate)
126  return True
127 
128  @property
129  def rank(self) -> int:
130  # Docstring inherited from PathElementParser.
131  return len(self._parser.keys)
132 
133  @abstractmethod
134  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
135  log: Log, predicate: Callable[[DataCoordinate], bool]):
136  """Customization hook for ``__call__``.
137 
138  Subclasses must override this method, while external callers (i.e.
139  `DirectoryScanner` should instead invoke `__call__`.
140 
141  Parameters
142  ----------
143  path : `str`
144  Full path of the file or directory.
145  nextDataId2 : `dict`
146  Gen2 data ID (usually partial) extracted from the path so far.
147  datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
148  Dictionary that found datasets should be added to.
149  log : `Log`, optional
150  Log to use to report warnings and debug information.
151  predicate : `~collections.abc.Callable`
152  A callable taking a single `DataCoordinate` argument and returning
153  `bool`, indicating whether that (Gen3) data ID represents one
154  that should be included in the scan.
155  formatterMap : `dict`, optional
156  Map dataset type to specialist formatter.
157  """
158  raise NotImplementedError()
159 
160 
162  """A `ParsedPathElementHandler` that does nothing with an entry other
163  optionally logging a warning message.
164 
165  A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
166  want to (or cannot) extract Gen3 datasets from, or other files/directories
167  that alway appears at a fixed level in the diectory tree.
168 
169  Parameters
170  ----------
171  parser : `PathElementParser`
172  An object that matches the path element this handler is responsible for
173  and extracts a (partial) Gen2 data ID from it.
174  isForFiles : `bool`
175  Whether this handler should be applied to files (`True`) or
176  directories (`False`).
177  message : `str`, optional
178  A message to log at warning level when this handler matches a path
179  entry. If `None`, matched entrie will be silently skipped.
180  """
181  def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
182  super().__init__(parser=parser)
183  self._isForFiles = isForFiles
184  self._message = message
185 
186  __slots__ = ("_message", "_isForFiles")
187 
188  def isForFiles(self) -> bool:
189  # Docstring inherited from PathElementHandler.
190  return self._isForFiles
191 
192  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
193  log: Log, predicate: Callable[[DataCoordinate], bool]):
194  # Docstring inherited from ParsedPathElementHandler.
195  if self._message is not None:
196  log.warn("Skipping %s: %s", path, self._message)
197 
198 
200  """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
201 
202  Parameters
203  ----------
204  parser : `PathElementParser`
205  An object that matches the path element this handler is responsible for
206  and extracts a (partial) Gen2 data ID from it.
207 
208  Notes
209  -----
210  The nested `DirectoryScanner` is default-constructed and should be
211  populated with child handlers after the `SubdirectoryHandler` is created.
212  """
213 
214  def __init__(self, parser: PathElementParser):
215  super().__init__(parser=parser)
217 
218  __slots__ = ("scanner",)
219 
220  def isForFiles(self) -> bool:
221  # Docstring inherited from PathElementHandler.
222  return False
223 
224  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
225  log: Log, predicate: Callable[[DataCoordinate], bool]):
226  # Docstring inherited from ParsedPathElementHandler.
227  if not nextDataId2:
228  # We matched, and there's no data ID at all yet. That means the
229  # full path so far is just a fixed string so we should descend
230  # and the match is exclusive.
231  scan = True
232  else:
233  dataId3 = self.translate(nextDataId2, partial=True, log=log)
234  if dataId3 is not None:
235  scan = predicate(dataId3)
236  else:
237  scan = True
238  if scan:
239  for handler in self.scanner:
240  handler.lastDataId2 = nextDataId2
241  self.scanner.scan(path, datasets, log=log, predicate=predicate)
242 
243  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
244  # Docstring inherited from PathElementHandler.
245  for handler in self.scanner:
246  # Since we're recursing, we're always asking for a partial match,
247  # because the data ID we have corresponds to different level than
248  # the one child handlers operate at.
249  result = handler.translate(dataId2, partial=True, log=log)
250  if result is not None:
251  return result
252  return None
253 
254  scanner: DirectoryScanner
255  """Scanner object that holds handlers for the entries of the subdirectory
256  matched by this handler (`DirectoryScanner`).
257  """
258 
259 
261  """A `PathElementHandler` that matches files that correspond to target
262  datasets and outputs `FileDataset` instances for them.
263 
264  Parameters
265  ----------
266  parser : `PathElementParser`
267  An object that matches the path element this handler is responsible for
268  and extracts a (partial) Gen2 data ID from it.
269  translator : `Translator`
270  Object that translates data IDs from Gen2 to Gen3.
271  datasetType : `lsst.daf.butler.DatasetType`
272  Gen3 dataset type for the datasets this handler matches.
273  formatter : `lsst.daf.butler.Formatter` or `str`, optional
274  A Gen 3 formatter class or fully-qualified name.
275  """
276  def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
277  formatter: FormatterParameter = None):
278  super().__init__(parser=parser)
279  self._translator = translator
280  self._datasetType = datasetType
281  self._formatter = formatter
282 
283  __slots__ = ("_translator", "_datasetType", "_formatter")
284 
285  def __str__(self):
286  return f"{type(self).__name__}({self._translator}, {self._datasetType})"
287 
288  def isForFiles(self) -> bool:
289  # Docstring inherited from PathElementHandler.
290  return True
291 
292  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
293  log: Log, predicate: Callable[[DataCoordinate], bool]):
294  # Docstring inherited from ParsedPathElementHandler.
295  dataId3 = self.translate(nextDataId2, partial=False, log=log)
296  if predicate(dataId3):
297  datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
298  path=path, formatter=self._formatter))
299 
300  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
301  # Docstring inherited from PathElementHandler.
302  rawDataId3 = self._translator(dataId2, partial=partial, log=log)
303  if partial:
304  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
305  else:
306  return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
307 
308 
310  """Handler for FITS files that store image and metadata in multiple HDUs
311  per file, for example DECam raw and Community Pipeline calibrations.
312 
313  Notes
314  -----
315  For now, this is only used by DECam, and may need to be made more generic
316  (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
317  with other obs packages.
318  """
319  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
320  log: Log, predicate: Callable[[DataCoordinate], bool]):
321  dataId3 = self.translate(nextDataId2, partial=True, log=log)
322 
323  def get_detectors(filename):
324  fitsData = lsst.afw.fits.Fits(filename, 'r')
325  # NOTE: The primary header (HDU=0) does not contain detector data.
326  detectors = []
327  for i in range(1, fitsData.countHdus()):
328  fitsData.setHdu(i)
329  metadata = fitsData.readMetadata()
330  detectors.append(metadata['CCDNUM'])
331  return detectors
332 
333  if predicate(dataId3):
334  detectors = get_detectors(path)
335  refs = []
336  for detector in detectors:
337  label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
338  ccd=detector, filter=nextDataId2.get("filter"))
339  newDataId3 = DataCoordinate.standardize(dataId3,
340  graph=self._datasetType.dimensions,
341  detector=detector,
342  calibration_label=label)
343  refs.append(DatasetRef(self._datasetType, newDataId3))
344 
345  datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter))
346 
347  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
348  assert partial is True, "We always require partial, to ignore 'ccdnum'"
349  rawDataId3 = self._translator(dataId2, partial=partial, log=log)
350  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler
Definition: handlers.py:161
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.__init__
def __init__(self, PathElementParser parser, Translator translator, DatasetType datasetType, FormatterParameter formatter=None)
Definition: handlers.py:276
lsst.obs.base.gen2to3.repoWalker.scanner.DirectoryScanner
Definition: scanner.py:151
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler._formatter
_formatter
Definition: handlers.py:280
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.__init__
def __init__(self, re.Pattern pattern, bool isForFiles)
Definition: handlers.py:72
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler
Definition: handlers.py:199
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:220
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler._parser
_parser
Definition: handlers.py:112
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler._message
_message
Definition: handlers.py:184
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.translate
Optional[DataCoordinate] translate(self, dict dataId2, *bool partial=False, Log log)
Definition: handlers.py:300
lsst.obs.base.gen2to3.repoWalker.scanner.PathElementHandler
Definition: scanner.py:51
lsst.obs.base.gen2to3.repoWalker.scanner.PathElementHandler.lastDataId2
lastDataId2
Definition: scanner.py:59
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.__str__
def __str__(self)
Definition: handlers.py:116
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:82
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler.__init__
def __init__(self, PathElementParser parser, bool isForFiles, Optional[str] message)
Definition: handlers.py:181
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler
Definition: handlers.py:260
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:288
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.rank
int rank(self)
Definition: handlers.py:87
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler._pattern
_pattern
Definition: handlers.py:74
lsst.obs.base.gen2to3.translators.makeCalibrationLabel
str makeCalibrationLabel(str datasetTypeName, str calibDate, Optional[int] ccd=None, Optional[str] filter=None)
Definition: translators.py:36
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler._datasetType
_datasetType
Definition: handlers.py:279
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler.handle
def handle(self, str path, dict nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Log log, Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:192
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.__str__
def __str__(self)
Definition: handlers.py:79
lsst.obs.base.gen2to3.repoWalker.handlers.MultiExtensionFileHandler.translate
Optional[DataCoordinate] translate(self, dict dataId2, *bool partial=False, Log log)
Definition: handlers.py:347
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.__str__
def __str__(self)
Definition: handlers.py:285
lsst.obs.base.gen2to3.repoWalker.handlers.MultiExtensionFileHandler.handle
def handle(self, str path, nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Log log, Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:319
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.scanner
scanner
Definition: handlers.py:216
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler.isForFiles
bool isForFiles(self)
Definition: handlers.py:188
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.rank
int rank(self)
Definition: handlers.py:129
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler.handle
def handle(self, str path, nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Log log, Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:292
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler
Definition: handlers.py:56
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.__init__
def __init__(self, PathElementParser parser)
Definition: handlers.py:110
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.__call__
bool __call__(self, str path, str name, Mapping[DatasetType, List[FileDataset]] datasets, *Log log, Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:119
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.handle
def handle(self, str path, nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Log log, Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:224
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.__init__
def __init__(self, PathElementParser parser)
Definition: handlers.py:214
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler.handle
def handle(self, str path, dict nextDataId2, Mapping[DatasetType, List[FileDataset]] datasets, *Log log, Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:134
lsst.obs.base.gen2to3.repoWalker.handlers.SkipHandler._isForFiles
_isForFiles
Definition: handlers.py:183
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler.__call__
bool __call__(self, str path, str name, Mapping[DatasetType, List[FileDataset]] datasets, *Log log, Callable[[DataCoordinate], bool] predicate)
Definition: handlers.py:91
lsst.obs.base.gen2to3.repoWalker.handlers.MultiExtensionFileHandler
Definition: handlers.py:309
lsst.obs.base.gen2to3.repoWalker.handlers.TargetFileHandler._translator
_translator
Definition: handlers.py:278
lsst.obs.base.gen2to3.repoWalker.handlers.IgnoreHandler._isForFiles
_isForFiles
Definition: handlers.py:75
lsst.obs.base.gen2to3.repoWalker.handlers.ParsedPathElementHandler
Definition: handlers.py:100
lsst.obs.base.gen2to3.repoWalker.handlers.SubdirectoryHandler.translate
Optional[DataCoordinate] translate(self, dict dataId2, *bool partial=False, Log log)
Definition: handlers.py:243