lsst.obs.base  19.0.0-28-g99824a6
handlers.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Concrete implementations of `PathElementHandler`.
22 
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
25 """
26 from __future__ import annotations
27 
28 __all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
29 
30 from abc import abstractmethod
31 import re
32 from typing import (
33  Callable,
34  List,
35  Mapping,
36  Optional,
37  TYPE_CHECKING
38 )
39 
40 import lsst.afw.fits
41 from lsst.log import Log
42 from lsst.daf.butler import (
43  DataCoordinate,
44  DatasetRef,
45  DatasetType,
46  FileDataset,
47 )
48 from ..translators import Translator, makeCalibrationLabel
49 from .parser import PathElementParser
50 from .scanner import PathElementHandler, DirectoryScanner
51 
52 if TYPE_CHECKING:
53  from lsst.daf.butler import FormatterParameter
54 
55 
57  """A `PathElementHandler` that matches via a regular expression, and does
58  nothing.
59 
60  An `IgnoreHandler` is used to ignore file or directory patterns that can
61  occur at any level in the directory tree, and have no relation to any
62  Gen2 filename template.
63 
64  Parameters
65  ----------
66  pattern : `re.Pattern`
67  A regular expression pattern.
68  isForFiles : `bool`
69  Whether this handler should be applied to files (`True`) or
70  directories (`False`).
71  """
72  def __init__(self, pattern: re.Pattern, isForFiles: bool):
73  super().__init__()
74  self._pattern = pattern
75  self._isForFiles = isForFiles
76 
77  __slots__ = ("_pattern", "_isForFiles")
78 
79  def isForFiles(self) -> bool:
80  # Docstring inherited from PathElementHandler.
81  return self._isForFiles
82 
83  @property
84  def rank(self) -> int:
85  # Docstring inherited from PathElementHandler.
86  return 0
87 
88  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
89  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
90  # Docstring inherited from PathElementHandler.
91  if self._pattern.fullmatch(name):
92  return True
93  else:
94  return False
95 
96 
98  """An intermediate base class for `PathElementHandler` classes that utilize
99  a `PathElementParser` to match a Gen2 filename template.
100 
101  Parameters
102  ----------
103  parser : `PathElementParser`
104  An object that matches the path element this handler is responsible for
105  and extracts a (partial) Gen2 data ID from it.
106  """
107  def __init__(self, parser: PathElementParser):
108  super().__init__()
109  self._parser = parser
110 
111  __slots__ = ("_parser",)
112 
113  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
114  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
115  # Docstring inherited from PathElementParser.
116  nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log)
117  if nextDataId2 is None:
118  return False
119  self.handle(path, nextDataId2, datasets, log=log, predicate=predicate)
120  return True
121 
122  @property
123  def rank(self) -> int:
124  # Docstring inherited from PathElementParser.
125  return len(self._parser.keys)
126 
127  @abstractmethod
128  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
129  log: Log, predicate: Callable[[DataCoordinate], bool]):
130  """Customization hook for ``__call__``.
131 
132  Subclasses must override this method, while external callers (i.e.
133  `DirectoryScanner` should instead invoke `__call__`.
134 
135  Parameters
136  ----------
137  path : `str`
138  Full path of the file or directory.
139  nextDataId2 : `dict`
140  Gen2 data ID (usually partial) extracted from the path so far.
141  datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
142  Dictionary that found datasets should be added to.
143  log : `Log`, optional
144  Log to use to report warnings and debug information.
145  predicate : `~collections.abc.Callable`
146  A callable taking a single `DataCoordinate` argument and returning
147  `bool`, indicating whether that (Gen3) data ID represents one
148  that should be included in the scan.
149  formatterMap : `dict`, optional
150  Map dataset type to specialist formatter.
151  """
152  raise NotImplementedError()
153 
154 
156  """A `ParsedPathElementHandler` that does nothing with an entry other
157  optionally logging a warning message.
158 
159  A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
160  want to (or cannot) extract Gen3 datasets from, or other files/directories
161  that alway appears at a fixed level in the diectory tree.
162 
163  Parameters
164  ----------
165  parser : `PathElementParser`
166  An object that matches the path element this handler is responsible for
167  and extracts a (partial) Gen2 data ID from it.
168  isForFiles : `bool`
169  Whether this handler should be applied to files (`True`) or
170  directories (`False`).
171  message : `str`, optional
172  A message to log at warning level when this handler matches a path
173  entry. If `None`, matched entrie will be silently skipped.
174  """
175  def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
176  super().__init__(parser=parser)
177  self._isForFiles = isForFiles
178  self._message = message
179 
180  __slots__ = ("_message", "_isForFiles")
181 
182  def isForFiles(self) -> bool:
183  # Docstring inherited from PathElementHandler.
184  return self._isForFiles
185 
186  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
187  log: Log, predicate: Callable[[DataCoordinate], bool]):
188  # Docstring inherited from ParsedPathElementHandler.
189  if self._message is not None:
190  log.warn("Skipping %s: %s", path, self._message)
191 
192 
194  """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
195 
196  Parameters
197  ----------
198  parser : `PathElementParser`
199  An object that matches the path element this handler is responsible for
200  and extracts a (partial) Gen2 data ID from it.
201 
202  Notes
203  -----
204  The nested `DirectoryScanner` is default-constructed and should be
205  populated with child handlers after the `SubdirectoryHandler` is created.
206  """
207 
208  def __init__(self, parser: PathElementParser):
209  super().__init__(parser=parser)
211 
212  __slots__ = ("scanner",)
213 
214  def isForFiles(self) -> bool:
215  # Docstring inherited from PathElementHandler.
216  return False
217 
218  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
219  log: Log, predicate: Callable[[DataCoordinate], bool]):
220  # Docstring inherited from ParsedPathElementHandler.
221  if not nextDataId2:
222  # We matched, and there's no data ID at all yet. That means the
223  # full path so far is just a fixed string so we should descend
224  # and the match is exclusive.
225  scan = True
226  else:
227  dataId3 = self.translate(nextDataId2, partial=True, log=log)
228  if dataId3 is not None:
229  scan = predicate(dataId3)
230  else:
231  scan = True
232  if scan:
233  for handler in self.scanner:
234  handler.lastDataId2 = nextDataId2
235  self.scanner.scan(path, datasets, log=log, predicate=predicate)
236 
237  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
238  # Docstring inherited from PathElementHandler.
239  for handler in self.scanner:
240  # Since we're recursing, we're always asking for a partial match,
241  # because the data ID we have corresponds to different level than
242  # the one child handlers operate at.
243  result = handler.translate(dataId2, partial=True, log=log)
244  if result is not None:
245  return result
246  return None
247 
248  scanner: DirectoryScanner
249  """Scanner object that holds handlers for the entries of the subdirectory
250  matched by this handler (`DirectoryScanner`).
251  """
252 
253 
255  """A `PathElementHandler` that matches files that correspond to target
256  datasets and outputs `FileDataset` instances for them.
257 
258  Parameters
259  ----------
260  parser : `PathElementParser`
261  An object that matches the path element this handler is responsible for
262  and extracts a (partial) Gen2 data ID from it.
263  translator : `Translator`
264  Object that translates data IDs from Gen2 to Gen3.
265  datasetType : `lsst.daf.butler.DatasetType`
266  Gen3 dataset type for the datasets this handler matches.
267  formatter : `lsst.daf.butler.Formatter` or `str`, optional
268  A Gen 3 formatter class or fully-qualified name.
269  """
270  def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
271  formatter: FormatterParameter = None):
272  super().__init__(parser=parser)
273  self._translator = translator
274  self._datasetType = datasetType
275  self._formatter = formatter
276 
277  __slots__ = ("_translator", "_datasetType", "_formatter")
278 
279  def isForFiles(self) -> bool:
280  # Docstring inherited from PathElementHandler.
281  return True
282 
283  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
284  log: Log, predicate: Callable[[DataCoordinate], bool]):
285  # Docstring inherited from ParsedPathElementHandler.
286  dataId3 = self.translate(nextDataId2, partial=False, log=log)
287  if predicate(dataId3):
288  datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
289  path=path, formatter=self._formatter))
290 
291  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
292  # Docstring inherited from PathElementHandler.
293  rawDataId3 = self._translator(dataId2, partial=partial, log=log)
294  if partial:
295  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
296  else:
297  return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
298 
299 
301  """Handler for FITS files that store image and metadata in multiple HDUs
302  per file, for example DECam raw and Community Pipeline calibrations.
303 
304  Notes
305  -----
306  For now, this is only used by DECam, and may need to be made more generic
307  (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
308  with other obs packages.
309  """
310  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
311  log: Log, predicate: Callable[[DataCoordinate], bool]):
312  dataId3 = self.translate(nextDataId2, partial=True, log=log)
313 
314  def get_detectors(filename):
315  fitsData = lsst.afw.fits.Fits(filename, 'r')
316  # NOTE: The primary header (HDU=0) does not contain detector data.
317  detectors = []
318  for i in range(1, fitsData.countHdus()):
319  fitsData.setHdu(i)
320  metadata = fitsData.readMetadata()
321  detectors.append(metadata['CCDNUM'])
322  return detectors
323 
324  if predicate(dataId3):
325  detectors = get_detectors(path)
326  refs = []
327  for detector in detectors:
328  label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
329  ccd=detector, filter=nextDataId2.get("filter"))
330  newDataId3 = DataCoordinate.standardize(dataId3,
331  graph=self._datasetType.dimensions,
332  detector=detector,
333  calibration_label=label)
334  refs.append(DatasetRef(self._datasetType, newDataId3))
335 
336  datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter))
337 
338  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
339  assert partial is True, "We always require partial, to ignore 'ccdnum'"
340  rawDataId3 = self._translator(dataId2, partial=partial, log=log)
341  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)