lsst.obs.base  19.0.0-24-g940be9f
handlers.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (http://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 """Concrete implementations of `PathElementHandler`.
22 
23 The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24 avoid a circular dependency between modules.
25 """
26 from __future__ import annotations
27 
28 __all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
29 
30 from abc import abstractmethod
31 import re
32 from typing import (
33  Callable,
34  List,
35  Mapping,
36  Optional,
37 )
38 
39 import lsst.afw.fits
40 from lsst.log import Log
41 from lsst.daf.butler import (
42  DataCoordinate,
43  DatasetRef,
44  DatasetType,
45  FileDataset,
46 )
47 from ..translators import Translator, makeCalibrationLabel
48 from .parser import PathElementParser
49 from .scanner import PathElementHandler, DirectoryScanner
50 
51 
53  """A `PathElementHandler` that matches via a regular expression, and does
54  nothing.
55 
56  An `IgnoreHandler` is used to ignore file or directory patterns that can
57  occur at any level in the directory tree, and have no relation to any
58  Gen2 filename template.
59 
60  Parameters
61  ----------
62  pattern : `re.Pattern`
63  A regular expression pattern.
64  isForFiles : `bool`
65  Whether this handler should be applied to files (`True`) or
66  directories (`False`).
67  """
68  def __init__(self, pattern: re.Pattern, isForFiles: bool):
69  super().__init__()
70  self._pattern = pattern
71  self._isForFiles = isForFiles
72 
73  __slots__ = ("_pattern", "_isForFiles")
74 
75  def isForFiles(self) -> bool:
76  # Docstring inherited from PathElementHandler.
77  return self._isForFiles
78 
79  @property
80  def rank(self) -> int:
81  # Docstring inherited from PathElementHandler.
82  return 0
83 
84  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
85  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
86  # Docstring inherited from PathElementHandler.
87  if self._pattern.fullmatch(name):
88  return True
89  else:
90  return False
91 
92 
94  """An intermediate base class for `PathElementHandler` classes that utilize
95  a `PathElementParser` to match a Gen2 filename template.
96 
97  Parameters
98  ----------
99  parser : `PathElementParser`
100  An object that matches the path element this handler is responsible for
101  and extracts a (partial) Gen2 data ID from it.
102  """
103  def __init__(self, parser: PathElementParser):
104  super().__init__()
105  self._parser = parser
106 
107  __slots__ = ("_parser",)
108 
109  def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
110  log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
111  # Docstring inherited from PathElementParser.
112  nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log)
113  if nextDataId2 is None:
114  return False
115  self.handle(path, nextDataId2, datasets, log=log, predicate=predicate)
116  return True
117 
118  @property
119  def rank(self) -> int:
120  # Docstring inherited from PathElementParser.
121  return len(self._parser.keys)
122 
123  @abstractmethod
124  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
125  log: Log, predicate: Callable[[DataCoordinate], bool]):
126  """Customization hook for ``__call__``.
127 
128  Subclasses must override this method, while external callers (i.e.
129  `DirectoryScanner` should instead invoke `__call__`.
130 
131  Parameters
132  ----------
133  path : `str`
134  Full path of the file or directory.
135  nextDataId2 : `dict`
136  Gen2 data ID (usually partial) extracted from the path so far.
137  datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
138  Dictionary that found datasets should be added to.
139  log : `Log`, optional
140  Log to use to report warnings and debug information.
141  predicate : `~collections.abc.Callable`
142  A callable taking a single `DataCoordinate` argument and returning
143  `bool`, indicating whether that (Gen3) data ID represents one
144  that should be included in the scan.
145  """
146  raise NotImplementedError()
147 
148 
150  """A `ParsedPathElementHandler` that does nothing with an entry other
151  optionally logging a warning message.
152 
153  A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
154  want to (or cannot) extract Gen3 datasets from, or other files/directories
155  that alway appears at a fixed level in the diectory tree.
156 
157  Parameters
158  ----------
159  parser : `PathElementParser`
160  An object that matches the path element this handler is responsible for
161  and extracts a (partial) Gen2 data ID from it.
162  isForFiles : `bool`
163  Whether this handler should be applied to files (`True`) or
164  directories (`False`).
165  message : `str`, optional
166  A message to log at warning level when this handler matches a path
167  entry. If `None`, matched entrie will be silently skipped.
168  """
169  def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
170  super().__init__(parser=parser)
171  self._isForFiles = isForFiles
172  self._message = message
173 
174  __slots__ = ("_message", "_isForFiles")
175 
176  def isForFiles(self) -> bool:
177  # Docstring inherited from PathElementHandler.
178  return self._isForFiles
179 
180  def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
181  log: Log, predicate: Callable[[DataCoordinate], bool]):
182  # Docstring inherited from ParsedPathElementHandler.
183  if self._message is not None:
184  log.warn("Skipping %s: %s", path, self._message)
185 
186 
188  """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
189 
190  Parameters
191  ----------
192  parser : `PathElementParser`
193  An object that matches the path element this handler is responsible for
194  and extracts a (partial) Gen2 data ID from it.
195 
196  Notes
197  -----
198  The nested `DirectoryScanner` is default-constructed and should be
199  populated with child handlers after the `SubdirectoryHandler` is created.
200  """
201 
202  def __init__(self, parser: PathElementParser):
203  super().__init__(parser=parser)
205 
206  __slots__ = ("scanner",)
207 
208  def isForFiles(self) -> bool:
209  # Docstring inherited from PathElementHandler.
210  return False
211 
212  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
213  log: Log, predicate: Callable[[DataCoordinate], bool]):
214  # Docstring inherited from ParsedPathElementHandler.
215  if not nextDataId2:
216  # We matched, and there's no data ID at all yet. That means the
217  # full path so far is just a fixed string so we should descend
218  # and the match is exclusive.
219  scan = True
220  else:
221  dataId3 = self.translate(nextDataId2, partial=True, log=log)
222  if dataId3 is not None:
223  scan = predicate(dataId3)
224  else:
225  scan = True
226  if scan:
227  for handler in self.scanner:
228  handler.lastDataId2 = nextDataId2
229  self.scanner.scan(path, datasets, log=log, predicate=predicate)
230 
231  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
232  # Docstring inherited from PathElementHandler.
233  for handler in self.scanner:
234  # Since we're recursing, we're always asking for a partial match,
235  # because the data ID we have corresponds to different level than
236  # the one child handlers operate at.
237  result = handler.translate(dataId2, partial=True, log=log)
238  if result is not None:
239  return result
240  return None
241 
242  scanner: DirectoryScanner
243  """Scanner object that holds handlers for the entries of the subdirectory
244  matched by this handler (`DirectoryScanner`).
245  """
246 
247 
249  """A `PathElementHandler` that matches files that correspond to target
250  datasets and outputs `FileDataset` instances for them.
251 
252  Parameters
253  ----------
254  parser : `PathElementParser`
255  An object that matches the path element this handler is responsible for
256  and extracts a (partial) Gen2 data ID from it.
257  translator : `Translator`
258  Object that translates data IDs from Gen2 to Gen3.
259  datasetType : `lsst.daf.butler.DatasetType`
260  Gen3 dataset type for the datasets this handler matches.
261  """
262  def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType):
263  super().__init__(parser=parser)
264  self._translator = translator
265  self._datasetType = datasetType
266 
267  __slots__ = ("_translator", "_datasetType")
268 
269  def isForFiles(self) -> bool:
270  # Docstring inherited from PathElementHandler.
271  return True
272 
273  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
274  log: Log, predicate: Callable[[DataCoordinate], bool]):
275  # Docstring inherited from ParsedPathElementHandler.
276  dataId3 = self.translate(nextDataId2, partial=False, log=log)
277  if predicate(dataId3):
278  datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
279  path=path))
280 
281  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
282  # Docstring inherited from PathElementHandler.
283  rawDataId3 = self._translator(dataId2, partial=partial, log=log)
284  if partial:
285  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
286  else:
287  return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
288 
289 
291  """Handler for FITS files that store image and metadata in multiple HDUs
292  per file, for example DECam raw and Community Pipeline calibrations.
293 
294  Notes
295  -----
296  For now, this is only used by DECam, and may need to be made more generic
297  (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
298  with other obs packages.
299  """
300  def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
301  log: Log, predicate: Callable[[DataCoordinate], bool]):
302  dataId3 = self.translate(nextDataId2, partial=True, log=log)
303 
304  def get_detectors(filename):
305  fitsData = lsst.afw.fits.Fits(filename, 'r')
306  # NOTE: The primary header (HDU=0) does not contain detector data.
307  detectors = []
308  for i in range(1, fitsData.countHdus()):
309  fitsData.setHdu(i)
310  metadata = fitsData.readMetadata()
311  detectors.append(metadata['CCDNUM'])
312  return detectors
313 
314  if predicate(dataId3):
315  detectors = get_detectors(path)
316  refs = []
317  for detector in detectors:
318  label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
319  ccd=detector, filter=nextDataId2.get("filter"))
320  newDataId3 = DataCoordinate.standardize(dataId3,
321  graph=self._datasetType.dimensions,
322  detector=detector,
323  calibration_label=label)
324  refs.append(DatasetRef(self._datasetType, newDataId3))
325 
326  datasets[self._datasetType].append(FileDataset(refs=refs, path=path))
327 
328  def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
329  assert partial is True, "We always require partial, to ignore 'ccdnum'"
330  rawDataId3 = self._translator(dataId2, partial=partial, log=log)
331  return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)