Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py : 32%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Concrete implementations of `PathElementHandler`.
23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24avoid a circular dependency between modules.
25"""
26from __future__ import annotations
28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
30from abc import abstractmethod
31import re
32from typing import (
33 Callable,
34 List,
35 Mapping,
36 Optional,
37 TYPE_CHECKING
38)
40import lsst.afw.fits
41from lsst.log import Log
42from lsst.daf.butler import (
43 DataCoordinate,
44 DatasetRef,
45 DatasetType,
46 FileDataset,
47)
48from ..translators import Translator, makeCalibrationLabel
49from .parser import PathElementParser
50from .scanner import PathElementHandler, DirectoryScanner
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from lsst.daf.butler import FormatterParameter
56class IgnoreHandler(PathElementHandler):
57 """A `PathElementHandler` that matches via a regular expression, and does
58 nothing.
60 An `IgnoreHandler` is used to ignore file or directory patterns that can
61 occur at any level in the directory tree, and have no relation to any
62 Gen2 filename template.
64 Parameters
65 ----------
66 pattern : `re.Pattern`
67 A regular expression pattern.
68 isForFiles : `bool`
69 Whether this handler should be applied to files (`True`) or
70 directories (`False`).
71 """
72 def __init__(self, pattern: re.Pattern, isForFiles: bool):
73 super().__init__()
74 self._pattern = pattern
75 self._isForFiles = isForFiles
77 __slots__ = ("_pattern", "_isForFiles")
79 def __str__(self):
80 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
82 def isForFiles(self) -> bool:
83 # Docstring inherited from PathElementHandler.
84 return self._isForFiles
86 @property
87 def rank(self) -> int:
88 # Docstring inherited from PathElementHandler.
89 return 0
91 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
92 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
93 # Docstring inherited from PathElementHandler.
94 if self._pattern.fullmatch(name):
95 return True
96 else:
97 return False
100class ParsedPathElementHandler(PathElementHandler):
101 """An intermediate base class for `PathElementHandler` classes that utilize
102 a `PathElementParser` to match a Gen2 filename template.
104 Parameters
105 ----------
106 parser : `PathElementParser`
107 An object that matches the path element this handler is responsible for
108 and extracts a (partial) Gen2 data ID from it.
109 """
110 def __init__(self, parser: PathElementParser):
111 super().__init__()
112 self._parser = parser
114 __slots__ = ("_parser",)
116 def __str__(self):
117 return f"{type(self).__name__}(parser={self._parser})"
119 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
120 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
121 # Docstring inherited from PathElementParser.
122 nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log)
123 if nextDataId2 is None:
124 return False
125 self.handle(path, nextDataId2, datasets, log=log, predicate=predicate)
126 return True
128 @property
129 def rank(self) -> int:
130 # Docstring inherited from PathElementParser.
131 return len(self._parser.keys)
133 @abstractmethod
134 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
135 log: Log, predicate: Callable[[DataCoordinate], bool]):
136 """Customization hook for ``__call__``.
138 Subclasses must override this method, while external callers (i.e.
139 `DirectoryScanner` should instead invoke `__call__`.
141 Parameters
142 ----------
143 path : `str`
144 Full path of the file or directory.
145 nextDataId2 : `dict`
146 Gen2 data ID (usually partial) extracted from the path so far.
147 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
148 Dictionary that found datasets should be added to.
149 log : `Log`, optional
150 Log to use to report warnings and debug information.
151 predicate : `~collections.abc.Callable`
152 A callable taking a single `DataCoordinate` argument and returning
153 `bool`, indicating whether that (Gen3) data ID represents one
154 that should be included in the scan.
155 formatterMap : `dict`, optional
156 Map dataset type to specialist formatter.
157 """
158 raise NotImplementedError()
161class SkipHandler(ParsedPathElementHandler):
162 """A `ParsedPathElementHandler` that does nothing with an entry other
163 optionally logging a warning message.
165 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
166 want to (or cannot) extract Gen3 datasets from, or other files/directories
167 that alway appears at a fixed level in the diectory tree.
169 Parameters
170 ----------
171 parser : `PathElementParser`
172 An object that matches the path element this handler is responsible for
173 and extracts a (partial) Gen2 data ID from it.
174 isForFiles : `bool`
175 Whether this handler should be applied to files (`True`) or
176 directories (`False`).
177 message : `str`, optional
178 A message to log at warning level when this handler matches a path
179 entry. If `None`, matched entrie will be silently skipped.
180 """
181 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
182 super().__init__(parser=parser)
183 self._isForFiles = isForFiles
184 self._message = message
186 __slots__ = ("_message", "_isForFiles")
188 def isForFiles(self) -> bool:
189 # Docstring inherited from PathElementHandler.
190 return self._isForFiles
192 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
193 log: Log, predicate: Callable[[DataCoordinate], bool]):
194 # Docstring inherited from ParsedPathElementHandler.
195 if self._message is not None:
196 log.warn("Skipping %s: %s", path, self._message)
199class SubdirectoryHandler(ParsedPathElementHandler):
200 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
202 Parameters
203 ----------
204 parser : `PathElementParser`
205 An object that matches the path element this handler is responsible for
206 and extracts a (partial) Gen2 data ID from it.
208 Notes
209 -----
210 The nested `DirectoryScanner` is default-constructed and should be
211 populated with child handlers after the `SubdirectoryHandler` is created.
212 """
214 def __init__(self, parser: PathElementParser):
215 super().__init__(parser=parser)
216 self.scanner = DirectoryScanner()
218 __slots__ = ("scanner",)
220 def isForFiles(self) -> bool:
221 # Docstring inherited from PathElementHandler.
222 return False
224 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
225 log: Log, predicate: Callable[[DataCoordinate], bool]):
226 # Docstring inherited from ParsedPathElementHandler.
227 if not nextDataId2:
228 # We matched, and there's no data ID at all yet. That means the
229 # full path so far is just a fixed string so we should descend
230 # and the match is exclusive.
231 scan = True
232 else:
233 dataId3 = self.translate(nextDataId2, partial=True, log=log)
234 if dataId3 is not None:
235 scan = predicate(dataId3)
236 else:
237 scan = True
238 if scan:
239 for handler in self.scanner:
240 handler.lastDataId2 = nextDataId2
241 self.scanner.scan(path, datasets, log=log, predicate=predicate)
243 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
244 # Docstring inherited from PathElementHandler.
245 for handler in self.scanner:
246 # Since we're recursing, we're always asking for a partial match,
247 # because the data ID we have corresponds to different level than
248 # the one child handlers operate at.
249 result = handler.translate(dataId2, partial=True, log=log)
250 if result is not None:
251 return result
252 return None
254 scanner: DirectoryScanner
255 """Scanner object that holds handlers for the entries of the subdirectory
256 matched by this handler (`DirectoryScanner`).
257 """
260class TargetFileHandler(ParsedPathElementHandler):
261 """A `PathElementHandler` that matches files that correspond to target
262 datasets and outputs `FileDataset` instances for them.
264 Parameters
265 ----------
266 parser : `PathElementParser`
267 An object that matches the path element this handler is responsible for
268 and extracts a (partial) Gen2 data ID from it.
269 translator : `Translator`
270 Object that translates data IDs from Gen2 to Gen3.
271 datasetType : `lsst.daf.butler.DatasetType`
272 Gen3 dataset type for the datasets this handler matches.
273 formatter : `lsst.daf.butler.Formatter` or `str`, optional
274 A Gen 3 formatter class or fully-qualified name.
275 """
276 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
277 formatter: FormatterParameter = None):
278 super().__init__(parser=parser)
279 self._translator = translator
280 self._datasetType = datasetType
281 self._formatter = formatter
283 __slots__ = ("_translator", "_datasetType", "_formatter")
285 def __str__(self):
286 return f"{type(self).__name__}({self._translator}, {self._datasetType})"
288 def isForFiles(self) -> bool:
289 # Docstring inherited from PathElementHandler.
290 return True
292 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
293 log: Log, predicate: Callable[[DataCoordinate], bool]):
294 # Docstring inherited from ParsedPathElementHandler.
295 dataId3 = self.translate(nextDataId2, partial=False, log=log)
296 if predicate(dataId3):
297 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
298 path=path, formatter=self._formatter))
300 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
301 # Docstring inherited from PathElementHandler.
302 rawDataId3 = self._translator(dataId2, partial=partial, log=log)
303 if partial:
304 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
305 else:
306 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
309class MultiExtensionFileHandler(TargetFileHandler):
310 """Handler for FITS files that store image and metadata in multiple HDUs
311 per file, for example DECam raw and Community Pipeline calibrations.
313 Notes
314 -----
315 For now, this is only used by DECam, and may need to be made more generic
316 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
317 with other obs packages.
318 """
319 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
320 log: Log, predicate: Callable[[DataCoordinate], bool]):
321 dataId3 = self.translate(nextDataId2, partial=True, log=log)
323 def get_detectors(filename):
324 fitsData = lsst.afw.fits.Fits(filename, 'r')
325 # NOTE: The primary header (HDU=0) does not contain detector data.
326 detectors = []
327 for i in range(1, fitsData.countHdus()):
328 fitsData.setHdu(i)
329 metadata = fitsData.readMetadata()
330 detectors.append(metadata['CCDNUM'])
331 return detectors
333 if predicate(dataId3):
334 detectors = get_detectors(path)
335 refs = []
336 for detector in detectors:
337 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
338 ccd=detector, filter=nextDataId2.get("filter"))
339 newDataId3 = DataCoordinate.standardize(dataId3,
340 graph=self._datasetType.dimensions,
341 detector=detector,
342 calibration_label=label)
343 refs.append(DatasetRef(self._datasetType, newDataId3))
345 datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter))
347 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
348 assert partial is True, "We always require partial, to ignore 'ccdnum'"
349 rawDataId3 = self._translator(dataId2, partial=partial, log=log)
350 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)