Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py : 32%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Concrete implementations of `PathElementHandler`.
23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24avoid a circular dependency between modules.
25"""
26from __future__ import annotations
28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
30from abc import abstractmethod
31import re
32from typing import (
33 Callable,
34 List,
35 Mapping,
36 Optional,
37)
39import lsst.afw.fits
40from lsst.log import Log
41from lsst.daf.butler import (
42 DataCoordinate,
43 DatasetRef,
44 DatasetType,
45 FileDataset,
46)
47from ..translators import Translator, makeCalibrationLabel
48from .parser import PathElementParser
49from .scanner import PathElementHandler, DirectoryScanner
52class IgnoreHandler(PathElementHandler):
53 """A `PathElementHandler` that matches via a regular expression, and does
54 nothing.
56 An `IgnoreHandler` is used to ignore file or directory patterns that can
57 occur at any level in the directory tree, and have no relation to any
58 Gen2 filename template.
60 Parameters
61 ----------
62 pattern : `re.Pattern`
63 A regular expression pattern.
64 isForFiles : `bool`
65 Whether this handler should be applied to files (`True`) or
66 directories (`False`).
67 """
68 def __init__(self, pattern: re.Pattern, isForFiles: bool):
69 super().__init__()
70 self._pattern = pattern
71 self._isForFiles = isForFiles
73 __slots__ = ("_pattern", "_isForFiles")
75 def isForFiles(self) -> bool:
76 # Docstring inherited from PathElementHandler.
77 return self._isForFiles
79 @property
80 def rank(self) -> int:
81 # Docstring inherited from PathElementHandler.
82 return 0
84 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
85 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
86 # Docstring inherited from PathElementHandler.
87 if self._pattern.fullmatch(name):
88 return True
89 else:
90 return False
93class ParsedPathElementHandler(PathElementHandler):
94 """An intermediate base class for `PathElementHandler` classes that utilize
95 a `PathElementParser` to match a Gen2 filename template.
97 Parameters
98 ----------
99 parser : `PathElementParser`
100 An object that matches the path element this handler is responsible for
101 and extracts a (partial) Gen2 data ID from it.
102 """
103 def __init__(self, parser: PathElementParser):
104 super().__init__()
105 self._parser = parser
107 __slots__ = ("_parser",)
109 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
110 log: Log, predicate: Callable[[DataCoordinate], bool]) -> bool:
111 # Docstring inherited from PathElementParser.
112 nextDataId2 = self._parser.parse(name, self.lastDataId2, log=log)
113 if nextDataId2 is None:
114 return False
115 self.handle(path, nextDataId2, datasets, log=log, predicate=predicate)
116 return True
118 @property
119 def rank(self) -> int:
120 # Docstring inherited from PathElementParser.
121 return len(self._parser.keys)
123 @abstractmethod
124 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
125 log: Log, predicate: Callable[[DataCoordinate], bool]):
126 """Customization hook for ``__call__``.
128 Subclasses must override this method, while external callers (i.e.
129 `DirectoryScanner` should instead invoke `__call__`.
131 Parameters
132 ----------
133 path : `str`
134 Full path of the file or directory.
135 nextDataId2 : `dict`
136 Gen2 data ID (usually partial) extracted from the path so far.
137 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
138 Dictionary that found datasets should be added to.
139 log : `Log`, optional
140 Log to use to report warnings and debug information.
141 predicate : `~collections.abc.Callable`
142 A callable taking a single `DataCoordinate` argument and returning
143 `bool`, indicating whether that (Gen3) data ID represents one
144 that should be included in the scan.
145 """
146 raise NotImplementedError()
149class SkipHandler(ParsedPathElementHandler):
150 """A `ParsedPathElementHandler` that does nothing with an entry other
151 optionally logging a warning message.
153 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
154 want to (or cannot) extract Gen3 datasets from, or other files/directories
155 that alway appears at a fixed level in the diectory tree.
157 Parameters
158 ----------
159 parser : `PathElementParser`
160 An object that matches the path element this handler is responsible for
161 and extracts a (partial) Gen2 data ID from it.
162 isForFiles : `bool`
163 Whether this handler should be applied to files (`True`) or
164 directories (`False`).
165 message : `str`, optional
166 A message to log at warning level when this handler matches a path
167 entry. If `None`, matched entrie will be silently skipped.
168 """
169 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
170 super().__init__(parser=parser)
171 self._isForFiles = isForFiles
172 self._message = message
174 __slots__ = ("_message", "_isForFiles")
176 def isForFiles(self) -> bool:
177 # Docstring inherited from PathElementHandler.
178 return self._isForFiles
180 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
181 log: Log, predicate: Callable[[DataCoordinate], bool]):
182 # Docstring inherited from ParsedPathElementHandler.
183 if self._message is not None:
184 log.warn("Skipping %s: %s", path, self._message)
187class SubdirectoryHandler(ParsedPathElementHandler):
188 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
190 Parameters
191 ----------
192 parser : `PathElementParser`
193 An object that matches the path element this handler is responsible for
194 and extracts a (partial) Gen2 data ID from it.
196 Notes
197 -----
198 The nested `DirectoryScanner` is default-constructed and should be
199 populated with child handlers after the `SubdirectoryHandler` is created.
200 """
202 def __init__(self, parser: PathElementParser):
203 super().__init__(parser=parser)
204 self.scanner = DirectoryScanner()
206 __slots__ = ("scanner",)
208 def isForFiles(self) -> bool:
209 # Docstring inherited from PathElementHandler.
210 return False
212 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
213 log: Log, predicate: Callable[[DataCoordinate], bool]):
214 # Docstring inherited from ParsedPathElementHandler.
215 if not nextDataId2:
216 # We matched, and there's no data ID at all yet. That means the
217 # full path so far is just a fixed string so we should descend
218 # and the match is exclusive.
219 scan = True
220 else:
221 dataId3 = self.translate(nextDataId2, partial=True, log=log)
222 if dataId3 is not None:
223 scan = predicate(dataId3)
224 else:
225 scan = True
226 if scan:
227 for handler in self.scanner:
228 handler.lastDataId2 = nextDataId2
229 self.scanner.scan(path, datasets, log=log, predicate=predicate)
231 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
232 # Docstring inherited from PathElementHandler.
233 for handler in self.scanner:
234 # Since we're recursing, we're always asking for a partial match,
235 # because the data ID we have corresponds to different level than
236 # the one child handlers operate at.
237 result = handler.translate(dataId2, partial=True, log=log)
238 if result is not None:
239 return result
240 return None
242 scanner: DirectoryScanner
243 """Scanner object that holds handlers for the entries of the subdirectory
244 matched by this handler (`DirectoryScanner`).
245 """
248class TargetFileHandler(ParsedPathElementHandler):
249 """A `PathElementHandler` that matches files that correspond to target
250 datasets and outputs `FileDataset` instances for them.
252 Parameters
253 ----------
254 parser : `PathElementParser`
255 An object that matches the path element this handler is responsible for
256 and extracts a (partial) Gen2 data ID from it.
257 translator : `Translator`
258 Object that translates data IDs from Gen2 to Gen3.
259 datasetType : `lsst.daf.butler.DatasetType`
260 Gen3 dataset type for the datasets this handler matches.
261 """
262 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType):
263 super().__init__(parser=parser)
264 self._translator = translator
265 self._datasetType = datasetType
267 __slots__ = ("_translator", "_datasetType")
269 def isForFiles(self) -> bool:
270 # Docstring inherited from PathElementHandler.
271 return True
273 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
274 log: Log, predicate: Callable[[DataCoordinate], bool]):
275 # Docstring inherited from ParsedPathElementHandler.
276 dataId3 = self.translate(nextDataId2, partial=False, log=log)
277 if predicate(dataId3):
278 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
279 path=path))
281 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
282 # Docstring inherited from PathElementHandler.
283 rawDataId3 = self._translator(dataId2, partial=partial, log=log)
284 if partial:
285 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
286 else:
287 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
290class MultiExtensionFileHandler(TargetFileHandler):
291 """Handler for FITS files that store image and metadata in multiple HDUs
292 per file, for example DECam raw and Community Pipeline calibrations.
294 Notes
295 -----
296 For now, this is only used by DECam, and may need to be made more generic
297 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
298 with other obs packages.
299 """
300 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
301 log: Log, predicate: Callable[[DataCoordinate], bool]):
302 dataId3 = self.translate(nextDataId2, partial=True, log=log)
304 def get_detectors(filename):
305 fitsData = lsst.afw.fits.Fits(filename, 'r')
306 # NOTE: The primary header (HDU=0) does not contain detector data.
307 detectors = []
308 for i in range(1, fitsData.countHdus()):
309 fitsData.setHdu(i)
310 metadata = fitsData.readMetadata()
311 detectors.append(metadata['CCDNUM'])
312 return detectors
314 if predicate(dataId3):
315 detectors = get_detectors(path)
316 refs = []
317 for detector in detectors:
318 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
319 ccd=detector, filter=nextDataId2.get("filter"))
320 newDataId3 = DataCoordinate.standardize(dataId3,
321 graph=self._datasetType.dimensions,
322 detector=detector,
323 calibration_label=label)
324 refs.append(DatasetRef(self._datasetType, newDataId3))
326 datasets[self._datasetType].append(FileDataset(refs=refs, path=path))
328 def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
329 assert partial is True, "We always require partial, to ignore 'ccdnum'"
330 rawDataId3 = self._translator(dataId2, partial=partial, log=log)
331 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)