Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py : 32%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Concrete implementations of `PathElementHandler`.
23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24avoid a circular dependency between modules.
25"""
26from __future__ import annotations
28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
30from abc import abstractmethod
31import re
32from typing import (
33 Callable,
34 List,
35 Mapping,
36 Optional,
37 TYPE_CHECKING
38)
40import lsst.afw.fits
41from lsst.daf.butler import (
42 DataCoordinate,
43 DatasetRef,
44 DatasetType,
45 FileDataset,
46)
47from ..translators import Translator, makeCalibrationLabel
48from .parser import PathElementParser
49from .scanner import PathElementHandler, DirectoryScanner
51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true
52 from lsst.daf.butler import FormatterParameter
55class IgnoreHandler(PathElementHandler):
56 """A `PathElementHandler` that matches via a regular expression, and does
57 nothing.
59 An `IgnoreHandler` is used to ignore file or directory patterns that can
60 occur at any level in the directory tree, and have no relation to any
61 Gen2 filename template.
63 Parameters
64 ----------
65 pattern : `re.Pattern`
66 A regular expression pattern.
67 isForFiles : `bool`
68 Whether this handler should be applied to files (`True`) or
69 directories (`False`).
70 """
71 def __init__(self, pattern: re.Pattern, isForFiles: bool):
72 super().__init__()
73 self._pattern = pattern
74 self._isForFiles = isForFiles
76 __slots__ = ("_pattern", "_isForFiles")
78 def __str__(self):
79 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
81 def isForFiles(self) -> bool:
82 # Docstring inherited from PathElementHandler.
83 return self._isForFiles
85 @property
86 def rank(self) -> int:
87 # Docstring inherited from PathElementHandler.
88 return 0
90 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
91 predicate: Callable[[DataCoordinate], bool]) -> bool:
92 # Docstring inherited from PathElementHandler.
93 if self._pattern.fullmatch(name):
94 return True
95 else:
96 return False
99class ParsedPathElementHandler(PathElementHandler):
100 """An intermediate base class for `PathElementHandler` classes that utilize
101 a `PathElementParser` to match a Gen2 filename template.
103 Parameters
104 ----------
105 parser : `PathElementParser`
106 An object that matches the path element this handler is responsible for
107 and extracts a (partial) Gen2 data ID from it.
108 """
109 def __init__(self, parser: PathElementParser):
110 super().__init__()
111 self._parser = parser
113 __slots__ = ("_parser",)
115 def __str__(self):
116 return f"{type(self).__name__}(parser={self._parser})"
118 def __call__(self, path: str, name: str, datasets: Mapping[DatasetType, List[FileDataset]], *,
119 predicate: Callable[[DataCoordinate], bool]) -> bool:
120 # Docstring inherited from PathElementParser.
121 nextDataId2 = self._parser.parse(name, self.lastDataId2)
122 if nextDataId2 is None:
123 return False
124 self.handle(path, nextDataId2, datasets, predicate=predicate)
125 return True
127 @property
128 def rank(self) -> int:
129 # Docstring inherited from PathElementParser.
130 return len(self._parser.keys)
132 @abstractmethod
133 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
134 predicate: Callable[[DataCoordinate], bool]):
135 """Customization hook for ``__call__``.
137 Subclasses must override this method, while external callers (i.e.
138 `DirectoryScanner` should instead invoke `__call__`.
140 Parameters
141 ----------
142 path : `str`
143 Full path of the file or directory.
144 nextDataId2 : `dict`
145 Gen2 data ID (usually partial) extracted from the path so far.
146 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
147 Dictionary that found datasets should be added to.
148 predicate : `~collections.abc.Callable`
149 A callable taking a single `DataCoordinate` argument and returning
150 `bool`, indicating whether that (Gen3) data ID represents one
151 that should be included in the scan.
152 formatterMap : `dict`, optional
153 Map dataset type to specialist formatter.
154 """
155 raise NotImplementedError()
158class SkipHandler(ParsedPathElementHandler):
159 """A `ParsedPathElementHandler` that does nothing with an entry other
160 optionally logging a warning message.
162 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
163 want to (or cannot) extract Gen3 datasets from, or other files/directories
164 that alway appears at a fixed level in the diectory tree.
166 Parameters
167 ----------
168 parser : `PathElementParser`
169 An object that matches the path element this handler is responsible for
170 and extracts a (partial) Gen2 data ID from it.
171 isForFiles : `bool`
172 Whether this handler should be applied to files (`True`) or
173 directories (`False`).
174 message : `str`, optional
175 A message to log at warning level when this handler matches a path
176 entry. If `None`, matched entrie will be silently skipped.
177 """
178 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
179 super().__init__(parser=parser)
180 self._isForFiles = isForFiles
181 self._message = message
183 __slots__ = ("_message", "_isForFiles")
185 def isForFiles(self) -> bool:
186 # Docstring inherited from PathElementHandler.
187 return self._isForFiles
189 def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, List[FileDataset]], *,
190 predicate: Callable[[DataCoordinate], bool]):
191 # Docstring inherited from ParsedPathElementHandler.
192 if self._message is not None:
193 self.log.warn("Skipping %s: %s", path, self._message)
196class SubdirectoryHandler(ParsedPathElementHandler):
197 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
199 Parameters
200 ----------
201 parser : `PathElementParser`
202 An object that matches the path element this handler is responsible for
203 and extracts a (partial) Gen2 data ID from it.
205 Notes
206 -----
207 The nested `DirectoryScanner` is default-constructed and should be
208 populated with child handlers after the `SubdirectoryHandler` is created.
209 """
211 def __init__(self, parser: PathElementParser):
212 super().__init__(parser=parser)
213 self.scanner = DirectoryScanner()
215 __slots__ = ("scanner",)
217 def isForFiles(self) -> bool:
218 # Docstring inherited from PathElementHandler.
219 return False
221 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
222 predicate: Callable[[DataCoordinate], bool]):
223 # Docstring inherited from ParsedPathElementHandler.
224 if not nextDataId2:
225 # We matched, and there's no data ID at all yet. That means the
226 # full path so far is just a fixed string so we should descend
227 # and the match is exclusive.
228 scan = True
229 else:
230 dataId3 = self.translate(nextDataId2, partial=True)
231 if dataId3 is not None:
232 scan = predicate(dataId3)
233 else:
234 scan = True
235 if scan:
236 for handler in self.scanner:
237 handler.lastDataId2 = nextDataId2
238 self.scanner.scan(path, datasets, predicate=predicate)
240 def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]:
241 # Docstring inherited from PathElementHandler.
242 for handler in self.scanner:
243 # Since we're recursing, we're always asking for a partial match,
244 # because the data ID we have corresponds to different level than
245 # the one child handlers operate at.
246 result = handler.translate(dataId2, partial=True)
247 if result is not None:
248 return result
249 return None
251 scanner: DirectoryScanner
252 """Scanner object that holds handlers for the entries of the subdirectory
253 matched by this handler (`DirectoryScanner`).
254 """
257class TargetFileHandler(ParsedPathElementHandler):
258 """A `PathElementHandler` that matches files that correspond to target
259 datasets and outputs `FileDataset` instances for them.
261 Parameters
262 ----------
263 parser : `PathElementParser`
264 An object that matches the path element this handler is responsible for
265 and extracts a (partial) Gen2 data ID from it.
266 translator : `Translator`
267 Object that translates data IDs from Gen2 to Gen3.
268 datasetType : `lsst.daf.butler.DatasetType`
269 Gen3 dataset type for the datasets this handler matches.
270 formatter : `lsst.daf.butler.Formatter` or `str`, optional
271 A Gen 3 formatter class or fully-qualified name.
272 """
273 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
274 formatter: FormatterParameter = None):
275 super().__init__(parser=parser)
276 self._translator = translator
277 self._datasetType = datasetType
278 self._formatter = formatter
280 __slots__ = ("_translator", "_datasetType", "_formatter")
282 def __str__(self):
283 return f"{type(self).__name__}({self._translator}, {self._datasetType})"
285 def isForFiles(self) -> bool:
286 # Docstring inherited from PathElementHandler.
287 return True
289 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
290 predicate: Callable[[DataCoordinate], bool]):
291 # Docstring inherited from ParsedPathElementHandler.
292 dataId3 = self.translate(nextDataId2, partial=False)
293 if predicate(dataId3):
294 datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
295 path=path, formatter=self._formatter))
297 def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]:
298 # Docstring inherited from PathElementHandler.
299 rawDataId3 = self._translator(dataId2, partial=partial)
300 if partial:
301 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)
302 else:
303 return DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions)
306class MultiExtensionFileHandler(TargetFileHandler):
307 """Handler for FITS files that store image and metadata in multiple HDUs
308 per file, for example DECam raw and Community Pipeline calibrations.
310 Notes
311 -----
312 For now, this is only used by DECam, and may need to be made more generic
313 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
314 with other obs packages.
315 """
316 def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *,
317 predicate: Callable[[DataCoordinate], bool]):
318 dataId3 = self.translate(nextDataId2, partial=True)
320 def get_detectors(filename):
321 fitsData = lsst.afw.fits.Fits(filename, 'r')
322 # NOTE: The primary header (HDU=0) does not contain detector data.
323 detectors = []
324 for i in range(1, fitsData.countHdus()):
325 fitsData.setHdu(i)
326 metadata = fitsData.readMetadata()
327 detectors.append(metadata['CCDNUM'])
328 return detectors
330 if predicate(dataId3):
331 detectors = get_detectors(path)
332 refs = []
333 for detector in detectors:
334 label = makeCalibrationLabel(self._datasetType.name, nextDataId2["calibDate"],
335 ccd=detector, filter=nextDataId2.get("filter"))
336 newDataId3 = DataCoordinate.standardize(dataId3,
337 graph=self._datasetType.dimensions,
338 detector=detector,
339 calibration_label=label)
340 refs.append(DatasetRef(self._datasetType, newDataId3))
342 datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter))
344 def translate(self, dataId2: dict, *, partial: bool = False) -> Optional[DataCoordinate]:
345 assert partial is True, "We always require partial, to ignore 'ccdnum'"
346 rawDataId3 = self._translator(dataId2, partial=partial)
347 return DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe)