Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py : 34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Concrete implementations of `PathElementHandler`.
23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24avoid a circular dependency between modules.
25"""
26from __future__ import annotations
28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
30from abc import abstractmethod
31import re
32from typing import (
33 Callable,
34 List,
35 Mapping,
36 Optional,
37 Tuple,
38 TYPE_CHECKING
39)
41import lsst.afw.fits
42from lsst.daf.butler import (
43 DataCoordinate,
44 DatasetRef,
45 DatasetType,
46 FileDataset,
47 Progress,
48)
49from ..translators import Translator
50from .parser import PathElementParser
51from .scanner import PathElementHandler, DirectoryScanner
53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true
54 from lsst.daf.butler import FormatterParameter
57class IgnoreHandler(PathElementHandler):
58 """A `PathElementHandler` that matches via a regular expression, and does
59 nothing.
61 An `IgnoreHandler` is used to ignore file or directory patterns that can
62 occur at any level in the directory tree, and have no relation to any
63 Gen2 filename template.
65 Parameters
66 ----------
67 pattern : `re.Pattern`
68 A regular expression pattern.
69 isForFiles : `bool`
70 Whether this handler should be applied to files (`True`) or
71 directories (`False`).
72 """
73 def __init__(self, pattern: re.Pattern, isForFiles: bool):
74 super().__init__()
75 self._pattern = pattern
76 self._isForFiles = isForFiles
78 __slots__ = ("_pattern", "_isForFiles")
80 def __str__(self):
81 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
83 def isForFiles(self) -> bool:
84 # Docstring inherited from PathElementHandler.
85 return self._isForFiles
87 @property
88 def rank(self) -> int:
89 # Docstring inherited from PathElementHandler.
90 return 0
92 def __call__(self, path: str, name: str,
93 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
94 predicate: Callable[[DataCoordinate], bool]) -> bool:
95 # Docstring inherited from PathElementHandler.
96 if self._pattern.fullmatch(name):
97 return True
98 else:
99 return False
102class ParsedPathElementHandler(PathElementHandler):
103 """An intermediate base class for `PathElementHandler` classes that utilize
104 a `PathElementParser` to match a Gen2 filename template.
106 Parameters
107 ----------
108 parser : `PathElementParser`
109 An object that matches the path element this handler is responsible for
110 and extracts a (partial) Gen2 data ID from it.
111 """
112 def __init__(self, parser: PathElementParser):
113 super().__init__()
114 self._parser = parser
116 __slots__ = ("_parser",)
118 def __str__(self):
119 return f"{type(self).__name__}(parser={self._parser})"
121 def __call__(self, path: str, name: str,
122 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
123 predicate: Callable[[DataCoordinate], bool]) -> bool:
124 # Docstring inherited from PathElementParser.
125 nextDataId2 = self._parser.parse(name, self.lastDataId2)
126 if nextDataId2 is None:
127 return False
128 self.handle(path, nextDataId2, datasets, predicate=predicate)
129 return True
131 @property
132 def rank(self) -> int:
133 # Docstring inherited from PathElementParser.
134 return len(self._parser.keys)
136 @abstractmethod
137 def handle(self, path: str, nextDataId2: dict,
138 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
139 predicate: Callable[[DataCoordinate], bool]):
140 """Customization hook for ``__call__``.
142 Subclasses must override this method, while external callers (i.e.
143 `DirectoryScanner` should instead invoke `__call__`.
145 Parameters
146 ----------
147 path : `str`
148 Full path of the file or directory.
149 nextDataId2 : `dict`
150 Gen2 data ID (usually partial) extracted from the path so far.
151 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
152 Dictionary that found datasets should be added to.
153 predicate : `~collections.abc.Callable`
154 A callable taking a single `DataCoordinate` argument and returning
155 `bool`, indicating whether that (Gen3) data ID represents one
156 that should be included in the scan.
157 formatterMap : `dict`, optional
158 Map dataset type to specialist formatter.
159 """
160 raise NotImplementedError()
163class SkipHandler(ParsedPathElementHandler):
164 """A `ParsedPathElementHandler` that does nothing with an entry other
165 optionally logging a warning message.
167 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
168 want to (or cannot) extract Gen3 datasets from, or other files/directories
169 that alway appears at a fixed level in the diectory tree.
171 Parameters
172 ----------
173 parser : `PathElementParser`
174 An object that matches the path element this handler is responsible for
175 and extracts a (partial) Gen2 data ID from it.
176 isForFiles : `bool`
177 Whether this handler should be applied to files (`True`) or
178 directories (`False`).
179 message : `str`, optional
180 A message to log at warning level when this handler matches a path
181 entry. If `None`, matched entrie will be silently skipped.
182 """
183 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
184 super().__init__(parser=parser)
185 self._isForFiles = isForFiles
186 self._message = message
188 __slots__ = ("_message", "_isForFiles")
190 def isForFiles(self) -> bool:
191 # Docstring inherited from PathElementHandler.
192 return self._isForFiles
194 def handle(self, path: str, nextDataId2: dict,
195 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
196 predicate: Callable[[DataCoordinate], bool]):
197 # Docstring inherited from ParsedPathElementHandler.
198 if self._message is not None:
199 self.log.warn("Skipping %s: %s", path, self._message)
202class SubdirectoryHandler(ParsedPathElementHandler):
203 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
205 Parameters
206 ----------
207 parser : `PathElementParser`
208 An object that matches the path element this handler is responsible for
209 and extracts a (partial) Gen2 data ID from it.
210 progress : `Progress`, optional
211 Object to use to report incremental progress.
213 Notes
214 -----
215 The nested `DirectoryScanner` is default-constructed and should be
216 populated with child handlers after the `SubdirectoryHandler` is created.
217 """
219 def __init__(self, parser: PathElementParser, progress: Optional[Progress] = None):
220 super().__init__(parser=parser)
221 self.scanner = DirectoryScanner(progress=progress)
223 __slots__ = ("scanner",)
225 def isForFiles(self) -> bool:
226 # Docstring inherited from PathElementHandler.
227 return False
229 def handle(self, path: str, nextDataId2,
230 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
231 predicate: Callable[[DataCoordinate], bool]):
232 # Docstring inherited from ParsedPathElementHandler.
233 if not nextDataId2:
234 # We matched, and there's no data ID at all yet. That means the
235 # full path so far is just a fixed string so we should descend
236 # and the match is exclusive.
237 scan = True
238 else:
239 dataId3, _ = self.translate(nextDataId2, partial=True)
240 if dataId3 is not None:
241 scan = predicate(dataId3)
242 else:
243 scan = True
244 if scan:
245 for handler in self.scanner:
246 handler.lastDataId2 = nextDataId2
247 self.scanner.scan(path, datasets, predicate=predicate)
249 def translate(self, dataId2: dict, *, partial: bool = False
250 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
251 # Docstring inherited from PathElementHandler.
252 for handler in self.scanner:
253 # Since we're recursing, we're always asking for a partial match,
254 # because the data ID we have corresponds to different level than
255 # the one child handlers operate at.
256 result, calibDate = handler.translate(dataId2, partial=True)
257 if result is not None:
258 return result, calibDate
259 return None, None
261 scanner: DirectoryScanner
262 """Scanner object that holds handlers for the entries of the subdirectory
263 matched by this handler (`DirectoryScanner`).
264 """
267class TargetFileHandler(ParsedPathElementHandler):
268 """A `PathElementHandler` that matches files that correspond to target
269 datasets and outputs `FileDataset` instances for them.
271 Parameters
272 ----------
273 parser : `PathElementParser`
274 An object that matches the path element this handler is responsible for
275 and extracts a (partial) Gen2 data ID from it.
276 translator : `Translator`
277 Object that translates data IDs from Gen2 to Gen3.
278 datasetType : `lsst.daf.butler.DatasetType`
279 Gen3 dataset type for the datasets this handler matches.
280 formatter : `lsst.daf.butler.Formatter` or `str`, optional
281 A Gen 3 formatter class or fully-qualified name.
282 """
283 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
284 formatter: FormatterParameter = None):
285 super().__init__(parser=parser)
286 self._translator = translator
287 self._datasetType = datasetType
288 self._formatter = formatter
290 __slots__ = ("_translator", "_datasetType", "_formatter")
292 def __str__(self):
293 return f"{type(self).__name__}({self._translator}, {self._datasetType})"
295 def isForFiles(self) -> bool:
296 # Docstring inherited from PathElementHandler.
297 return True
299 def handle(self, path: str, nextDataId2,
300 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
301 predicate: Callable[[DataCoordinate], bool]):
302 # Docstring inherited from ParsedPathElementHandler.
303 dataId3, calibDate = self.translate(nextDataId2, partial=False)
304 if predicate(dataId3):
305 datasets[self._datasetType][calibDate].append(
306 FileDataset(
307 refs=[DatasetRef(self._datasetType, dataId3)],
308 path=path, formatter=self._formatter
309 )
310 )
312 def translate(self, dataId2: dict, *, partial: bool = False
313 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
314 # Docstring inherited from PathElementHandler.
315 rawDataId3, calibDate = self._translator(dataId2, partial=partial)
316 if partial:
317 return (
318 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
319 calibDate,
320 )
321 else:
322 return (
323 DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions),
324 calibDate
325 )
328class MultiExtensionFileHandler(TargetFileHandler):
329 """Handler for FITS files that store image and metadata in multiple HDUs
330 per file, for example DECam raw and Community Pipeline calibrations.
332 Notes
333 -----
334 For now, this is only used by DECam, and may need to be made more generic
335 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
336 with other obs packages.
337 """
338 def handle(self, path: str, nextDataId2,
339 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
340 predicate: Callable[[DataCoordinate], bool]):
341 dataId3, calibDate = self.translate(nextDataId2, partial=True)
343 def get_detectors(filename):
344 fitsData = lsst.afw.fits.Fits(filename, 'r')
345 # NOTE: The primary header (HDU=0) does not contain detector data.
346 detectors = []
347 for i in range(1, fitsData.countHdus()):
348 fitsData.setHdu(i)
349 metadata = fitsData.readMetadata()
350 detectors.append(metadata['CCDNUM'])
351 return detectors
353 if predicate(dataId3):
354 detectors = get_detectors(path)
355 refs = []
356 for detector in detectors:
357 newDataId3 = DataCoordinate.standardize(dataId3,
358 graph=self._datasetType.dimensions,
359 detector=detector)
360 refs.append(DatasetRef(self._datasetType, newDataId3))
362 datasets[self._datasetType][calibDate].append(
363 FileDataset(refs=refs, path=path, formatter=self._formatter)
364 )
366 def translate(self, dataId2: dict, *, partial: bool = False
367 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
368 assert partial is True, "We always require partial, to ignore 'ccdnum'"
369 rawDataId3, calibDate = self._translator(dataId2, partial=partial)
370 return (
371 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
372 calibDate,
373 )