Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py : 34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Concrete implementations of `PathElementHandler`.
23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24avoid a circular dependency between modules.
25"""
26from __future__ import annotations
28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
30from abc import abstractmethod
31import re
32from typing import (
33 Callable,
34 List,
35 Mapping,
36 Optional,
37 Tuple,
38 TYPE_CHECKING
39)
41import lsst.afw.fits
42from lsst.daf.butler import (
43 DataCoordinate,
44 DatasetRef,
45 DatasetType,
46 FileDataset,
47)
48from ..translators import Translator
49from .parser import PathElementParser
50from .scanner import PathElementHandler, DirectoryScanner
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from lsst.daf.butler import FormatterParameter
56class IgnoreHandler(PathElementHandler):
57 """A `PathElementHandler` that matches via a regular expression, and does
58 nothing.
60 An `IgnoreHandler` is used to ignore file or directory patterns that can
61 occur at any level in the directory tree, and have no relation to any
62 Gen2 filename template.
64 Parameters
65 ----------
66 pattern : `re.Pattern`
67 A regular expression pattern.
68 isForFiles : `bool`
69 Whether this handler should be applied to files (`True`) or
70 directories (`False`).
71 """
72 def __init__(self, pattern: re.Pattern, isForFiles: bool):
73 super().__init__()
74 self._pattern = pattern
75 self._isForFiles = isForFiles
77 __slots__ = ("_pattern", "_isForFiles")
79 def __str__(self):
80 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
82 def isForFiles(self) -> bool:
83 # Docstring inherited from PathElementHandler.
84 return self._isForFiles
86 @property
87 def rank(self) -> int:
88 # Docstring inherited from PathElementHandler.
89 return 0
91 def __call__(self, path: str, name: str,
92 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
93 predicate: Callable[[DataCoordinate], bool]) -> bool:
94 # Docstring inherited from PathElementHandler.
95 if self._pattern.fullmatch(name):
96 return True
97 else:
98 return False
101class ParsedPathElementHandler(PathElementHandler):
102 """An intermediate base class for `PathElementHandler` classes that utilize
103 a `PathElementParser` to match a Gen2 filename template.
105 Parameters
106 ----------
107 parser : `PathElementParser`
108 An object that matches the path element this handler is responsible for
109 and extracts a (partial) Gen2 data ID from it.
110 """
111 def __init__(self, parser: PathElementParser):
112 super().__init__()
113 self._parser = parser
115 __slots__ = ("_parser",)
117 def __str__(self):
118 return f"{type(self).__name__}(parser={self._parser})"
120 def __call__(self, path: str, name: str,
121 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
122 predicate: Callable[[DataCoordinate], bool]) -> bool:
123 # Docstring inherited from PathElementParser.
124 nextDataId2 = self._parser.parse(name, self.lastDataId2)
125 if nextDataId2 is None:
126 return False
127 self.handle(path, nextDataId2, datasets, predicate=predicate)
128 return True
130 @property
131 def rank(self) -> int:
132 # Docstring inherited from PathElementParser.
133 return len(self._parser.keys)
135 @abstractmethod
136 def handle(self, path: str, nextDataId2: dict,
137 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
138 predicate: Callable[[DataCoordinate], bool]):
139 """Customization hook for ``__call__``.
141 Subclasses must override this method, while external callers (i.e.
142 `DirectoryScanner` should instead invoke `__call__`.
144 Parameters
145 ----------
146 path : `str`
147 Full path of the file or directory.
148 nextDataId2 : `dict`
149 Gen2 data ID (usually partial) extracted from the path so far.
150 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
151 Dictionary that found datasets should be added to.
152 predicate : `~collections.abc.Callable`
153 A callable taking a single `DataCoordinate` argument and returning
154 `bool`, indicating whether that (Gen3) data ID represents one
155 that should be included in the scan.
156 formatterMap : `dict`, optional
157 Map dataset type to specialist formatter.
158 """
159 raise NotImplementedError()
162class SkipHandler(ParsedPathElementHandler):
163 """A `ParsedPathElementHandler` that does nothing with an entry other
164 optionally logging a warning message.
166 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
167 want to (or cannot) extract Gen3 datasets from, or other files/directories
168 that alway appears at a fixed level in the diectory tree.
170 Parameters
171 ----------
172 parser : `PathElementParser`
173 An object that matches the path element this handler is responsible for
174 and extracts a (partial) Gen2 data ID from it.
175 isForFiles : `bool`
176 Whether this handler should be applied to files (`True`) or
177 directories (`False`).
178 message : `str`, optional
179 A message to log at warning level when this handler matches a path
180 entry. If `None`, matched entrie will be silently skipped.
181 """
182 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
183 super().__init__(parser=parser)
184 self._isForFiles = isForFiles
185 self._message = message
187 __slots__ = ("_message", "_isForFiles")
189 def isForFiles(self) -> bool:
190 # Docstring inherited from PathElementHandler.
191 return self._isForFiles
193 def handle(self, path: str, nextDataId2: dict,
194 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
195 predicate: Callable[[DataCoordinate], bool]):
196 # Docstring inherited from ParsedPathElementHandler.
197 if self._message is not None:
198 self.log.warn("Skipping %s: %s", path, self._message)
201class SubdirectoryHandler(ParsedPathElementHandler):
202 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
204 Parameters
205 ----------
206 parser : `PathElementParser`
207 An object that matches the path element this handler is responsible for
208 and extracts a (partial) Gen2 data ID from it.
210 Notes
211 -----
212 The nested `DirectoryScanner` is default-constructed and should be
213 populated with child handlers after the `SubdirectoryHandler` is created.
214 """
216 def __init__(self, parser: PathElementParser):
217 super().__init__(parser=parser)
218 self.scanner = DirectoryScanner()
220 __slots__ = ("scanner",)
222 def isForFiles(self) -> bool:
223 # Docstring inherited from PathElementHandler.
224 return False
226 def handle(self, path: str, nextDataId2,
227 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
228 predicate: Callable[[DataCoordinate], bool]):
229 # Docstring inherited from ParsedPathElementHandler.
230 if not nextDataId2:
231 # We matched, and there's no data ID at all yet. That means the
232 # full path so far is just a fixed string so we should descend
233 # and the match is exclusive.
234 scan = True
235 else:
236 dataId3, _ = self.translate(nextDataId2, partial=True)
237 if dataId3 is not None:
238 scan = predicate(dataId3)
239 else:
240 scan = True
241 if scan:
242 for handler in self.scanner:
243 handler.lastDataId2 = nextDataId2
244 self.scanner.scan(path, datasets, predicate=predicate)
246 def translate(self, dataId2: dict, *, partial: bool = False
247 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
248 # Docstring inherited from PathElementHandler.
249 for handler in self.scanner:
250 # Since we're recursing, we're always asking for a partial match,
251 # because the data ID we have corresponds to different level than
252 # the one child handlers operate at.
253 result, calibDate = handler.translate(dataId2, partial=True)
254 if result is not None:
255 return result, calibDate
256 return None, None
258 scanner: DirectoryScanner
259 """Scanner object that holds handlers for the entries of the subdirectory
260 matched by this handler (`DirectoryScanner`).
261 """
264class TargetFileHandler(ParsedPathElementHandler):
265 """A `PathElementHandler` that matches files that correspond to target
266 datasets and outputs `FileDataset` instances for them.
268 Parameters
269 ----------
270 parser : `PathElementParser`
271 An object that matches the path element this handler is responsible for
272 and extracts a (partial) Gen2 data ID from it.
273 translator : `Translator`
274 Object that translates data IDs from Gen2 to Gen3.
275 datasetType : `lsst.daf.butler.DatasetType`
276 Gen3 dataset type for the datasets this handler matches.
277 formatter : `lsst.daf.butler.Formatter` or `str`, optional
278 A Gen 3 formatter class or fully-qualified name.
279 """
280 def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
281 formatter: FormatterParameter = None):
282 super().__init__(parser=parser)
283 self._translator = translator
284 self._datasetType = datasetType
285 self._formatter = formatter
287 __slots__ = ("_translator", "_datasetType", "_formatter")
289 def __str__(self):
290 return f"{type(self).__name__}({self._translator}, {self._datasetType})"
292 def isForFiles(self) -> bool:
293 # Docstring inherited from PathElementHandler.
294 return True
296 def handle(self, path: str, nextDataId2,
297 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
298 predicate: Callable[[DataCoordinate], bool]):
299 # Docstring inherited from ParsedPathElementHandler.
300 dataId3, calibDate = self.translate(nextDataId2, partial=False)
301 if predicate(dataId3):
302 datasets[self._datasetType][calibDate].append(
303 FileDataset(
304 refs=[DatasetRef(self._datasetType, dataId3)],
305 path=path, formatter=self._formatter
306 )
307 )
309 def translate(self, dataId2: dict, *, partial: bool = False
310 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
311 # Docstring inherited from PathElementHandler.
312 rawDataId3, calibDate = self._translator(dataId2, partial=partial)
313 if partial:
314 return (
315 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
316 calibDate,
317 )
318 else:
319 return (
320 DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions),
321 calibDate
322 )
325class MultiExtensionFileHandler(TargetFileHandler):
326 """Handler for FITS files that store image and metadata in multiple HDUs
327 per file, for example DECam raw and Community Pipeline calibrations.
329 Notes
330 -----
331 For now, this is only used by DECam, and may need to be made more generic
332 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
333 with other obs packages.
334 """
335 def handle(self, path: str, nextDataId2,
336 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *,
337 predicate: Callable[[DataCoordinate], bool]):
338 dataId3, calibDate = self.translate(nextDataId2, partial=True)
340 def get_detectors(filename):
341 fitsData = lsst.afw.fits.Fits(filename, 'r')
342 # NOTE: The primary header (HDU=0) does not contain detector data.
343 detectors = []
344 for i in range(1, fitsData.countHdus()):
345 fitsData.setHdu(i)
346 metadata = fitsData.readMetadata()
347 detectors.append(metadata['CCDNUM'])
348 return detectors
350 if predicate(dataId3):
351 detectors = get_detectors(path)
352 refs = []
353 for detector in detectors:
354 newDataId3 = DataCoordinate.standardize(dataId3,
355 graph=self._datasetType.dimensions,
356 detector=detector)
357 refs.append(DatasetRef(self._datasetType, newDataId3))
359 datasets[self._datasetType][calibDate].append(
360 FileDataset(refs=refs, path=path, formatter=self._formatter)
361 )
363 def translate(self, dataId2: dict, *, partial: bool = False
364 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
365 assert partial is True, "We always require partial, to ignore 'ccdnum'"
366 rawDataId3, calibDate = self._translator(dataId2, partial=partial)
367 return (
368 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
369 calibDate,
370 )