Coverage for python/lsst/obs/base/gen2to3/repoWalker/handlers.py: 38%
128 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 10:48 +0000
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 10:48 +0000
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Concrete implementations of `PathElementHandler`.
23The `PathElementHandler` ABC is defined in ``scanner.py`` instead of here to
24avoid a circular dependency between modules.
25"""
26from __future__ import annotations
28__all__ = ["IgnoreHandler", "SkipHandler", "SubdirectoryHandler", "TargetFileHandler"]
30import re
31from abc import abstractmethod
32from typing import TYPE_CHECKING, Callable, List, Mapping, Optional, Tuple
34import lsst.afw.fits
35from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, FileDataset, Progress
37from ..translators import Translator
38from .parser import PathElementParser
39from .scanner import DirectoryScanner, PathElementHandler
41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 from lsst.daf.butler import FormatterParameter
45class IgnoreHandler(PathElementHandler):
46 """A `PathElementHandler` that matches via a regular expression, and does
47 nothing.
49 An `IgnoreHandler` is used to ignore file or directory patterns that can
50 occur at any level in the directory tree, and have no relation to any
51 Gen2 filename template.
53 Parameters
54 ----------
55 pattern : `re.Pattern`
56 A regular expression pattern.
57 isForFiles : `bool`
58 Whether this handler should be applied to files (`True`) or
59 directories (`False`).
60 """
62 def __init__(self, pattern: re.Pattern, isForFiles: bool):
63 super().__init__()
64 self._pattern = pattern
65 self._isForFiles = isForFiles
67 __slots__ = ("_pattern", "_isForFiles")
69 def __str__(self):
70 return f"{type(self).__name__}({self._pattern}, isForFiles={self._isForFiles})"
72 def isForFiles(self) -> bool:
73 # Docstring inherited from PathElementHandler.
74 return self._isForFiles
76 @property
77 def rank(self) -> int:
78 # Docstring inherited from PathElementHandler.
79 return 0
81 def __call__(
82 self,
83 path: str,
84 name: str,
85 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
86 *,
87 predicate: Callable[[DataCoordinate], bool],
88 ) -> bool:
89 # Docstring inherited from PathElementHandler.
90 if self._pattern.fullmatch(name):
91 return True
92 else:
93 return False
96class ParsedPathElementHandler(PathElementHandler):
97 """An intermediate base class for `PathElementHandler` classes that utilize
98 a `PathElementParser` to match a Gen2 filename template.
100 Parameters
101 ----------
102 parser : `PathElementParser`
103 An object that matches the path element this handler is responsible for
104 and extracts a (partial) Gen2 data ID from it.
105 """
107 def __init__(self, parser: PathElementParser):
108 super().__init__()
109 self._parser = parser
111 __slots__ = ("_parser",)
113 def __str__(self):
114 return f"{type(self).__name__}(parser={self._parser})"
116 def __call__(
117 self,
118 path: str,
119 name: str,
120 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
121 *,
122 predicate: Callable[[DataCoordinate], bool],
123 ) -> bool:
124 # Docstring inherited from PathElementParser.
125 nextDataId2 = self._parser.parse(name, self.lastDataId2)
126 if nextDataId2 is None:
127 return False
128 self.handle(path, nextDataId2, datasets, predicate=predicate)
129 return True
131 @property
132 def rank(self) -> int:
133 # Docstring inherited from PathElementParser.
134 return len(self._parser.keys)
136 @abstractmethod
137 def handle(
138 self,
139 path: str,
140 nextDataId2: dict,
141 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
142 *,
143 predicate: Callable[[DataCoordinate], bool],
144 ):
145 """Customization hook for ``__call__``.
147 Subclasses must override this method, while external callers (i.e.
148 `DirectoryScanner` should instead invoke `__call__`.
150 Parameters
151 ----------
152 path : `str`
153 Full path of the file or directory.
154 nextDataId2 : `dict`
155 Gen2 data ID (usually partial) extracted from the path so far.
156 datasets : `dict` [`DatasetType`, `list` [`FileDataset`] ]
157 Dictionary that found datasets should be added to.
158 predicate : `~collections.abc.Callable`
159 A callable taking a single `DataCoordinate` argument and returning
160 `bool`, indicating whether that (Gen3) data ID represents one
161 that should be included in the scan.
162 formatterMap : `dict`, optional
163 Map dataset type to specialist formatter.
164 """
165 raise NotImplementedError()
168class SkipHandler(ParsedPathElementHandler):
169 """A `ParsedPathElementHandler` that does nothing with an entry other
170 optionally logging a warning message.
172 A `SkipHandler` is used for Gen2 datasets that we can recognize but do not
173 want to (or cannot) extract Gen3 datasets from, or other files/directories
174 that alway appears at a fixed level in the diectory tree.
176 Parameters
177 ----------
178 parser : `PathElementParser`
179 An object that matches the path element this handler is responsible for
180 and extracts a (partial) Gen2 data ID from it.
181 isForFiles : `bool`
182 Whether this handler should be applied to files (`True`) or
183 directories (`False`).
184 message : `str`, optional
185 A message to log at warning level when this handler matches a path
186 entry. If `None`, matched entrie will be silently skipped.
187 """
189 def __init__(self, parser: PathElementParser, isForFiles: bool, message: Optional[str]):
190 super().__init__(parser=parser)
191 self._isForFiles = isForFiles
192 self._message = message
194 __slots__ = ("_message", "_isForFiles")
196 def isForFiles(self) -> bool:
197 # Docstring inherited from PathElementHandler.
198 return self._isForFiles
200 def handle(
201 self,
202 path: str,
203 nextDataId2: dict,
204 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
205 *,
206 predicate: Callable[[DataCoordinate], bool],
207 ):
208 # Docstring inherited from ParsedPathElementHandler.
209 if self._message is not None:
210 self.log.warning("Skipping %s: %s", path, self._message)
213class SubdirectoryHandler(ParsedPathElementHandler):
214 """A `PathElementHandler` that uses a `DirectoryScanner` to recurse.
216 Parameters
217 ----------
218 parser : `PathElementParser`
219 An object that matches the path element this handler is responsible for
220 and extracts a (partial) Gen2 data ID from it.
221 progress : `Progress`, optional
222 Object to use to report incremental progress.
224 Notes
225 -----
226 The nested `DirectoryScanner` is default-constructed and should be
227 populated with child handlers after the `SubdirectoryHandler` is created.
228 """
230 def __init__(self, parser: PathElementParser, progress: Optional[Progress] = None):
231 super().__init__(parser=parser)
232 self.scanner = DirectoryScanner(progress=progress)
234 __slots__ = ("scanner",)
236 def isForFiles(self) -> bool:
237 # Docstring inherited from PathElementHandler.
238 return False
240 def handle(
241 self,
242 path: str,
243 nextDataId2,
244 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
245 *,
246 predicate: Callable[[DataCoordinate], bool],
247 ):
248 # Docstring inherited from ParsedPathElementHandler.
249 if not nextDataId2:
250 # We matched, and there's no data ID at all yet. That means the
251 # full path so far is just a fixed string so we should descend
252 # and the match is exclusive.
253 scan = True
254 else:
255 dataId3, _ = self.translate(nextDataId2, partial=True)
256 if dataId3 is not None:
257 scan = predicate(dataId3)
258 else:
259 scan = True
260 if scan:
261 for handler in self.scanner:
262 handler.lastDataId2 = nextDataId2
263 self.scanner.scan(path, datasets, predicate=predicate)
265 def translate(
266 self, dataId2: dict, *, partial: bool = False
267 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
268 # Docstring inherited from PathElementHandler.
269 for handler in self.scanner:
270 # Since we're recursing, we're always asking for a partial match,
271 # because the data ID we have corresponds to different level than
272 # the one child handlers operate at.
273 result, calibDate = handler.translate(dataId2, partial=True)
274 if result is not None:
275 return result, calibDate
276 return None, None
278 scanner: DirectoryScanner
279 """Scanner object that holds handlers for the entries of the subdirectory
280 matched by this handler (`DirectoryScanner`).
281 """
284class TargetFileHandler(ParsedPathElementHandler):
285 """A `PathElementHandler` that matches files that correspond to target
286 datasets and outputs `FileDataset` instances for them.
288 Parameters
289 ----------
290 parser : `PathElementParser`
291 An object that matches the path element this handler is responsible for
292 and extracts a (partial) Gen2 data ID from it.
293 translator : `Translator`
294 Object that translates data IDs from Gen2 to Gen3.
295 datasetType : `lsst.daf.butler.DatasetType`
296 Gen3 dataset type for the datasets this handler matches.
297 formatter : `lsst.daf.butler.Formatter` or `str`, optional
298 A Gen 3 formatter class or fully-qualified name.
299 """
301 def __init__(
302 self,
303 parser: PathElementParser,
304 translator: Translator,
305 datasetType: DatasetType,
306 formatter: FormatterParameter = None,
307 ):
308 super().__init__(parser=parser)
309 self._translator = translator
310 self._datasetType = datasetType
311 self._formatter = formatter
313 __slots__ = ("_translator", "_datasetType", "_formatter")
315 def __str__(self):
316 return f"{type(self).__name__}({self._translator}, {self._datasetType})"
318 def isForFiles(self) -> bool:
319 # Docstring inherited from PathElementHandler.
320 return True
322 def handle(
323 self,
324 path: str,
325 nextDataId2,
326 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
327 *,
328 predicate: Callable[[DataCoordinate], bool],
329 ):
330 # Docstring inherited from ParsedPathElementHandler.
331 dataId3, calibDate = self.translate(nextDataId2, partial=False)
332 if predicate(dataId3):
333 datasets[self._datasetType][calibDate].append(
334 FileDataset(
335 refs=[DatasetRef(self._datasetType, dataId3)], path=path, formatter=self._formatter
336 )
337 )
339 def translate(
340 self, dataId2: dict, *, partial: bool = False
341 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
342 # Docstring inherited from PathElementHandler.
343 rawDataId3, calibDate = self._translator(dataId2, partial=partial)
344 if partial:
345 return (
346 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
347 calibDate,
348 )
349 else:
350 return (DataCoordinate.standardize(rawDataId3, graph=self._datasetType.dimensions), calibDate)
353class MultiExtensionFileHandler(TargetFileHandler):
354 """Handler for FITS files that store image and metadata in multiple HDUs
355 per file, for example DECam raw and Community Pipeline calibrations.
357 Notes
358 -----
359 For now, this is only used by DECam, and may need to be made more generic
360 (e.g. making ``metadata['CCDNUM']`` use a configurable field) to be used
361 with other obs packages.
362 """
364 def handle(
365 self,
366 path: str,
367 nextDataId2,
368 datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]],
369 *,
370 predicate: Callable[[DataCoordinate], bool],
371 ):
372 dataId3, calibDate = self.translate(nextDataId2, partial=True)
374 def get_detectors(filename):
375 fitsData = lsst.afw.fits.Fits(filename, "r")
376 # NOTE: The primary header (HDU=0) does not contain detector data.
377 detectors = []
378 for i in range(1, fitsData.countHdus()):
379 fitsData.setHdu(i)
380 metadata = fitsData.readMetadata()
381 detectors.append(metadata["CCDNUM"])
382 return detectors
384 if predicate(dataId3):
385 detectors = get_detectors(path)
386 refs = []
387 for detector in detectors:
388 newDataId3 = DataCoordinate.standardize(
389 dataId3, graph=self._datasetType.dimensions, detector=detector
390 )
391 refs.append(DatasetRef(self._datasetType, newDataId3))
393 datasets[self._datasetType][calibDate].append(
394 FileDataset(refs=refs, path=path, formatter=self._formatter)
395 )
397 def translate(
398 self, dataId2: dict, *, partial: bool = False
399 ) -> Tuple[Optional[DataCoordinate], Optional[str]]:
400 assert partial is True, "We always require partial, to ignore 'ccdnum'"
401 rawDataId3, calibDate = self._translator(dataId2, partial=partial)
402 return (
403 DataCoordinate.standardize(rawDataId3, universe=self._datasetType.dimensions.universe),
404 calibDate,
405 )