Coverage for python/lsst/obs/base/gen2to3/translators.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Translator", "TranslatorFactory", "KeyHandler", "CopyKeyHandler", "ConstantKeyHandler",
25 "CalibKeyHandler", "AbstractToPhysicalFilterKeyHandler", "PhysicalToAbstractFilterKeyHandler",
26 "makeCalibrationLabel")
28import itertools
29from typing import Optional, Any, Dict, Tuple, FrozenSet, Iterable, List
30from abc import ABCMeta, abstractmethod
32from lsst.log import Log
33from lsst.skymap import BaseSkyMap
36def makeCalibrationLabel(datasetTypeName: str, calibDate: str, ccd: Optional[int] = None,
37 filter: Optional[str] = None) -> str:
38 """Make a Gen3 calibration_label string corresponding to a Gen2 data ID.
40 Parameters
41 ----------
42 datasetTypeName : `str`
43 Name of the dataset type this calibration label identifies.
44 calibDate : `str`
45 Date string used in the Gen2 template.
46 ccd : `int`, optional
47 Detector ID used in the Gen2 template.
48 filter : `str`, optional
49 Filter used in the Gen2 template.
51 Returns
52 -------
53 label : `str`
54 Calibration label string.
55 """
56 # TODO: this function is probably HSC-specific, but I don't know how other
57 # obs calib registries behave so I don't know (yet) how to generalize it.
58 elements = [datasetTypeName, calibDate]
59 if ccd is not None:
60 elements.append(f"{ccd:03d}")
61 if filter is not None:
62 elements.append(filter)
63 return "gen2/{}".format("_".join(elements))
66class KeyHandler(metaclass=ABCMeta):
67 """Base class for Translator helpers that each handle just one Gen3 Data
68 ID key.
70 Parameters
71 ----------
72 dimension : `str`
73 Name of the Gen3 dimension (data ID key) populated by
74 this handler (e.g. "visit" or "abstract_filter").
75 """
76 def __init__(self, dimension: str):
77 self.dimension = dimension
79 __slots__ = ("dimension",)
81 def __str__(self):
82 return f"{type(self).__name__}({self.dimension})"
84 def translate(self, gen2id: dict, gen3id: dict,
85 skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
86 datasetTypeName: str):
87 """Update a Gen3 data ID dict with a single key-value pair from a Gen2
88 data ID.
90 This method is implemented by the base class and is not expected to
91 be re-implemented by subclasses.
93 Parameters
94 ----------
95 gen2id: `dict`
96 Gen2 data ID from which to draw key-value pairs from.
97 gen3id: `dict`
98 Gen3 data ID to update in-place.
99 skyMap: `BaseSkyMap`, optional
100 SkyMap that defines the tracts and patches used in the Gen2 data
101 ID, if any.
102 skyMapName: `str`
103 Name of the Gen3 skymap dimension that defines the tracts and
104 patches used in the Gen3 data ID.
105 datasetTypeName: `str`
106 Name of the dataset type.
107 """
108 gen3id[self.dimension] = self.extract(gen2id, skyMap=skyMap, skyMapName=skyMapName,
109 datasetTypeName=datasetTypeName)
111 @abstractmethod
112 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
113 datasetTypeName: str) -> Any:
114 """Extract a Gen3 data ID value from a Gen2 data ID.
116 Parameters
117 ----------
118 gen2id: `dict`
119 Gen2 data ID from which to draw key-value pairs from.
120 skyMap: `BaseSkyMap`, optional
121 SkyMap that defines the tracts and patches used in the Gen2 data
122 ID, if any.
123 skyMapName: `str`
124 Name of the Gen3 skymap dimension that defines the tracts and
125 patches used in the Gen3 data ID.
126 datasetTypeName: `str`
127 Name of the dataset type.
128 """
129 raise NotImplementedError()
132class ConstantKeyHandler(KeyHandler):
133 """A KeyHandler that adds a constant key-value pair to the Gen3 data ID.
135 Parameters
136 ----------
137 dimension : `str`
138 Name of the Gen3 dimension (data ID key) populated by
139 this handler (e.g. "visit" or "abstract_filter").
140 value : `object`
141 Data ID value.
142 """
143 def __init__(self, dimension: str, value: Any):
144 super().__init__(dimension)
145 self.value = value
147 __slots__ = ("value",)
149 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
150 datasetTypeName: str) -> Any:
151 # Docstring inherited from KeyHandler.extract.
152 return self.value
155class CopyKeyHandler(KeyHandler):
156 """A KeyHandler that simply copies a value from a Gen3 data ID.
158 Parameters
159 ----------
160 dimension : `str`
161 Name of the Gen3 dimension produced by this handler.
162 dtype : `type`, optional
163 If not `None`, the type that values for this key must be an
164 instance of.
165 """
166 def __init__(self, dimension: str, gen2key: Optional[str] = None,
167 dtype: Optional[type] = None):
168 super().__init__(dimension)
169 self.gen2key = gen2key if gen2key is not None else dimension
170 self.dtype = dtype
172 __slots__ = ("gen2key", "dtype")
174 def __str__(self):
175 return f"{type(self).__name__}({self.gen2key}, {self.dtype})"
177 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
178 datasetTypeName: str) -> Any:
179 # Docstring inherited from KeyHandler.extract.
180 r = gen2id[self.gen2key]
181 if self.dtype is not None:
182 try:
183 r = self.dtype(r)
184 except ValueError as err:
185 raise TypeError(
186 f"'{r}' is not a valid value for {self.dimension}; "
187 f"expected {self.dtype.__name__}, got {type(r).__name__}."
188 ) from err
189 return r
192class PatchKeyHandler(KeyHandler):
193 """A KeyHandler for skymap patches.
194 """
195 def __init__(self):
196 super().__init__("patch")
198 __slots__ = ()
200 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
201 datasetTypeName: str) -> Any:
202 # Docstring inherited from KeyHandler.extract.
203 tract = gen2id["tract"]
204 tractInfo = skyMap[tract]
205 x, y = gen2id["patch"].split(",")
206 patchInfo = tractInfo[int(x), int(y)]
207 return tractInfo.getSequentialPatchIndex(patchInfo)
210class SkyMapKeyHandler(KeyHandler):
211 """A KeyHandler for skymaps."""
212 def __init__(self):
213 super().__init__("skymap")
215 __slots__ = ()
217 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
218 datasetTypeName: str) -> Any:
219 # Docstring inherited from KeyHandler.extract.
220 return skyMapName
223class CalibKeyHandler(KeyHandler):
224 """A KeyHandler for master calibration datasets.
225 """
226 __slots__ = ("ccdKey",)
228 def __init__(self, ccdKey="ccd"):
229 self.ccdKey = ccdKey
230 super().__init__("calibration_label")
232 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
233 datasetTypeName: str) -> Any:
234 # Docstring inherited from KeyHandler.extract.
235 return makeCalibrationLabel(datasetTypeName, gen2id["calibDate"],
236 ccd=gen2id.get(self.ccdKey), filter=gen2id.get("filter"))
239class PhysicalToAbstractFilterKeyHandler(KeyHandler):
240 """KeyHandler for gen2 ``filter`` keys that match ``physical_filter``
241 keys in gen3 but should be mapped to ``abstract_filter``.
243 Note that multiple physical filter can potentially map to one abstract
244 filter, so be careful to only use this translator on obs packages where
245 there is a one-to-one mapping.
246 """
248 __slots__ = ("_map",)
250 def __init__(self, filterDefinitions):
251 super().__init__("abstract_filter")
252 self._map = {d.physical_filter: d.abstract_filter for d in filterDefinitions
253 if d.physical_filter is not None}
255 def extract(self, gen2id, *args, **kwargs):
256 physical = gen2id["filter"]
257 return self._map.get(physical, physical)
260class AbstractToPhysicalFilterKeyHandler(KeyHandler):
261 """KeyHandler for gen2 ``filter`` keys that match ``abstract_filter``
262 keys in gen3 but should be mapped to ``physical_filter``.
264 Note that one abstract filter can potentially map to multiple physical
265 filters, so be careful to only use this translator on obs packages where
266 there is a one-to-one mapping.
267 """
269 __slots__ = ("_map",)
271 def __init__(self, filterDefinitions):
272 super().__init__("physical_filter")
273 self._map = {d.abstract_filter: d.physical_filter for d in filterDefinitions
274 if d.abstract_filter is not None}
276 def extract(self, gen2id, *args, **kwargs):
277 abstract = gen2id["filter"]
278 return self._map.get(abstract, abstract)
281class TranslatorFactory:
282 """A class that manages a system of rules for translating Gen2 data IDs
283 to Gen3 data IDs, and uses these to construct translators for particular
284 dataset types.
285 """
286 def __init__(self):
287 # The rules used to match KeyHandlers when constructing a Translator.
288 self._rules: Dict[
289 Optional[str], # instrument name (or None to match any)
290 Dict[
291 Optional[str], # dataset type name (or None to match any)
292 # gen2keys, handler, consume
293 List[Tuple[FrozenSet[str], KeyHandler, bool]]
294 ]
295 ] = {
296 None: {
297 None: []
298 }
299 }
300 self._addDefaultRules()
302 def __str__(self):
303 lines = []
304 for instrumentName, nested in self._rules.items():
305 if instrumentName is None:
306 instrumentName = "[any instrument]"
307 for datasetTypeName, rules in nested.items():
308 if datasetTypeName is None:
309 datasetTypeName = "[any dataset type]"
310 lines.append(f"{instrumentName} + {datasetTypeName}:")
311 for gen2keys, handler, consume in rules:
312 consumed = " (consumed)" if consume else ""
313 lines.append(f" {gen2keys}{consumed}: {handler}")
314 return "\n".join(lines)
316 def addRule(self, handler: KeyHandler, instrument: Optional[str] = None,
317 datasetTypeName: Optional[str] = None, gen2keys: Iterable[str] = (),
318 consume: bool = True):
319 """Add a KeyHandler and an associated matching rule.
321 Parameters
322 ----------
323 handler : `KeyHandler`
324 A KeyHandler instance to add to a Translator when this rule
325 matches.
326 instrument : `str`
327 Gen3 instrument name the Gen2 repository must be associated with
328 for this rule to match, or None to match any instrument.
329 datasetTypeName : `str`
330 Name of the DatasetType this rule matches, or None to match any
331 DatasetType.
332 gen2Keys : sequence
333 Sequence of Gen2 data ID keys that must all be present for this
334 rule to match.
335 consume : `bool` or `tuple`
336 If True (default), remove all entries in gen2keys from the set of
337 keys being matched to in order to prevent less-specific handlers
338 from matching them.
339 May also be a `tuple` listing only the keys to consume.
340 """
341 # Ensure consume is always a frozenset, so we can process it uniformly
342 # from here on.
343 if consume is True:
344 consume = frozenset(gen2keys)
345 elif consume:
346 consume = frozenset(consume)
347 else:
348 consume = frozenset()
349 # find the rules for this instrument, or if we haven't seen it before,
350 # add a nested dictionary that matches any DatasetType name and then
351 # append this rule.
352 rulesForInstrument = self._rules.setdefault(instrument, {None: []})
353 rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, [])
354 rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume))
356 def _addDefaultRules(self):
357 """Add translator rules that should always be present, and don't depend
358 at all on the instrument whose datasets are being converted.
360 This is called by `TranslatorFactory` construction.
361 """
362 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key.
363 self.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False)
365 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones
366 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"):
367 self.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap")
369 # Translate Gen2 str patch IDs to Gen3 sequential integers.
370 self.addRule(PatchKeyHandler(), gen2keys=("patch",))
372 # Copy Gen2 "tract" to Gen3 "tract".
373 self.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",))
375 # Add valid_first, valid_last to instrument-level transmission/ datasets;
376 # these are considered calibration products in Gen3.
377 for datasetTypeName in ("transmission_sensor", "transmission_optics", "transmission_filter"):
378 self.addRule(ConstantKeyHandler("calibration_label", "unbounded"),
379 datasetTypeName=datasetTypeName)
381 # Translate Gen2 pixel_id to Gen3 skypix.
382 #
383 # TODO: For now, we just assume that the refcat indexer uses htm7,
384 # since that's what we have generated most of our refcats at.
385 # Eventually that may have to change, but it's not clear enough how to
386 # do that for us to have a ticket yet. If you found this note because
387 # you've run into this limitation, please let the middleware team know
388 # that it's time to make this a priority.
389 self.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",))
391 def addGenericInstrumentRules(self, instrumentName: str,
392 calibFilterType: str = "physical_filter",
393 detectorKey: str = "ccd",
394 exposureKey: str = "visit"):
395 """Add translation rules that depend on some properties of the
396 instrument but are otherwise generic.
398 Parameters
399 ----------
400 instrument : `str`
401 The short (dimension) name of the instrument that conversion is
402 going to be run on.
403 calibFilterType : `str`, optional
404 One of ``physical_filter`` or ``abstract_filter``, indicating which
405 of those the gen2 calibRegistry uses as the ``filter`` key.
406 detectorKey : `str`, optional
407 The gen2 key used to identify what in gen3 is `detector`.
408 exposureKey : `str`, optional
409 The gen2 key used to identify what in gen3 is `exposure`.
410 """
411 # Add instrument to Gen3 data ID if Gen2 contains exposureKey or
412 # detectorKey. (Both rules will match, so we'll actually set
413 # instrument in the same dict twice).
414 self.addRule(ConstantKeyHandler("instrument", instrumentName),
415 instrument=instrumentName, gen2keys=(exposureKey,), consume=False)
416 self.addRule(ConstantKeyHandler("instrument", instrumentName),
417 instrument=instrumentName, gen2keys=(detectorKey,), consume=False)
418 self.addRule(ConstantKeyHandler("instrument", instrumentName),
419 instrument=instrumentName, gen2keys=("calibDate",), consume=False)
421 # Copy Gen2 exposureKey to Gen3 'exposure' for raw only. Also consume
422 # filter, since that's implied by 'exposure' in Gen3.
423 self.addRule(CopyKeyHandler("exposure", exposureKey),
424 instrument=instrumentName, datasetTypeName="raw", gen2keys=(exposureKey,),
425 consume=(exposureKey, "filter"))
427 # Copy Gen2 'visit' to Gen3 'visit' otherwise. Also consume filter.
428 self.addRule(CopyKeyHandler("visit"), instrument=instrumentName, gen2keys=("visit",),
429 consume=("visit", "filter"))
431 # Copy Gen2 'ccd' to Gen3 'detector;
432 self.addRule(CopyKeyHandler("detector", detectorKey),
433 instrument=instrumentName,
434 gen2keys=(detectorKey,))
436 # Add instrument for transmission curve datasets (transmission_sensor is
437 # already handled by the above rules).
438 self.addRule(ConstantKeyHandler("instrument", instrumentName),
439 instrument=instrumentName, datasetTypeName="transmission_optics")
440 self.addRule(ConstantKeyHandler("instrument", instrumentName),
441 instrument=instrumentName, datasetTypeName="transmission_atmosphere")
442 self.addRule(ConstantKeyHandler("instrument", instrumentName),
443 instrument=instrumentName, datasetTypeName="transmission_filter")
444 self.addRule(CopyKeyHandler("physical_filter", "filter"),
445 instrument=instrumentName, datasetTypeName="transmission_filter")
447 # Add calibration mapping for filter dependent types
448 for calibType in ('flat', 'sky', 'fringe'):
449 self.addRule(CopyKeyHandler(calibFilterType, "filter"),
450 instrument=instrumentName, datasetTypeName=calibType)
452 # Translate Gen2 calibDate and datasetType to Gen3 calibration_label.
453 self.addRule(CalibKeyHandler(detectorKey), gen2keys=("calibDate",))
455 def makeMatching(self, datasetTypeName: str, gen2keys: Dict[str, type], instrument: Optional[str] = None,
456 skyMap: Optional[BaseSkyMap] = None, skyMapName: Optional[str] = None):
457 """Construct a Translator appropriate for instances of the given
458 dataset.
460 Parameters
461 ----------
462 datasetTypeName : `str`
463 Name of the dataset type.
464 gen2keys: `dict`
465 Keys of a Gen2 data ID for this dataset.
466 instrument: `str`, optional
467 Name of the Gen3 instrument dimension for translated data IDs.
468 skyMap: `~lsst.skymap.BaseSkyMap`, optional
469 The skymap instance that defines any tract/patch data IDs.
470 `~lsst.skymap.BaseSkyMap` instances.
471 skyMapName : `str`, optional
472 Gen3 SkyMap Dimension name to be associated with any tract or patch
473 Dimensions.
475 Returns
476 -------
477 translator : `Translator`
478 A translator whose translate() method can be used to transform Gen2
479 data IDs to Gen3 dataIds.
480 """
481 if instrument is not None:
482 rulesForInstrument = self._rules.get(instrument, {None: []})
483 else:
484 rulesForInstrument = {None: []}
485 rulesForAnyInstrument = self._rules[None]
486 candidateRules = itertools.chain(
487 rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType
488 rulesForInstrument[None], # this instrument, any DatasetType
489 rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType
490 rulesForAnyInstrument[None], # any instrument, any DatasetType
491 )
492 matchedHandlers = []
493 targetKeys = set(gen2keys)
494 for ruleKeys, ruleHandlers, consume in candidateRules:
495 if ruleKeys.issubset(targetKeys):
496 matchedHandlers.append(ruleHandlers)
497 targetKeys -= consume
498 return Translator(matchedHandlers, skyMap=skyMap, skyMapName=skyMapName,
499 datasetTypeName=datasetTypeName)
502class Translator:
503 """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a
504 particular DatasetType.
506 Translators should usually be constructed via
507 `TranslatorFactory.makeMatching`.
509 Parameters
510 ----------
511 handlers : `list`
512 A list of KeyHandlers this Translator should use.
513 skyMap : `BaseSkyMap`, optional
514 SkyMap instance used to define any tract or patch Dimensions.
515 skyMapName : `str`
516 Gen3 SkyMap Dimension name to be associated with any tract or patch
517 Dimensions.
518 datasetTypeName : `str`
519 Name of the dataset type whose data IDs this translator handles.
520 """
521 def __init__(self, handlers: List[KeyHandler], skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
522 datasetTypeName: str):
523 self.handlers = handlers
524 self.skyMap = skyMap
525 self.skyMapName = skyMapName
526 self.datasetTypeName = datasetTypeName
528 __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName")
530 def __str__(self):
531 hstr = ",".join(str(h) for h in self.handlers)
532 return f"{type(self).__name__}(dtype={self.datasetTypeName}, handlers=[{hstr}])"
534 def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False, log: Optional[Log] = None):
535 """Return a Gen3 data ID that corresponds to the given Gen2 data ID.
536 """
537 gen3id = {}
538 for handler in self.handlers:
539 try:
540 handler.translate(gen2id, gen3id, skyMap=self.skyMap, skyMapName=self.skyMapName,
541 datasetTypeName=self.datasetTypeName)
542 except KeyError:
543 if partial:
544 if log is not None:
545 log.debug("Failed to translate %s from %s.", handler.dimension, gen2id)
546 continue
547 else:
548 raise
549 return gen3id
551 @property
552 def dimensionNames(self):
553 """The names of the dimensions populated by this Translator
554 (`frozenset`).
555 """
556 return frozenset(h.dimension for h in self.handlers)