Coverage for python/lsst/obs/base/gen2to3/translators.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Translator", "TranslatorFactory", "KeyHandler", "CopyKeyHandler", "ConstantKeyHandler",
25 "CalibKeyHandler", "AbstractToPhysicalFilterKeyHandler", "PhysicalToAbstractFilterKeyHandler",
26 "makeCalibrationLabel")
28import itertools
29from typing import Optional, Any, Dict, Tuple, FrozenSet, Iterable, List
30from abc import ABCMeta, abstractmethod
32from lsst.log import Log
33from lsst.skymap import BaseSkyMap
36def makeCalibrationLabel(datasetTypeName: str, calibDate: str, ccd: Optional[int] = None,
37 filter: Optional[str] = None) -> str:
38 """Make a Gen3 calibration_label string corresponding to a Gen2 data ID.
40 Parameters
41 ----------
42 datasetTypeName : `str`
43 Name of the dataset type this calibration label identifies.
44 calibDate : `str`
45 Date string used in the Gen2 template.
46 ccd : `int`, optional
47 Detector ID used in the Gen2 template.
48 filter : `str`, optional
49 Filter used in the Gen2 template.
51 Returns
52 -------
53 label : `str`
54 Calibration label string.
55 """
56 # TODO: this function is probably HSC-specific, but I don't know how other
57 # obs calib registries behave so I don't know (yet) how to generalize it.
58 elements = [datasetTypeName, calibDate]
59 if ccd is not None:
60 elements.append(f"{ccd:03d}")
61 if filter is not None:
62 elements.append(filter)
63 return "gen2/{}".format("_".join(elements))
66class KeyHandler(metaclass=ABCMeta):
67 """Base class for Translator helpers that each handle just one Gen3 Data
68 ID key.
70 Parameters
71 ----------
72 dimension : `str`
73 Name of the Gen3 dimension (data ID key) populated by
74 this handler (e.g. "visit" or "abstract_filter").
75 """
76 def __init__(self, dimension: str):
77 self.dimension = dimension
79 __slots__ = ("dimension",)
81 def __repr__(self):
82 return f"{type(self).__name__}({self.dimension}, ...)"
84 def translate(self, gen2id: dict, gen3id: dict,
85 skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
86 datasetTypeName: str):
87 """Update a Gen3 data ID dict with a single key-value pair from a Gen2
88 data ID.
90 This method is implemented by the base class and is not expected to
91 be re-implemented by subclasses.
93 Parameters
94 ----------
95 gen2id: `dict`
96 Gen2 data ID from which to draw key-value pairs from.
97 gen3id: `dict`
98 Gen3 data ID to update in-place.
99 skyMap: `BaseSkyMap`, optional
100 SkyMap that defines the tracts and patches used in the Gen2 data
101 ID, if any.
102 skyMapName: `str`
103 Name of the Gen3 skymap dimension that defines the tracts and
104 patches used in the Gen3 data ID.
105 datasetTypeName: `str`
106 Name of the dataset type.
107 """
108 gen3id[self.dimension] = self.extract(gen2id, skyMap=skyMap, skyMapName=skyMapName,
109 datasetTypeName=datasetTypeName)
111 @abstractmethod
112 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
113 datasetTypeName: str) -> Any:
114 """Extract a Gen3 data ID value from a Gen2 data ID.
116 Parameters
117 ----------
118 gen2id: `dict`
119 Gen2 data ID from which to draw key-value pairs from.
120 skyMap: `BaseSkyMap`, optional
121 SkyMap that defines the tracts and patches used in the Gen2 data
122 ID, if any.
123 skyMapName: `str`
124 Name of the Gen3 skymap dimension that defines the tracts and
125 patches used in the Gen3 data ID.
126 datasetTypeName: `str`
127 Name of the dataset type.
128 """
129 raise NotImplementedError()
132class ConstantKeyHandler(KeyHandler):
133 """A KeyHandler that adds a constant key-value pair to the Gen3 data ID.
135 Parameters
136 ----------
137 dimension : `str`
138 Name of the Gen3 dimension (data ID key) populated by
139 this handler (e.g. "visit" or "abstract_filter").
140 value : `object`
141 Data ID value.
142 """
143 def __init__(self, dimension: str, value: Any):
144 super().__init__(dimension)
145 self.value = value
147 __slots__ = ("value",)
149 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
150 datasetTypeName: str) -> Any:
151 # Docstring inherited from KeyHandler.extract.
152 return self.value
155class CopyKeyHandler(KeyHandler):
156 """A KeyHandler that simply copies a value from a Gen3 data ID.
158 Parameters
159 ----------
160 dimension : `str`
161 Name of the Gen3 dimension produced by this handler.
162 dtype : `type`, optional
163 If not `None`, the type that values for this key must be an
164 instance of.
165 """
166 def __init__(self, dimension: str, gen2key: Optional[str] = None,
167 dtype: Optional[type] = None):
168 super().__init__(dimension)
169 self.gen2key = gen2key if gen2key is not None else dimension
170 self.dtype = dtype
172 __slots__ = ("gen2key", "dtype")
174 def __str__(self):
175 return f"{type(self).__name__}({self.gen2key}, {self.dtype})"
177 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
178 datasetTypeName: str) -> Any:
179 # Docstring inherited from KeyHandler.extract.
180 r = gen2id[self.gen2key]
181 if self.dtype is not None:
182 try:
183 r = self.dtype(r)
184 except ValueError as err:
185 raise TypeError(
186 f"'{r}' is not a valid value for {self.dimension}; "
187 f"expected {self.dtype.__name__}, got {type(r).__name__}."
188 ) from err
189 return r
192class PatchKeyHandler(KeyHandler):
193 """A KeyHandler for skymap patches.
194 """
195 def __init__(self):
196 super().__init__("patch")
198 __slots__ = ()
200 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
201 datasetTypeName: str) -> Any:
202 # Docstring inherited from KeyHandler.extract.
203 tract = gen2id["tract"]
204 tractInfo = skyMap[tract]
205 x, y = gen2id["patch"].split(",")
206 patchInfo = tractInfo[int(x), int(y)]
207 return tractInfo.getSequentialPatchIndex(patchInfo)
210class SkyMapKeyHandler(KeyHandler):
211 """A KeyHandler for skymaps."""
212 def __init__(self):
213 super().__init__("skymap")
215 __slots__ = ()
217 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
218 datasetTypeName: str) -> Any:
219 # Docstring inherited from KeyHandler.extract.
220 return skyMapName
223class CalibKeyHandler(KeyHandler):
224 """A KeyHandler for master calibration datasets.
225 """
226 __slots__ = ("ccdKey",)
228 def __init__(self, ccdKey="ccd"):
229 self.ccdKey = ccdKey
230 super().__init__("calibration_label")
232 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
233 datasetTypeName: str) -> Any:
234 # Docstring inherited from KeyHandler.extract.
235 return makeCalibrationLabel(datasetTypeName, gen2id["calibDate"],
236 ccd=gen2id.get(self.ccdKey), filter=gen2id.get("filter"))
239class PhysicalToAbstractFilterKeyHandler(KeyHandler):
240 """KeyHandler for gen2 ``filter`` keys that match ``physical_filter``
241 keys in gen3 but should be mapped to ``abstract_filter``.
243 Note that multiple physical filter can potentially map to one abstract
244 filter, so be careful to only use this translator on obs packages where
245 there is a one-to-one mapping.
246 """
248 __slots__ = ("_map",)
250 def __init__(self, filterDefinitions):
251 super().__init__("abstract_filter")
252 self._map = {d.physical_filter: d.abstract_filter for d in filterDefinitions
253 if d.physical_filter is not None}
255 def extract(self, gen2id, *args, **kwargs):
256 physical = gen2id["filter"]
257 return self._map.get(physical, physical)
260class AbstractToPhysicalFilterKeyHandler(KeyHandler):
261 """KeyHandler for gen2 ``filter`` keys that match ``abstract_filter``
262 keys in gen3 but should be mapped to ``physical_filter``.
264 Note that one abstract filter can potentially map to multiple physical
265 filters, so be careful to only use this translator on obs packages where
266 there is a one-to-one mapping.
267 """
269 __slots__ = ("_map",)
271 def __init__(self, filterDefinitions):
272 super().__init__("physical_filter")
273 self._map = {d.abstract_filter: d.physical_filter for d in filterDefinitions
274 if d.abstract_filter is not None}
276 def extract(self, gen2id, *args, **kwargs):
277 abstract = gen2id["filter"]
278 return self._map.get(abstract, abstract)
281class TranslatorFactory:
282 """A class that manages a system of rules for translating Gen2 data IDs
283 to Gen3 data IDs, and uses these to construct translators for particular
284 dataset types.
286 Parameters
287 ----------
288 log : `lsst.log.Log`, optional
289 A logger for diagnostic messages.
290 """
291 def __init__(self, log: Optional[Log] = None):
292 # The rules used to match KeyHandlers when constructing a Translator.
293 self._rules: Dict[
294 Optional[str], # instrument name (or None to match any)
295 Dict[
296 Optional[str], # dataset type name (or None to match any)
297 # gen2keys, handler, consume
298 List[Tuple[FrozenSet[str], KeyHandler, bool]]
299 ]
300 ] = {
301 None: {
302 None: []
303 }
304 }
305 self._addDefaultRules()
306 if log is None:
307 log = Log.getLogger("obs.base.gen2to3.TranslatorFactory")
308 self.log = log
310 def __str__(self):
311 lines = []
312 for instrumentName, nested in self._rules.items():
313 if instrumentName is None:
314 instrumentName = "[any instrument]"
315 for datasetTypeName, rules in nested.items():
316 if datasetTypeName is None:
317 datasetTypeName = "[any dataset type]"
318 lines.append(f"{instrumentName} + {datasetTypeName}:")
319 for gen2keys, handler, consume in rules:
320 consumed = " (consumed)" if consume else ""
321 lines.append(f" {gen2keys}{consumed}: {handler}")
322 return "\n".join(lines)
324 def addRule(self, handler: KeyHandler, instrument: Optional[str] = None,
325 datasetTypeName: Optional[str] = None, gen2keys: Iterable[str] = (),
326 consume: bool = True):
327 """Add a KeyHandler and an associated matching rule.
329 Parameters
330 ----------
331 handler : `KeyHandler`
332 A KeyHandler instance to add to a Translator when this rule
333 matches.
334 instrument : `str`
335 Gen3 instrument name the Gen2 repository must be associated with
336 for this rule to match, or None to match any instrument.
337 datasetTypeName : `str`
338 Name of the DatasetType this rule matches, or None to match any
339 DatasetType.
340 gen2Keys : sequence
341 Sequence of Gen2 data ID keys that must all be present for this
342 rule to match.
343 consume : `bool` or `tuple`
344 If True (default), remove all entries in gen2keys from the set of
345 keys being matched to in order to prevent less-specific handlers
346 from matching them.
347 May also be a `tuple` listing only the keys to consume.
348 """
349 # Ensure consume is always a frozenset, so we can process it uniformly
350 # from here on.
351 if consume is True:
352 consume = frozenset(gen2keys)
353 elif consume:
354 consume = frozenset(consume)
355 else:
356 consume = frozenset()
357 # find the rules for this instrument, or if we haven't seen it before,
358 # add a nested dictionary that matches any DatasetType name and then
359 # append this rule.
360 rulesForInstrument = self._rules.setdefault(instrument, {None: []})
361 rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, [])
362 rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume))
364 def _addDefaultRules(self):
365 """Add translator rules that should always be present, and don't depend
366 at all on the instrument whose datasets are being converted.
368 This is called by `TranslatorFactory` construction.
369 """
370 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key.
371 self.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False)
373 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones
374 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"):
375 self.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap")
377 # Translate Gen2 str patch IDs to Gen3 sequential integers.
378 self.addRule(PatchKeyHandler(), gen2keys=("patch",))
380 # Translate any "filter" values that appear alongside "tract" to
381 # "abstract_filter". This is _not_ the right choice for instruments
382 # that use "physical_filter" values for coadds in Gen2 (like HSC);
383 # those will need to add a rule that invokes
384 # PhysicalToAbstractFilterKey instead for just that instrument, but the
385 # same criteria otherwise. That will override this one, because
386 # instrument-specific rules match first, and that rule will consume
387 # the Gen2 "filter" key before this rule has a chance to fire.
388 self.addRule(CopyKeyHandler("abstract_filter", "filter"),
389 gen2keys=("filter", "tract"),
390 consume=("filter",))
392 # Copy Gen2 "tract" to Gen3 "tract".
393 self.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",))
395 # Add valid_first, valid_last to instrument-level transmission/ datasets;
396 # these are considered calibration products in Gen3.
397 for datasetTypeName in ("transmission_sensor", "transmission_optics", "transmission_filter"):
398 self.addRule(ConstantKeyHandler("calibration_label", "unbounded"),
399 datasetTypeName=datasetTypeName)
401 # Translate Gen2 pixel_id to Gen3 skypix.
402 #
403 # TODO: For now, we just assume that the refcat indexer uses htm7,
404 # since that's what we have generated most of our refcats at.
405 # Eventually that may have to change, but it's not clear enough how to
406 # do that for us to have a ticket yet. If you found this note because
407 # you've run into this limitation, please let the middleware team know
408 # that it's time to make this a priority.
409 self.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",))
411 def addGenericInstrumentRules(self, instrumentName: str,
412 calibFilterType: str = "physical_filter",
413 detectorKey: str = "ccd",
414 exposureKey: str = "visit"):
415 """Add translation rules that depend on some properties of the
416 instrument but are otherwise generic.
418 Parameters
419 ----------
420 instrument : `str`
421 The short (dimension) name of the instrument that conversion is
422 going to be run on.
423 calibFilterType : `str`, optional
424 One of ``physical_filter`` or ``abstract_filter``, indicating which
425 of those the gen2 calibRegistry uses as the ``filter`` key.
426 detectorKey : `str`, optional
427 The gen2 key used to identify what in gen3 is `detector`.
428 exposureKey : `str`, optional
429 The gen2 key used to identify what in gen3 is `exposure`.
430 """
431 # Add instrument to Gen3 data ID if Gen2 contains exposureKey or
432 # detectorKey. (Both rules will match, so we'll actually set
433 # instrument in the same dict twice).
434 self.addRule(ConstantKeyHandler("instrument", instrumentName),
435 instrument=instrumentName, gen2keys=(exposureKey,), consume=False)
436 self.addRule(ConstantKeyHandler("instrument", instrumentName),
437 instrument=instrumentName, gen2keys=(detectorKey,), consume=False)
438 self.addRule(ConstantKeyHandler("instrument", instrumentName),
439 instrument=instrumentName, gen2keys=("calibDate",), consume=False)
441 # Copy Gen2 exposureKey to Gen3 'exposure' for raw only. Also consume
442 # filter, since that's implied by 'exposure' in Gen3.
443 self.addRule(CopyKeyHandler("exposure", exposureKey),
444 instrument=instrumentName, datasetTypeName="raw", gen2keys=(exposureKey,),
445 consume=(exposureKey, "filter"))
447 # Copy Gen2 'visit' to Gen3 'visit' otherwise. Also consume filter.
448 self.addRule(CopyKeyHandler("visit"), instrument=instrumentName, gen2keys=("visit",),
449 consume=("visit", "filter"))
451 # Copy Gen2 'ccd' to Gen3 'detector;
452 self.addRule(CopyKeyHandler("detector", detectorKey),
453 instrument=instrumentName,
454 gen2keys=(detectorKey,))
456 # Add instrument for transmission curve datasets (transmission_sensor is
457 # already handled by the above rules).
458 self.addRule(ConstantKeyHandler("instrument", instrumentName),
459 instrument=instrumentName, datasetTypeName="transmission_optics")
460 self.addRule(ConstantKeyHandler("instrument", instrumentName),
461 instrument=instrumentName, datasetTypeName="transmission_atmosphere")
462 self.addRule(ConstantKeyHandler("instrument", instrumentName),
463 instrument=instrumentName, datasetTypeName="transmission_filter")
464 self.addRule(CopyKeyHandler("physical_filter", "filter"),
465 instrument=instrumentName, datasetTypeName="transmission_filter")
467 # Add calibration mapping for filter dependent types
468 for calibType in ('flat', 'sky', 'fringe'):
469 self.addRule(CopyKeyHandler(calibFilterType, "filter"),
470 instrument=instrumentName, datasetTypeName=calibType)
472 # Translate Gen2 calibDate and datasetType to Gen3 calibration_label.
473 self.addRule(CalibKeyHandler(detectorKey), gen2keys=("calibDate",))
475 def makeMatching(self, datasetTypeName: str, gen2keys: Dict[str, type], instrument: Optional[str] = None,
476 skyMap: Optional[BaseSkyMap] = None, skyMapName: Optional[str] = None):
477 """Construct a Translator appropriate for instances of the given
478 dataset.
480 Parameters
481 ----------
482 datasetTypeName : `str`
483 Name of the dataset type.
484 gen2keys: `dict`
485 Keys of a Gen2 data ID for this dataset.
486 instrument: `str`, optional
487 Name of the Gen3 instrument dimension for translated data IDs.
488 skyMap: `~lsst.skymap.BaseSkyMap`, optional
489 The skymap instance that defines any tract/patch data IDs.
490 `~lsst.skymap.BaseSkyMap` instances.
491 skyMapName : `str`, optional
492 Gen3 SkyMap Dimension name to be associated with any tract or patch
493 Dimensions.
495 Returns
496 -------
497 translator : `Translator`
498 A translator whose translate() method can be used to transform Gen2
499 data IDs to Gen3 dataIds.
500 """
501 if instrument is not None:
502 rulesForInstrument = self._rules.get(instrument, {None: []})
503 else:
504 rulesForInstrument = {None: []}
505 rulesForAnyInstrument = self._rules[None]
506 candidateRules = itertools.chain(
507 rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType
508 rulesForInstrument[None], # this instrument, any DatasetType
509 rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType
510 rulesForAnyInstrument[None], # any instrument, any DatasetType
511 )
512 matchedHandlers = []
513 targetKeys = set(gen2keys)
514 self.log.debug("Constructing data ID translator for %s with Gen2 keys %s...",
515 datasetTypeName, gen2keys)
516 for ruleKeys, ruleHandlers, consume in candidateRules:
517 if ruleKeys.issubset(targetKeys):
518 matchedHandlers.append(ruleHandlers)
519 targetKeys -= consume
520 self.log.debug("...matched %d handlers: %s, with %s unmatched.",
521 len(matchedHandlers), matchedHandlers, targetKeys)
522 return Translator(matchedHandlers, skyMap=skyMap, skyMapName=skyMapName,
523 datasetTypeName=datasetTypeName, log=self.log)
526class Translator:
527 """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a
528 particular DatasetType.
530 Translators should usually be constructed via
531 `TranslatorFactory.makeMatching`.
533 Parameters
534 ----------
535 handlers : `list`
536 A list of KeyHandlers this Translator should use.
537 skyMap : `BaseSkyMap`, optional
538 SkyMap instance used to define any tract or patch Dimensions.
539 skyMapName : `str`
540 Gen3 SkyMap Dimension name to be associated with any tract or patch
541 Dimensions.
542 datasetTypeName : `str`
543 Name of the dataset type whose data IDs this translator handles.
544 """
545 def __init__(self, handlers: List[KeyHandler], skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
546 datasetTypeName: str, log: Log):
547 self.handlers = handlers
548 self.skyMap = skyMap
549 self.skyMapName = skyMapName
550 self.datasetTypeName = datasetTypeName
551 self.log = log
553 __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName", "log")
555 def __str__(self):
556 hstr = ",".join(str(h) for h in self.handlers)
557 return f"{type(self).__name__}(dtype={self.datasetTypeName}, handlers=[{hstr}])"
559 def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False):
560 """Return a Gen3 data ID that corresponds to the given Gen2 data ID.
561 """
562 gen3id = {}
563 for handler in self.handlers:
564 try:
565 handler.translate(gen2id, gen3id, skyMap=self.skyMap, skyMapName=self.skyMapName,
566 datasetTypeName=self.datasetTypeName)
567 except KeyError:
568 if partial:
569 self.log.debug("Failed to translate %s from %s (this may not be an error).",
570 handler.dimension, gen2id)
571 continue
572 else:
573 raise
574 return gen3id
576 @property
577 def dimensionNames(self):
578 """The names of the dimensions populated by this Translator
579 (`frozenset`).
580 """
581 return frozenset(h.dimension for h in self.handlers)