Coverage for python/lsst/obs/base/gen2to3/translators.py: 32%
165 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-09 03:03 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-09 03:03 -0700
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "Translator",
26 "TranslatorFactory",
27 "KeyHandler",
28 "CopyKeyHandler",
29 "ConstantKeyHandler",
30 "BandToPhysicalFilterKeyHandler",
31 "PhysicalFilterToBandKeyHandler",
32)
34import itertools
35import logging
36from abc import ABCMeta, abstractmethod
37from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple
39from lsst.skymap import BaseSkyMap
42class KeyHandler(metaclass=ABCMeta):
43 """Base class for Translator helpers that each handle just one Gen3 Data
44 ID key.
46 Parameters
47 ----------
48 dimension : `str`
49 Name of the Gen3 dimension (data ID key) populated by
50 this handler (e.g. "visit" or "band").
51 """
53 def __init__(self, dimension: str):
54 self.dimension = dimension
56 __slots__ = ("dimension",)
58 def __repr__(self):
59 return f"{type(self).__name__}({self.dimension}, ...)"
61 def translate(
62 self,
63 gen2id: dict,
64 gen3id: dict,
65 skyMap: Optional[BaseSkyMap],
66 skyMapName: Optional[str],
67 datasetTypeName: str,
68 ):
69 """Update a Gen3 data ID dict with a single key-value pair from a Gen2
70 data ID.
72 This method is implemented by the base class and is not expected to
73 be re-implemented by subclasses.
75 Parameters
76 ----------
77 gen2id: `dict`
78 Gen2 data ID from which to draw key-value pairs from.
79 gen3id: `dict`
80 Gen3 data ID to update in-place.
81 skyMap: `BaseSkyMap`, optional
82 SkyMap that defines the tracts and patches used in the Gen2 data
83 ID, if any.
84 skyMapName: `str`
85 Name of the Gen3 skymap dimension that defines the tracts and
86 patches used in the Gen3 data ID.
87 datasetTypeName: `str`
88 Name of the dataset type.
89 """
90 gen3id[self.dimension] = self.extract(
91 gen2id, skyMap=skyMap, skyMapName=skyMapName, datasetTypeName=datasetTypeName
92 )
94 @abstractmethod
95 def extract(
96 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str
97 ) -> Any:
98 """Extract a Gen3 data ID value from a Gen2 data ID.
100 Parameters
101 ----------
102 gen2id: `dict`
103 Gen2 data ID from which to draw key-value pairs from.
104 skyMap: `BaseSkyMap`, optional
105 SkyMap that defines the tracts and patches used in the Gen2 data
106 ID, if any.
107 skyMapName: `str`
108 Name of the Gen3 skymap dimension that defines the tracts and
109 patches used in the Gen3 data ID.
110 datasetTypeName: `str`
111 Name of the dataset type.
112 """
113 raise NotImplementedError()
116class ConstantKeyHandler(KeyHandler):
117 """A KeyHandler that adds a constant key-value pair to the Gen3 data ID.
119 Parameters
120 ----------
121 dimension : `str`
122 Name of the Gen3 dimension (data ID key) populated by
123 this handler (e.g. "visit" or "band").
124 value : `object`
125 Data ID value.
126 """
128 def __init__(self, dimension: str, value: Any):
129 super().__init__(dimension)
130 self.value = value
132 __slots__ = ("value",)
134 def extract(
135 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str
136 ) -> Any:
137 # Docstring inherited from KeyHandler.extract.
138 return self.value
141class CopyKeyHandler(KeyHandler):
142 """A KeyHandler that simply copies a value from a Gen3 data ID.
144 Parameters
145 ----------
146 dimension : `str`
147 Name of the Gen3 dimension produced by this handler.
148 dtype : `type`, optional
149 If not `None`, the type that values for this key must be an
150 instance of.
151 """
153 def __init__(self, dimension: str, gen2key: Optional[str] = None, dtype: Optional[type] = None):
154 super().__init__(dimension)
155 self.gen2key = gen2key if gen2key is not None else dimension
156 self.dtype = dtype
158 __slots__ = ("gen2key", "dtype")
160 def __str__(self):
161 return f"{type(self).__name__}({self.gen2key}, {self.dtype})"
163 def extract(
164 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str
165 ) -> Any:
166 # Docstring inherited from KeyHandler.extract.
167 r = gen2id[self.gen2key]
168 if self.dtype is not None:
169 try:
170 r = self.dtype(r)
171 except ValueError as err:
172 raise TypeError(
173 f"'{r}' is not a valid value for {self.dimension}; "
174 f"expected {self.dtype.__name__}, got {type(r).__name__}."
175 ) from err
176 return r
179class PatchKeyHandler(KeyHandler):
180 """A KeyHandler for skymap patches."""
182 def __init__(self):
183 super().__init__("patch")
185 __slots__ = ()
187 def extract(
188 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str
189 ) -> Any:
190 # Docstring inherited from KeyHandler.extract.
191 tract = gen2id["tract"]
192 tractInfo = skyMap[tract]
193 x, y = gen2id["patch"].split(",")
194 patchInfo = tractInfo[int(x), int(y)]
195 return tractInfo.getSequentialPatchIndex(patchInfo)
198class SkyMapKeyHandler(KeyHandler):
199 """A KeyHandler for skymaps."""
201 def __init__(self):
202 super().__init__("skymap")
204 __slots__ = ()
206 def extract(
207 self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str], datasetTypeName: str
208 ) -> Any:
209 # Docstring inherited from KeyHandler.extract.
210 return skyMapName
213class PhysicalFilterToBandKeyHandler(KeyHandler):
214 """KeyHandler for gen2 ``filter`` keys that match ``physical_filter``
215 keys in gen3 but should be mapped to ``band``.
217 Note that multiple physical filter can potentially map to one abstract
218 filter, so be careful to only use this translator on obs packages where
219 there is a one-to-one mapping.
220 """
222 __slots__ = ("_map",)
224 def __init__(self, filterDefinitions):
225 super().__init__("band")
226 self._map = {d.physical_filter: d.band for d in filterDefinitions if d.physical_filter is not None}
228 def extract(self, gen2id, *args, **kwargs):
229 physical = gen2id["filter"]
230 return self._map.get(physical, physical)
233class BandToPhysicalFilterKeyHandler(KeyHandler):
234 """KeyHandler for gen2 ``filter`` keys that match ``band``
235 keys in gen3 but should be mapped to ``physical_filter``.
237 Note that one abstract filter can potentially map to multiple physical
238 filters, so be careful to only use this translator on obs packages where
239 there is a one-to-one mapping.
240 """
242 __slots__ = ("_map",)
244 def __init__(self, filterDefinitions):
245 super().__init__("physical_filter")
246 self._map = {d.band: d.physical_filter for d in filterDefinitions if d.band is not None}
248 def extract(self, gen2id, *args, **kwargs):
249 abstract = gen2id["filter"]
250 return self._map.get(abstract, abstract)
253class TranslatorFactory:
254 """A class that manages a system of rules for translating Gen2 data IDs
255 to Gen3 data IDs, and uses these to construct translators for particular
256 dataset types.
258 Parameters
259 ----------
260 log : `logging.Logger`, optional
261 A logger for diagnostic messages.
262 """
264 def __init__(self, log: Optional[logging.Logger] = None):
265 # The rules used to match KeyHandlers when constructing a Translator.
266 self._rules: Dict[
267 Optional[str], # instrument name (or None to match any)
268 Dict[
269 Optional[str], # dataset type name (or None to match any)
270 # gen2keys, handler, consume
271 List[Tuple[FrozenSet[str], KeyHandler, bool]],
272 ],
273 ] = {None: {None: []}}
274 self._addDefaultRules()
275 if log is None:
276 log = logging.getLogger(__name__)
277 self.log = log
279 def __str__(self):
280 lines = []
281 for instrumentName, nested in self._rules.items():
282 if instrumentName is None:
283 instrumentName = "[any instrument]"
284 for datasetTypeName, rules in nested.items():
285 if datasetTypeName is None:
286 datasetTypeName = "[any dataset type]"
287 lines.append(f"{instrumentName} + {datasetTypeName}:")
288 for gen2keys, handler, consume in rules:
289 consumed = " (consumed)" if consume else ""
290 lines.append(f" {gen2keys}{consumed}: {handler}")
291 return "\n".join(lines)
293 def addRule(
294 self,
295 handler: KeyHandler,
296 instrument: Optional[str] = None,
297 datasetTypeName: Optional[str] = None,
298 gen2keys: Iterable[str] = (),
299 consume: bool = True,
300 ):
301 """Add a KeyHandler and an associated matching rule.
303 Parameters
304 ----------
305 handler : `KeyHandler`
306 A KeyHandler instance to add to a Translator when this rule
307 matches.
308 instrument : `str`
309 Gen3 instrument name the Gen2 repository must be associated with
310 for this rule to match, or None to match any instrument.
311 datasetTypeName : `str`
312 Name of the DatasetType this rule matches, or None to match any
313 DatasetType.
314 gen2Keys : sequence
315 Sequence of Gen2 data ID keys that must all be present for this
316 rule to match.
317 consume : `bool` or `tuple`
318 If True (default), remove all entries in gen2keys from the set of
319 keys being matched to in order to prevent less-specific handlers
320 from matching them.
321 May also be a `tuple` listing only the keys to consume.
322 """
323 # Ensure consume is always a frozenset, so we can process it uniformly
324 # from here on.
325 if consume is True:
326 consume = frozenset(gen2keys)
327 elif consume:
328 consume = frozenset(consume)
329 else:
330 consume = frozenset()
331 # find the rules for this instrument, or if we haven't seen it before,
332 # add a nested dictionary that matches any DatasetType name and then
333 # append this rule.
334 rulesForInstrument = self._rules.setdefault(instrument, {None: []})
335 rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, [])
336 rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume))
338 def _addDefaultRules(self):
339 """Add translator rules that should always be present, and don't depend
340 at all on the instrument whose datasets are being converted.
342 This is called by `TranslatorFactory` construction.
343 """
344 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key.
345 self.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False)
347 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones
348 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"):
349 self.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap")
351 # Translate Gen2 str patch IDs to Gen3 sequential integers.
352 self.addRule(PatchKeyHandler(), gen2keys=("patch",))
354 # Translate any "filter" values that appear alongside "tract" to
355 # "band". This is _not_ the right choice for instruments
356 # that use "physical_filter" values for coadds in Gen2 (like HSC);
357 # those will need to add a rule that invokes
358 # PhysicalFilterToBandKey instead for just that instrument, but the
359 # same criteria otherwise. That will override this one, because
360 # instrument-specific rules match first, and that rule will consume
361 # the Gen2 "filter" key before this rule has a chance to fire.
362 self.addRule(CopyKeyHandler("band", "filter"), gen2keys=("filter", "tract"), consume=("filter",))
364 # Copy Gen2 "tract" to Gen3 "tract".
365 self.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",))
367 # Translate Gen2 pixel_id to Gen3 skypix.
368 #
369 # TODO: For now, we just assume that the refcat indexer uses htm7,
370 # since that's what we have generated most of our refcats at.
371 # Eventually that may have to change, but it's not clear enough how to
372 # do that for us to have a ticket yet. If you found this note because
373 # you've run into this limitation, please let the middleware team know
374 # that it's time to make this a priority.
375 self.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",))
377 def addGenericInstrumentRules(
378 self,
379 instrumentName: str,
380 calibFilterType: str = "physical_filter",
381 detectorKey: str = "ccd",
382 exposureKey: str = "visit",
383 ):
384 """Add translation rules that depend on some properties of the
385 instrument but are otherwise generic.
387 Parameters
388 ----------
389 instrument : `str`
390 The short (dimension) name of the instrument that conversion is
391 going to be run on.
392 calibFilterType : `str`, optional
393 One of ``physical_filter`` or ``band``, indicating which
394 of those the gen2 calibRegistry uses as the ``filter`` key.
395 detectorKey : `str`, optional
396 The gen2 key used to identify what in gen3 is `detector`.
397 exposureKey : `str`, optional
398 The gen2 key used to identify what in gen3 is `exposure`.
399 """
400 # Add instrument to Gen3 data ID if Gen2 contains exposureKey,
401 # detectorKey, "visit", or "calibDate". (Multiple rules may match, so
402 # we'll actually set instrument in the same dict more than once).
403 self.addRule(
404 ConstantKeyHandler("instrument", instrumentName),
405 instrument=instrumentName,
406 gen2keys=(exposureKey,),
407 consume=False,
408 )
409 self.addRule(
410 ConstantKeyHandler("instrument", instrumentName),
411 instrument=instrumentName,
412 gen2keys=(detectorKey,),
413 consume=False,
414 )
415 self.addRule(
416 ConstantKeyHandler("instrument", instrumentName),
417 instrument=instrumentName,
418 gen2keys=("calibDate",),
419 consume=False,
420 )
421 self.addRule(
422 ConstantKeyHandler("instrument", instrumentName),
423 instrument=instrumentName,
424 gen2keys=("visit",),
425 consume=False,
426 )
428 # Copy Gen2 exposureKey to Gen3 'exposure' for raw only. Also consume
429 # filter, since that's implied by 'exposure' in Gen3.
430 self.addRule(
431 CopyKeyHandler("exposure", exposureKey),
432 instrument=instrumentName,
433 datasetTypeName="raw",
434 gen2keys=(exposureKey,),
435 consume=(exposureKey, "filter"),
436 )
438 # Copy Gen2 'visit' to Gen3 'visit' otherwise. Also consume filter.
439 self.addRule(
440 CopyKeyHandler("visit"),
441 instrument=instrumentName,
442 gen2keys=("visit",),
443 consume=("visit", "filter"),
444 )
446 # Copy Gen2 'ccd' to Gen3 'detector;
447 self.addRule(
448 CopyKeyHandler("detector", detectorKey), instrument=instrumentName, gen2keys=(detectorKey,)
449 )
451 # Add instrument for transmission curve datasets (transmission_sensor
452 # is already handled by the above rules).
453 self.addRule(
454 ConstantKeyHandler("instrument", instrumentName),
455 instrument=instrumentName,
456 datasetTypeName="transmission_optics",
457 )
458 self.addRule(
459 ConstantKeyHandler("instrument", instrumentName),
460 instrument=instrumentName,
461 datasetTypeName="transmission_atmosphere",
462 )
463 self.addRule(
464 ConstantKeyHandler("instrument", instrumentName),
465 instrument=instrumentName,
466 datasetTypeName="transmission_filter",
467 )
468 self.addRule(
469 CopyKeyHandler("physical_filter", "filter"),
470 instrument=instrumentName,
471 datasetTypeName="transmission_filter",
472 )
474 # Add calibration mapping for filter dependent types
475 for calibType in ("flat", "sky", "fringe"):
476 self.addRule(
477 CopyKeyHandler(calibFilterType, "filter"),
478 instrument=instrumentName,
479 datasetTypeName=calibType,
480 )
482 def makeMatching(
483 self,
484 datasetTypeName: str,
485 gen2keys: Dict[str, type],
486 instrument: Optional[str] = None,
487 skyMap: Optional[BaseSkyMap] = None,
488 skyMapName: Optional[str] = None,
489 ):
490 """Construct a Translator appropriate for instances of the given
491 dataset.
493 Parameters
494 ----------
495 datasetTypeName : `str`
496 Name of the dataset type.
497 gen2keys: `dict`
498 Keys of a Gen2 data ID for this dataset.
499 instrument: `str`, optional
500 Name of the Gen3 instrument dimension for translated data IDs.
501 skyMap: `~lsst.skymap.BaseSkyMap`, optional
502 The skymap instance that defines any tract/patch data IDs.
503 `~lsst.skymap.BaseSkyMap` instances.
504 skyMapName : `str`, optional
505 Gen3 SkyMap Dimension name to be associated with any tract or patch
506 Dimensions.
508 Returns
509 -------
510 translator : `Translator`
511 A translator whose translate() method can be used to transform Gen2
512 data IDs to Gen3 dataIds.
513 """
514 if instrument is not None:
515 rulesForInstrument = self._rules.get(instrument, {None: []})
516 else:
517 rulesForInstrument = {None: []}
518 rulesForAnyInstrument = self._rules[None]
519 candidateRules = itertools.chain(
520 rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType
521 rulesForInstrument[None], # this instrument, any DatasetType
522 rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType
523 rulesForAnyInstrument[None], # any instrument, any DatasetType
524 )
525 matchedHandlers = []
526 targetKeys = set(gen2keys)
527 self.log.debug(
528 "Constructing data ID translator for %s with Gen2 keys %s...", datasetTypeName, gen2keys
529 )
530 for ruleKeys, ruleHandlers, consume in candidateRules:
531 if ruleKeys.issubset(targetKeys):
532 matchedHandlers.append(ruleHandlers)
533 targetKeys -= consume
534 self.log.debug(
535 "...matched %d handlers: %s, with %s unmatched.",
536 len(matchedHandlers),
537 matchedHandlers,
538 targetKeys,
539 )
540 return Translator(
541 matchedHandlers,
542 skyMap=skyMap,
543 skyMapName=skyMapName,
544 datasetTypeName=datasetTypeName,
545 log=self.log,
546 )
549class Translator:
550 """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a
551 particular DatasetType.
553 Translators should usually be constructed via
554 `TranslatorFactory.makeMatching`.
556 Parameters
557 ----------
558 handlers : `list`
559 A list of KeyHandlers this Translator should use.
560 skyMap : `BaseSkyMap`, optional
561 SkyMap instance used to define any tract or patch Dimensions.
562 skyMapName : `str`
563 Gen3 SkyMap Dimension name to be associated with any tract or patch
564 Dimensions.
565 datasetTypeName : `str`
566 Name of the dataset type whose data IDs this translator handles.
567 """
569 def __init__(
570 self,
571 handlers: List[KeyHandler],
572 skyMap: Optional[BaseSkyMap],
573 skyMapName: Optional[str],
574 datasetTypeName: str,
575 log: logging.Logger,
576 ):
577 self.handlers = handlers
578 self.skyMap = skyMap
579 self.skyMapName = skyMapName
580 self.datasetTypeName = datasetTypeName
581 self.log = log
583 __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName", "log")
585 def __str__(self):
586 hstr = ",".join(str(h) for h in self.handlers)
587 return f"{type(self).__name__}(dtype={self.datasetTypeName}, handlers=[{hstr}])"
589 def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False) -> Tuple[dict, Optional[str]]:
590 """Return a Gen3 data ID that corresponds to the given Gen2 data ID."""
591 gen3id = {}
592 calibDate = gen2id.get("calibDate", None)
593 for handler in self.handlers:
594 try:
595 handler.translate(
596 gen2id,
597 gen3id,
598 skyMap=self.skyMap,
599 skyMapName=self.skyMapName,
600 datasetTypeName=self.datasetTypeName,
601 )
602 except KeyError:
603 if partial:
604 self.log.debug(
605 "Failed to translate %s from %s (this may not be an error).",
606 handler.dimension,
607 gen2id,
608 )
609 continue
610 else:
611 raise
612 return gen3id, calibDate
614 @property
615 def dimensionNames(self) -> FrozenSet[str]:
616 """The names of the dimensions populated by this Translator
617 (`frozenset`).
618 """
619 return frozenset(h.dimension for h in self.handlers)