Coverage for python/lsst/obs/base/gen2to3/translators.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Translator", "TranslatorFactory", "KeyHandler", "CopyKeyHandler", "ConstantKeyHandler",
25 "BandToPhysicalFilterKeyHandler", "PhysicalFilterToBandKeyHandler")
27import itertools
28from typing import Optional, Any, Dict, Tuple, FrozenSet, Iterable, List
29from abc import ABCMeta, abstractmethod
31from lsst.log import Log
32from lsst.skymap import BaseSkyMap
35class KeyHandler(metaclass=ABCMeta):
36 """Base class for Translator helpers that each handle just one Gen3 Data
37 ID key.
39 Parameters
40 ----------
41 dimension : `str`
42 Name of the Gen3 dimension (data ID key) populated by
43 this handler (e.g. "visit" or "band").
44 """
45 def __init__(self, dimension: str):
46 self.dimension = dimension
48 __slots__ = ("dimension",)
50 def __repr__(self):
51 return f"{type(self).__name__}({self.dimension}, ...)"
53 def translate(self, gen2id: dict, gen3id: dict,
54 skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
55 datasetTypeName: str):
56 """Update a Gen3 data ID dict with a single key-value pair from a Gen2
57 data ID.
59 This method is implemented by the base class and is not expected to
60 be re-implemented by subclasses.
62 Parameters
63 ----------
64 gen2id: `dict`
65 Gen2 data ID from which to draw key-value pairs from.
66 gen3id: `dict`
67 Gen3 data ID to update in-place.
68 skyMap: `BaseSkyMap`, optional
69 SkyMap that defines the tracts and patches used in the Gen2 data
70 ID, if any.
71 skyMapName: `str`
72 Name of the Gen3 skymap dimension that defines the tracts and
73 patches used in the Gen3 data ID.
74 datasetTypeName: `str`
75 Name of the dataset type.
76 """
77 gen3id[self.dimension] = self.extract(gen2id, skyMap=skyMap, skyMapName=skyMapName,
78 datasetTypeName=datasetTypeName)
80 @abstractmethod
81 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
82 datasetTypeName: str) -> Any:
83 """Extract a Gen3 data ID value from a Gen2 data ID.
85 Parameters
86 ----------
87 gen2id: `dict`
88 Gen2 data ID from which to draw key-value pairs from.
89 skyMap: `BaseSkyMap`, optional
90 SkyMap that defines the tracts and patches used in the Gen2 data
91 ID, if any.
92 skyMapName: `str`
93 Name of the Gen3 skymap dimension that defines the tracts and
94 patches used in the Gen3 data ID.
95 datasetTypeName: `str`
96 Name of the dataset type.
97 """
98 raise NotImplementedError()
101class ConstantKeyHandler(KeyHandler):
102 """A KeyHandler that adds a constant key-value pair to the Gen3 data ID.
104 Parameters
105 ----------
106 dimension : `str`
107 Name of the Gen3 dimension (data ID key) populated by
108 this handler (e.g. "visit" or "band").
109 value : `object`
110 Data ID value.
111 """
112 def __init__(self, dimension: str, value: Any):
113 super().__init__(dimension)
114 self.value = value
116 __slots__ = ("value",)
118 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
119 datasetTypeName: str) -> Any:
120 # Docstring inherited from KeyHandler.extract.
121 return self.value
124class CopyKeyHandler(KeyHandler):
125 """A KeyHandler that simply copies a value from a Gen3 data ID.
127 Parameters
128 ----------
129 dimension : `str`
130 Name of the Gen3 dimension produced by this handler.
131 dtype : `type`, optional
132 If not `None`, the type that values for this key must be an
133 instance of.
134 """
135 def __init__(self, dimension: str, gen2key: Optional[str] = None,
136 dtype: Optional[type] = None):
137 super().__init__(dimension)
138 self.gen2key = gen2key if gen2key is not None else dimension
139 self.dtype = dtype
141 __slots__ = ("gen2key", "dtype")
143 def __str__(self):
144 return f"{type(self).__name__}({self.gen2key}, {self.dtype})"
146 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
147 datasetTypeName: str) -> Any:
148 # Docstring inherited from KeyHandler.extract.
149 r = gen2id[self.gen2key]
150 if self.dtype is not None:
151 try:
152 r = self.dtype(r)
153 except ValueError as err:
154 raise TypeError(
155 f"'{r}' is not a valid value for {self.dimension}; "
156 f"expected {self.dtype.__name__}, got {type(r).__name__}."
157 ) from err
158 return r
161class PatchKeyHandler(KeyHandler):
162 """A KeyHandler for skymap patches.
163 """
164 def __init__(self):
165 super().__init__("patch")
167 __slots__ = ()
169 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
170 datasetTypeName: str) -> Any:
171 # Docstring inherited from KeyHandler.extract.
172 tract = gen2id["tract"]
173 tractInfo = skyMap[tract]
174 x, y = gen2id["patch"].split(",")
175 patchInfo = tractInfo[int(x), int(y)]
176 return tractInfo.getSequentialPatchIndex(patchInfo)
179class SkyMapKeyHandler(KeyHandler):
180 """A KeyHandler for skymaps."""
181 def __init__(self):
182 super().__init__("skymap")
184 __slots__ = ()
186 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
187 datasetTypeName: str) -> Any:
188 # Docstring inherited from KeyHandler.extract.
189 return skyMapName
192class PhysicalFilterToBandKeyHandler(KeyHandler):
193 """KeyHandler for gen2 ``filter`` keys that match ``physical_filter``
194 keys in gen3 but should be mapped to ``band``.
196 Note that multiple physical filter can potentially map to one abstract
197 filter, so be careful to only use this translator on obs packages where
198 there is a one-to-one mapping.
199 """
201 __slots__ = ("_map",)
203 def __init__(self, filterDefinitions):
204 super().__init__("band")
205 self._map = {d.physical_filter: d.band for d in filterDefinitions
206 if d.physical_filter is not None}
208 def extract(self, gen2id, *args, **kwargs):
209 physical = gen2id["filter"]
210 return self._map.get(physical, physical)
213class BandToPhysicalFilterKeyHandler(KeyHandler):
214 """KeyHandler for gen2 ``filter`` keys that match ``band``
215 keys in gen3 but should be mapped to ``physical_filter``.
217 Note that one abstract filter can potentially map to multiple physical
218 filters, so be careful to only use this translator on obs packages where
219 there is a one-to-one mapping.
220 """
222 __slots__ = ("_map",)
224 def __init__(self, filterDefinitions):
225 super().__init__("physical_filter")
226 self._map = {d.band: d.physical_filter for d in filterDefinitions
227 if d.band is not None}
229 def extract(self, gen2id, *args, **kwargs):
230 abstract = gen2id["filter"]
231 return self._map.get(abstract, abstract)
234class TranslatorFactory:
235 """A class that manages a system of rules for translating Gen2 data IDs
236 to Gen3 data IDs, and uses these to construct translators for particular
237 dataset types.
239 Parameters
240 ----------
241 log : `lsst.log.Log`, optional
242 A logger for diagnostic messages.
243 """
244 def __init__(self, log: Optional[Log] = None):
245 # The rules used to match KeyHandlers when constructing a Translator.
246 self._rules: Dict[
247 Optional[str], # instrument name (or None to match any)
248 Dict[
249 Optional[str], # dataset type name (or None to match any)
250 # gen2keys, handler, consume
251 List[Tuple[FrozenSet[str], KeyHandler, bool]]
252 ]
253 ] = {
254 None: {
255 None: []
256 }
257 }
258 self._addDefaultRules()
259 if log is None:
260 log = Log.getLogger("obs.base.gen2to3.TranslatorFactory")
261 self.log = log
263 def __str__(self):
264 lines = []
265 for instrumentName, nested in self._rules.items():
266 if instrumentName is None:
267 instrumentName = "[any instrument]"
268 for datasetTypeName, rules in nested.items():
269 if datasetTypeName is None:
270 datasetTypeName = "[any dataset type]"
271 lines.append(f"{instrumentName} + {datasetTypeName}:")
272 for gen2keys, handler, consume in rules:
273 consumed = " (consumed)" if consume else ""
274 lines.append(f" {gen2keys}{consumed}: {handler}")
275 return "\n".join(lines)
277 def addRule(self, handler: KeyHandler, instrument: Optional[str] = None,
278 datasetTypeName: Optional[str] = None, gen2keys: Iterable[str] = (),
279 consume: bool = True):
280 """Add a KeyHandler and an associated matching rule.
282 Parameters
283 ----------
284 handler : `KeyHandler`
285 A KeyHandler instance to add to a Translator when this rule
286 matches.
287 instrument : `str`
288 Gen3 instrument name the Gen2 repository must be associated with
289 for this rule to match, or None to match any instrument.
290 datasetTypeName : `str`
291 Name of the DatasetType this rule matches, or None to match any
292 DatasetType.
293 gen2Keys : sequence
294 Sequence of Gen2 data ID keys that must all be present for this
295 rule to match.
296 consume : `bool` or `tuple`
297 If True (default), remove all entries in gen2keys from the set of
298 keys being matched to in order to prevent less-specific handlers
299 from matching them.
300 May also be a `tuple` listing only the keys to consume.
301 """
302 # Ensure consume is always a frozenset, so we can process it uniformly
303 # from here on.
304 if consume is True:
305 consume = frozenset(gen2keys)
306 elif consume:
307 consume = frozenset(consume)
308 else:
309 consume = frozenset()
310 # find the rules for this instrument, or if we haven't seen it before,
311 # add a nested dictionary that matches any DatasetType name and then
312 # append this rule.
313 rulesForInstrument = self._rules.setdefault(instrument, {None: []})
314 rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, [])
315 rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume))
317 def _addDefaultRules(self):
318 """Add translator rules that should always be present, and don't depend
319 at all on the instrument whose datasets are being converted.
321 This is called by `TranslatorFactory` construction.
322 """
323 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key.
324 self.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False)
326 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones
327 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"):
328 self.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap")
330 # Translate Gen2 str patch IDs to Gen3 sequential integers.
331 self.addRule(PatchKeyHandler(), gen2keys=("patch",))
333 # Translate any "filter" values that appear alongside "tract" to
334 # "band". This is _not_ the right choice for instruments
335 # that use "physical_filter" values for coadds in Gen2 (like HSC);
336 # those will need to add a rule that invokes
337 # PhysicalFilterToBandKey instead for just that instrument, but the
338 # same criteria otherwise. That will override this one, because
339 # instrument-specific rules match first, and that rule will consume
340 # the Gen2 "filter" key before this rule has a chance to fire.
341 self.addRule(CopyKeyHandler("band", "filter"),
342 gen2keys=("filter", "tract"),
343 consume=("filter",))
345 # Copy Gen2 "tract" to Gen3 "tract".
346 self.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",))
348 # Translate Gen2 pixel_id to Gen3 skypix.
349 #
350 # TODO: For now, we just assume that the refcat indexer uses htm7,
351 # since that's what we have generated most of our refcats at.
352 # Eventually that may have to change, but it's not clear enough how to
353 # do that for us to have a ticket yet. If you found this note because
354 # you've run into this limitation, please let the middleware team know
355 # that it's time to make this a priority.
356 self.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",))
358 def addGenericInstrumentRules(self, instrumentName: str,
359 calibFilterType: str = "physical_filter",
360 detectorKey: str = "ccd",
361 exposureKey: str = "visit"):
362 """Add translation rules that depend on some properties of the
363 instrument but are otherwise generic.
365 Parameters
366 ----------
367 instrument : `str`
368 The short (dimension) name of the instrument that conversion is
369 going to be run on.
370 calibFilterType : `str`, optional
371 One of ``physical_filter`` or ``band``, indicating which
372 of those the gen2 calibRegistry uses as the ``filter`` key.
373 detectorKey : `str`, optional
374 The gen2 key used to identify what in gen3 is `detector`.
375 exposureKey : `str`, optional
376 The gen2 key used to identify what in gen3 is `exposure`.
377 """
378 # Add instrument to Gen3 data ID if Gen2 contains exposureKey,
379 # detectorKey, "visit", or "calibDate". (Multiple rules may match, so
380 # we'll actually set instrument in the same dict more than once).
381 self.addRule(ConstantKeyHandler("instrument", instrumentName),
382 instrument=instrumentName, gen2keys=(exposureKey,), consume=False)
383 self.addRule(ConstantKeyHandler("instrument", instrumentName),
384 instrument=instrumentName, gen2keys=(detectorKey,), consume=False)
385 self.addRule(ConstantKeyHandler("instrument", instrumentName),
386 instrument=instrumentName, gen2keys=("calibDate",), consume=False)
387 self.addRule(ConstantKeyHandler("instrument", instrumentName),
388 instrument=instrumentName, gen2keys=("visit",), consume=False)
390 # Copy Gen2 exposureKey to Gen3 'exposure' for raw only. Also consume
391 # filter, since that's implied by 'exposure' in Gen3.
392 self.addRule(CopyKeyHandler("exposure", exposureKey),
393 instrument=instrumentName, datasetTypeName="raw", gen2keys=(exposureKey,),
394 consume=(exposureKey, "filter"))
396 # Copy Gen2 'visit' to Gen3 'visit' otherwise. Also consume filter.
397 self.addRule(CopyKeyHandler("visit"), instrument=instrumentName, gen2keys=("visit",),
398 consume=("visit", "filter"))
400 # Copy Gen2 'ccd' to Gen3 'detector;
401 self.addRule(CopyKeyHandler("detector", detectorKey),
402 instrument=instrumentName,
403 gen2keys=(detectorKey,))
405 # Add instrument for transmission curve datasets (transmission_sensor
406 # is already handled by the above rules).
407 self.addRule(ConstantKeyHandler("instrument", instrumentName),
408 instrument=instrumentName, datasetTypeName="transmission_optics")
409 self.addRule(ConstantKeyHandler("instrument", instrumentName),
410 instrument=instrumentName, datasetTypeName="transmission_atmosphere")
411 self.addRule(ConstantKeyHandler("instrument", instrumentName),
412 instrument=instrumentName, datasetTypeName="transmission_filter")
413 self.addRule(CopyKeyHandler("physical_filter", "filter"),
414 instrument=instrumentName, datasetTypeName="transmission_filter")
416 # Add calibration mapping for filter dependent types
417 for calibType in ('flat', 'sky', 'fringe'):
418 self.addRule(CopyKeyHandler(calibFilterType, "filter"),
419 instrument=instrumentName, datasetTypeName=calibType)
421 def makeMatching(self, datasetTypeName: str, gen2keys: Dict[str, type], instrument: Optional[str] = None,
422 skyMap: Optional[BaseSkyMap] = None, skyMapName: Optional[str] = None):
423 """Construct a Translator appropriate for instances of the given
424 dataset.
426 Parameters
427 ----------
428 datasetTypeName : `str`
429 Name of the dataset type.
430 gen2keys: `dict`
431 Keys of a Gen2 data ID for this dataset.
432 instrument: `str`, optional
433 Name of the Gen3 instrument dimension for translated data IDs.
434 skyMap: `~lsst.skymap.BaseSkyMap`, optional
435 The skymap instance that defines any tract/patch data IDs.
436 `~lsst.skymap.BaseSkyMap` instances.
437 skyMapName : `str`, optional
438 Gen3 SkyMap Dimension name to be associated with any tract or patch
439 Dimensions.
441 Returns
442 -------
443 translator : `Translator`
444 A translator whose translate() method can be used to transform Gen2
445 data IDs to Gen3 dataIds.
446 """
447 if instrument is not None:
448 rulesForInstrument = self._rules.get(instrument, {None: []})
449 else:
450 rulesForInstrument = {None: []}
451 rulesForAnyInstrument = self._rules[None]
452 candidateRules = itertools.chain(
453 rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType
454 rulesForInstrument[None], # this instrument, any DatasetType
455 rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType
456 rulesForAnyInstrument[None], # any instrument, any DatasetType
457 )
458 matchedHandlers = []
459 targetKeys = set(gen2keys)
460 self.log.debug("Constructing data ID translator for %s with Gen2 keys %s...",
461 datasetTypeName, gen2keys)
462 for ruleKeys, ruleHandlers, consume in candidateRules:
463 if ruleKeys.issubset(targetKeys):
464 matchedHandlers.append(ruleHandlers)
465 targetKeys -= consume
466 self.log.debug("...matched %d handlers: %s, with %s unmatched.",
467 len(matchedHandlers), matchedHandlers, targetKeys)
468 return Translator(matchedHandlers, skyMap=skyMap, skyMapName=skyMapName,
469 datasetTypeName=datasetTypeName, log=self.log)
472class Translator:
473 """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a
474 particular DatasetType.
476 Translators should usually be constructed via
477 `TranslatorFactory.makeMatching`.
479 Parameters
480 ----------
481 handlers : `list`
482 A list of KeyHandlers this Translator should use.
483 skyMap : `BaseSkyMap`, optional
484 SkyMap instance used to define any tract or patch Dimensions.
485 skyMapName : `str`
486 Gen3 SkyMap Dimension name to be associated with any tract or patch
487 Dimensions.
488 datasetTypeName : `str`
489 Name of the dataset type whose data IDs this translator handles.
490 """
491 def __init__(self, handlers: List[KeyHandler], skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
492 datasetTypeName: str, log: Log):
493 self.handlers = handlers
494 self.skyMap = skyMap
495 self.skyMapName = skyMapName
496 self.datasetTypeName = datasetTypeName
497 self.log = log
499 __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName", "log")
501 def __str__(self):
502 hstr = ",".join(str(h) for h in self.handlers)
503 return f"{type(self).__name__}(dtype={self.datasetTypeName}, handlers=[{hstr}])"
505 def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False) -> Tuple[dict, Optional[str]]:
506 """Return a Gen3 data ID that corresponds to the given Gen2 data ID.
507 """
508 gen3id = {}
509 calibDate = gen2id.get("calibDate", None)
510 for handler in self.handlers:
511 try:
512 handler.translate(gen2id, gen3id, skyMap=self.skyMap, skyMapName=self.skyMapName,
513 datasetTypeName=self.datasetTypeName)
514 except KeyError:
515 if partial:
516 self.log.debug("Failed to translate %s from %s (this may not be an error).",
517 handler.dimension, gen2id)
518 continue
519 else:
520 raise
521 return gen3id, calibDate
523 @property
524 def dimensionNames(self) -> FrozenSet[str]:
525 """The names of the dimensions populated by this Translator
526 (`frozenset`).
527 """
528 return frozenset(h.dimension for h in self.handlers)