Coverage for python/lsst/obs/base/gen2to3/translators.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Translator", "TranslatorFactory", "KeyHandler", "CopyKeyHandler", "ConstantKeyHandler",
25 "BandToPhysicalFilterKeyHandler", "PhysicalFilterToBandKeyHandler")
27import itertools
28from typing import Optional, Any, Dict, Tuple, FrozenSet, Iterable, List
29from abc import ABCMeta, abstractmethod
31from lsst.log import Log
32from lsst.skymap import BaseSkyMap
35class KeyHandler(metaclass=ABCMeta):
36 """Base class for Translator helpers that each handle just one Gen3 Data
37 ID key.
39 Parameters
40 ----------
41 dimension : `str`
42 Name of the Gen3 dimension (data ID key) populated by
43 this handler (e.g. "visit" or "band").
44 """
45 def __init__(self, dimension: str):
46 self.dimension = dimension
48 __slots__ = ("dimension",)
50 def __repr__(self):
51 return f"{type(self).__name__}({self.dimension}, ...)"
53 def translate(self, gen2id: dict, gen3id: dict,
54 skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
55 datasetTypeName: str):
56 """Update a Gen3 data ID dict with a single key-value pair from a Gen2
57 data ID.
59 This method is implemented by the base class and is not expected to
60 be re-implemented by subclasses.
62 Parameters
63 ----------
64 gen2id: `dict`
65 Gen2 data ID from which to draw key-value pairs from.
66 gen3id: `dict`
67 Gen3 data ID to update in-place.
68 skyMap: `BaseSkyMap`, optional
69 SkyMap that defines the tracts and patches used in the Gen2 data
70 ID, if any.
71 skyMapName: `str`
72 Name of the Gen3 skymap dimension that defines the tracts and
73 patches used in the Gen3 data ID.
74 datasetTypeName: `str`
75 Name of the dataset type.
76 """
77 gen3id[self.dimension] = self.extract(gen2id, skyMap=skyMap, skyMapName=skyMapName,
78 datasetTypeName=datasetTypeName)
80 @abstractmethod
81 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
82 datasetTypeName: str) -> Any:
83 """Extract a Gen3 data ID value from a Gen2 data ID.
85 Parameters
86 ----------
87 gen2id: `dict`
88 Gen2 data ID from which to draw key-value pairs from.
89 skyMap: `BaseSkyMap`, optional
90 SkyMap that defines the tracts and patches used in the Gen2 data
91 ID, if any.
92 skyMapName: `str`
93 Name of the Gen3 skymap dimension that defines the tracts and
94 patches used in the Gen3 data ID.
95 datasetTypeName: `str`
96 Name of the dataset type.
97 """
98 raise NotImplementedError()
101class ConstantKeyHandler(KeyHandler):
102 """A KeyHandler that adds a constant key-value pair to the Gen3 data ID.
104 Parameters
105 ----------
106 dimension : `str`
107 Name of the Gen3 dimension (data ID key) populated by
108 this handler (e.g. "visit" or "band").
109 value : `object`
110 Data ID value.
111 """
112 def __init__(self, dimension: str, value: Any):
113 super().__init__(dimension)
114 self.value = value
116 __slots__ = ("value",)
118 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
119 datasetTypeName: str) -> Any:
120 # Docstring inherited from KeyHandler.extract.
121 return self.value
124class CopyKeyHandler(KeyHandler):
125 """A KeyHandler that simply copies a value from a Gen3 data ID.
127 Parameters
128 ----------
129 dimension : `str`
130 Name of the Gen3 dimension produced by this handler.
131 dtype : `type`, optional
132 If not `None`, the type that values for this key must be an
133 instance of.
134 """
135 def __init__(self, dimension: str, gen2key: Optional[str] = None,
136 dtype: Optional[type] = None):
137 super().__init__(dimension)
138 self.gen2key = gen2key if gen2key is not None else dimension
139 self.dtype = dtype
141 __slots__ = ("gen2key", "dtype")
143 def __str__(self):
144 return f"{type(self).__name__}({self.gen2key}, {self.dtype})"
146 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
147 datasetTypeName: str) -> Any:
148 # Docstring inherited from KeyHandler.extract.
149 r = gen2id[self.gen2key]
150 if self.dtype is not None:
151 try:
152 r = self.dtype(r)
153 except ValueError as err:
154 raise TypeError(
155 f"'{r}' is not a valid value for {self.dimension}; "
156 f"expected {self.dtype.__name__}, got {type(r).__name__}."
157 ) from err
158 return r
161class PatchKeyHandler(KeyHandler):
162 """A KeyHandler for skymap patches.
163 """
164 def __init__(self):
165 super().__init__("patch")
167 __slots__ = ()
169 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
170 datasetTypeName: str) -> Any:
171 # Docstring inherited from KeyHandler.extract.
172 tract = gen2id["tract"]
173 tractInfo = skyMap[tract]
174 x, y = gen2id["patch"].split(",")
175 patchInfo = tractInfo[int(x), int(y)]
176 return tractInfo.getSequentialPatchIndex(patchInfo)
179class SkyMapKeyHandler(KeyHandler):
180 """A KeyHandler for skymaps."""
181 def __init__(self):
182 super().__init__("skymap")
184 __slots__ = ()
186 def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
187 datasetTypeName: str) -> Any:
188 # Docstring inherited from KeyHandler.extract.
189 return skyMapName
192class PhysicalFilterToBandKeyHandler(KeyHandler):
193 """KeyHandler for gen2 ``filter`` keys that match ``physical_filter``
194 keys in gen3 but should be mapped to ``band``.
196 Note that multiple physical filter can potentially map to one abstract
197 filter, so be careful to only use this translator on obs packages where
198 there is a one-to-one mapping.
199 """
201 __slots__ = ("_map",)
203 def __init__(self, filterDefinitions):
204 super().__init__("band")
205 self._map = {d.physical_filter: d.band for d in filterDefinitions
206 if d.physical_filter is not None}
208 def extract(self, gen2id, *args, **kwargs):
209 physical = gen2id["filter"]
210 return self._map.get(physical, physical)
213class BandToPhysicalFilterKeyHandler(KeyHandler):
214 """KeyHandler for gen2 ``filter`` keys that match ``band``
215 keys in gen3 but should be mapped to ``physical_filter``.
217 Note that one abstract filter can potentially map to multiple physical
218 filters, so be careful to only use this translator on obs packages where
219 there is a one-to-one mapping.
220 """
222 __slots__ = ("_map",)
224 def __init__(self, filterDefinitions):
225 super().__init__("physical_filter")
226 self._map = {d.band: d.physical_filter for d in filterDefinitions
227 if d.band is not None}
229 def extract(self, gen2id, *args, **kwargs):
230 abstract = gen2id["filter"]
231 return self._map.get(abstract, abstract)
234class TranslatorFactory:
235 """A class that manages a system of rules for translating Gen2 data IDs
236 to Gen3 data IDs, and uses these to construct translators for particular
237 dataset types.
239 Parameters
240 ----------
241 log : `lsst.log.Log`, optional
242 A logger for diagnostic messages.
243 """
244 def __init__(self, log: Optional[Log] = None):
245 # The rules used to match KeyHandlers when constructing a Translator.
246 self._rules: Dict[
247 Optional[str], # instrument name (or None to match any)
248 Dict[
249 Optional[str], # dataset type name (or None to match any)
250 # gen2keys, handler, consume
251 List[Tuple[FrozenSet[str], KeyHandler, bool]]
252 ]
253 ] = {
254 None: {
255 None: []
256 }
257 }
258 self._addDefaultRules()
259 if log is None:
260 log = Log.getLogger("obs.base.gen2to3.TranslatorFactory")
261 self.log = log
263 def __str__(self):
264 lines = []
265 for instrumentName, nested in self._rules.items():
266 if instrumentName is None:
267 instrumentName = "[any instrument]"
268 for datasetTypeName, rules in nested.items():
269 if datasetTypeName is None:
270 datasetTypeName = "[any dataset type]"
271 lines.append(f"{instrumentName} + {datasetTypeName}:")
272 for gen2keys, handler, consume in rules:
273 consumed = " (consumed)" if consume else ""
274 lines.append(f" {gen2keys}{consumed}: {handler}")
275 return "\n".join(lines)
277 def addRule(self, handler: KeyHandler, instrument: Optional[str] = None,
278 datasetTypeName: Optional[str] = None, gen2keys: Iterable[str] = (),
279 consume: bool = True):
280 """Add a KeyHandler and an associated matching rule.
282 Parameters
283 ----------
284 handler : `KeyHandler`
285 A KeyHandler instance to add to a Translator when this rule
286 matches.
287 instrument : `str`
288 Gen3 instrument name the Gen2 repository must be associated with
289 for this rule to match, or None to match any instrument.
290 datasetTypeName : `str`
291 Name of the DatasetType this rule matches, or None to match any
292 DatasetType.
293 gen2Keys : sequence
294 Sequence of Gen2 data ID keys that must all be present for this
295 rule to match.
296 consume : `bool` or `tuple`
297 If True (default), remove all entries in gen2keys from the set of
298 keys being matched to in order to prevent less-specific handlers
299 from matching them.
300 May also be a `tuple` listing only the keys to consume.
301 """
302 # Ensure consume is always a frozenset, so we can process it uniformly
303 # from here on.
304 if consume is True:
305 consume = frozenset(gen2keys)
306 elif consume:
307 consume = frozenset(consume)
308 else:
309 consume = frozenset()
310 # find the rules for this instrument, or if we haven't seen it before,
311 # add a nested dictionary that matches any DatasetType name and then
312 # append this rule.
313 rulesForInstrument = self._rules.setdefault(instrument, {None: []})
314 rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, [])
315 rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume))
317 def _addDefaultRules(self):
318 """Add translator rules that should always be present, and don't depend
319 at all on the instrument whose datasets are being converted.
321 This is called by `TranslatorFactory` construction.
322 """
323 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key.
324 self.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False)
326 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones
327 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"):
328 self.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap")
330 # Translate Gen2 str patch IDs to Gen3 sequential integers.
331 self.addRule(PatchKeyHandler(), gen2keys=("patch",))
333 # Translate any "filter" values that appear alongside "tract" to
334 # "band". This is _not_ the right choice for instruments
335 # that use "physical_filter" values for coadds in Gen2 (like HSC);
336 # those will need to add a rule that invokes
337 # PhysicalFilterToBandKey instead for just that instrument, but the
338 # same criteria otherwise. That will override this one, because
339 # instrument-specific rules match first, and that rule will consume
340 # the Gen2 "filter" key before this rule has a chance to fire.
341 self.addRule(CopyKeyHandler("band", "filter"),
342 gen2keys=("filter", "tract"),
343 consume=("filter",))
345 # Copy Gen2 "tract" to Gen3 "tract".
346 self.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",))
348 # Translate Gen2 pixel_id to Gen3 skypix.
349 #
350 # TODO: For now, we just assume that the refcat indexer uses htm7,
351 # since that's what we have generated most of our refcats at.
352 # Eventually that may have to change, but it's not clear enough how to
353 # do that for us to have a ticket yet. If you found this note because
354 # you've run into this limitation, please let the middleware team know
355 # that it's time to make this a priority.
356 self.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",))
358 def addGenericInstrumentRules(self, instrumentName: str,
359 calibFilterType: str = "physical_filter",
360 detectorKey: str = "ccd",
361 exposureKey: str = "visit"):
362 """Add translation rules that depend on some properties of the
363 instrument but are otherwise generic.
365 Parameters
366 ----------
367 instrument : `str`
368 The short (dimension) name of the instrument that conversion is
369 going to be run on.
370 calibFilterType : `str`, optional
371 One of ``physical_filter`` or ``band``, indicating which
372 of those the gen2 calibRegistry uses as the ``filter`` key.
373 detectorKey : `str`, optional
374 The gen2 key used to identify what in gen3 is `detector`.
375 exposureKey : `str`, optional
376 The gen2 key used to identify what in gen3 is `exposure`.
377 """
378 # Add instrument to Gen3 data ID if Gen2 contains exposureKey or
379 # detectorKey. (Both rules will match, so we'll actually set
380 # instrument in the same dict twice).
381 self.addRule(ConstantKeyHandler("instrument", instrumentName),
382 instrument=instrumentName, gen2keys=(exposureKey,), consume=False)
383 self.addRule(ConstantKeyHandler("instrument", instrumentName),
384 instrument=instrumentName, gen2keys=(detectorKey,), consume=False)
385 self.addRule(ConstantKeyHandler("instrument", instrumentName),
386 instrument=instrumentName, gen2keys=("calibDate",), consume=False)
388 # Copy Gen2 exposureKey to Gen3 'exposure' for raw only. Also consume
389 # filter, since that's implied by 'exposure' in Gen3.
390 self.addRule(CopyKeyHandler("exposure", exposureKey),
391 instrument=instrumentName, datasetTypeName="raw", gen2keys=(exposureKey,),
392 consume=(exposureKey, "filter"))
394 # Copy Gen2 'visit' to Gen3 'visit' otherwise. Also consume filter.
395 self.addRule(CopyKeyHandler("visit"), instrument=instrumentName, gen2keys=("visit",),
396 consume=("visit", "filter"))
398 # Copy Gen2 'ccd' to Gen3 'detector;
399 self.addRule(CopyKeyHandler("detector", detectorKey),
400 instrument=instrumentName,
401 gen2keys=(detectorKey,))
403 # Add instrument for transmission curve datasets (transmission_sensor is
404 # already handled by the above rules).
405 self.addRule(ConstantKeyHandler("instrument", instrumentName),
406 instrument=instrumentName, datasetTypeName="transmission_optics")
407 self.addRule(ConstantKeyHandler("instrument", instrumentName),
408 instrument=instrumentName, datasetTypeName="transmission_atmosphere")
409 self.addRule(ConstantKeyHandler("instrument", instrumentName),
410 instrument=instrumentName, datasetTypeName="transmission_filter")
411 self.addRule(CopyKeyHandler("physical_filter", "filter"),
412 instrument=instrumentName, datasetTypeName="transmission_filter")
414 # Add calibration mapping for filter dependent types
415 for calibType in ('flat', 'sky', 'fringe'):
416 self.addRule(CopyKeyHandler(calibFilterType, "filter"),
417 instrument=instrumentName, datasetTypeName=calibType)
419 def makeMatching(self, datasetTypeName: str, gen2keys: Dict[str, type], instrument: Optional[str] = None,
420 skyMap: Optional[BaseSkyMap] = None, skyMapName: Optional[str] = None):
421 """Construct a Translator appropriate for instances of the given
422 dataset.
424 Parameters
425 ----------
426 datasetTypeName : `str`
427 Name of the dataset type.
428 gen2keys: `dict`
429 Keys of a Gen2 data ID for this dataset.
430 instrument: `str`, optional
431 Name of the Gen3 instrument dimension for translated data IDs.
432 skyMap: `~lsst.skymap.BaseSkyMap`, optional
433 The skymap instance that defines any tract/patch data IDs.
434 `~lsst.skymap.BaseSkyMap` instances.
435 skyMapName : `str`, optional
436 Gen3 SkyMap Dimension name to be associated with any tract or patch
437 Dimensions.
439 Returns
440 -------
441 translator : `Translator`
442 A translator whose translate() method can be used to transform Gen2
443 data IDs to Gen3 dataIds.
444 """
445 if instrument is not None:
446 rulesForInstrument = self._rules.get(instrument, {None: []})
447 else:
448 rulesForInstrument = {None: []}
449 rulesForAnyInstrument = self._rules[None]
450 candidateRules = itertools.chain(
451 rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType
452 rulesForInstrument[None], # this instrument, any DatasetType
453 rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType
454 rulesForAnyInstrument[None], # any instrument, any DatasetType
455 )
456 matchedHandlers = []
457 targetKeys = set(gen2keys)
458 self.log.debug("Constructing data ID translator for %s with Gen2 keys %s...",
459 datasetTypeName, gen2keys)
460 for ruleKeys, ruleHandlers, consume in candidateRules:
461 if ruleKeys.issubset(targetKeys):
462 matchedHandlers.append(ruleHandlers)
463 targetKeys -= consume
464 self.log.debug("...matched %d handlers: %s, with %s unmatched.",
465 len(matchedHandlers), matchedHandlers, targetKeys)
466 return Translator(matchedHandlers, skyMap=skyMap, skyMapName=skyMapName,
467 datasetTypeName=datasetTypeName, log=self.log)
470class Translator:
471 """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a
472 particular DatasetType.
474 Translators should usually be constructed via
475 `TranslatorFactory.makeMatching`.
477 Parameters
478 ----------
479 handlers : `list`
480 A list of KeyHandlers this Translator should use.
481 skyMap : `BaseSkyMap`, optional
482 SkyMap instance used to define any tract or patch Dimensions.
483 skyMapName : `str`
484 Gen3 SkyMap Dimension name to be associated with any tract or patch
485 Dimensions.
486 datasetTypeName : `str`
487 Name of the dataset type whose data IDs this translator handles.
488 """
489 def __init__(self, handlers: List[KeyHandler], skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
490 datasetTypeName: str, log: Log):
491 self.handlers = handlers
492 self.skyMap = skyMap
493 self.skyMapName = skyMapName
494 self.datasetTypeName = datasetTypeName
495 self.log = log
497 __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName", "log")
499 def __str__(self):
500 hstr = ",".join(str(h) for h in self.handlers)
501 return f"{type(self).__name__}(dtype={self.datasetTypeName}, handlers=[{hstr}])"
503 def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False) -> Tuple[dict, Optional[str]]:
504 """Return a Gen3 data ID that corresponds to the given Gen2 data ID.
505 """
506 gen3id = {}
507 calibDate = gen2id.get("calibDate", None)
508 for handler in self.handlers:
509 try:
510 handler.translate(gen2id, gen3id, skyMap=self.skyMap, skyMapName=self.skyMapName,
511 datasetTypeName=self.datasetTypeName)
512 except KeyError:
513 if partial:
514 self.log.debug("Failed to translate %s from %s (this may not be an error).",
515 handler.dimension, gen2id)
516 continue
517 else:
518 raise
519 return gen3id, calibDate
521 @property
522 def dimensionNames(self) -> FrozenSet[str]:
523 """The names of the dimensions populated by this Translator
524 (`frozenset`).
525 """
526 return frozenset(h.dimension for h in self.handlers)