lsst.obs.base  19.0.0-26-g830ab5e+1
translators.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 
22 __all__ = ("Translator", "KeyHandler", "CopyKeyHandler", "ConstantKeyHandler",
23  "CalibKeyHandler", "AbstractToPhysicalFilterKeyHandler", "PhysicalToAbstractFilterKeyHandler",
24  "makeCalibrationLabel")
25 
26 import itertools
27 from typing import Optional, Any, Dict, Tuple, FrozenSet, Iterable, List
28 from abc import ABCMeta, abstractmethod
29 
30 from lsst.log import Log
31 from lsst.skymap import BaseSkyMap
32 
33 
34 def makeCalibrationLabel(datasetTypeName: str, calibDate: str, ccd: Optional[int] = None,
35  filter: Optional[str] = None) -> str:
36  """Make a Gen3 calibration_label string corresponding to a Gen2 data ID.
37 
38  Parameters
39  ----------
40  datasetTypeName : `str`
41  Name of the dataset type this calibration label identifies.
42  calibDate : `str`
43  Date string used in the Gen2 template.
44  ccd : `int`, optional
45  Detector ID used in the Gen2 template.
46  filter : `str`, optional
47  Filter used in the Gen2 template.
48 
49  Returns
50  -------
51  label : `str`
52  Calibration label string.
53  """
54  # TODO: this function is probably HSC-specific, but I don't know how other
55  # obs calib registries behave so I don't know (yet) how to generalize it.
56  elements = [datasetTypeName, calibDate]
57  if ccd is not None:
58  elements.append(f"{ccd:03d}")
59  if filter is not None:
60  elements.append(filter)
61  return "gen2/{}".format("_".join(elements))
62 
63 
64 class KeyHandler(metaclass=ABCMeta):
65  """Base class for Translator helpers that each handle just one Gen3 Data
66  ID key.
67 
68  Parameters
69  ----------
70  dimension : `str`
71  Name of the Gen3 dimension (data ID key) populated by
72  this handler (e.g. "visit" or "abstract_filter").
73  """
74  def __init__(self, dimension: str):
75  self.dimension = dimension
76 
77  __slots__ = ("dimension",)
78 
79  def translate(self, gen2id: dict, gen3id: dict,
80  skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
81  datasetTypeName: str):
82  """Update a Gen3 data ID dict with a single key-value pair from a Gen2
83  data ID.
84 
85  This method is implemented by the base class and is not expected to
86  be re-implemented by subclasses.
87 
88  Parameters
89  ----------
90  gen2id: `dict`
91  Gen2 data ID from which to draw key-value pairs from.
92  gen3id: `dict`
93  Gen3 data ID to update in-place.
94  skyMap: `BaseSkyMap`, optional
95  SkyMap that defines the tracts and patches used in the Gen2 data
96  ID, if any.
97  skyMapName: `str`
98  Name of the Gen3 skymap dimension that defines the tracts and
99  patches used in the Gen3 data ID.
100  datasetTypeName: `str`
101  Name of the dataset type.
102  """
103  gen3id[self.dimension] = self.extract(gen2id, skyMap=skyMap, skyMapName=skyMapName,
104  datasetTypeName=datasetTypeName)
105 
106  @abstractmethod
107  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
108  datasetTypeName: str) -> Any:
109  """Extract a Gen3 data ID value from a Gen2 data ID.
110 
111  Parameters
112  ----------
113  gen2id: `dict`
114  Gen2 data ID from which to draw key-value pairs from.
115  skyMap: `BaseSkyMap`, optional
116  SkyMap that defines the tracts and patches used in the Gen2 data
117  ID, if any.
118  skyMapName: `str`
119  Name of the Gen3 skymap dimension that defines the tracts and
120  patches used in the Gen3 data ID.
121  datasetTypeName: `str`
122  Name of the dataset type.
123  """
124  raise NotImplementedError()
125 
126 
128  """A KeyHandler that adds a constant key-value pair to the Gen3 data ID.
129 
130  Parameters
131  ----------
132  dimension : `str`
133  Name of the Gen3 dimension (data ID key) populated by
134  this handler (e.g. "visit" or "abstract_filter").
135  value : `object`
136  Data ID value.
137  """
138  def __init__(self, dimension: str, value: Any):
139  super().__init__(dimension)
140  self.value = value
141 
142  __slots__ = ("value",)
143 
144  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
145  datasetTypeName: str) -> Any:
146  # Docstring inherited from KeyHandler.extract.
147  return self.value
148 
149 
151  """A KeyHandler that simply copies a value from a Gen3 data ID.
152 
153  Parameters
154  ----------
155  dimension : `str`
156  Name of the Gen3 dimension produced by this handler.
157  dtype : `type`, optional
158  If not `None`, the type that values for this key must be an
159  instance of.
160  """
161  def __init__(self, dimension: str, gen2key: Optional[str] = None,
162  dtype: Optional[type] = None):
163  super().__init__(dimension)
164  self.gen2key = gen2key if gen2key is not None else dimension
165  self.dtype = dtype
166 
167  __slots__ = ("gen2key", "dtype")
168 
169  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
170  datasetTypeName: str) -> Any:
171  # Docstring inherited from KeyHandler.extract.
172  r = gen2id[self.gen2key]
173  if self.dtype is not None:
174  try:
175  r = self.dtype(r)
176  except ValueError as err:
177  raise TypeError(
178  f"'{r}' is not a valid value for {self.dimension}; "
179  f"expected {self.dtype.__name__}, got {type(r).__name__}."
180  ) from err
181  return r
182 
183 
185  """A KeyHandler for skymap patches.
186  """
187  def __init__(self):
188  super().__init__("patch")
189 
190  __slots__ = ()
191 
192  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
193  datasetTypeName: str) -> Any:
194  # Docstring inherited from KeyHandler.extract.
195  tract = gen2id["tract"]
196  tractInfo = skyMap[tract]
197  x, y = gen2id["patch"].split(",")
198  patchInfo = tractInfo[int(x), int(y)]
199  return tractInfo.getSequentialPatchIndex(patchInfo)
200 
201 
203  """A KeyHandler for skymaps."""
204  def __init__(self):
205  super().__init__("skymap")
206 
207  __slots__ = ()
208 
209  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
210  datasetTypeName: str) -> Any:
211  # Docstring inherited from KeyHandler.extract.
212  return skyMapName
213 
214 
216  """A KeyHandler for master calibration datasets.
217  """
218  __slots__ = ("ccdKey",)
219 
220  def __init__(self, ccdKey="ccd"):
221  self.ccdKey = ccdKey
222  super().__init__("calibration_label")
223 
224  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
225  datasetTypeName: str) -> Any:
226  # Docstring inherited from KeyHandler.extract.
227  return makeCalibrationLabel(datasetTypeName, gen2id["calibDate"],
228  ccd=gen2id.get(self.ccdKey), filter=gen2id.get("filter"))
229 
230 
232  """KeyHandler for gen2 ``filter`` keys that match ``physical_filter``
233  keys in gen3 but should be mapped to ``abstract_filter``.
234 
235  Note that multiple physical filter can potentially map to one abstract
236  filter, so be careful to only use this translator on obs packages where
237  there is a one-to-one mapping.
238  """
239 
240  __slots__ = ("_map",)
241 
242  def __init__(self, filterDefinitions):
243  super().__init__("abstract_filter")
244  self._map = {d.physical_filter: d.abstract_filter for d in filterDefinitions
245  if d.physical_filter is not None}
246 
247  def extract(self, gen2id, *args, **kwargs):
248  physical = gen2id["filter"]
249  return self._map.get(physical, physical)
250 
251 
253  """KeyHandler for gen2 ``filter`` keys that match ``abstract_filter``
254  keys in gen3 but should be mapped to ``physical_filter``.
255 
256  Note that one abstract filter can potentially map to multiple physical
257  filters, so be careful to only use this translator on obs packages where
258  there is a one-to-one mapping.
259  """
260 
261  __slots__ = ("_map",)
262 
263  def __init__(self, filterDefinitions):
264  super().__init__("physical_filter")
265  self._map = {d.abstract_filter: d.physical_filter for d in filterDefinitions
266  if d.abstract_filter is not None}
267 
268  def extract(self, gen2id, *args, **kwargs):
269  abstract = gen2id["filter"]
270  return self._map.get(abstract, abstract)
271 
272 
274  """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a
275  particular DatasetType.
276 
277  Translators should usually be constructed via the `makeMatching` method.
278 
279  Parameters
280  ----------
281  handlers : `list`
282  A list of KeyHandlers this Translator should use.
283  skyMap : `BaseSkyMap`, optional
284  SkyMap instance used to define any tract or patch Dimensions.
285  skyMapName : `str`
286  Gen3 SkyMap Dimension name to be associated with any tract or patch
287  Dimensions.
288  datasetTypeName : `str`
289  Name of the dataset type whose data IDs this translator handles.
290  """
291  def __init__(self, handlers: List[KeyHandler], skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
292  datasetTypeName: str):
293  self.handlers = handlers
294  self.skyMap = skyMap
295  self.skyMapName = skyMapName
296  self.datasetTypeName = datasetTypeName
297 
298  __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName")
299 
300  # Rules used to match Handlers when constring a Translator.
301  # outer key is instrument name, or None for any
302  # inner key is DatasetType name, or None for any
303  # values are 3-tuples of (frozenset(gen2keys), handler, consume)
304  _rules: Dict[
305  Optional[str],
306  Dict[
307  Optional[str],
308  Tuple[FrozenSet[str], KeyHandler, bool]
309  ]
310  ] = {
311  None: {
312  None: []
313  }
314  }
315 
316  @classmethod
317  def addRule(cls, handler: KeyHandler, instrument: Optional[str] = None,
318  datasetTypeName: Optional[str] = None, gen2keys: Iterable[str] = (),
319  consume: bool = True):
320  """Add a KeyHandler and an associated matching rule.
321 
322  Parameters
323  ----------
324  handler : `KeyHandler`
325  A KeyHandler instance to add to a Translator when this rule
326  matches.
327  instrument : `str`
328  Gen3 instrument name the Gen2 repository must be associated with
329  for this rule to match, or None to match any instrument.
330  datasetTypeName : `str`
331  Name of the DatasetType this rule matches, or None to match any
332  DatasetType.
333  gen2Keys : sequence
334  Sequence of Gen2 data ID keys that must all be present for this
335  rule to match.
336  consume : `bool` or `tuple`
337  If True (default), remove all entries in gen2keys from the set of
338  keys being matched to in order to prevent less-specific handlers
339  from matching them.
340  May also be a `tuple` listing only the keys to consume.
341  """
342  # Ensure consume is always a frozenset, so we can process it uniformly
343  # from here on.
344  if consume is True:
345  consume = frozenset(gen2keys)
346  elif consume:
347  consume = frozenset(consume)
348  else:
349  consume = frozenset()
350  # find the rules for this instrument, or if we haven't seen it before,
351  # add a nested dictionary that matches any DatasetType name and then
352  # append this rule.
353  rulesForInstrument = cls._rules.setdefault(instrument, {None: []})
354  rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, [])
355  rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume))
356 
357  @classmethod
358  def makeMatching(cls, datasetTypeName: str, gen2keys: Dict[str, type], instrument: Optional[str] = None,
359  skyMap: Optional[BaseSkyMap] = None, skyMapName: Optional[str] = None):
360  """Construct a Translator appropriate for instances of the given
361  dataset.
362 
363  Parameters
364  ----------
365  datasetTypeName : `str`
366  Name of the dataset type.
367  gen2keys: `dict`
368  Keys of a Gen2 data ID for this dataset.
369  instrument: `str`, optional
370  Name of the Gen3 instrument dimension for translated data IDs.
371  skyMap: `~lsst.skymap.BaseSkyMap`, optional
372  The skymap instance that defines any tract/patch data IDs.
373  `~lsst.skymap.BaseSkyMap` instances.
374  skyMapName : `str`, optional
375  Gen3 SkyMap Dimension name to be associated with any tract or patch
376  Dimensions.
377 
378  Returns
379  -------
380  translator : `Translator`
381  A translator whose translate() method can be used to transform Gen2
382  data IDs to Gen3 dataIds.
383  """
384  if instrument is not None:
385  rulesForInstrument = cls._rules.get(instrument, {None: []})
386  else:
387  rulesForInstrument = {None: []}
388  rulesForAnyInstrument = cls._rules[None]
389  candidateRules = itertools.chain(
390  rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType
391  rulesForInstrument[None], # this instrument, any DatasetType
392  rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType
393  rulesForAnyInstrument[None], # any instrument, any DatasetType
394  )
395  matchedHandlers = []
396  targetKeys = set(gen2keys)
397  for ruleKeys, ruleHandlers, consume in candidateRules:
398  if ruleKeys.issubset(targetKeys):
399  matchedHandlers.append(ruleHandlers)
400  targetKeys -= consume
401  return Translator(matchedHandlers, skyMap=skyMap, skyMapName=skyMapName,
402  datasetTypeName=datasetTypeName)
403 
404  def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False, log: Optional[Log] = None):
405  """Return a Gen3 data ID that corresponds to the given Gen2 data ID.
406  """
407  gen3id = {}
408  for handler in self.handlers:
409  try:
410  handler.translate(gen2id, gen3id, skyMap=self.skyMap, skyMapName=self.skyMapName,
411  datasetTypeName=self.datasetTypeName)
412  except KeyError:
413  if partial:
414  if log is not None:
415  log.debug("Failed to translate %s from %s.", handler.dimension, gen2id)
416  continue
417  else:
418  raise
419  return gen3id
420 
421  @property
422  def dimensionNames(self):
423  """The names of the dimensions populated by this Translator
424  (`frozenset`).
425  """
426  return frozenset(h.dimension for h in self.handlers)
427 
428 
429 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key.
430 Translator.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False)
431 
432 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones
433 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"):
434  Translator.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap")
435 
436 # Translate Gen2 str patch IDs to Gen3 sequential integers.
437 Translator.addRule(PatchKeyHandler(), gen2keys=("patch",))
438 
439 # Copy Gen2 "tract" to Gen3 "tract".
440 Translator.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",))
441 
442 # Add valid_first, valid_last to instrument-level transmission/ datasets;
443 # these are considered calibration products in Gen3.
444 for datasetTypeName in ("transmission_sensor", "transmission_optics", "transmission_filter"):
445  Translator.addRule(ConstantKeyHandler("calibration_label", "unbounded"),
446  datasetTypeName=datasetTypeName)
447 
448 # Translate Gen2 pixel_id to Gen3 skypix.
449 # TODO: For now, we just assume that the refcat indexer uses htm7, since that's
450 # what we have generated most of our refcats at.
451 Translator.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",))