lsst.obs.base  19.0.0-20-g6de566f+5
translators.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 
22 __all__ = ("Translator", "KeyHandler", "CopyKeyHandler", "ConstantKeyHandler",
23  "makeCalibrationLabel")
24 
25 import itertools
26 from typing import Optional, Any, Dict, Tuple, FrozenSet, Iterable, List
27 from abc import ABCMeta, abstractmethod
28 
29 from lsst.log import Log
30 from lsst.skymap import BaseSkyMap
31 
32 
33 def makeCalibrationLabel(datasetTypeName: str, calibDate: str, ccd: Optional[int] = None,
34  filter: Optional[str] = None) -> str:
35  """Make a Gen3 calibration_label string corresponding to a Gen2 data ID.
36 
37  Parameters
38  ----------
39  datasetTypeName : `str`
40  Name of the dataset type this calibration label identifies.
41  calibDate : `str`
42  Date string used in the Gen2 template.
43  ccd : `int`, optional
44  Detector ID used in the Gen2 template.
45  filter : `str`, optional
46  Filter used in the Gen2 template.
47 
48  Returns
49  -------
50  label : `str`
51  Calibration label string.
52  """
53  # TODO: this function is probably HSC-specific, but I don't know how other
54  # obs calib registries behave so I don't know (yet) how to generalize it.
55  elements = [datasetTypeName, calibDate]
56  if ccd is not None:
57  elements.append(f"{ccd:03d}")
58  if filter is not None:
59  elements.append(filter)
60  return "gen2/{}".format("_".join(elements))
61 
62 
63 class KeyHandler(metaclass=ABCMeta):
64  """Base class for Translator helpers that each handle just one Gen3 Data
65  ID key.
66 
67  Parameters
68  ----------
69  dimension : `str`
70  Name of the Gen3 dimension (data ID key) populated by
71  this handler (e.g. "visit" or "abstract_filter").
72  """
73  def __init__(self, dimension: str):
74  self.dimension = dimension
75 
76  __slots__ = ("dimension",)
77 
78  def translate(self, gen2id: dict, gen3id: dict,
79  skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
80  datasetTypeName: str):
81  """Update a Gen3 data ID dict with a single key-value pair from a Gen2
82  data ID.
83 
84  This method is implemented by the base class and is not expected to
85  be re-implemented by subclasses.
86 
87  Parameters
88  ----------
89  gen2id: `dict`
90  Gen2 data ID from which to draw key-value pairs from.
91  gen3id: `dict`
92  Gen3 data ID to update in-place.
93  skyMap: `BaseSkyMap`, optional
94  SkyMap that defines the tracts and patches used in the Gen2 data
95  ID, if any.
96  skyMapName: `str`
97  Name of the Gen3 skymap dimension that defines the tracts and
98  patches used in the Gen3 data ID.
99  datasetTypeName: `str`
100  Name of the dataset type.
101  """
102  gen3id[self.dimension] = self.extract(gen2id, skyMap=skyMap, skyMapName=skyMapName,
103  datasetTypeName=datasetTypeName)
104 
105  @abstractmethod
106  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
107  datasetTypeName: str) -> Any:
108  """Extract a Gen3 data ID value from a Gen2 data ID.
109 
110  Parameters
111  ----------
112  gen2id: `dict`
113  Gen2 data ID from which to draw key-value pairs from.
114  skyMap: `BaseSkyMap`, optional
115  SkyMap that defines the tracts and patches used in the Gen2 data
116  ID, if any.
117  skyMapName: `str`
118  Name of the Gen3 skymap dimension that defines the tracts and
119  patches used in the Gen3 data ID.
120  datasetTypeName: `str`
121  Name of the dataset type.
122  """
123  raise NotImplementedError()
124 
125 
127  """A KeyHandler that adds a constant key-value pair to the Gen3 data ID.
128 
129  Parameters
130  ----------
131  dimension : `str`
132  Name of the Gen3 dimension (data ID key) populated by
133  this handler (e.g. "visit" or "abstract_filter").
134  value : `object`
135  Data ID value.
136  """
137  def __init__(self, dimension: str, value: Any):
138  super().__init__(dimension)
139  self.value = value
140 
141  __slots__ = ("value",)
142 
143  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
144  datasetTypeName: str) -> Any:
145  # Docstring inherited from KeyHandler.extract.
146  return self.value
147 
148 
150  """A KeyHandler that simply copies a value from a Gen3 data ID.
151 
152  Parameters
153  ----------
154  dimension : `str`
155  Name of the Gen3 dimension produced by this handler.
156  dtype : `type`, optional
157  If not `None`, the type that values for this key must be an
158  instance of.
159  """
160  def __init__(self, dimension: str, gen2key: Optional[str] = None,
161  dtype: Optional[type] = None):
162  super().__init__(dimension)
163  self.gen2key = gen2key if gen2key is not None else dimension
164  self.dtype = dtype
165 
166  __slots__ = ("gen2key", "dtype")
167 
168  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
169  datasetTypeName: str) -> Any:
170  # Docstring inherited from KeyHandler.extract.
171  r = gen2id[self.gen2key]
172  if self.dtype is not None:
173  try:
174  r = self.dtype(r)
175  except ValueError as err:
176  raise TypeError(
177  f"'{r}' is not a valid value for {self.dimension}; "
178  f"expected {self.dtype.__name__}, got {type(r).__name__}."
179  ) from err
180  return r
181 
182 
184  """A KeyHandler for skymap patches.
185  """
186  def __init__(self):
187  super().__init__("patch")
188 
189  __slots__ = ()
190 
191  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
192  datasetTypeName: str) -> Any:
193  # Docstring inherited from KeyHandler.extract.
194  tract = gen2id["tract"]
195  tractInfo = skyMap[tract]
196  x, y = gen2id["patch"].split(",")
197  patchInfo = tractInfo[int(x), int(y)]
198  return tractInfo.getSequentialPatchIndex(patchInfo)
199 
200 
202  """A KeyHandler for skymaps."""
203  def __init__(self):
204  super().__init__("skymap")
205 
206  __slots__ = ()
207 
208  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
209  datasetTypeName: str) -> Any:
210  # Docstring inherited from KeyHandler.extract.
211  return skyMapName
212 
213 
215  """A KeyHandler for master calibration datasets.
216  """
217 
218  def __init__(self):
219  super().__init__("calibration_label")
220 
221  __slots__ = ()
222 
223  def extract(self, gen2id: dict, skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
224  datasetTypeName: str) -> Any:
225  # Docstring inherited from KeyHandler.extract.
226  return makeCalibrationLabel(datasetTypeName, gen2id["calibDate"],
227  ccd=gen2id.get("ccd"), filter=gen2id.get("filter"))
228 
229 
231  """Callable object that translates Gen2 Data IDs to Gen3 Data IDs for a
232  particular DatasetType.
233 
234  Translators should usually be constructed via the `makeMatching` method.
235 
236  Parameters
237  ----------
238  handlers : `list`
239  A list of KeyHandlers this Translator should use.
240  skyMap : `BaseSkyMap`, optional
241  SkyMap instance used to define any tract or patch Dimensions.
242  skyMapName : `str`
243  Gen3 SkyMap Dimension name to be associated with any tract or patch
244  Dimensions.
245  datasetTypeName : `str`
246  Name of the dataset type whose data IDs this translator handles.
247  """
248  def __init__(self, handlers: List[KeyHandler], skyMap: Optional[BaseSkyMap], skyMapName: Optional[str],
249  datasetTypeName: str):
250  self.handlers = handlers
251  self.skyMap = skyMap
252  self.skyMapName = skyMapName
253  self.datasetTypeName = datasetTypeName
254 
255  __slots__ = ("handlers", "skyMap", "skyMapName", "datasetTypeName")
256 
257  # Rules used to match Handlers when constring a Translator.
258  # outer key is instrument name, or None for any
259  # inner key is DatasetType name, or None for any
260  # values are 3-tuples of (frozenset(gen2keys), handler, consume)
261  _rules: Dict[
262  Optional[str],
263  Dict[
264  Optional[str],
265  Tuple[FrozenSet[str], KeyHandler, bool]
266  ]
267  ] = {
268  None: {
269  None: []
270  }
271  }
272 
273  @classmethod
274  def addRule(cls, handler: KeyHandler, instrument: Optional[str] = None,
275  datasetTypeName: Optional[str] = None, gen2keys: Iterable[str] = (),
276  consume: bool = True):
277  """Add a KeyHandler and an associated matching rule.
278 
279  Parameters
280  ----------
281  handler : `KeyHandler`
282  A KeyHandler instance to add to a Translator when this rule
283  matches.
284  instrument : `str`
285  Gen3 instrument name the Gen2 repository must be associated with
286  for this rule to match, or None to match any instrument.
287  datasetTypeName : `str`
288  Name of the DatasetType this rule matches, or None to match any
289  DatasetType.
290  gen2Keys : sequence
291  Sequence of Gen2 data ID keys that must all be present for this
292  rule to match.
293  consume : `bool` or `tuple`
294  If True (default), remove all entries in gen2keys from the set of
295  keys being matched to in order to prevent less-specific handlers
296  from matching them.
297  May also be a `tuple` listing only the keys to consume.
298  """
299  # Ensure consume is always a frozenset, so we can process it uniformly
300  # from here on.
301  if consume is True:
302  consume = frozenset(gen2keys)
303  elif consume:
304  consume = frozenset(consume)
305  else:
306  consume = frozenset()
307  # find the rules for this instrument, or if we haven't seen it before,
308  # add a nested dictionary that matches any DatasetType name and then
309  # append this rule.
310  rulesForInstrument = cls._rules.setdefault(instrument, {None: []})
311  rulesForInstrumentAndDatasetType = rulesForInstrument.setdefault(datasetTypeName, [])
312  rulesForInstrumentAndDatasetType.append((frozenset(gen2keys), handler, consume))
313 
314  @classmethod
315  def makeMatching(cls, datasetTypeName: str, gen2keys: Dict[str, type], instrument: Optional[str] = None,
316  skyMap: Optional[BaseSkyMap] = None, skyMapName: Optional[str] = None):
317  """Construct a Translator appropriate for instances of the given
318  dataset.
319 
320  Parameters
321  ----------
322  datasetTypeName : `str`
323  Name of the dataset type.
324  gen2keys: `dict`
325  Keys of a Gen2 data ID for this dataset.
326  instrument: `str`, optional
327  Name of the Gen3 instrument dimension for translated data IDs.
328  skyMap: `~lsst.skymap.BaseSkyMap`, optional
329  The skymap instance that defines any tract/patch data IDs.
330  `~lsst.skymap.BaseSkyMap` instances.
331  skyMapName : `str`, optional
332  Gen3 SkyMap Dimension name to be associated with any tract or patch
333  Dimensions.
334 
335  Returns
336  -------
337  translator : `Translator`
338  A translator whose translate() method can be used to transform Gen2
339  data IDs to Gen3 dataIds.
340  """
341  if instrument is not None:
342  rulesForInstrument = cls._rules.get(instrument, {None: []})
343  else:
344  rulesForInstrument = {None: []}
345  rulesForAnyInstrument = cls._rules[None]
346  candidateRules = itertools.chain(
347  rulesForInstrument.get(datasetTypeName, []), # this instrument, this DatasetType
348  rulesForInstrument[None], # this instrument, any DatasetType
349  rulesForAnyInstrument.get(datasetTypeName, []), # any instrument, this DatasetType
350  rulesForAnyInstrument[None], # any instrument, any DatasetType
351  )
352  matchedHandlers = []
353  targetKeys = set(gen2keys)
354  for ruleKeys, ruleHandlers, consume in candidateRules:
355  if ruleKeys.issubset(targetKeys):
356  matchedHandlers.append(ruleHandlers)
357  targetKeys -= consume
358  return Translator(matchedHandlers, skyMap=skyMap, skyMapName=skyMapName,
359  datasetTypeName=datasetTypeName)
360 
361  def __call__(self, gen2id: Dict[str, Any], *, partial: bool = False, log: Optional[Log] = None):
362  """Return a Gen3 data ID that corresponds to the given Gen2 data ID.
363  """
364  gen3id = {}
365  for handler in self.handlers:
366  try:
367  handler.translate(gen2id, gen3id, skyMap=self.skyMap, skyMapName=self.skyMapName,
368  datasetTypeName=self.datasetTypeName)
369  except KeyError:
370  if partial:
371  if log is not None:
372  log.debug("Failed to translate %s from %s.", handler.dimension, gen2id)
373  continue
374  else:
375  raise
376  return gen3id
377 
378  @property
379  def dimensionNames(self):
380  """The names of the dimensions populated by this Translator
381  (`frozenset`).
382  """
383  return frozenset(h.dimension for h in self.handlers)
384 
385 
386 # Add "skymap" to Gen3 ID if Gen2 ID has a "tract" key.
387 Translator.addRule(SkyMapKeyHandler(), gen2keys=("tract",), consume=False)
388 
389 # Add "skymap" to Gen3 ID if DatasetType is one of a few specific ones
390 for coaddName in ("deep", "goodSeeing", "psfMatched", "dcr"):
391  Translator.addRule(SkyMapKeyHandler(), datasetTypeName=f"{coaddName}Coadd_skyMap")
392 
393 # Translate Gen2 str patch IDs to Gen3 sequential integers.
394 Translator.addRule(PatchKeyHandler(), gen2keys=("patch",))
395 
396 # Copy Gen2 "tract" to Gen3 "tract".
397 Translator.addRule(CopyKeyHandler("tract", dtype=int), gen2keys=("tract",))
398 
399 # Add valid_first, valid_last to instrument-level transmission/ datasets;
400 # these are considered calibration products in Gen3.
401 for datasetTypeName in ("transmission_sensor", "transmission_optics", "transmission_filter"):
402  Translator.addRule(ConstantKeyHandler("calibration_label", "unbounded"),
403  datasetTypeName=datasetTypeName)
404 
405 # Translate Gen2 pixel_id to Gen3 skypix.
406 # TODO: For now, we just assume that the refcat indexer uses htm7, since that's
407 # what the ps1 refcat in testdata_ci_hsc uses.
408 Translator.addRule(CopyKeyHandler("htm7", gen2key="pixel_id", dtype=int), gen2keys=("pixel_id",))
409 
410 # Translate Gen2 calibDate and datasetType to Gen3 calibration_label.
411 Translator.addRule(CalibKeyHandler(), gen2keys=("calibDate",))