lsst.obs.base  20.0.0-50-g2b8b609+534456e0aa
calibRepoConverter.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import annotations
22 
23 __all__ = ["CalibRepoConverter"]
24 
25 from collections import defaultdict
26 import os
27 import sqlite3
28 from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Tuple, Optional
29 
30 import astropy.time
31 
32 from lsst.daf.butler import DataCoordinate, FileDataset, Timespan
33 from .repoConverter import RepoConverter
34 from .repoWalker import RepoWalker
35 
36 if TYPE_CHECKING:
37  from lsst.daf.butler import DatasetType, StorageClass, FormatterParameter
38  from .repoWalker.scanner import PathElementHandler
39  from ..cameraMapper import CameraMapper
40  from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping
41 
42 
44  """A specialization of `RepoConverter` for calibration repositories.
45 
46  Parameters
47  ----------
48  mapper : `CameraMapper`
49  Gen2 mapper for the data repository. The root associated with the
50  mapper is ignored and need not match the root of the repository.
51  kwds
52  Additional keyword arguments are forwarded to (and required by)
53  `RepoConverter`.
54  """
55 
56  def __init__(self, *, mapper: CameraMapper, collection: str, **kwds):
57  super().__init__(run=None, **kwds)
58  self.mapper = mapper
59  self.collection = collection
60  self._datasetTypes = set()
61 
62  def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
63  # Docstring inherited from RepoConverter.
64  return datasetTypeName in self.instrument.getCuratedCalibrationNames()
65 
66  def iterMappings(self) -> Iterator[Tuple[str, CameraMapperMapping]]:
67  # Docstring inherited from RepoConverter.
68  yield from self.mapper.calibrations.items()
69 
70  def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[str, type],
71  storageClass: StorageClass, formatter: FormatterParameter = None,
72  targetHandler: Optional[PathElementHandler] = None,
73  ) -> RepoWalker.Target:
74  # Docstring inherited from RepoConverter.
75  target = RepoWalker.Target(
76  datasetTypeName=datasetTypeName,
77  storageClass=storageClass,
78  template=template,
79  keys=keys,
80  instrument=self.task.instrument.getName(),
81  universe=self.task.registry.dimensions,
82  formatter=formatter,
83  targetHandler=targetHandler,
84  translatorFactory=self.task.translatorFactory,
85  )
86  self._datasetTypes.add(target.datasetType)
87  return target
88 
89  def _queryGen2CalibRegistry(self, db: sqlite3.Connection, datasetType: DatasetType, calibDate: str
90  ) -> Iterator[sqlite3.Row]:
91  # TODO: docs
92  fields = ["validStart", "validEnd"]
93  if "detector" in datasetType.dimensions.names:
94  fields.append(self.task.config.ccdKey)
95  else:
96  fields.append(f"NULL AS {self.task.config.ccdKey}")
97  if "physical_filter" in datasetType.dimensions.names:
98  fields.append("filter")
99  else:
100  assert "band" not in datasetType.dimensions.names
101  fields.append("NULL AS filter")
102  tables = self.mapper.mappings[datasetType.name].tables
103  if tables is None or len(tables) == 0:
104  self.task.log.warn("Could not extract calibration ranges for %s in %s; "
105  "no tables in Gen2 mapper.",
106  datasetType.name, self.root, tables[0])
107  return
108  query = f"SELECT DISTINCT {', '.join(fields)} FROM {tables[0]} WHERE calibDate = ?;"
109  try:
110  results = db.execute(query, (calibDate,))
111  except sqlite3.OperationalError as e:
112  self.task.log.warn("Could not extract calibration ranges for %s in %s from table %s: %r",
113  datasetType.name, self.root, tables[0], e)
114  return
115  yield from results
116 
117  def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]]):
118  # Read Gen2 calibration repository and extract validity ranges for
119  # all datasetType + calibDate combinations we ingested.
120  calibFile = os.path.join(self.root, "calibRegistry.sqlite3")
121  # If the registry file does not exist this indicates a problem.
122  # We check explicitly because sqlite will try to create the
123  # missing file if it can.
124  if not os.path.exists(calibFile):
125  raise RuntimeError("Attempting to convert calibrations but no registry database"
126  f" found in {self.root}")
127  # We will gather results in a dict-of-lists keyed by Timespan, since
128  # Registry.certify operates on one Timespan and multiple refs at a
129  # time.
130  refsByTimespan = defaultdict(list)
131  db = sqlite3.connect(calibFile)
132  db.row_factory = sqlite3.Row
133  day = astropy.time.TimeDelta(1, format="jd", scale="tai")
134  for datasetType, datasetsByCalibDate in datasets.items():
135  if not datasetType.isCalibration():
136  continue
137  gen2keys = {}
138  if "detector" in datasetType.dimensions.names:
139  gen2keys[self.task.config.ccdKey] = int
140  if "physical_filter" in datasetType.dimensions.names:
141  gen2keys["filter"] = str
142  translator = self.instrument.makeDataIdTranslatorFactory().makeMatching(
143  datasetType.name,
144  gen2keys,
145  instrument=self.instrument.getName()
146  )
147  for calibDate, datasetsForCalibDate in datasetsByCalibDate.items():
148  assert calibDate is not None, ("datasetType.isCalibration() is set by "
149  "the presence of calibDate in the Gen2 template")
150  # Build a mapping that lets us find DatasetRefs by data ID,
151  # for this DatasetType and calibDate. We know there is only
152  # one ref for each data ID (given DatasetType and calibDate as
153  # well).
154  refsByDataId = {}
155  for dataset in datasetsForCalibDate:
156  refsByDataId.update((ref.dataId, ref) for ref in dataset.refs)
157  # Query the Gen2 calibration repo for the validity ranges for
158  # this DatasetType and calibDate, and look up the appropriate
159  # refs by data ID.
160  for row in self._queryGen2CalibRegistry(db, datasetType, calibDate):
161  # For validity times we use TAI as some gen2 repos have validity
162  # dates very far in the past or future.
163  timespan = Timespan(
164  astropy.time.Time(row["validStart"], format="iso", scale="tai"),
165  astropy.time.Time(row["validEnd"], format="iso", scale="tai") + day,
166  )
167  # Make a Gen2 data ID from query results.
168  gen2id = {}
169  if "detector" in datasetType.dimensions.names:
170  gen2id[self.task.config.ccdKey] = row[self.task.config.ccdKey]
171  if "physical_filter" in datasetType.dimensions.names:
172  gen2id["filter"] = row["filter"]
173  # Translate that to Gen3.
174  gen3id, _ = translator(gen2id)
175  dataId = DataCoordinate.standardize(gen3id, graph=datasetType.dimensions)
176  ref = refsByDataId.get(dataId)
177  if ref is not None:
178  refsByTimespan[timespan].append(ref)
179  else:
180  # The Gen2 calib registry mentions this dataset, but it
181  # isn't included in what we've ingested. This might
182  # sometimes be a problem, but it should usually
183  # represent someone just trying to convert a subset of
184  # the Gen2 repo, so I don't think it's appropriate to
185  # warn or even log at info, since in that case there
186  # may be a _lot_ of these messages.
187  self.task.log.debug(
188  "Gen2 calibration registry entry has no dataset: %s for calibDate=%s, %s.",
189  datasetType.name, calibDate, dataId
190  )
191  # Done reading from Gen2, time to certify into Gen3.
192  for timespan, refs in refsByTimespan.items():
193  self.task.registry.certify(self.collection, refs, timespan)
194 
195  def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str:
196  if calibDate is None:
197  return super().getRun(datasetTypeName)
198  else:
199  return self.instrument.makeCollectionName("calib", "gen2", calibDate)
200 
201  # Class attributes that will be shadowed by public instance attributes;
202  # defined here only for documentation purposes.
203 
204  mapper: CameraMapper
205  """Gen2 mapper associated with this repository.
206  """
lsst.obs.base.gen2to3.repoConverter.RepoConverter.instrument
instrument
Definition: repoConverter.py:212
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter.getRun
str getRun(self, str datasetTypeName, Optional[str] calibDate=None)
Definition: calibRepoConverter.py:195
lsst.obs.base.gen2to3.repoConverter.RepoConverter.root
root
Definition: repoConverter.py:211
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter._queryGen2CalibRegistry
Iterator[sqlite3.Row] _queryGen2CalibRegistry(self, sqlite3.Connection db, DatasetType datasetType, str calibDate)
Definition: calibRepoConverter.py:89
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter.collection
collection
Definition: calibRepoConverter.py:59
lsst.obs.base.gen2to3.repoConverter.RepoConverter.task
task
Definition: repoConverter.py:210
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter._datasetTypes
_datasetTypes
Definition: calibRepoConverter.py:60
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter.iterMappings
Iterator[Tuple[str, CameraMapperMapping]] iterMappings(self)
Definition: calibRepoConverter.py:66
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter.__init__
def __init__(self, *CameraMapper mapper, str collection, **kwds)
Definition: calibRepoConverter.py:56
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter.isDatasetTypeSpecial
bool isDatasetTypeSpecial(self, str datasetTypeName)
Definition: calibRepoConverter.py:62
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter
Definition: calibRepoConverter.py:43
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter.mapper
mapper
Definition: calibRepoConverter.py:58
lsst.obs.base.gen2to3.repoConverter.RepoConverter
Definition: repoConverter.py:180
lsst.obs.base.gen2to3.calibRepoConverter.CalibRepoConverter.makeRepoWalkerTarget
RepoWalker.Target makeRepoWalkerTarget(self, str datasetTypeName, str template, Dict[str, type] keys, StorageClass storageClass, FormatterParameter formatter=None, Optional[PathElementHandler] targetHandler=None)
Definition: calibRepoConverter.py:70