Coverage for python/lsst/obs/base/gen2to3/calibRepoConverter.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["CalibRepoConverter"]
25from collections import defaultdict
26import os
27import sqlite3
28from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Tuple, Optional
30import astropy.time
32from lsst.daf.butler import DataCoordinate, FileDataset, Timespan
33from .repoConverter import RepoConverter
34from .repoWalker import RepoWalker
36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true
37 from lsst.daf.butler import DatasetType, StorageClass, FormatterParameter
38 from .repoWalker.scanner import PathElementHandler
39 from ..cameraMapper import CameraMapper
40 from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping
43class CalibRepoConverter(RepoConverter):
44 """A specialization of `RepoConverter` for calibration repositories.
46 Parameters
47 ----------
48 mapper : `CameraMapper`
49 Gen2 mapper for the data repository. The root associated with the
50 mapper is ignored and need not match the root of the repository.
51 kwds
52 Additional keyword arguments are forwarded to (and required by)
53 `RepoConverter`.
54 """
56 def __init__(self, *, mapper: CameraMapper, collection: str, **kwds):
57 super().__init__(run=None, **kwds)
58 self.mapper = mapper
59 self.collection = collection
60 self._datasetTypes = set()
62 def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
63 # Docstring inherited from RepoConverter.
64 return datasetTypeName in self.instrument.getCuratedCalibrationNames()
66 def iterMappings(self) -> Iterator[Tuple[str, CameraMapperMapping]]:
67 # Docstring inherited from RepoConverter.
68 yield from self.mapper.calibrations.items()
70 def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[str, type],
71 storageClass: StorageClass, formatter: FormatterParameter = None,
72 targetHandler: Optional[PathElementHandler] = None,
73 ) -> RepoWalker.Target:
74 # Docstring inherited from RepoConverter.
75 target = RepoWalker.Target(
76 datasetTypeName=datasetTypeName,
77 storageClass=storageClass,
78 template=template,
79 keys=keys,
80 instrument=self.task.instrument.getName(),
81 universe=self.task.registry.dimensions,
82 formatter=formatter,
83 targetHandler=targetHandler,
84 translatorFactory=self.task.translatorFactory,
85 )
86 self._datasetTypes.add(target.datasetType)
87 return target
89 def _queryGen2CalibRegistry(self, db: sqlite3.Connection, datasetType: DatasetType, calibDate: str
90 ) -> Iterator[sqlite3.Row]:
91 # TODO: docs
92 fields = ["validStart", "validEnd"]
93 if "detector" in datasetType.dimensions.names:
94 fields.append(self.task.config.ccdKey)
95 else:
96 fields.append(f"NULL AS {self.task.config.ccdKey}")
97 if "physical_filter" in datasetType.dimensions.names:
98 fields.append("filter")
99 else:
100 assert "band" not in datasetType.dimensions.names
101 fields.append("NULL AS filter")
102 tables = self.mapper.mappings[datasetType.name].tables
103 if tables is None or len(tables) == 0:
104 self.task.log.warn("Could not extract calibration ranges for %s in %s; "
105 "no tables in Gen2 mapper.",
106 datasetType.name, self.root, tables[0])
107 return
108 query = f"SELECT DISTINCT {', '.join(fields)} FROM {tables[0]} WHERE calibDate = ?;"
109 try:
110 results = db.execute(query, (calibDate,))
111 except sqlite3.OperationalError as e:
112 self.task.log.warn("Could not extract calibration ranges for %s in %s from table %s: %r",
113 datasetType.name, self.root, tables[0], e)
114 return
115 yield from results
117 def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]]):
118 # Read Gen2 calibration repository and extract validity ranges for
119 # all datasetType + calibDate combinations we ingested.
120 calibFile = os.path.join(self.root, "calibRegistry.sqlite3")
121 # If the registry file does not exist this indicates a problem.
122 # We check explicitly because sqlite will try to create the
123 # missing file if it can.
124 if not os.path.exists(calibFile):
125 raise RuntimeError("Attempting to convert calibrations but no registry database"
126 f" found in {self.root}")
127 # We will gather results in a dict-of-lists keyed by Timespan, since
128 # Registry.certify operates on one Timespan and multiple refs at a
129 # time.
130 refsByTimespan = defaultdict(list)
131 db = sqlite3.connect(calibFile)
132 db.row_factory = sqlite3.Row
133 day = astropy.time.TimeDelta(1, format="jd", scale="tai")
134 for datasetType, datasetsByCalibDate in datasets.items():
135 if not datasetType.isCalibration():
136 continue
137 gen2keys = {}
138 if "detector" in datasetType.dimensions.names:
139 gen2keys[self.task.config.ccdKey] = int
140 if "physical_filter" in datasetType.dimensions.names:
141 gen2keys["filter"] = str
142 translator = self.instrument.makeDataIdTranslatorFactory().makeMatching(
143 datasetType.name,
144 gen2keys,
145 instrument=self.instrument.getName()
146 )
147 for calibDate, datasetsForCalibDate in datasetsByCalibDate.items():
148 assert calibDate is not None, ("datasetType.isCalibration() is set by "
149 "the presence of calibDate in the Gen2 template")
150 # Build a mapping that lets us find DatasetRefs by data ID,
151 # for this DatasetType and calibDate. We know there is only
152 # one ref for each data ID (given DatasetType and calibDate as
153 # well).
154 refsByDataId = {}
155 for dataset in datasetsForCalibDate:
156 refsByDataId.update((ref.dataId, ref) for ref in dataset.refs)
157 # Query the Gen2 calibration repo for the validity ranges for
158 # this DatasetType and calibDate, and look up the appropriate
159 # refs by data ID.
160 for row in self._queryGen2CalibRegistry(db, datasetType, calibDate):
161 # For validity times we use TAI as some gen2 repos have validity
162 # dates very far in the past or future.
163 timespan = Timespan(
164 astropy.time.Time(row["validStart"], format="iso", scale="tai"),
165 astropy.time.Time(row["validEnd"], format="iso", scale="tai") + day,
166 )
167 # Make a Gen2 data ID from query results.
168 gen2id = {}
169 if "detector" in datasetType.dimensions.names:
170 gen2id[self.task.config.ccdKey] = row[self.task.config.ccdKey]
171 if "physical_filter" in datasetType.dimensions.names:
172 gen2id["filter"] = row["filter"]
173 # Translate that to Gen3.
174 gen3id, _ = translator(gen2id)
175 dataId = DataCoordinate.standardize(gen3id, graph=datasetType.dimensions)
176 ref = refsByDataId.get(dataId)
177 if ref is not None:
178 refsByTimespan[timespan].append(ref)
179 else:
180 # The Gen2 calib registry mentions this dataset, but it
181 # isn't included in what we've ingested. This might
182 # sometimes be a problem, but it should usually
183 # represent someone just trying to convert a subset of
184 # the Gen2 repo, so I don't think it's appropriate to
185 # warn or even log at info, since in that case there
186 # may be a _lot_ of these messages.
187 self.task.log.debug(
188 "Gen2 calibration registry entry has no dataset: %s for calibDate=%s, %s.",
189 datasetType.name, calibDate, dataId
190 )
191 # Done reading from Gen2, time to certify into Gen3.
192 for timespan, refs in refsByTimespan.items():
193 self.task.registry.certify(self.collection, refs, timespan)
195 def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str:
196 if calibDate is None:
197 return super().getRun(datasetTypeName)
198 else:
199 return self.instrument.makeCollectionName("calib", "gen2", calibDate)
201 # Class attributes that will be shadowed by public instance attributes;
202 # defined here only for documentation purposes.
204 mapper: CameraMapper
205 """Gen2 mapper associated with this repository.
206 """