Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["CalibRepoConverter"] 

24 

25from collections import defaultdict 

26import os 

27import sqlite3 

28from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Tuple, Optional 

29 

30import astropy.time 

31 

32from lsst.daf.butler import DataCoordinate, FileDataset, Timespan 

33from .repoConverter import RepoConverter 

34from .repoWalker import RepoWalker 

35 

36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true

37 from lsst.daf.butler import DatasetType, StorageClass, FormatterParameter 

38 from .repoWalker.scanner import PathElementHandler 

39 from ..cameraMapper import CameraMapper 

40 from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping 

41 

42 

43class CalibRepoConverter(RepoConverter): 

44 """A specialization of `RepoConverter` for calibration repositories. 

45 

46 Parameters 

47 ---------- 

48 mapper : `CameraMapper` 

49 Gen2 mapper for the data repository. The root associated with the 

50 mapper is ignored and need not match the root of the repository. 

51 kwds 

52 Additional keyword arguments are forwarded to (and required by) 

53 `RepoConverter`. 

54 """ 

55 

56 def __init__(self, *, mapper: CameraMapper, collection: str, **kwds): 

57 super().__init__(run=None, **kwds) 

58 self.mapper = mapper 

59 self.collection = collection 

60 self._datasetTypes = set() 

61 

62 def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool: 

63 # Docstring inherited from RepoConverter. 

64 return datasetTypeName in self.instrument.getCuratedCalibrationNames() 

65 

66 def iterMappings(self) -> Iterator[Tuple[str, CameraMapperMapping]]: 

67 # Docstring inherited from RepoConverter. 

68 yield from self.mapper.calibrations.items() 

69 

70 def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[str, type], 

71 storageClass: StorageClass, formatter: FormatterParameter = None, 

72 targetHandler: Optional[PathElementHandler] = None, 

73 ) -> RepoWalker.Target: 

74 # Docstring inherited from RepoConverter. 

75 target = RepoWalker.Target( 

76 datasetTypeName=datasetTypeName, 

77 storageClass=storageClass, 

78 template=template, 

79 keys=keys, 

80 instrument=self.task.instrument.getName(), 

81 universe=self.task.registry.dimensions, 

82 formatter=formatter, 

83 targetHandler=targetHandler, 

84 translatorFactory=self.task.translatorFactory, 

85 ) 

86 self._datasetTypes.add(target.datasetType) 

87 return target 

88 

89 def _queryGen2CalibRegistry(self, db: sqlite3.Connection, datasetType: DatasetType, calibDate: str 

90 ) -> Iterator[sqlite3.Row]: 

91 # TODO: docs 

92 fields = ["validStart", "validEnd"] 

93 if "detector" in datasetType.dimensions.names: 

94 fields.append(self.task.config.ccdKey) 

95 else: 

96 fields.append(f"NULL AS {self.task.config.ccdKey}") 

97 if "physical_filter" in datasetType.dimensions.names: 

98 fields.append("filter") 

99 else: 

100 assert "band" not in datasetType.dimensions.names 

101 fields.append("NULL AS filter") 

102 tables = self.mapper.mappings[datasetType.name].tables 

103 if tables is None or len(tables) == 0: 

104 self.task.log.warn("Could not extract calibration ranges for %s in %s; " 

105 "no tables in Gen2 mapper.", 

106 datasetType.name, self.root, tables[0]) 

107 return 

108 query = f"SELECT DISTINCT {', '.join(fields)} FROM {tables[0]} WHERE calibDate = ?;" 

109 try: 

110 results = db.execute(query, (calibDate,)) 

111 except sqlite3.OperationalError as e: 

112 self.task.log.warn("Could not extract calibration ranges for %s in %s from table %s: %r", 

113 datasetType.name, self.root, tables[0], e) 

114 return 

115 yield from results 

116 

117 def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]]): 

118 # Read Gen2 calibration repository and extract validity ranges for 

119 # all datasetType + calibDate combinations we ingested. 

120 calibFile = os.path.join(self.root, "calibRegistry.sqlite3") 

121 # If the registry file does not exist this indicates a problem. 

122 # We check explicitly because sqlite will try to create the 

123 # missing file if it can. 

124 if not os.path.exists(calibFile): 

125 raise RuntimeError("Attempting to convert calibrations but no registry database" 

126 f" found in {self.root}") 

127 # We will gather results in a dict-of-lists keyed by Timespan, since 

128 # Registry.certify operates on one Timespan and multiple refs at a 

129 # time. 

130 refsByTimespan = defaultdict(list) 

131 db = sqlite3.connect(calibFile) 

132 db.row_factory = sqlite3.Row 

133 day = astropy.time.TimeDelta(1, format="jd", scale="tai") 

134 for datasetType, datasetsByCalibDate in datasets.items(): 

135 if not datasetType.isCalibration(): 

136 continue 

137 gen2keys = {} 

138 if "detector" in datasetType.dimensions.names: 

139 gen2keys[self.task.config.ccdKey] = int 

140 if "physical_filter" in datasetType.dimensions.names: 

141 gen2keys["filter"] = str 

142 translator = self.instrument.makeDataIdTranslatorFactory().makeMatching( 

143 datasetType.name, 

144 gen2keys, 

145 instrument=self.instrument.getName() 

146 ) 

147 for calibDate, datasetsForCalibDate in datasetsByCalibDate.items(): 

148 assert calibDate is not None, ("datasetType.isCalibration() is set by " 

149 "the presence of calibDate in the Gen2 template") 

150 # Build a mapping that lets us find DatasetRefs by data ID, 

151 # for this DatasetType and calibDate. We know there is only 

152 # one ref for each data ID (given DatasetType and calibDate as 

153 # well). 

154 refsByDataId = {} 

155 for dataset in datasetsForCalibDate: 

156 refsByDataId.update((ref.dataId, ref) for ref in dataset.refs) 

157 # Query the Gen2 calibration repo for the validity ranges for 

158 # this DatasetType and calibDate, and look up the appropriate 

159 # refs by data ID. 

160 for row in self._queryGen2CalibRegistry(db, datasetType, calibDate): 

161 # For validity times we use TAI as some gen2 repos have validity 

162 # dates very far in the past or future. 

163 timespan = Timespan( 

164 astropy.time.Time(row["validStart"], format="iso", scale="tai"), 

165 astropy.time.Time(row["validEnd"], format="iso", scale="tai") + day, 

166 ) 

167 # Make a Gen2 data ID from query results. 

168 gen2id = {} 

169 if "detector" in datasetType.dimensions.names: 

170 gen2id[self.task.config.ccdKey] = row[self.task.config.ccdKey] 

171 if "physical_filter" in datasetType.dimensions.names: 

172 gen2id["filter"] = row["filter"] 

173 # Translate that to Gen3. 

174 gen3id, _ = translator(gen2id) 

175 dataId = DataCoordinate.standardize(gen3id, graph=datasetType.dimensions) 

176 ref = refsByDataId.get(dataId) 

177 if ref is not None: 

178 refsByTimespan[timespan].append(ref) 

179 else: 

180 # The Gen2 calib registry mentions this dataset, but it 

181 # isn't included in what we've ingested. This might 

182 # sometimes be a problem, but it should usually 

183 # represent someone just trying to convert a subset of 

184 # the Gen2 repo, so I don't think it's appropriate to 

185 # warn or even log at info, since in that case there 

186 # may be a _lot_ of these messages. 

187 self.task.log.debug( 

188 "Gen2 calibration registry entry has no dataset: %s for calibDate=%s, %s.", 

189 datasetType.name, calibDate, dataId 

190 ) 

191 # Done reading from Gen2, time to certify into Gen3. 

192 for timespan, refs in refsByTimespan.items(): 

193 self.task.registry.certify(self.collection, refs, timespan) 

194 

195 def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str: 

196 if calibDate is None: 

197 return super().getRun(datasetTypeName) 

198 else: 

199 return self.instrument.makeCollectionName("calib", "gen2", calibDate) 

200 

201 # Class attributes that will be shadowed by public instance attributes; 

202 # defined here only for documentation purposes. 

203 

204 mapper: CameraMapper 

205 """Gen2 mapper associated with this repository. 

206 """