Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 16%

120 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"] 

31 

32import logging 

33import warnings 

34from abc import abstractmethod 

35from collections.abc import Callable, Collection, Mapping 

36from typing import TYPE_CHECKING, Any, cast 

37from uuid import UUID 

38 

39import astropy.time 

40from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse 

41from lsst.utils.introspection import find_outside_stacklevel 

42 

43from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig 

44from ._spatial import RegionTypeError, RegionTypeWarning 

45 

46if TYPE_CHECKING: 

47 from lsst.sphgeom import Region 

48 

49 from ._schema import ObsCoreSchema 

50 from ._spatial import SpatialObsCorePlugin 

51 

52if TYPE_CHECKING: 

53 from ..queries import SqlQueryContext 

54 

55_LOG = logging.getLogger(__name__) 

56 

57# Map extra column type to a conversion method that takes string. 

58_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 58 ↛ exitline 58 didn't jump to the function exit

59 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input. 

60 ExtraColumnType.int: int, 

61 ExtraColumnType.float: float, 

62 ExtraColumnType.string: str, 

63} 

64 

65 

66class ExposureRegionFactory: 

67 """Abstract interface for a class that returns a Region for an exposure.""" 

68 

69 @abstractmethod 

70 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Region | None: 

71 """Return a region for a given DataId that corresponds to an exposure. 

72 

73 Parameters 

74 ---------- 

75 dataId : `DataCoordinate` 

76 Data ID for an exposure dataset. 

77 context : `SqlQueryContext` 

78 Context used to execute queries for additional dimension metadata. 

79 

80 Returns 

81 ------- 

82 region : `Region` 

83 `None` is returned if region cannot be determined. 

84 """ 

85 raise NotImplementedError() 

86 

87 

88Record = dict[str, Any] 

89 

90 

91class RecordFactory: 

92 """Class that implements conversion of dataset information to ObsCore. 

93 

94 Parameters 

95 ---------- 

96 config : `ObsCoreConfig` 

97 Complete configuration specifying conversion options. 

98 schema : `ObsCoreSchema` 

99 Description of obscore schema. 

100 universe : `DimensionUniverse` 

101 Registry dimensions universe. 

102 spatial_plugins : `~collections.abc.Collection` of `SpatialObsCorePlugin` 

103 Spatial plugins. 

104 exposure_region_factory : `ExposureRegionFactory`, optional 

105 Manager for Registry dimensions. 

106 """ 

107 

108 def __init__( 

109 self, 

110 config: ObsCoreConfig, 

111 schema: ObsCoreSchema, 

112 universe: DimensionUniverse, 

113 spatial_plugins: Collection[SpatialObsCorePlugin], 

114 exposure_region_factory: ExposureRegionFactory | None = None, 

115 ): 

116 self.config = config 

117 self.schema = schema 

118 self.universe = universe 

119 self.exposure_region_factory = exposure_region_factory 

120 self.spatial_plugins = spatial_plugins 

121 

122 # All dimension elements used below. 

123 self.band = cast(Dimension, universe["band"]) 

124 self.exposure = universe["exposure"] 

125 self.visit = universe["visit"] 

126 self.physical_filter = cast(Dimension, universe["physical_filter"]) 

127 

128 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None: 

129 """Make an ObsCore record from a dataset. 

130 

131 Parameters 

132 ---------- 

133 ref : `DatasetRef` 

134 Dataset ref, its DataId must be in expanded form. 

135 context : `SqlQueryContext` 

136 Context used to execute queries for additional dimension metadata. 

137 

138 Returns 

139 ------- 

140 record : `dict` [ `str`, `Any` ] or `None` 

141 ObsCore record represented as a dictionary. `None` is returned if 

142 dataset does not need to be stored in the obscore table, e.g. when 

143 dataset type is not in obscore configuration. 

144 

145 Notes 

146 ----- 

147 This method filters records by dataset type and returns `None` if 

148 reference dataset type is not configured. It does not check reference 

149 run name against configured collections, all runs are acceptable by 

150 this method. 

151 """ 

152 # Quick check for dataset type. 

153 dataset_type_name = ref.datasetType.name 

154 dataset_config = self.config.dataset_types.get(dataset_type_name) 

155 if dataset_config is None: 

156 return None 

157 

158 dataId = ref.dataId 

159 # _LOG.debug("New record, dataId=%s", dataId.full) 

160 # _LOG.debug("New record, records=%s", dataId.records) 

161 

162 record: dict[str, str | int | float | UUID | None] 

163 

164 # We need all columns filled, to simplify logic below just pre-fill 

165 # everything with None. 

166 record = {field.name: None for field in self.schema.table_spec.fields} 

167 

168 record["dataproduct_type"] = dataset_config.dataproduct_type 

169 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype 

170 record["o_ucd"] = dataset_config.o_ucd 

171 record["facility_name"] = self.config.facility_name 

172 record["calib_level"] = dataset_config.calib_level 

173 if dataset_config.obs_collection is not None: 

174 record["obs_collection"] = dataset_config.obs_collection 

175 else: 

176 record["obs_collection"] = self.config.obs_collection 

177 record["access_format"] = dataset_config.access_format 

178 

179 record["instrument_name"] = dataId.get("instrument") 

180 if self.schema.dataset_fk is not None: 

181 record[self.schema.dataset_fk.name] = ref.id 

182 

183 timespan = dataId.timespan 

184 if timespan is not None: 

185 if timespan.begin is not None: 

186 t_min = cast(astropy.time.Time, timespan.begin) 

187 record["t_min"] = t_min.mjd 

188 if timespan.end is not None: 

189 t_max = cast(astropy.time.Time, timespan.end) 

190 record["t_max"] = t_max.mjd 

191 

192 region = dataId.region 

193 if self.exposure.name in dataId: 

194 if (dimension_record := dataId.records[self.exposure.name]) is not None: 

195 self._exposure_records(dimension_record, record) 

196 if self.exposure_region_factory is not None: 

197 region = self.exposure_region_factory.exposure_region(dataId, context) 

198 elif self.visit.name in dataId and (dimension_record := dataId.records[self.visit.name]) is not None: 

199 self._visit_records(dimension_record, record) 

200 

201 # ask each plugin for its values to add to a record. 

202 try: 

203 plugin_records = self.make_spatial_records(region) 

204 except RegionTypeError as exc: 

205 warnings.warn( 

206 f"Failed to convert region for obscore dataset {ref.id}: {exc}", 

207 category=RegionTypeWarning, 

208 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

209 ) 

210 else: 

211 record.update(plugin_records) 

212 

213 if self.band.name in dataId: 

214 em_range = None 

215 if (label := dataId.get(self.physical_filter.name)) is not None: 

216 em_range = self.config.spectral_ranges.get(cast(str, label)) 

217 if not em_range: 

218 band_name = dataId[self.band.name] 

219 assert isinstance(band_name, str), "Band name must be string" 

220 em_range = self.config.spectral_ranges.get(band_name) 

221 if em_range: 

222 record["em_min"], record["em_max"] = em_range 

223 else: 

224 _LOG.warning("could not find spectral range for dataId=%s", dataId.full) 

225 record["em_filter_name"] = dataId["band"] 

226 

227 # Dictionary to use for substitutions when formatting various 

228 # strings. 

229 fmt_kws: dict[str, Any] = dict(records=dataId.records) 

230 fmt_kws.update(dataId.mapping) 

231 fmt_kws.update(id=ref.id) 

232 fmt_kws.update(run=ref.run) 

233 fmt_kws.update(dataset_type=dataset_type_name) 

234 fmt_kws.update(record) 

235 if dataset_config.obs_id_fmt: 

236 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws) 

237 fmt_kws["obs_id"] = record["obs_id"] 

238 

239 if dataset_config.datalink_url_fmt: 

240 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws) 

241 

242 # add extra columns 

243 extra_columns = {} 

244 if self.config.extra_columns: 

245 extra_columns.update(self.config.extra_columns) 

246 if dataset_config.extra_columns: 

247 extra_columns.update(dataset_config.extra_columns) 

248 for key, column_value in extra_columns.items(): 

249 # Try to expand the template with known keys, if expansion 

250 # fails due to a missing key name then store None. 

251 if isinstance(column_value, ExtraColumnConfig): 

252 try: 

253 value = column_value.template.format(**fmt_kws) 

254 record[key] = _TYPE_CONVERSION[column_value.type](value) 

255 except KeyError: 

256 pass 

257 else: 

258 # Just a static value. 

259 record[key] = column_value 

260 

261 return record 

262 

263 def make_spatial_records(self, region: Region | None) -> Record: 

264 """Make spatial records for a given region. 

265 

266 Parameters 

267 ---------- 

268 region : `~lsst.sphgeom.Region` or `None` 

269 Spacial region to convert to record. 

270 

271 Returns 

272 ------- 

273 record : `dict` 

274 Record items. 

275 

276 Raises 

277 ------ 

278 RegionTypeError 

279 Raised if type of the region is not supported. 

280 """ 

281 record = Record() 

282 # ask each plugin for its values to add to a record. 

283 for plugin in self.spatial_plugins: 

284 plugin_record = plugin.make_records(region) 

285 if plugin_record is not None: 

286 record.update(plugin_record) 

287 return record 

288 

289 def _exposure_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None: 

290 """Extract all needed info from a visit dimension record.""" 

291 record["t_exptime"] = dimension_record.exposure_time 

292 record["target_name"] = dimension_record.target_name 

293 

294 def _visit_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None: 

295 """Extract all needed info from an exposure dimension record.""" 

296 record["t_exptime"] = dimension_record.exposure_time 

297 record["target_name"] = dimension_record.target_name