Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 16%

121 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-21 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"] 

25 

26import logging 

27import warnings 

28from abc import abstractmethod 

29from collections.abc import Callable, Collection, Mapping 

30from typing import TYPE_CHECKING, Any, cast 

31from uuid import UUID 

32 

33import astropy.time 

34from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse 

35from lsst.utils.introspection import find_outside_stacklevel 

36 

37from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig 

38from ._spatial import RegionTypeError, RegionTypeWarning 

39 

40if TYPE_CHECKING: 

41 from lsst.sphgeom import Region 

42 

43 from ._schema import ObsCoreSchema 

44 from ._spatial import SpatialObsCorePlugin 

45 

46if TYPE_CHECKING: 

47 from ..queries import SqlQueryContext 

48 

49_LOG = logging.getLogger(__name__) 

50 

51# Map extra column type to a conversion method that takes string. 

52_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 52 ↛ exitline 52 didn't jump to the function exit

53 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input. 

54 ExtraColumnType.int: int, 

55 ExtraColumnType.float: float, 

56 ExtraColumnType.string: str, 

57} 

58 

59 

60class ExposureRegionFactory: 

61 """Abstract interface for a class that returns a Region for an exposure.""" 

62 

63 @abstractmethod 

64 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Region | None: 

65 """Return a region for a given DataId that corresponds to an exposure. 

66 

67 Parameters 

68 ---------- 

69 dataId : `DataCoordinate` 

70 Data ID for an exposure dataset. 

71 context : `SqlQueryContext` 

72 Context used to execute queries for additional dimension metadata. 

73 

74 Returns 

75 ------- 

76 region : `Region` 

77 `None` is returned if region cannot be determined. 

78 """ 

79 raise NotImplementedError() 

80 

81 

82Record = dict[str, Any] 

83 

84 

85class RecordFactory: 

86 """Class that implements conversion of dataset information to ObsCore. 

87 

88 Parameters 

89 ---------- 

90 config : `ObsCoreConfig` 

91 Complete configuration specifying conversion options. 

92 schema : `ObsCoreSchema` 

93 Description of obscore schema. 

94 universe : `DimensionUniverse` 

95 Registry dimensions universe. 

96 exposure_region_factory: `ExposureRegionFactory`, optional 

97 Manager for Registry dimensions. 

98 """ 

99 

100 def __init__( 

101 self, 

102 config: ObsCoreConfig, 

103 schema: ObsCoreSchema, 

104 universe: DimensionUniverse, 

105 spatial_plugins: Collection[SpatialObsCorePlugin], 

106 exposure_region_factory: ExposureRegionFactory | None = None, 

107 ): 

108 self.config = config 

109 self.schema = schema 

110 self.universe = universe 

111 self.exposure_region_factory = exposure_region_factory 

112 self.spatial_plugins = spatial_plugins 

113 

114 # All dimension elements used below. 

115 self.band = cast(Dimension, universe["band"]) 

116 self.exposure = universe["exposure"] 

117 self.visit = universe["visit"] 

118 self.physical_filter = cast(Dimension, universe["physical_filter"]) 

119 

120 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None: 

121 """Make an ObsCore record from a dataset. 

122 

123 Parameters 

124 ---------- 

125 ref : `DatasetRef` 

126 Dataset ref, its DataId must be in expanded form. 

127 context : `SqlQueryContext` 

128 Context used to execute queries for additional dimension metadata. 

129 

130 Returns 

131 ------- 

132 record : `dict` [ `str`, `Any` ] or `None` 

133 ObsCore record represented as a dictionary. `None` is returned if 

134 dataset does not need to be stored in the obscore table, e.g. when 

135 dataset type is not in obscore configuration. 

136 

137 Notes 

138 ----- 

139 This method filters records by dataset type and returns `None` if 

140 reference dataset type is not configured. It does not check reference 

141 run name against configured collections, all runs are acceptable by 

142 this method. 

143 """ 

144 # Quick check for dataset type. 

145 dataset_type_name = ref.datasetType.name 

146 dataset_config = self.config.dataset_types.get(dataset_type_name) 

147 if dataset_config is None: 

148 return None 

149 

150 dataId = ref.dataId 

151 # _LOG.debug("New record, dataId=%s", dataId.full) 

152 # _LOG.debug("New record, records=%s", dataId.records) 

153 

154 record: dict[str, str | int | float | UUID | None] 

155 

156 # We need all columns filled, to simplify logic below just pre-fill 

157 # everything with None. 

158 record = {field.name: None for field in self.schema.table_spec.fields} 

159 

160 record["dataproduct_type"] = dataset_config.dataproduct_type 

161 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype 

162 record["o_ucd"] = dataset_config.o_ucd 

163 record["facility_name"] = self.config.facility_name 

164 record["calib_level"] = dataset_config.calib_level 

165 if dataset_config.obs_collection is not None: 

166 record["obs_collection"] = dataset_config.obs_collection 

167 else: 

168 record["obs_collection"] = self.config.obs_collection 

169 record["access_format"] = dataset_config.access_format 

170 

171 record["instrument_name"] = dataId.get("instrument") 

172 if self.schema.dataset_fk is not None: 

173 record[self.schema.dataset_fk.name] = ref.id 

174 

175 timespan = dataId.timespan 

176 if timespan is not None: 

177 if timespan.begin is not None: 

178 t_min = cast(astropy.time.Time, timespan.begin) 

179 record["t_min"] = t_min.mjd 

180 if timespan.end is not None: 

181 t_max = cast(astropy.time.Time, timespan.end) 

182 record["t_max"] = t_max.mjd 

183 

184 region = dataId.region 

185 if self.exposure in dataId: 

186 if (dimension_record := dataId.records[self.exposure]) is not None: 

187 self._exposure_records(dimension_record, record) 

188 if self.exposure_region_factory is not None: 

189 region = self.exposure_region_factory.exposure_region(dataId, context) 

190 elif self.visit in dataId: 

191 if (dimension_record := dataId.records[self.visit]) is not None: 

192 self._visit_records(dimension_record, record) 

193 

194 # ask each plugin for its values to add to a record. 

195 try: 

196 plugin_records = self.make_spatial_records(region) 

197 except RegionTypeError as exc: 

198 warnings.warn( 

199 f"Failed to convert region for obscore dataset {ref.id}: {exc}", 

200 category=RegionTypeWarning, 

201 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

202 ) 

203 else: 

204 record.update(plugin_records) 

205 

206 if self.band in dataId: 

207 em_range = None 

208 if (label := dataId.get(self.physical_filter)) is not None: 

209 em_range = self.config.spectral_ranges.get(label) 

210 if not em_range: 

211 band_name = dataId[self.band] 

212 assert isinstance(band_name, str), "Band name must be string" 

213 em_range = self.config.spectral_ranges.get(band_name) 

214 if em_range: 

215 record["em_min"], record["em_max"] = em_range 

216 else: 

217 _LOG.warning("could not find spectral range for dataId=%s", dataId.full) 

218 record["em_filter_name"] = dataId["band"] 

219 

220 # Dictionary to use for substitutions when formatting various 

221 # strings. 

222 fmt_kws: dict[str, Any] = dict(records=dataId.records) 

223 fmt_kws.update(dataId.full.byName()) 

224 fmt_kws.update(id=ref.id) 

225 fmt_kws.update(run=ref.run) 

226 fmt_kws.update(dataset_type=dataset_type_name) 

227 fmt_kws.update(record) 

228 if dataset_config.obs_id_fmt: 

229 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws) 

230 fmt_kws["obs_id"] = record["obs_id"] 

231 

232 if dataset_config.datalink_url_fmt: 

233 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws) 

234 

235 # add extra columns 

236 extra_columns = {} 

237 if self.config.extra_columns: 

238 extra_columns.update(self.config.extra_columns) 

239 if dataset_config.extra_columns: 

240 extra_columns.update(dataset_config.extra_columns) 

241 for key, column_value in extra_columns.items(): 

242 # Try to expand the template with known keys, if expansion 

243 # fails due to a missing key name then store None. 

244 if isinstance(column_value, ExtraColumnConfig): 

245 try: 

246 value = column_value.template.format(**fmt_kws) 

247 record[key] = _TYPE_CONVERSION[column_value.type](value) 

248 except KeyError: 

249 pass 

250 else: 

251 # Just a static value. 

252 record[key] = column_value 

253 

254 return record 

255 

256 def make_spatial_records(self, region: Region | None) -> Record: 

257 """Make spatial records for a given region. 

258 

259 Parameters 

260 ---------- 

261 region : `~lsst.sphgeom.Region` or `None` 

262 Spacial region to convert to record. 

263 

264 Return 

265 ------ 

266 record : `dict` 

267 Record items. 

268 

269 Raises 

270 ------ 

271 RegionTypeError 

272 Raised if type of the region is not supported. 

273 """ 

274 record = Record() 

275 # ask each plugin for its values to add to a record. 

276 for plugin in self.spatial_plugins: 

277 plugin_record = plugin.make_records(region) 

278 if plugin_record is not None: 

279 record.update(plugin_record) 

280 return record 

281 

282 def _exposure_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None: 

283 """Extract all needed info from a visit dimension record.""" 

284 record["t_exptime"] = dimension_record.exposure_time 

285 record["target_name"] = dimension_record.target_name 

286 

287 def _visit_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None: 

288 """Extract all needed info from an exposure dimension record.""" 

289 record["t_exptime"] = dimension_record.exposure_time 

290 record["target_name"] = dimension_record.target_name