Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 14%

124 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 09:47 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ExposureRegionFactory", "RecordFactory"] 

25 

26import logging 

27from abc import abstractmethod 

28from collections.abc import Mapping 

29from typing import Any, Callable, Dict, Optional, cast 

30from uuid import UUID 

31 

32import astropy.time 

33from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse 

34from lsst.sphgeom import ConvexPolygon, LonLat, Region 

35 

36from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig 

37from ._schema import ObsCoreSchema 

38 

39_LOG = logging.getLogger(__name__) 

40 

41# Map extra column type to a conversion method that takes string. 

42_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 42 ↛ exitline 42 didn't jump to the function exit

43 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input. 

44 ExtraColumnType.int: int, 

45 ExtraColumnType.float: float, 

46 ExtraColumnType.string: str, 

47} 

48 

49 

50class ExposureRegionFactory: 

51 """Abstract interface for a class that returns a Region for an exposure.""" 

52 

53 @abstractmethod 

54 def exposure_region(self, dataId: DataCoordinate) -> Optional[Region]: 

55 """Return a region for a given DataId that corresponds to an exposure. 

56 

57 Parameters 

58 ---------- 

59 dataId : `DataCoordinate` 

60 Data ID for an exposure dataset. 

61 

62 Returns 

63 ------- 

64 region : `Region` 

65 `None` is returned if region cannot be determined. 

66 """ 

67 raise NotImplementedError() 

68 

69 

70class RecordFactory: 

71 """Class that implements conversion of dataset information to ObsCore. 

72 

73 Parameters 

74 ---------- 

75 config : `ObsCoreConfig` 

76 Complete configuration specifying conversion options. 

77 schema : `ObsCoreSchema` 

78 Description of obscore schema. 

79 universe : `DimensionUniverse` 

80 Registry dimensions universe. 

81 exposure_region_factory: `ExposureRegionFactory`, optional 

82 Manager for Registry dimensions. 

83 """ 

84 

85 def __init__( 

86 self, 

87 config: ObsCoreConfig, 

88 schema: ObsCoreSchema, 

89 universe: DimensionUniverse, 

90 exposure_region_factory: Optional[ExposureRegionFactory] = None, 

91 ): 

92 self.config = config 

93 self.schema = schema 

94 self.universe = universe 

95 self.exposure_region_factory = exposure_region_factory 

96 

97 # All dimension elements used below. 

98 self.band = cast(Dimension, universe["band"]) 

99 self.exposure = universe["exposure"] 

100 self.visit = universe["visit"] 

101 self.physical_filter = cast(Dimension, universe["physical_filter"]) 

102 

103 def __call__(self, ref: DatasetRef) -> Optional[Dict[str, str | int | float | UUID | None]]: 

104 """Make an ObsCore record from a dataset. 

105 

106 Parameters 

107 ---------- 

108 ref : `DatasetRef` 

109 Dataset ref, its DataId must be in expanded form. 

110 

111 Returns 

112 ------- 

113 record : `dict` [ `str`, `Any` ] or `None` 

114 ObsCore record represented as a dictionary. `None` is returned if 

115 dataset does not need to be stored in the obscore table, e.g. when 

116 dataset type is not in obscore configuration. 

117 

118 Notes 

119 ----- 

120 This method filters records by dataset type and returns `None` if 

121 reference dataset type is not configured. It does not check reference 

122 run name against configured collections, all runs are acceptable by 

123 this method. 

124 """ 

125 # Quick check for dataset type. 

126 dataset_type_name = ref.datasetType.name 

127 dataset_config = self.config.dataset_types.get(dataset_type_name) 

128 if dataset_config is None: 

129 return None 

130 

131 dataId = ref.dataId 

132 # _LOG.debug("New record, dataId=%s", dataId.full) 

133 # _LOG.debug("New record, records=%s", dataId.records) 

134 

135 record: Dict[str, str | int | float | UUID | None] = {} 

136 

137 record["dataproduct_type"] = dataset_config.dataproduct_type 

138 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype 

139 record["o_ucd"] = dataset_config.o_ucd 

140 record["facility_name"] = self.config.facility_name 

141 record["calib_level"] = dataset_config.calib_level 

142 if dataset_config.obs_collection is not None: 

143 record["obs_collection"] = dataset_config.obs_collection 

144 else: 

145 record["obs_collection"] = self.config.obs_collection 

146 record["access_format"] = dataset_config.access_format 

147 

148 record["instrument_name"] = dataId.get("instrument") 

149 if self.schema.dataset_fk is not None: 

150 record[self.schema.dataset_fk.name] = ref.id 

151 

152 timespan = dataId.timespan 

153 if timespan is not None: 

154 if timespan.begin is not None: 

155 t_min = cast(astropy.time.Time, timespan.begin) 

156 record["t_min"] = t_min.mjd 

157 if timespan.end is not None: 

158 t_max = cast(astropy.time.Time, timespan.end) 

159 record["t_max"] = t_max.mjd 

160 

161 region = dataId.region 

162 if self.exposure in dataId: 

163 if (dimension_record := dataId.records[self.exposure]) is not None: 

164 self._exposure_records(dimension_record, record) 

165 if self.exposure_region_factory is not None: 

166 region = self.exposure_region_factory.exposure_region(dataId) 

167 elif self.visit in dataId: 

168 if (dimension_record := dataId.records[self.visit]) is not None: 

169 self._visit_records(dimension_record, record) 

170 

171 self.region_to_columns(region, record) 

172 

173 if self.band in dataId: 

174 em_range = None 

175 if (label := dataId.get(self.physical_filter)) is not None: 

176 em_range = self.config.spectral_ranges.get(label) 

177 if not em_range: 

178 band_name = dataId[self.band] 

179 assert isinstance(band_name, str), "Band name must be string" 

180 em_range = self.config.spectral_ranges.get(band_name) 

181 if em_range: 

182 record["em_min"], record["em_max"] = em_range 

183 else: 

184 _LOG.warning("could not find spectral range for dataId=%s", dataId.full) 

185 record["em_filter_name"] = dataId["band"] 

186 

187 # Dictionary to use for substitutions when formatting various 

188 # strings. 

189 fmt_kws: Dict[str, Any] = dict(records=dataId.records) 

190 fmt_kws.update(dataId.full.byName()) 

191 fmt_kws.update(id=ref.id) 

192 fmt_kws.update(run=ref.run) 

193 fmt_kws.update(dataset_type=dataset_type_name) 

194 fmt_kws.update(record) 

195 if dataset_config.obs_id_fmt: 

196 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws) 

197 fmt_kws["obs_id"] = record["obs_id"] 

198 

199 if dataset_config.datalink_url_fmt: 

200 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws) 

201 

202 # add extra columns 

203 extra_columns = {} 

204 if self.config.extra_columns: 

205 extra_columns.update(self.config.extra_columns) 

206 if dataset_config.extra_columns: 

207 extra_columns.update(dataset_config.extra_columns) 

208 for key, column_value in extra_columns.items(): 

209 # Try to expand the template with known keys, if expansion 

210 # fails due to a missing key name then store None. 

211 if isinstance(column_value, ExtraColumnConfig): 

212 try: 

213 value = column_value.template.format(**fmt_kws) 

214 record[key] = _TYPE_CONVERSION[column_value.type](value) 

215 except KeyError: 

216 pass 

217 else: 

218 # Just a static value. 

219 record[key] = column_value 

220 

221 return record 

222 

223 @classmethod 

224 def region_to_columns(cls, region: Optional[Region], record: Dict[str, Any]) -> None: 

225 """Fill obscore column values from sphgeom region. 

226 

227 Parameters 

228 ---------- 

229 region : `lsst.sphgeom.Region` 

230 Spatial region, expected to be a ``ConvexPolygon`` instance, 

231 warning will be logged for other types. 

232 record : `dict` [ `str`, `Any` ] 

233 Obscore record that will be expanded with the new columns. 

234 

235 Notes 

236 ----- 

237 This method adds ``s_ra``, ``s_dec``, and ``s_fov`` values to the 

238 record, they are computed from the region bounding circle. If the 

239 region is a ``ConvexPolygon`` instance, then ``s_region`` value is 

240 added as well representing the polygon in ADQL format. 

241 """ 

242 if region is None: 

243 return 

244 

245 # Get spatial parameters from the bounding circle. 

246 circle = region.getBoundingCircle() 

247 center = LonLat(circle.getCenter()) 

248 record["s_ra"] = center.getLon().asDegrees() 

249 record["s_dec"] = center.getLat().asDegrees() 

250 record["s_fov"] = circle.getOpeningAngle().asDegrees() * 2 

251 

252 if isinstance(region, ConvexPolygon): 

253 poly = ["POLYGON ICRS"] 

254 for vertex in region.getVertices(): 

255 lon_lat = LonLat(vertex) 

256 poly += [ 

257 f"{lon_lat.getLon().asDegrees():.6f}", 

258 f"{lon_lat.getLat().asDegrees():.6f}", 

259 ] 

260 record["s_region"] = " ".join(poly) 

261 else: 

262 _LOG.warning(f"Unexpected region type: {type(region)}") 

263 

264 def _exposure_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None: 

265 """Extract all needed info from a visit dimension record.""" 

266 record["t_exptime"] = dimension_record.exposure_time 

267 record["target_name"] = dimension_record.target_name 

268 

269 def _visit_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None: 

270 """Extract all needed info from an exposure dimension record.""" 

271 record["t_exptime"] = dimension_record.exposure_time 

272 record["target_name"] = dimension_record.target_name