Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 15%

118 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-28 02:30 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"] 

25 

26import logging 

27from abc import abstractmethod 

28from collections.abc import Collection, Mapping 

29from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, cast 

30from uuid import UUID 

31 

32import astropy.time 

33from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse 

34 

35from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig 

36 

37if TYPE_CHECKING: 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true

38 from lsst.sphgeom import Region 

39 

40 from ._schema import ObsCoreSchema 

41 from ._spatial import SpatialObsCorePlugin 

42 

43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 from ..queries import SqlQueryContext 

45 

46_LOG = logging.getLogger(__name__) 

47 

48# Map extra column type to a conversion method that takes string. 

49_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 49 ↛ exitline 49 didn't jump to the function exit

50 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input. 

51 ExtraColumnType.int: int, 

52 ExtraColumnType.float: float, 

53 ExtraColumnType.string: str, 

54} 

55 

56 

57class ExposureRegionFactory: 

58 """Abstract interface for a class that returns a Region for an exposure.""" 

59 

60 @abstractmethod 

61 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Optional[Region]: 

62 """Return a region for a given DataId that corresponds to an exposure. 

63 

64 Parameters 

65 ---------- 

66 dataId : `DataCoordinate` 

67 Data ID for an exposure dataset. 

68 context : `SqlQueryContext` 

69 Context used to execute queries for additional dimension metadata. 

70 

71 Returns 

72 ------- 

73 region : `Region` 

74 `None` is returned if region cannot be determined. 

75 """ 

76 raise NotImplementedError() 

77 

78 

79Record = Dict[str, Any] 

80 

81 

82class RecordFactory: 

83 """Class that implements conversion of dataset information to ObsCore. 

84 

85 Parameters 

86 ---------- 

87 config : `ObsCoreConfig` 

88 Complete configuration specifying conversion options. 

89 schema : `ObsCoreSchema` 

90 Description of obscore schema. 

91 universe : `DimensionUniverse` 

92 Registry dimensions universe. 

93 exposure_region_factory: `ExposureRegionFactory`, optional 

94 Manager for Registry dimensions. 

95 """ 

96 

97 def __init__( 

98 self, 

99 config: ObsCoreConfig, 

100 schema: ObsCoreSchema, 

101 universe: DimensionUniverse, 

102 spatial_plugins: Collection[SpatialObsCorePlugin], 

103 exposure_region_factory: Optional[ExposureRegionFactory] = None, 

104 ): 

105 self.config = config 

106 self.schema = schema 

107 self.universe = universe 

108 self.exposure_region_factory = exposure_region_factory 

109 self.spatial_plugins = spatial_plugins 

110 

111 # All dimension elements used below. 

112 self.band = cast(Dimension, universe["band"]) 

113 self.exposure = universe["exposure"] 

114 self.visit = universe["visit"] 

115 self.physical_filter = cast(Dimension, universe["physical_filter"]) 

116 

117 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Optional[Record]: 

118 """Make an ObsCore record from a dataset. 

119 

120 Parameters 

121 ---------- 

122 ref : `DatasetRef` 

123 Dataset ref, its DataId must be in expanded form. 

124 context : `SqlQueryContext` 

125 Context used to execute queries for additional dimension metadata. 

126 

127 Returns 

128 ------- 

129 record : `dict` [ `str`, `Any` ] or `None` 

130 ObsCore record represented as a dictionary. `None` is returned if 

131 dataset does not need to be stored in the obscore table, e.g. when 

132 dataset type is not in obscore configuration. 

133 

134 Notes 

135 ----- 

136 This method filters records by dataset type and returns `None` if 

137 reference dataset type is not configured. It does not check reference 

138 run name against configured collections, all runs are acceptable by 

139 this method. 

140 """ 

141 # Quick check for dataset type. 

142 dataset_type_name = ref.datasetType.name 

143 dataset_config = self.config.dataset_types.get(dataset_type_name) 

144 if dataset_config is None: 

145 return None 

146 

147 dataId = ref.dataId 

148 # _LOG.debug("New record, dataId=%s", dataId.full) 

149 # _LOG.debug("New record, records=%s", dataId.records) 

150 

151 record: Dict[str, str | int | float | UUID | None] 

152 

153 # We need all columns filled, to simplify logic below just pre-fill 

154 # everything with None. 

155 record = {field.name: None for field in self.schema.table_spec.fields} 

156 

157 record["dataproduct_type"] = dataset_config.dataproduct_type 

158 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype 

159 record["o_ucd"] = dataset_config.o_ucd 

160 record["facility_name"] = self.config.facility_name 

161 record["calib_level"] = dataset_config.calib_level 

162 if dataset_config.obs_collection is not None: 

163 record["obs_collection"] = dataset_config.obs_collection 

164 else: 

165 record["obs_collection"] = self.config.obs_collection 

166 record["access_format"] = dataset_config.access_format 

167 

168 record["instrument_name"] = dataId.get("instrument") 

169 if self.schema.dataset_fk is not None: 

170 record[self.schema.dataset_fk.name] = ref.id 

171 

172 timespan = dataId.timespan 

173 if timespan is not None: 

174 if timespan.begin is not None: 

175 t_min = cast(astropy.time.Time, timespan.begin) 

176 record["t_min"] = t_min.mjd 

177 if timespan.end is not None: 

178 t_max = cast(astropy.time.Time, timespan.end) 

179 record["t_max"] = t_max.mjd 

180 

181 region = dataId.region 

182 if self.exposure in dataId: 

183 if (dimension_record := dataId.records[self.exposure]) is not None: 

184 self._exposure_records(dimension_record, record) 

185 if self.exposure_region_factory is not None: 

186 region = self.exposure_region_factory.exposure_region(dataId, context) 

187 elif self.visit in dataId: 

188 if (dimension_record := dataId.records[self.visit]) is not None: 

189 self._visit_records(dimension_record, record) 

190 

191 # ask each plugin for its values to add to a record. 

192 for plugin in self.spatial_plugins: 

193 assert ref.id is not None, "Dataset ID must be defined" 

194 plugin_record = plugin.make_records(ref.id, region) 

195 if plugin_record is not None: 

196 record.update(plugin_record) 

197 

198 if self.band in dataId: 

199 em_range = None 

200 if (label := dataId.get(self.physical_filter)) is not None: 

201 em_range = self.config.spectral_ranges.get(label) 

202 if not em_range: 

203 band_name = dataId[self.band] 

204 assert isinstance(band_name, str), "Band name must be string" 

205 em_range = self.config.spectral_ranges.get(band_name) 

206 if em_range: 

207 record["em_min"], record["em_max"] = em_range 

208 else: 

209 _LOG.warning("could not find spectral range for dataId=%s", dataId.full) 

210 record["em_filter_name"] = dataId["band"] 

211 

212 # Dictionary to use for substitutions when formatting various 

213 # strings. 

214 fmt_kws: Dict[str, Any] = dict(records=dataId.records) 

215 fmt_kws.update(dataId.full.byName()) 

216 fmt_kws.update(id=ref.id) 

217 fmt_kws.update(run=ref.run) 

218 fmt_kws.update(dataset_type=dataset_type_name) 

219 fmt_kws.update(record) 

220 if dataset_config.obs_id_fmt: 

221 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws) 

222 fmt_kws["obs_id"] = record["obs_id"] 

223 

224 if dataset_config.datalink_url_fmt: 

225 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws) 

226 

227 # add extra columns 

228 extra_columns = {} 

229 if self.config.extra_columns: 

230 extra_columns.update(self.config.extra_columns) 

231 if dataset_config.extra_columns: 

232 extra_columns.update(dataset_config.extra_columns) 

233 for key, column_value in extra_columns.items(): 

234 # Try to expand the template with known keys, if expansion 

235 # fails due to a missing key name then store None. 

236 if isinstance(column_value, ExtraColumnConfig): 

237 try: 

238 value = column_value.template.format(**fmt_kws) 

239 record[key] = _TYPE_CONVERSION[column_value.type](value) 

240 except KeyError: 

241 pass 

242 else: 

243 # Just a static value. 

244 record[key] = column_value 

245 

246 return record 

247 

248 def _exposure_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None: 

249 """Extract all needed info from a visit dimension record.""" 

250 record["t_exptime"] = dimension_record.exposure_time 

251 record["target_name"] = dimension_record.target_name 

252 

253 def _visit_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None: 

254 """Extract all needed info from an exposure dimension record.""" 

255 record["t_exptime"] = dimension_record.exposure_time 

256 record["target_name"] = dimension_record.target_name