Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 14%

116 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-29 02:20 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"] 

25 

26import logging 

27from abc import abstractmethod 

28from collections.abc import Collection, Mapping 

29from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, cast 

30from uuid import UUID 

31 

32import astropy.time 

33from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse 

34 

35from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig 

36 

37if TYPE_CHECKING: 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true

38 from lsst.sphgeom import Region 

39 

40 from ._schema import ObsCoreSchema 

41 from ._spatial import SpatialObsCorePlugin 

42 

43_LOG = logging.getLogger(__name__) 

44 

45# Map extra column type to a conversion method that takes string. 

46_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 46 ↛ exitline 46 didn't jump to the function exit

47 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input. 

48 ExtraColumnType.int: int, 

49 ExtraColumnType.float: float, 

50 ExtraColumnType.string: str, 

51} 

52 

53 

54class ExposureRegionFactory: 

55 """Abstract interface for a class that returns a Region for an exposure.""" 

56 

57 @abstractmethod 

58 def exposure_region(self, dataId: DataCoordinate) -> Optional[Region]: 

59 """Return a region for a given DataId that corresponds to an exposure. 

60 

61 Parameters 

62 ---------- 

63 dataId : `DataCoordinate` 

64 Data ID for an exposure dataset. 

65 

66 Returns 

67 ------- 

68 region : `Region` 

69 `None` is returned if region cannot be determined. 

70 """ 

71 raise NotImplementedError() 

72 

73 

74Record = Dict[str, Any] 

75 

76 

77class RecordFactory: 

78 """Class that implements conversion of dataset information to ObsCore. 

79 

80 Parameters 

81 ---------- 

82 config : `ObsCoreConfig` 

83 Complete configuration specifying conversion options. 

84 schema : `ObsCoreSchema` 

85 Description of obscore schema. 

86 universe : `DimensionUniverse` 

87 Registry dimensions universe. 

88 exposure_region_factory: `ExposureRegionFactory`, optional 

89 Manager for Registry dimensions. 

90 """ 

91 

92 def __init__( 

93 self, 

94 config: ObsCoreConfig, 

95 schema: ObsCoreSchema, 

96 universe: DimensionUniverse, 

97 spatial_plugins: Collection[SpatialObsCorePlugin], 

98 exposure_region_factory: Optional[ExposureRegionFactory] = None, 

99 ): 

100 self.config = config 

101 self.schema = schema 

102 self.universe = universe 

103 self.exposure_region_factory = exposure_region_factory 

104 self.spatial_plugins = spatial_plugins 

105 

106 # All dimension elements used below. 

107 self.band = cast(Dimension, universe["band"]) 

108 self.exposure = universe["exposure"] 

109 self.visit = universe["visit"] 

110 self.physical_filter = cast(Dimension, universe["physical_filter"]) 

111 

112 def __call__(self, ref: DatasetRef) -> Optional[Record]: 

113 """Make an ObsCore record from a dataset. 

114 

115 Parameters 

116 ---------- 

117 ref : `DatasetRef` 

118 Dataset ref, its DataId must be in expanded form. 

119 

120 Returns 

121 ------- 

122 record : `dict` [ `str`, `Any` ] or `None` 

123 ObsCore record represented as a dictionary. `None` is returned if 

124 dataset does not need to be stored in the obscore table, e.g. when 

125 dataset type is not in obscore configuration. 

126 

127 Notes 

128 ----- 

129 This method filters records by dataset type and returns `None` if 

130 reference dataset type is not configured. It does not check reference 

131 run name against configured collections, all runs are acceptable by 

132 this method. 

133 """ 

134 # Quick check for dataset type. 

135 dataset_type_name = ref.datasetType.name 

136 dataset_config = self.config.dataset_types.get(dataset_type_name) 

137 if dataset_config is None: 

138 return None 

139 

140 dataId = ref.dataId 

141 # _LOG.debug("New record, dataId=%s", dataId.full) 

142 # _LOG.debug("New record, records=%s", dataId.records) 

143 

144 record: Dict[str, str | int | float | UUID | None] = {} 

145 

146 record["dataproduct_type"] = dataset_config.dataproduct_type 

147 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype 

148 record["o_ucd"] = dataset_config.o_ucd 

149 record["facility_name"] = self.config.facility_name 

150 record["calib_level"] = dataset_config.calib_level 

151 if dataset_config.obs_collection is not None: 

152 record["obs_collection"] = dataset_config.obs_collection 

153 else: 

154 record["obs_collection"] = self.config.obs_collection 

155 record["access_format"] = dataset_config.access_format 

156 

157 record["instrument_name"] = dataId.get("instrument") 

158 if self.schema.dataset_fk is not None: 

159 record[self.schema.dataset_fk.name] = ref.id 

160 

161 timespan = dataId.timespan 

162 if timespan is not None: 

163 if timespan.begin is not None: 

164 t_min = cast(astropy.time.Time, timespan.begin) 

165 record["t_min"] = t_min.mjd 

166 if timespan.end is not None: 

167 t_max = cast(astropy.time.Time, timespan.end) 

168 record["t_max"] = t_max.mjd 

169 

170 region = dataId.region 

171 if self.exposure in dataId: 

172 if (dimension_record := dataId.records[self.exposure]) is not None: 

173 self._exposure_records(dimension_record, record) 

174 if self.exposure_region_factory is not None: 

175 region = self.exposure_region_factory.exposure_region(dataId) 

176 elif self.visit in dataId: 

177 if (dimension_record := dataId.records[self.visit]) is not None: 

178 self._visit_records(dimension_record, record) 

179 

180 # ask each plugin for its values to add to a record. 

181 for plugin in self.spatial_plugins: 

182 assert ref.id is not None, "Dataset ID must be defined" 

183 plugin_record = plugin.make_records(ref.id, region) 

184 if plugin_record is not None: 

185 record.update(plugin_record) 

186 

187 if self.band in dataId: 

188 em_range = None 

189 if (label := dataId.get(self.physical_filter)) is not None: 

190 em_range = self.config.spectral_ranges.get(label) 

191 if not em_range: 

192 band_name = dataId[self.band] 

193 assert isinstance(band_name, str), "Band name must be string" 

194 em_range = self.config.spectral_ranges.get(band_name) 

195 if em_range: 

196 record["em_min"], record["em_max"] = em_range 

197 else: 

198 _LOG.warning("could not find spectral range for dataId=%s", dataId.full) 

199 record["em_filter_name"] = dataId["band"] 

200 

201 # Dictionary to use for substitutions when formatting various 

202 # strings. 

203 fmt_kws: Dict[str, Any] = dict(records=dataId.records) 

204 fmt_kws.update(dataId.full.byName()) 

205 fmt_kws.update(id=ref.id) 

206 fmt_kws.update(run=ref.run) 

207 fmt_kws.update(dataset_type=dataset_type_name) 

208 fmt_kws.update(record) 

209 if dataset_config.obs_id_fmt: 

210 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws) 

211 fmt_kws["obs_id"] = record["obs_id"] 

212 

213 if dataset_config.datalink_url_fmt: 

214 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws) 

215 

216 # add extra columns 

217 extra_columns = {} 

218 if self.config.extra_columns: 

219 extra_columns.update(self.config.extra_columns) 

220 if dataset_config.extra_columns: 

221 extra_columns.update(dataset_config.extra_columns) 

222 for key, column_value in extra_columns.items(): 

223 # Try to expand the template with known keys, if expansion 

224 # fails due to a missing key name then store None. 

225 if isinstance(column_value, ExtraColumnConfig): 

226 try: 

227 value = column_value.template.format(**fmt_kws) 

228 record[key] = _TYPE_CONVERSION[column_value.type](value) 

229 except KeyError: 

230 pass 

231 else: 

232 # Just a static value. 

233 record[key] = column_value 

234 

235 return record 

236 

237 def _exposure_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None: 

238 """Extract all needed info from a visit dimension record.""" 

239 record["t_exptime"] = dimension_record.exposure_time 

240 record["target_name"] = dimension_record.target_name 

241 

242 def _visit_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None: 

243 """Extract all needed info from an exposure dimension record.""" 

244 record["t_exptime"] = dimension_record.exposure_time 

245 record["target_name"] = dimension_record.target_name