Coverage for python/lsst/daf/butler/registry/obscore/_schema.py: 27%

59 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 09:54 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ObsCoreSchema"] 

31 

32import re 

33from collections.abc import Sequence 

34from typing import TYPE_CHECKING 

35 

36import sqlalchemy 

37from lsst.daf.butler import ddl 

38from lsst.utils.iteration import ensure_iterable 

39 

40from ._config import DatasetTypeConfig, ExtraColumnConfig, ObsCoreConfig 

41from ._spatial import SpatialObsCorePlugin 

42 

43if TYPE_CHECKING: 

44 from ..interfaces import DatasetRecordStorageManager 

45 

46 

47# Regular expression to match templates in extra_columns that specify simple 

48# dimensions, e.g. "{exposure}". 

49_DIMENSION_TEMPLATE_RE = re.compile(r"^[{](\w+)[}]$") 

50 

51# List of standard columns in output file. This should include at least all 

52# mandatory columns defined in ObsCore note (revision 1.1, Appendix B). Extra 

53# columns can be added via `extra_columns` parameters in configuration. 

54_STATIC_COLUMNS = ( 

55 ddl.FieldSpec( 

56 name="dataproduct_type", dtype=sqlalchemy.String, length=255, doc="Logical data product type" 

57 ), 

58 ddl.FieldSpec( 

59 name="dataproduct_subtype", dtype=sqlalchemy.String, length=255, doc="Data product specific type" 

60 ), 

61 ddl.FieldSpec( 

62 name="facility_name", 

63 dtype=sqlalchemy.String, 

64 length=255, 

65 doc="The name of the facility used for the observation", 

66 ), 

67 ddl.FieldSpec(name="calib_level", dtype=sqlalchemy.SmallInteger, doc="Calibration level {0, 1, 2, 3, 4}"), 

68 ddl.FieldSpec(name="target_name", dtype=sqlalchemy.String, length=255, doc="Object of interest"), 

69 ddl.FieldSpec(name="obs_id", dtype=sqlalchemy.String, length=255, doc="Observation ID"), 

70 ddl.FieldSpec( 

71 name="obs_collection", dtype=sqlalchemy.String, length=255, doc="Name of the data collection" 

72 ), 

73 ddl.FieldSpec( 

74 name="obs_publisher_did", 

75 dtype=sqlalchemy.String, 

76 length=255, 

77 doc="Dataset identifier given by the publisher", 

78 ), 

79 ddl.FieldSpec( 

80 name="access_url", dtype=sqlalchemy.String, length=65535, doc="URL used to access (download) dataset" 

81 ), 

82 ddl.FieldSpec(name="access_format", dtype=sqlalchemy.String, length=255, doc="File content format"), 

83 # Spatial columns s_ra, s_dec, s_fow, s_region are managed by a default 

84 # spatial plugin 

85 ddl.FieldSpec( 

86 name="s_resolution", dtype=sqlalchemy.Float, doc="Spatial resolution of data as FWHM (arcsec)" 

87 ), 

88 ddl.FieldSpec( 

89 name="s_xel1", dtype=sqlalchemy.Integer, doc="Number of elements along the first spatial axis" 

90 ), 

91 ddl.FieldSpec( 

92 name="s_xel2", dtype=sqlalchemy.Integer, doc="Number of elements along the second spatial axis" 

93 ), 

94 ddl.FieldSpec(name="t_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the time axis"), 

95 ddl.FieldSpec(name="t_min", dtype=sqlalchemy.Float, doc="Start time in MJD"), 

96 ddl.FieldSpec(name="t_max", dtype=sqlalchemy.Float, doc="Stop time in MJD"), 

97 ddl.FieldSpec(name="t_exptime", dtype=sqlalchemy.Float, doc="Total exposure time (sec)"), 

98 ddl.FieldSpec(name="t_resolution", dtype=sqlalchemy.Float, doc="Temporal resolution (sec)"), 

99 ddl.FieldSpec(name="em_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the spectral axis"), 

100 ddl.FieldSpec(name="em_min", dtype=sqlalchemy.Float, doc="Start in spectral coordinates (m)"), 

101 ddl.FieldSpec(name="em_max", dtype=sqlalchemy.Float, doc="Stop in spectral coordinates (m)"), 

102 ddl.FieldSpec(name="em_res_power", dtype=sqlalchemy.Float, doc="Spectral resolving power"), 

103 ddl.FieldSpec( 

104 name="em_filter_name", dtype=sqlalchemy.String, length=255, doc="Filter name (non-standard column)" 

105 ), 

106 ddl.FieldSpec(name="o_ucd", dtype=sqlalchemy.String, length=255, doc="UCD of observable"), 

107 ddl.FieldSpec(name="pol_xel", dtype=sqlalchemy.Integer, doc="Number of polarization samples"), 

108 ddl.FieldSpec( 

109 name="instrument_name", 

110 dtype=sqlalchemy.String, 

111 length=255, 

112 doc="Name of the instrument used for this observation", 

113 ), 

114) 

115 

116_TYPE_MAP = { 

117 int: sqlalchemy.BigInteger, 

118 float: sqlalchemy.Float, 

119 bool: sqlalchemy.Boolean, 

120 str: sqlalchemy.String, 

121} 

122 

123 

124class ObsCoreSchema: 

125 """Generate table specification for an ObsCore table based on its 

126 configuration. 

127 

128 Parameters 

129 ---------- 

130 config : `ObsCoreConfig` 

131 ObsCore configuration instance. 

132 spatial_plugins : `~collections.abc.Sequence` of `SpatialObsCorePlugin` 

133 Spatial plugins. 

134 datasets : `type`, optional 

135 Type of dataset records manager. If specified, the ObsCore table will 

136 define a foreign key to ``datasets`` table with "ON DELETE CASCADE" 

137 constraint. 

138 

139 Notes 

140 ----- 

141 This class is designed to support both "live" obscore table which is 

142 located in the same database as the Registry, and standalone table in a 

143 completely separate database. Live obscore table depends on foreign key 

144 constraints with "ON DELETE CASCADE" option to manage lifetime of obscore 

145 records when their original datasets are removed. 

146 """ 

147 

148 def __init__( 

149 self, 

150 config: ObsCoreConfig, 

151 spatial_plugins: Sequence[SpatialObsCorePlugin], 

152 datasets: type[DatasetRecordStorageManager] | None = None, 

153 ): 

154 self._dimension_columns: dict[str, str] = {"instrument": "instrument_name"} 

155 

156 fields = list(_STATIC_COLUMNS) 

157 

158 column_names = {col.name for col in fields} 

159 

160 all_configs: list[ObsCoreConfig | DatasetTypeConfig] = [config] 

161 if config.dataset_types: 

162 all_configs += list(config.dataset_types.values()) 

163 for cfg in all_configs: 

164 if cfg.extra_columns: 

165 for col_name, col_value in cfg.extra_columns.items(): 

166 if col_name in column_names: 

167 continue 

168 doc: str | None = None 

169 if isinstance(col_value, ExtraColumnConfig): 

170 col_type = ddl.VALID_CONFIG_COLUMN_TYPES.get(col_value.type.name) 

171 col_length = col_value.length 

172 doc = col_value.doc 

173 # For columns that store dimensions remember their 

174 # column names. 

175 if match := _DIMENSION_TEMPLATE_RE.match(col_value.template): 

176 dimension = match.group(1) 

177 self._dimension_columns[dimension] = col_name 

178 else: 

179 # Only value is provided, guess type from Python, and 

180 # use a fixed length of 255 for strings. 

181 col_type = _TYPE_MAP.get(type(col_value)) 

182 col_length = 255 if isinstance(col_value, str) else None 

183 if col_type is None: 

184 raise TypeError( 

185 f"Unexpected type in extra_columns: column={col_name}, value={col_value}" 

186 ) 

187 fields.append(ddl.FieldSpec(name=col_name, dtype=col_type, length=col_length, doc=doc)) 

188 column_names.add(col_name) 

189 

190 indices: list[ddl.IndexSpec] = [] 

191 if config.indices: 

192 for columns in config.indices.values(): 

193 indices.append(ddl.IndexSpec(*ensure_iterable(columns))) 

194 

195 self._table_spec = ddl.TableSpec(fields=fields, indexes=indices) 

196 

197 # Possibly extend table specs with plugin-added stuff. 

198 for plugin in spatial_plugins: 

199 plugin.extend_table_spec(self._table_spec) 

200 

201 self._dataset_fk: ddl.FieldSpec | None = None 

202 if datasets is not None: 

203 # Add FK to datasets, is also a PK for this table 

204 self._dataset_fk = datasets.addDatasetForeignKey( 

205 self._table_spec, name="registry_dataset", onDelete="CASCADE", doc="Registry dataset ID" 

206 ) 

207 self._dataset_fk.primaryKey = True 

208 

209 @property 

210 def table_spec(self) -> ddl.TableSpec: 

211 """Specification for obscore table (`ddl.TableSpec`).""" 

212 return self._table_spec 

213 

214 @property 

215 def dataset_fk(self) -> ddl.FieldSpec | None: 

216 """Specification for the field which is a foreign key to ``datasets`` 

217 table, and also a primary key for obscore table (`ddl.FieldSpec` or 

218 `None`). 

219 """ 

220 return self._dataset_fk 

221 

222 def dimension_column(self, dimension: str) -> str | None: 

223 """Return column name for a given dimension. 

224 

225 Parameters 

226 ---------- 

227 dimension : `str` 

228 Dimension name, e.g. "exposure". 

229 

230 Returns 

231 ------- 

232 column_name : `str` or `None` 

233 Name of the column in obscore table or `None` if there is no 

234 configured column for this dimension. 

235 """ 

236 return self._dimension_columns.get(dimension)