Coverage for python/lsst/daf/butler/registry/obscore/_schema.py: 24%

59 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-31 02:41 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ObsCoreSchema"] 

25 

26import re 

27from collections.abc import Sequence 

28from typing import TYPE_CHECKING, List, Optional, Type 

29 

30import sqlalchemy 

31from lsst.daf.butler import ddl 

32from lsst.utils.iteration import ensure_iterable 

33 

34from ._config import DatasetTypeConfig, ExtraColumnConfig, ObsCoreConfig 

35from ._spatial import SpatialObsCorePlugin 

36 

37if TYPE_CHECKING: 

38 from ..interfaces import DatasetRecordStorageManager 

39 

40 

41# Regular expression to match templates in extra_columns that specify simple 

42# dimensions, e.g. "{exposure}". 

43_DIMENSION_TEMPLATE_RE = re.compile(r"^[{](\w+)[}]$") 

44 

45# List of standard columns in output file. This should include at least all 

46# mandatory columns defined in ObsCore note (revision 1.1, Appendix B). Extra 

47# columns can be added via `extra_columns` parameters in configuration. 

48_STATIC_COLUMNS = ( 

49 ddl.FieldSpec( 

50 name="dataproduct_type", dtype=sqlalchemy.String, length=255, doc="Logical data product type" 

51 ), 

52 ddl.FieldSpec( 

53 name="dataproduct_subtype", dtype=sqlalchemy.String, length=255, doc="Data product specific type" 

54 ), 

55 ddl.FieldSpec( 

56 name="facility_name", 

57 dtype=sqlalchemy.String, 

58 length=255, 

59 doc="The name of the facility used for the observation", 

60 ), 

61 ddl.FieldSpec(name="calib_level", dtype=sqlalchemy.SmallInteger, doc="Calibration level {0, 1, 2, 3, 4}"), 

62 ddl.FieldSpec(name="target_name", dtype=sqlalchemy.String, length=255, doc="Object of interest"), 

63 ddl.FieldSpec(name="obs_id", dtype=sqlalchemy.String, length=255, doc="Observation ID"), 

64 ddl.FieldSpec( 

65 name="obs_collection", dtype=sqlalchemy.String, length=255, doc="Name of the data collection" 

66 ), 

67 ddl.FieldSpec( 

68 name="obs_publisher_did", 

69 dtype=sqlalchemy.String, 

70 length=255, 

71 doc="Dataset identifier given by the publisher", 

72 ), 

73 ddl.FieldSpec( 

74 name="access_url", dtype=sqlalchemy.String, length=65535, doc="URL used to access (download) dataset" 

75 ), 

76 ddl.FieldSpec(name="access_format", dtype=sqlalchemy.String, length=255, doc="File content format"), 

77 # Spatial columns s_ra, s_dec, s_fow, s_region are managed by a default 

78 # spatial plugin 

79 ddl.FieldSpec( 

80 name="s_resolution", dtype=sqlalchemy.Float, doc="Spatial resolution of data as FWHM (arcsec)" 

81 ), 

82 ddl.FieldSpec( 

83 name="s_xel1", dtype=sqlalchemy.Integer, doc="Number of elements along the first spatial axis" 

84 ), 

85 ddl.FieldSpec( 

86 name="s_xel2", dtype=sqlalchemy.Integer, doc="Number of elements along the second spatial axis" 

87 ), 

88 ddl.FieldSpec(name="t_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the time axis"), 

89 ddl.FieldSpec(name="t_min", dtype=sqlalchemy.Float, doc="Start time in MJD"), 

90 ddl.FieldSpec(name="t_max", dtype=sqlalchemy.Float, doc="Stop time in MJD"), 

91 ddl.FieldSpec(name="t_exptime", dtype=sqlalchemy.Float, doc="Total exposure time (sec)"), 

92 ddl.FieldSpec(name="t_resolution", dtype=sqlalchemy.Float, doc="Temporal resolution (sec)"), 

93 ddl.FieldSpec(name="em_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the spectral axis"), 

94 ddl.FieldSpec(name="em_min", dtype=sqlalchemy.Float, doc="Start in spectral coordinates (m)"), 

95 ddl.FieldSpec(name="em_max", dtype=sqlalchemy.Float, doc="Stop in spectral coordinates (m)"), 

96 ddl.FieldSpec(name="em_res_power", dtype=sqlalchemy.Float, doc="Spectral resolving power"), 

97 ddl.FieldSpec( 

98 name="em_filter_name", dtype=sqlalchemy.String, length=255, doc="Filter name (non-standard column)" 

99 ), 

100 ddl.FieldSpec(name="o_ucd", dtype=sqlalchemy.String, length=255, doc="UCD of observable"), 

101 ddl.FieldSpec(name="pol_xel", dtype=sqlalchemy.Integer, doc="Number of polarization samples"), 

102 ddl.FieldSpec( 

103 name="instrument_name", 

104 dtype=sqlalchemy.String, 

105 length=255, 

106 doc="Name of the instrument used for this observation", 

107 ), 

108) 

109 

110_TYPE_MAP = { 

111 int: sqlalchemy.BigInteger, 

112 float: sqlalchemy.Float, 

113 bool: sqlalchemy.Boolean, 

114 str: sqlalchemy.String, 

115} 

116 

117 

118class ObsCoreSchema: 

119 """Generate table specification for an ObsCore table based on its 

120 configuration. 

121 

122 Parameters 

123 ---------- 

124 config : `ObsCoreConfig` 

125 ObsCore configuration instance. 

126 datasets : `type`, optional 

127 Type of dataset records manager. If specified, the ObsCore table will 

128 define a foreign key to ``datasets`` table with "ON DELETE CASCADE" 

129 constraint. 

130 

131 Notes 

132 ----- 

133 This class is designed to support both "live" obscore table which is 

134 located in the same database as the Registry, and standalone table in a 

135 completely separate database. Live obscore table depends on foreign key 

136 constraints with "ON DELETE CASCADE" option to manage lifetime of obscore 

137 records when their original datasets are removed. 

138 """ 

139 

140 def __init__( 

141 self, 

142 config: ObsCoreConfig, 

143 spatial_plugins: Sequence[SpatialObsCorePlugin], 

144 datasets: Optional[Type[DatasetRecordStorageManager]] = None, 

145 ): 

146 self._dimension_columns: dict[str, str] = {"instrument": "instrument_name"} 

147 

148 fields = list(_STATIC_COLUMNS) 

149 

150 column_names = set(col.name for col in fields) 

151 

152 all_configs: List[ObsCoreConfig | DatasetTypeConfig] = [config] 

153 if config.dataset_types: 

154 all_configs += list(config.dataset_types.values()) 

155 for cfg in all_configs: 

156 if cfg.extra_columns: 

157 for col_name, col_value in cfg.extra_columns.items(): 

158 if col_name in column_names: 

159 continue 

160 doc: Optional[str] = None 

161 if isinstance(col_value, ExtraColumnConfig): 

162 col_type = ddl.VALID_CONFIG_COLUMN_TYPES.get(col_value.type.name) 

163 col_length = col_value.length 

164 doc = col_value.doc 

165 # For columns that store dimensions remember their 

166 # column names. 

167 if match := _DIMENSION_TEMPLATE_RE.match(col_value.template): 

168 dimension = match.group(1) 

169 self._dimension_columns[dimension] = col_name 

170 else: 

171 # Only value is provided, guess type from Python, and 

172 # use a fixed length of 255 for strings. 

173 col_type = _TYPE_MAP.get(type(col_value)) 

174 col_length = 255 if isinstance(col_value, str) else None 

175 if col_type is None: 

176 raise TypeError( 

177 f"Unexpected type in extra_columns: column={col_name}, value={col_value}" 

178 ) 

179 fields.append(ddl.FieldSpec(name=col_name, dtype=col_type, length=col_length, doc=doc)) 

180 column_names.add(col_name) 

181 

182 indices: List[ddl.IndexSpec] = [] 

183 if config.indices: 

184 for columns in config.indices.values(): 

185 indices.append(ddl.IndexSpec(*ensure_iterable(columns))) 

186 

187 self._table_spec = ddl.TableSpec(fields=fields, indexes=indices) 

188 

189 # Possibly extend table specs with plugin-added stuff. 

190 for plugin in spatial_plugins: 

191 plugin.extend_table_spec(self._table_spec) 

192 

193 self._dataset_fk: Optional[ddl.FieldSpec] = None 

194 if datasets is not None: 

195 # Add FK to datasets, is also a PK for this table 

196 self._dataset_fk = datasets.addDatasetForeignKey( 

197 self._table_spec, name="registry_dataset", onDelete="CASCADE", doc="Registry dataset ID" 

198 ) 

199 self._dataset_fk.primaryKey = True 

200 

201 @property 

202 def table_spec(self) -> ddl.TableSpec: 

203 """Specification for obscore table (`ddl.TableSpec`).""" 

204 return self._table_spec 

205 

206 @property 

207 def dataset_fk(self) -> Optional[ddl.FieldSpec]: 

208 """Specification for the field which is a foreign key to ``datasets`` 

209 table, and also a primary key for obscore table (`ddl.FieldSpec` or 

210 `None`). 

211 """ 

212 return self._dataset_fk 

213 

214 def dimension_column(self, dimension: str) -> str | None: 

215 """Return column name for a given dimension. 

216 

217 Parameters 

218 ---------- 

219 dimension : `str` 

220 Dimension name, e.g. "exposure". 

221 

222 Returns 

223 ------- 

224 column_name : `str` or `None` 

225 Name of the column in obscore table or `None` if there is no 

226 configured column for this dimension. 

227 """ 

228 return self._dimension_columns.get(dimension)