Coverage for python/lsst/daf/butler/registry/obscore/_schema.py: 24%

53 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-04 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ObsCoreSchema"] 

25 

26from collections.abc import Sequence 

27from typing import TYPE_CHECKING, List, Optional, Type 

28 

29import sqlalchemy 

30from lsst.daf.butler import ddl 

31from lsst.utils.iteration import ensure_iterable 

32 

33from ._config import DatasetTypeConfig, ExtraColumnConfig, ObsCoreConfig 

34from ._spatial import SpatialObsCorePlugin 

35 

36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true

37 from ..interfaces import DatasetRecordStorageManager 

38 

39 

40# List of standard columns in output file. This should include at least all 

41# mandatory columns defined in ObsCore note (revision 1.1, Appendix B). Extra 

42# columns can be added via `extra_columns` parameters in configuration. 

43_STATIC_COLUMNS = ( 

44 ddl.FieldSpec( 

45 name="dataproduct_type", dtype=sqlalchemy.String, length=255, doc="Logical data product type" 

46 ), 

47 ddl.FieldSpec( 

48 name="dataproduct_subtype", dtype=sqlalchemy.String, length=255, doc="Data product specific type" 

49 ), 

50 ddl.FieldSpec( 

51 name="facility_name", 

52 dtype=sqlalchemy.String, 

53 length=255, 

54 doc="The name of the facility used for the observation", 

55 ), 

56 ddl.FieldSpec(name="calib_level", dtype=sqlalchemy.SmallInteger, doc="Calibration level {0, 1, 2, 3, 4}"), 

57 ddl.FieldSpec(name="target_name", dtype=sqlalchemy.String, length=255, doc="Object of interest"), 

58 ddl.FieldSpec(name="obs_id", dtype=sqlalchemy.String, length=255, doc="Observation ID"), 

59 ddl.FieldSpec( 

60 name="obs_collection", dtype=sqlalchemy.String, length=255, doc="Name of the data collection" 

61 ), 

62 ddl.FieldSpec( 

63 name="obs_publisher_did", 

64 dtype=sqlalchemy.String, 

65 length=255, 

66 doc="Dataset identifier given by the publisher", 

67 ), 

68 ddl.FieldSpec( 

69 name="access_url", dtype=sqlalchemy.String, length=65535, doc="URL used to access (download) dataset" 

70 ), 

71 ddl.FieldSpec(name="access_format", dtype=sqlalchemy.String, length=255, doc="File content format"), 

72 # Spatial columns s_ra, s_dec, s_fow, s_region are managed by a default 

73 # spatial plugin 

74 ddl.FieldSpec( 

75 name="s_resolution", dtype=sqlalchemy.Float, doc="Spatial resolution of data as FWHM (arcsec)" 

76 ), 

77 ddl.FieldSpec( 

78 name="s_xel1", dtype=sqlalchemy.Integer, doc="Number of elements along the first spatial axis" 

79 ), 

80 ddl.FieldSpec( 

81 name="s_xel2", dtype=sqlalchemy.Integer, doc="Number of elements along the second spatial axis" 

82 ), 

83 ddl.FieldSpec(name="t_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the time axis"), 

84 ddl.FieldSpec(name="t_min", dtype=sqlalchemy.Float, doc="Start time in MJD"), 

85 ddl.FieldSpec(name="t_max", dtype=sqlalchemy.Float, doc="Stop time in MJD"), 

86 ddl.FieldSpec(name="t_exptime", dtype=sqlalchemy.Float, doc="Total exposure time (sec)"), 

87 ddl.FieldSpec(name="t_resolution", dtype=sqlalchemy.Float, doc="Temporal resolution (sec)"), 

88 ddl.FieldSpec(name="em_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the spectral axis"), 

89 ddl.FieldSpec(name="em_min", dtype=sqlalchemy.Float, doc="Start in spectral coordinates (m)"), 

90 ddl.FieldSpec(name="em_max", dtype=sqlalchemy.Float, doc="Stop in spectral coordinates (m)"), 

91 ddl.FieldSpec(name="em_res_power", dtype=sqlalchemy.Float, doc="Spectral resolving power"), 

92 ddl.FieldSpec( 

93 name="em_filter_name", dtype=sqlalchemy.String, length=255, doc="Filter name (non-standard column)" 

94 ), 

95 ddl.FieldSpec(name="o_ucd", dtype=sqlalchemy.String, length=255, doc="UCD of observable"), 

96 ddl.FieldSpec(name="pol_xel", dtype=sqlalchemy.Integer, doc="Number of polarization samples"), 

97 ddl.FieldSpec( 

98 name="instrument_name", 

99 dtype=sqlalchemy.String, 

100 length=255, 

101 doc="Name of the instrument used for this observation", 

102 ), 

103) 

104 

105_TYPE_MAP = { 

106 int: sqlalchemy.BigInteger, 

107 float: sqlalchemy.Float, 

108 bool: sqlalchemy.Boolean, 

109 str: sqlalchemy.String, 

110} 

111 

112 

113class ObsCoreSchema: 

114 """Generate table specification for an ObsCore table based on its 

115 configuration. 

116 

117 Parameters 

118 ---------- 

119 config : `ObsCoreConfig` 

120 ObsCore configuration instance. 

121 datasets : `type`, optional 

122 Type of dataset records manager. If specified, the ObsCore table will 

123 define a foreign key to ``datasets`` table with "ON DELETE CASCADE" 

124 constraint. 

125 

126 Notes 

127 ----- 

128 This class is designed to support both "live" obscore table which is 

129 located in the same database as the Registry, and standalone table in a 

130 completely separate database. Live obscore table depends on foreign key 

131 constraints with "ON DELETE CASCADE" option to manage lifetime of obscore 

132 records when their original datasets are removed. 

133 """ 

134 

135 def __init__( 

136 self, 

137 config: ObsCoreConfig, 

138 spatial_plugins: Sequence[SpatialObsCorePlugin], 

139 datasets: Optional[Type[DatasetRecordStorageManager]] = None, 

140 ): 

141 fields = list(_STATIC_COLUMNS) 

142 

143 column_names = set(col.name for col in fields) 

144 

145 all_configs: List[ObsCoreConfig | DatasetTypeConfig] = [config] 

146 if config.dataset_types: 

147 all_configs += list(config.dataset_types.values()) 

148 for cfg in all_configs: 

149 if cfg.extra_columns: 

150 for col_name, col_value in cfg.extra_columns.items(): 

151 if col_name in column_names: 

152 continue 

153 doc: Optional[str] = None 

154 if isinstance(col_value, ExtraColumnConfig): 

155 col_type = ddl.VALID_CONFIG_COLUMN_TYPES.get(col_value.type.name) 

156 col_length = col_value.length 

157 doc = col_value.doc 

158 else: 

159 # Only value is provided, guess type from Python, and 

160 # use a fixed length of 255 for strings. 

161 col_type = _TYPE_MAP.get(type(col_value)) 

162 col_length = 255 if isinstance(col_value, str) else None 

163 if col_type is None: 

164 raise TypeError( 

165 f"Unexpected type in extra_columns: column={col_name}, value={col_value}" 

166 ) 

167 fields.append(ddl.FieldSpec(name=col_name, dtype=col_type, length=col_length, doc=doc)) 

168 column_names.add(col_name) 

169 

170 indices: List[ddl.IndexSpec] = [] 

171 if config.indices: 

172 for columns in config.indices.values(): 

173 indices.append(ddl.IndexSpec(*ensure_iterable(columns))) 

174 

175 self._table_spec = ddl.TableSpec(fields=fields, indexes=indices) 

176 

177 # Possibly extend table specs with plugin-added stuff. 

178 for plugin in spatial_plugins: 

179 plugin.extend_table_spec(self._table_spec) 

180 

181 self._dataset_fk: Optional[ddl.FieldSpec] = None 

182 if datasets is not None: 

183 # Add FK to datasets, is also a PK for this table 

184 self._dataset_fk = datasets.addDatasetForeignKey( 

185 self._table_spec, name="registry_dataset", onDelete="CASCADE", doc="Registry dataset ID" 

186 ) 

187 self._dataset_fk.primaryKey = True 

188 

189 @property 

190 def table_spec(self) -> ddl.TableSpec: 

191 """Specification for obscore table (`ddl.TableSpec`).""" 

192 return self._table_spec 

193 

194 @property 

195 def dataset_fk(self) -> Optional[ddl.FieldSpec]: 

196 """Specification for the field which is a foreign key to ``datasets`` 

197 table, and also a primary key for obscore table (`ddl.FieldSpec` or 

198 `None`). 

199 """ 

200 return self._dataset_fk