Coverage for python/lsst/daf/butler/registry/obscore/_schema.py: 24%

53 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-19 01:58 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ObsCoreSchema"] 

25 

26from collections.abc import Sequence 

27from typing import TYPE_CHECKING, List, Optional, Type 

28 

29import sqlalchemy 

30from lsst.daf.butler import ddl 

31from lsst.utils.iteration import ensure_iterable 

32 

33from ._config import DatasetTypeConfig, ExtraColumnConfig, ObsCoreConfig 

34from ._spatial import SpatialObsCorePlugin 

35 

36if TYPE_CHECKING: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true

37 from ..interfaces import DatasetRecordStorageManager 

38 

39 

40# List of standard columns in output file. This should include at least all 

41# mandatory columns defined in ObsCore note (revision 1.1, Appendix B). Extra 

42# columns can be added via `extra_columns` parameters in configuration. 

43_STATIC_COLUMNS = ( 

44 ddl.FieldSpec( 

45 name="dataproduct_type", dtype=sqlalchemy.String, length=255, doc="Logical data product type" 

46 ), 

47 ddl.FieldSpec( 

48 name="dataproduct_subtype", dtype=sqlalchemy.String, length=255, doc="Data product specific type" 

49 ), 

50 ddl.FieldSpec( 

51 name="facility_name", 

52 dtype=sqlalchemy.String, 

53 length=255, 

54 doc="The name of the facility used for the observation", 

55 ), 

56 ddl.FieldSpec(name="calib_level", dtype=sqlalchemy.SmallInteger, doc="Calibration level {0, 1, 2, 3, 4}"), 

57 ddl.FieldSpec(name="target_name", dtype=sqlalchemy.String, length=255, doc="Object of interest"), 

58 ddl.FieldSpec(name="obs_id", dtype=sqlalchemy.String, length=255, doc="Observation ID"), 

59 ddl.FieldSpec( 

60 name="obs_collection", dtype=sqlalchemy.String, length=255, doc="Name of the data collection" 

61 ), 

62 ddl.FieldSpec( 

63 name="obs_publisher_did", 

64 dtype=sqlalchemy.String, 

65 length=255, 

66 doc="Dataset identifier given by the publisher", 

67 ), 

68 ddl.FieldSpec( 

69 name="access_url", dtype=sqlalchemy.String, length=65535, doc="URL used to access (download) dataset" 

70 ), 

71 ddl.FieldSpec(name="access_format", dtype=sqlalchemy.String, length=255, doc="File content format"), 

72 # Spatial columns s_ra, s_dec, s_fow, s_region are managed by a default 

73 # spatial plugin 

74 ddl.FieldSpec( 

75 name="s_resolution", dtype=sqlalchemy.Float, doc="Spatial resolution of data as FWHM (arcsec)" 

76 ), 

77 ddl.FieldSpec( 

78 name="s_xel1", dtype=sqlalchemy.Integer, doc="Number of elements along the first spatial axis" 

79 ), 

80 ddl.FieldSpec( 

81 name="s_xel2", dtype=sqlalchemy.Integer, doc="Number of elements along the second spatial axis" 

82 ), 

83 ddl.FieldSpec(name="t_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the time axis"), 

84 ddl.FieldSpec(name="t_min", dtype=sqlalchemy.Float, doc="Start time in MJD"), 

85 ddl.FieldSpec(name="t_max", dtype=sqlalchemy.Float, doc="Stop time in MJD"), 

86 ddl.FieldSpec(name="t_exptime", dtype=sqlalchemy.Float, doc="Total exposure time (sec)"), 

87 ddl.FieldSpec(name="t_resolution", dtype=sqlalchemy.Float, doc="Temporal resolution (sec)"), 

88 ddl.FieldSpec(name="em_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the spectral axis"), 

89 ddl.FieldSpec(name="em_min", dtype=sqlalchemy.Float, doc="Start in spectral coordinates (m)"), 

90 ddl.FieldSpec(name="em_max", dtype=sqlalchemy.Float, doc="Stop in spectral coordinates (m)"), 

91 ddl.FieldSpec(name="em_res_power", dtype=sqlalchemy.Float, doc="Spectral resolving power"), 

92 ddl.FieldSpec( 

93 name="em_filter_name", dtype=sqlalchemy.String, length=255, doc="Filter name (non-standard column)" 

94 ), 

95 ddl.FieldSpec(name="o_ucd", dtype=sqlalchemy.String, length=255, doc="UCD of observable"), 

96 ddl.FieldSpec(name="pol_xel", dtype=sqlalchemy.Integer, doc="Number of polarization samples"), 

97 ddl.FieldSpec( 

98 name="instrument_name", 

99 dtype=sqlalchemy.String, 

100 length=255, 

101 doc="Name of the instrument used for this observation", 

102 ), 

103) 

104 

105_TYPE_MAP = { 

106 int: sqlalchemy.BigInteger, 

107 float: sqlalchemy.Float, 

108 bool: sqlalchemy.Boolean, 

109 str: sqlalchemy.String, 

110} 

111 

112 

113class ObsCoreSchema: 

114 """Generate table specification for an ObsCore table based on its 

115 configuration. 

116 

117 Parameters 

118 ---------- 

119 config : `ObsCoreConfig` 

120 ObsCore configuration instance. 

121 datasets : `type`, optional 

122 Type of dataset records manager. If specified, the ObsCore table will 

123 define a foreign key to ``datasets`` table with "ON DELETE CASCADE" 

124 constraint. 

125 

126 Notes 

127 ----- 

128 This class is designed to support both "live" obscore table which is 

129 located in the same database as the Registry, and standalone table in a 

130 completely separate database. Live obscore table depends on foreign key 

131 constraints with "ON DELETE CASCADE" option to manage lifetime of obscore 

132 records when their original datasets are removed. 

133 """ 

134 

135 def __init__( 

136 self, 

137 config: ObsCoreConfig, 

138 spatial_plugins: Sequence[SpatialObsCorePlugin], 

139 datasets: Optional[Type[DatasetRecordStorageManager]] = None, 

140 ): 

141 

142 fields = list(_STATIC_COLUMNS) 

143 

144 column_names = set(col.name for col in fields) 

145 

146 all_configs: List[ObsCoreConfig | DatasetTypeConfig] = [config] 

147 if config.dataset_types: 

148 all_configs += list(config.dataset_types.values()) 

149 for cfg in all_configs: 

150 if cfg.extra_columns: 

151 for col_name, col_value in cfg.extra_columns.items(): 

152 if col_name in column_names: 

153 continue 

154 doc: Optional[str] = None 

155 if isinstance(col_value, ExtraColumnConfig): 

156 col_type = ddl.VALID_CONFIG_COLUMN_TYPES.get(col_value.type.name) 

157 col_length = col_value.length 

158 doc = col_value.doc 

159 else: 

160 # Only value is provided, guess type from Python, and 

161 # use a fixed length of 255 for strings. 

162 col_type = _TYPE_MAP.get(type(col_value)) 

163 col_length = 255 if isinstance(col_value, str) else None 

164 if col_type is None: 

165 raise TypeError( 

166 f"Unexpected type in extra_columns: column={col_name}, value={col_value}" 

167 ) 

168 fields.append(ddl.FieldSpec(name=col_name, dtype=col_type, length=col_length, doc=doc)) 

169 column_names.add(col_name) 

170 

171 indices: List[ddl.IndexSpec] = [] 

172 if config.indices: 

173 for columns in config.indices.values(): 

174 indices.append(ddl.IndexSpec(*ensure_iterable(columns))) 

175 

176 self._table_spec = ddl.TableSpec(fields=fields, indexes=indices) 

177 

178 # Possibly extend table specs with plugin-added stuff. 

179 for plugin in spatial_plugins: 

180 plugin.extend_table_spec(self._table_spec) 

181 

182 self._dataset_fk: Optional[ddl.FieldSpec] = None 

183 if datasets is not None: 

184 # Add FK to datasets, is also a PK for this table 

185 self._dataset_fk = datasets.addDatasetForeignKey( 

186 self._table_spec, name="registry_dataset", onDelete="CASCADE", doc="Registry dataset ID" 

187 ) 

188 self._dataset_fk.primaryKey = True 

189 

190 @property 

191 def table_spec(self) -> ddl.TableSpec: 

192 """Specification for obscore table (`ddl.TableSpec`).""" 

193 return self._table_spec 

194 

195 @property 

196 def dataset_fk(self) -> Optional[ddl.FieldSpec]: 

197 """Specification for the field which is a foreign key to ``datasets`` 

198 table, and also a primary key for obscore table (`ddl.FieldSpec` or 

199 `None`). 

200 """ 

201 return self._dataset_fk