Coverage for python/lsst/daf/butler/dimensions/_schema.py: 25%

111 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("addDimensionForeignKey",) 

30 

31import copy 

32from collections.abc import Mapping 

33from typing import TYPE_CHECKING 

34 

35from lsst.utils.classes import cached_getter 

36 

37from .. import ddl 

38from .._column_tags import DimensionKeyColumnTag, DimensionRecordColumnTag 

39from .._named import NamedValueSet 

40from .._timespan import TimespanDatabaseRepresentation 

41 

42if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

43 from lsst.daf.relation import ColumnTag 

44 

45 from ._elements import Dimension, DimensionElement 

46 

47 

48def _makeForeignKeySpec(dimension: Dimension) -> ddl.ForeignKeySpec: 

49 """Make a `ddl.ForeignKeySpec`. 

50 

51 This will reference the table for the given `Dimension` table. 

52 

53 Most callers should use the higher-level `addDimensionForeignKey` function 

54 instead. 

55 

56 Parameters 

57 ---------- 

58 dimension : `Dimension` 

59 The dimension to be referenced. Caller guarantees that it is actually 

60 associated with a table. 

61 

62 Returns 

63 ------- 

64 spec : `ddl.ForeignKeySpec` 

65 A database-agnostic foreign key specification. 

66 """ 

67 source = [] 

68 target = [] 

69 for other in dimension.required: 

70 if other == dimension: 

71 target.append(dimension.primaryKey.name) 

72 else: 

73 target.append(other.name) 

74 source.append(other.name) 

75 return ddl.ForeignKeySpec(table=dimension.name, source=tuple(source), target=tuple(target)) 

76 

77 

78def addDimensionForeignKey( 

79 tableSpec: ddl.TableSpec, 

80 dimension: Dimension, 

81 *, 

82 primaryKey: bool, 

83 nullable: bool = False, 

84 constraint: bool = True, 

85) -> ddl.FieldSpec: 

86 """Add a field and possibly a foreign key to a table specification. 

87 

88 The field will reference the table for the given `Dimension`. 

89 

90 Parameters 

91 ---------- 

92 tableSpec : `ddl.TableSpec` 

93 Specification the field and foreign key are to be added to. 

94 dimension : `Dimension` 

95 Dimension to be referenced. If this dimension has required 

96 dependencies, those must have already been added to the table. A field 

97 will be added that correspond to this dimension's primary key, and a 

98 foreign key constraint will be added only if the dimension is 

99 associated with a table of its own. 

100 primaryKey : `bool` 

101 If `True`, the new field will be added as part of a compound primary 

102 key for the table. 

103 nullable : `bool`, optional 

104 If `False` (default) the new field will be added with a NOT NULL 

105 constraint. 

106 constraint : `bool` 

107 If `False` (`True` is default), just add the field, not the foreign 

108 key constraint. 

109 

110 Returns 

111 ------- 

112 fieldSpec : `ddl.FieldSpec` 

113 Specification for the field just added. 

114 """ 

115 # Add the dependency's primary key field, but use the dimension name for 

116 # the field name to make it unique and more meaningful in this table. 

117 fieldSpec = copy.copy(dimension.primaryKey) 

118 fieldSpec.name = dimension.name 

119 fieldSpec.primaryKey = primaryKey 

120 fieldSpec.nullable = nullable 

121 tableSpec.fields.add(fieldSpec) 

122 # Also add a foreign key constraint on the dependency table, but only if 

123 # there actually is one and we weren't told not to. 

124 if dimension.hasTable() and dimension.viewOf is None and constraint: 

125 tableSpec.foreignKeys.append(_makeForeignKeySpec(dimension)) 

126 return fieldSpec 

127 

128 

129class DimensionElementFields: 

130 """Class for constructing table schemas for `DimensionElement`. 

131 

132 This creates an object that constructs the table schema for a 

133 `DimensionElement` and provides a categorized view of its fields. 

134 

135 Parameters 

136 ---------- 

137 element : `DimensionElement` 

138 Element for which to make a table specification. 

139 

140 Notes 

141 ----- 

142 This combines the foreign key fields from dependencies, unique keys 

143 for true `Dimension` instances, metadata fields, and region/timestamp 

144 fields for spatial/temporal elements. 

145 

146 Callers should use `DimensionUniverse.makeSchemaSpec` if they want to 

147 account for elements that have no table or reference another table; this 

148 class simply creates a specification for the table an element _would_ have 

149 without checking whether it does have one. That can be useful in contexts 

150 (e.g. `DimensionRecord`) where we want to simulate the existence of such a 

151 table. 

152 """ 

153 

154 def __init__(self, element: DimensionElement): 

155 self.element = element 

156 self._tableSpec = ddl.TableSpec(fields=()) 

157 # Add the primary key fields of required dimensions. These continue to 

158 # be primary keys in the table for this dimension. 

159 self.required = NamedValueSet() 

160 self.dimensions = NamedValueSet() 

161 self.facts = NamedValueSet() 

162 self.standard = NamedValueSet() 

163 dependencies = [] 

164 for dimension in element.required: 

165 if dimension != element: 

166 fieldSpec = addDimensionForeignKey(self._tableSpec, dimension, primaryKey=True) 

167 dependencies.append(fieldSpec.name) 

168 else: 

169 fieldSpec = element.primaryKey # type: ignore 

170 # A Dimension instance is in its own required dependency graph 

171 # (always at the end, because of topological ordering). In 

172 # this case we don't want to rename the field. 

173 self._tableSpec.fields.add(fieldSpec) 

174 self.required.add(fieldSpec) 

175 self.dimensions.add(fieldSpec) 

176 self.standard.add(fieldSpec) 

177 # Add fields and foreign keys for implied dimensions. These are 

178 # primary keys in their own table, but should not be here. As with 

179 # required dependencies, we rename the fields with the dimension name. 

180 # We use element.implied instead of element.graph.implied because we 

181 # don't want *recursive* implied dependencies. 

182 self.implied = NamedValueSet() 

183 for dimension in element.implied: 

184 fieldSpec = addDimensionForeignKey(self._tableSpec, dimension, primaryKey=False, nullable=False) 

185 self.implied.add(fieldSpec) 

186 self.dimensions.add(fieldSpec) 

187 self.standard.add(fieldSpec) 

188 # Add non-primary unique keys and unique constraints for them. 

189 for fieldSpec in getattr(element, "alternateKeys", ()): 

190 self._tableSpec.fields.add(fieldSpec) 

191 self._tableSpec.unique.add(tuple(dependencies) + (fieldSpec.name,)) 

192 self.standard.add(fieldSpec) 

193 self.facts.add(fieldSpec) 

194 # Add other metadata fields. 

195 for fieldSpec in element.metadata: 

196 self._tableSpec.fields.add(fieldSpec) 

197 self.standard.add(fieldSpec) 

198 self.facts.add(fieldSpec) 

199 names = list(self.standard.names) 

200 # Add fields for regions and/or timespans. 

201 if element.spatial is not None: 

202 names.append("region") 

203 if element.temporal is not None: 

204 names.append(TimespanDatabaseRepresentation.NAME) 

205 self.names = tuple(names) 

206 

207 def makeTableSpec( 

208 self, 

209 TimespanReprClass: type[TimespanDatabaseRepresentation], 

210 ) -> ddl.TableSpec: 

211 """Construct a complete specification for a table. 

212 

213 The table could hold the records of this element. 

214 

215 Parameters 

216 ---------- 

217 TimespanReprClass : `type` [ `TimespanDatabaseRepresentation` ] 

218 Class object that specifies how timespans are represented in the 

219 database. 

220 

221 Returns 

222 ------- 

223 spec : `ddl.TableSpec` 

224 Specification for a table. 

225 """ 

226 if self.element.temporal is not None or self.element.spatial is not None: 

227 spec = ddl.TableSpec( 

228 fields=NamedValueSet(self._tableSpec.fields), 

229 unique=self._tableSpec.unique, 

230 indexes=self._tableSpec.indexes, 

231 foreignKeys=self._tableSpec.foreignKeys, 

232 ) 

233 if self.element.spatial is not None: 

234 spec.fields.add(ddl.FieldSpec.for_region()) 

235 if self.element.temporal is not None: 

236 spec.fields.update(TimespanReprClass.makeFieldSpecs(nullable=True)) 

237 else: 

238 spec = self._tableSpec 

239 return spec 

240 

241 def __str__(self) -> str: 

242 lines = [f"{self.element.name}: "] 

243 lines.extend(f" {field.name}: {field.getPythonType().__name__}" for field in self.standard) 

244 if self.element.spatial is not None: 

245 lines.append(" region: lsst.sphgeom.Region") 

246 if self.element.temporal is not None: 

247 lines.append(" timespan: lsst.daf.butler.Timespan") 

248 return "\n".join(lines) 

249 

250 @property 

251 @cached_getter 

252 def columns(self) -> Mapping[ColumnTag, str]: 

253 """A mapping from `ColumnTag` to field name for all fields in this 

254 element's records (`~collections.abc.Mapping`). 

255 """ 

256 result: dict[ColumnTag, str] = {} 

257 for dimension_name, field_name in zip( 

258 self.element.dimensions.names, self.dimensions.names, strict=True 

259 ): 

260 result[DimensionKeyColumnTag(dimension_name)] = field_name 

261 for field_name in self.facts.names: 

262 result[DimensionRecordColumnTag(self.element.name, field_name)] = field_name 

263 if self.element.spatial: 

264 result[DimensionRecordColumnTag(self.element.name, "region")] = "region" 

265 if self.element.temporal: 

266 result[DimensionRecordColumnTag(self.element.name, "timespan")] = "timespan" 

267 return result 

268 

269 element: DimensionElement 

270 """The dimension element these fields correspond to. 

271 

272 (`DimensionElement`) 

273 """ 

274 

275 required: NamedValueSet[ddl.FieldSpec] 

276 """The required dimension fields of this table. 

277 

278 They correspond to the element's required 

279 dimensions, in that order, i.e. `DimensionElement.required` 

280 (`NamedValueSet` [ `ddl.FieldSpec` ]). 

281 """ 

282 

283 implied: NamedValueSet[ddl.FieldSpec] 

284 """The implied dimension fields of this table. 

285 

286 They correspond to the element's implied 

287 dimensions, in that order, i.e. `DimensionElement.implied` 

288 (`NamedValueSet` [ `ddl.FieldSpec` ]). 

289 """ 

290 

291 dimensions: NamedValueSet[ddl.FieldSpec] 

292 """The direct and implied dimension fields of this table. 

293 

294 They correspond to the element's direct 

295 required and implied dimensions, in that order, i.e. 

296 `DimensionElement.dimensions` (`NamedValueSet` [ `ddl.FieldSpec` ]). 

297 """ 

298 

299 facts: NamedValueSet[ddl.FieldSpec] 

300 """The standard fields of this table that do not correspond to dimensions. 

301 

302 (`NamedValueSet` [ `ddl.FieldSpec` ]). 

303 

304 This is equivalent to ``standard - dimensions`` (but possibly in a 

305 different order). 

306 """ 

307 

308 standard: NamedValueSet[ddl.FieldSpec] 

309 """All standard fields that are expected to have the same form. 

310 

311 They are expected to have the same form in all 

312 databases; this is all fields other than those that represent a region 

313 and/or timespan (`NamedValueSet` [ `ddl.FieldSpec` ]). 

314 """ 

315 

316 names: tuple[str, ...] 

317 """The names of all fields in the specification (`tuple` [ `str` ]). 

318 

319 This includes "region" and/or "timespan" if `element` is spatial and/or 

320 temporal (respectively). The actual database representation of these 

321 quantities may involve multiple fields (or even fields only on a different 

322 table), but the Python representation of those rows (i.e. `DimensionRecord` 

323 instances) will always contain exactly these fields. 

324 """