Coverage for python/lsst/daf/butler/dimensions/_elements.py: 70%

123 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "Dimension", 

32 "DimensionCombination", 

33 "DimensionElement", 

34) 

35 

36from abc import abstractmethod 

37from typing import TYPE_CHECKING, Any, ClassVar 

38 

39from lsst.utils.classes import cached_getter 

40 

41from .. import ddl 

42from .._named import NamedValueAbstractSet, NamedValueSet 

43from .._topology import TopologicalRelationshipEndpoint 

44from ..json import from_json_generic, to_json_generic 

45 

46if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

47 from ..registry import Registry 

48 from ._governor import GovernorDimension 

49 from ._graph import DimensionGraph 

50 from ._records import DimensionRecord 

51 from ._universe import DimensionUniverse 

52 

53 

54class DimensionElement(TopologicalRelationshipEndpoint): 

55 """A label and/or metadata in the dimensions system. 

56 

57 A named data-organization concept that defines a label and/or metadata 

58 in the dimensions system. 

59 

60 A `DimensionElement` instance typically corresponds to a _logical_ table in 

61 the `Registry`: either an actual database table or a way of generating rows 

62 on-the-fly that can similarly participate in queries. The rows in that 

63 table are represented by instances of a `DimensionRecord` subclass. Most 

64 `DimensionElement` instances are instances of its `Dimension` subclass, 

65 which is used for elements that can be used as data ID keys. 

66 

67 Notes 

68 ----- 

69 `DimensionElement` instances should always be constructed by and retrieved 

70 from a `DimensionUniverse`. They are immutable after they are fully 

71 constructed, and should never be copied. 

72 

73 Pickling a `DimensionElement` just records its name and universe; 

74 unpickling one actually just looks up the element via the singleton 

75 dictionary of all universes. This allows pickle to be used to transfer 

76 elements between processes, but only when each process initializes its own 

77 instance of the same `DimensionUniverse`. 

78 """ 

79 

80 def __str__(self) -> str: 

81 return self.name 

82 

83 def __repr__(self) -> str: 

84 return f"{type(self).__name__}({self.name})" 

85 

86 def __eq__(self, other: Any) -> bool: 

87 try: 

88 return self.name == other.name 

89 except AttributeError: 

90 # TODO: try removing this fallback; it's not really consistent with 

91 # base class intent, and it could be confusing 

92 return self.name == other 

93 

94 def __hash__(self) -> int: 

95 return hash(self.name) 

96 

97 # TODO: try removing comparison operators; DimensionUniverse.sorted should 

98 # be adequate. 

99 

100 def __lt__(self, other: DimensionElement) -> bool: 

101 try: 

102 return self.universe.getElementIndex(self.name) < self.universe.getElementIndex(other.name) 

103 except KeyError: 

104 return NotImplemented 

105 

106 def __le__(self, other: DimensionElement) -> bool: 

107 try: 

108 return self.universe.getElementIndex(self.name) <= self.universe.getElementIndex(other.name) 

109 except KeyError: 

110 return NotImplemented 

111 

112 def __gt__(self, other: DimensionElement) -> bool: 

113 try: 

114 return self.universe.getElementIndex(self.name) > self.universe.getElementIndex(other.name) 

115 except KeyError: 

116 return NotImplemented 

117 

118 def __ge__(self, other: DimensionElement) -> bool: 

119 try: 

120 return self.universe.getElementIndex(self.name) >= self.universe.getElementIndex(other.name) 

121 except KeyError: 

122 return NotImplemented 

123 

124 @classmethod 

125 def _unpickle(cls, universe: DimensionUniverse, name: str) -> DimensionElement: 

126 """Callable used for unpickling. 

127 

128 For internal use only. 

129 """ 

130 return universe[name] 

131 

132 def __reduce__(self) -> tuple: 

133 return (self._unpickle, (self.universe, self.name)) 

134 

135 def __deepcopy__(self, memo: dict) -> DimensionElement: 

136 # DimensionElement is recursively immutable; see note in @immutable 

137 # decorator. 

138 return self 

139 

140 def to_simple(self, minimal: bool = False) -> str: 

141 """Convert this class to a simple python type. 

142 

143 This is suitable for serialization. 

144 

145 Parameters 

146 ---------- 

147 minimal : `bool`, optional 

148 Use minimal serialization. Has no effect on for this class. 

149 

150 Returns 

151 ------- 

152 simple : `str` 

153 The object converted to a single string. 

154 """ 

155 return self.name 

156 

157 @classmethod 

158 def from_simple( 

159 cls, simple: str, universe: DimensionUniverse | None = None, registry: Registry | None = None 

160 ) -> DimensionElement: 

161 """Construct a new object from the simplified form. 

162 

163 Usually the data is returned from the `to_simple` method. 

164 

165 Parameters 

166 ---------- 

167 simple : `str` 

168 The value returned by `to_simple()`. 

169 universe : `DimensionUniverse` 

170 The special graph of all known dimensions. 

171 registry : `lsst.daf.butler.Registry`, optional 

172 Registry from which a universe can be extracted. Can be `None` 

173 if universe is provided explicitly. 

174 

175 Returns 

176 ------- 

177 dataId : `DimensionElement` 

178 Newly-constructed object. 

179 """ 

180 if universe is None and registry is None: 

181 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate") 

182 if universe is None and registry is not None: 

183 universe = registry.dimensions 

184 if universe is None: 

185 # this is for mypy 

186 raise ValueError("Unable to determine a usable universe") 

187 

188 return universe[simple] 

189 

190 to_json = to_json_generic 

191 from_json: ClassVar = classmethod(from_json_generic) 

192 

193 def hasTable(self) -> bool: 

194 """Indicate if this element is associated with a table. 

195 

196 Return `True` if this element is associated with a table 

197 (even if that table "belongs" to another element). 

198 """ 

199 return True 

200 

201 universe: DimensionUniverse 

202 """The universe of all compatible dimensions with which this element is 

203 associated (`DimensionUniverse`). 

204 """ 

205 

206 @property 

207 @cached_getter 

208 def governor(self) -> GovernorDimension | None: 

209 """Return the governor dimension. 

210 

211 This is the `GovernorDimension` that is a required dependency of this 

212 element, or `None` if there is no such dimension (`GovernorDimension` 

213 or `None`). 

214 """ 

215 if len(self.graph.governors) == 1: 

216 (result,) = self.graph.governors 

217 return result 

218 elif len(self.graph.governors) > 1: 

219 raise RuntimeError( 

220 f"Dimension element {self.name} has multiple governors: {self.graph.governors}." 

221 ) 

222 else: 

223 return None 

224 

225 @property 

226 @abstractmethod 

227 def required(self) -> NamedValueAbstractSet[Dimension]: 

228 """Return the required dimensions. 

229 

230 Dimensions that are necessary to uniquely identify a record of this 

231 dimension element. 

232 

233 For elements with a database representation, these dimension are 

234 exactly those used to form the (possibly compound) primary key, and all 

235 dimensions here that are not ``self`` are also used to form foreign 

236 keys. 

237 

238 For `Dimension` instances, this should be exactly the same as 

239 ``graph.required``, but that may not be true for `DimensionElement` 

240 instances in general. When they differ, there are multiple 

241 combinations of dimensions that uniquely identify this element, but 

242 this one is more direct. 

243 """ 

244 raise NotImplementedError() 

245 

246 @property 

247 @abstractmethod 

248 def implied(self) -> NamedValueAbstractSet[Dimension]: 

249 """Return the implied dimensions. 

250 

251 Other dimensions that are uniquely identified directly by a record 

252 of this dimension element. 

253 

254 For elements with a database representation, these are exactly the 

255 dimensions used to form foreign key constraints whose fields are not 

256 (wholly) also part of the primary key. 

257 

258 Unlike ``self.graph.implied``, this set is not expanded recursively. 

259 """ 

260 raise NotImplementedError() 

261 

262 @property 

263 @cached_getter 

264 def dimensions(self) -> NamedValueAbstractSet[Dimension]: 

265 """Return all dimensions. 

266 

267 The union of `required` and `implied`, with all elements in 

268 `required` before any elements in `implied`. 

269 

270 This differs from ``self.graph.dimensions`` both in order and in 

271 content: 

272 

273 - as in ``self.implied``, implied dimensions are not expanded 

274 recursively here; 

275 - implied dimensions appear after required dimensions here, instead of 

276 being topologically ordered. 

277 

278 As a result, this set is ordered consistently with 

279 ``self.RecordClass.fields``. 

280 """ 

281 return NamedValueSet(list(self.required) + list(self.implied)).freeze() 

282 

283 @property 

284 @cached_getter 

285 def graph(self) -> DimensionGraph: 

286 """Return minimal graph that includes this element (`DimensionGraph`). 

287 

288 ``self.graph.required`` includes all dimensions whose primary key 

289 values are sufficient (often necessary) to uniquely identify ``self`` 

290 (including ``self`` if ``isinstance(self, Dimension)``. 

291 ``self.graph.implied`` includes all dimensions also identified 

292 (possibly recursively) by this set. 

293 """ 

294 return self.universe.extract(self.dimensions.names) 

295 

296 @property 

297 @cached_getter 

298 def RecordClass(self) -> type[DimensionRecord]: 

299 """Return the record subclass for this element. 

300 

301 The `DimensionRecord` subclass used to hold records for this element 

302 (`type`). 

303 

304 Because `DimensionRecord` subclasses are generated dynamically, this 

305 type cannot be imported directly and hence can only be obtained from 

306 this attribute. 

307 """ 

308 from ._records import _subclassDimensionRecord 

309 

310 return _subclassDimensionRecord(self) 

311 

312 @property 

313 @abstractmethod 

314 def metadata(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

315 """Additional metadata fields included in this element's table. 

316 

317 (`NamedValueSet` of `FieldSpec`). 

318 """ 

319 raise NotImplementedError() 

320 

321 @property 

322 def viewOf(self) -> str | None: 

323 """Name of another table this element's records are drawn from. 

324 

325 (`str` or `None`). 

326 """ 

327 return None 

328 

329 @property 

330 def alwaysJoin(self) -> bool: 

331 """Indicate if the element should always be included. 

332 

333 If `True`, always include this element in any query or data ID in 

334 which its ``required`` dimensions appear, because it defines a 

335 relationship between those dimensions that must always be satisfied. 

336 """ 

337 return False 

338 

339 @property 

340 @abstractmethod 

341 def populated_by(self) -> Dimension | None: 

342 """The dimension that this element's records are always inserted, 

343 exported, and imported alongside. 

344 

345 Notes 

346 ----- 

347 When this is `None` (as it will be, at least at first, for any data 

348 repositories created before this attribute was added), records for 

349 this element will often need to be exported manually when datasets 

350 associated with some other related dimension are exported, in order for 

351 the post-import data repository to function as expected. 

352 """ 

353 raise NotImplementedError() 

354 

355 

356class Dimension(DimensionElement): 

357 """A dimension. 

358 

359 A named data-organization concept that can be used as a key in a data 

360 ID. 

361 """ 

362 

363 @property 

364 @abstractmethod 

365 def uniqueKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

366 """Return the unique fields. 

367 

368 All fields that can individually be used to identify records of this 

369 element, given the primary keys of all required dependencies 

370 (`NamedValueAbstractSet` of `FieldSpec`). 

371 """ 

372 raise NotImplementedError() 

373 

374 @property 

375 @cached_getter 

376 def primaryKey(self) -> ddl.FieldSpec: 

377 """Return primary key field for this dimension (`FieldSpec`). 

378 

379 Note that the database primary keys for dimension tables are in general 

380 compound; this field is the only field in the database primary key that 

381 is not also a foreign key (to a required dependency dimension table). 

382 """ 

383 primaryKey, *_ = self.uniqueKeys 

384 return primaryKey 

385 

386 @property 

387 @cached_getter 

388 def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

389 """Return alternate keys. 

390 

391 Additional unique key fields for this dimension that are not the 

392 primary key (`NamedValueAbstractSet` of `FieldSpec`). 

393 

394 If this dimension has required dependencies, the keys of those 

395 dimensions are also included in the unique constraints defined for 

396 these alternate keys. 

397 """ 

398 _, *alternateKeys = self.uniqueKeys 

399 return NamedValueSet(alternateKeys).freeze() 

400 

401 @property 

402 def populated_by(self) -> Dimension: 

403 # Docstring inherited. 

404 return self 

405 

406 

407class DimensionCombination(DimensionElement): 

408 """Element with extra information. 

409 

410 A `DimensionElement` that provides extra metadata and/or relationship 

411 endpoint information for a combination of dimensions. 

412 """