Coverage for python/lsst/daf/butler/dimensions/_elements.py: 71%

126 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "Dimension", 

32 "DimensionCombination", 

33 "DimensionElement", 

34) 

35 

36from abc import abstractmethod 

37from typing import TYPE_CHECKING, Any, ClassVar, cast 

38 

39from lsst.utils.classes import cached_getter 

40 

41from .. import ddl 

42from .._named import NamedValueAbstractSet, NamedValueSet 

43from .._topology import TopologicalRelationshipEndpoint 

44from ..json import from_json_generic, to_json_generic 

45 

46if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

47 from ..registry import Registry 

48 from ._governor import GovernorDimension 

49 from ._graph import DimensionGraph 

50 from ._group import DimensionGroup 

51 from ._records import DimensionRecord 

52 from ._universe import DimensionUniverse 

53 

54 

55class DimensionElement(TopologicalRelationshipEndpoint): 

56 """A label and/or metadata in the dimensions system. 

57 

58 A named data-organization concept that defines a label and/or metadata 

59 in the dimensions system. 

60 

61 A `DimensionElement` instance typically corresponds to a _logical_ table in 

62 the `Registry`: either an actual database table or a way of generating rows 

63 on-the-fly that can similarly participate in queries. The rows in that 

64 table are represented by instances of a `DimensionRecord` subclass. Most 

65 `DimensionElement` instances are instances of its `Dimension` subclass, 

66 which is used for elements that can be used as data ID keys. 

67 

68 Notes 

69 ----- 

70 `DimensionElement` instances should always be constructed by and retrieved 

71 from a `DimensionUniverse`. They are immutable after they are fully 

72 constructed, and should never be copied. 

73 

74 Pickling a `DimensionElement` just records its name and universe; 

75 unpickling one actually just looks up the element via the singleton 

76 dictionary of all universes. This allows pickle to be used to transfer 

77 elements between processes, but only when each process initializes its own 

78 instance of the same `DimensionUniverse`. 

79 """ 

80 

81 def __str__(self) -> str: 

82 return self.name 

83 

84 def __repr__(self) -> str: 

85 return f"{type(self).__name__}({self.name})" 

86 

87 def __eq__(self, other: Any) -> bool: 

88 try: 

89 return self.name == other.name 

90 except AttributeError: 

91 # TODO: try removing this fallback; it's not really consistent with 

92 # base class intent, and it could be confusing 

93 return self.name == other 

94 

95 def __hash__(self) -> int: 

96 return hash(self.name) 

97 

98 # TODO: try removing comparison operators; DimensionUniverse.sorted should 

99 # be adequate. 

100 

101 def __lt__(self, other: DimensionElement) -> bool: 

102 try: 

103 return self.universe.getElementIndex(self.name) < self.universe.getElementIndex(other.name) 

104 except KeyError: 

105 return NotImplemented 

106 

107 def __le__(self, other: DimensionElement) -> bool: 

108 try: 

109 return self.universe.getElementIndex(self.name) <= self.universe.getElementIndex(other.name) 

110 except KeyError: 

111 return NotImplemented 

112 

113 def __gt__(self, other: DimensionElement) -> bool: 

114 try: 

115 return self.universe.getElementIndex(self.name) > self.universe.getElementIndex(other.name) 

116 except KeyError: 

117 return NotImplemented 

118 

119 def __ge__(self, other: DimensionElement) -> bool: 

120 try: 

121 return self.universe.getElementIndex(self.name) >= self.universe.getElementIndex(other.name) 

122 except KeyError: 

123 return NotImplemented 

124 

125 @classmethod 

126 def _unpickle(cls, universe: DimensionUniverse, name: str) -> DimensionElement: 

127 """Callable used for unpickling. 

128 

129 For internal use only. 

130 """ 

131 return universe[name] 

132 

133 def __reduce__(self) -> tuple: 

134 return (self._unpickle, (self.universe, self.name)) 

135 

136 def __deepcopy__(self, memo: dict) -> DimensionElement: 

137 # DimensionElement is recursively immutable; see note in @immutable 

138 # decorator. 

139 return self 

140 

141 def to_simple(self, minimal: bool = False) -> str: 

142 """Convert this class to a simple python type. 

143 

144 This is suitable for serialization. 

145 

146 Parameters 

147 ---------- 

148 minimal : `bool`, optional 

149 Use minimal serialization. Has no effect on for this class. 

150 

151 Returns 

152 ------- 

153 simple : `str` 

154 The object converted to a single string. 

155 """ 

156 return self.name 

157 

158 @classmethod 

159 def from_simple( 

160 cls, simple: str, universe: DimensionUniverse | None = None, registry: Registry | None = None 

161 ) -> DimensionElement: 

162 """Construct a new object from the simplified form. 

163 

164 Usually the data is returned from the `to_simple` method. 

165 

166 Parameters 

167 ---------- 

168 simple : `str` 

169 The value returned by `to_simple()`. 

170 universe : `DimensionUniverse` 

171 The special graph of all known dimensions. 

172 registry : `lsst.daf.butler.Registry`, optional 

173 Registry from which a universe can be extracted. Can be `None` 

174 if universe is provided explicitly. 

175 

176 Returns 

177 ------- 

178 dataId : `DimensionElement` 

179 Newly-constructed object. 

180 """ 

181 if universe is None and registry is None: 

182 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate") 

183 if universe is None and registry is not None: 

184 universe = registry.dimensions 

185 if universe is None: 

186 # this is for mypy 

187 raise ValueError("Unable to determine a usable universe") 

188 

189 return universe[simple] 

190 

191 to_json = to_json_generic 

192 from_json: ClassVar = classmethod(from_json_generic) 

193 

194 def hasTable(self) -> bool: 

195 """Indicate if this element is associated with a table. 

196 

197 Return `True` if this element is associated with a table 

198 (even if that table "belongs" to another element). 

199 """ 

200 return True 

201 

202 universe: DimensionUniverse 

203 """The universe of all compatible dimensions with which this element is 

204 associated (`DimensionUniverse`). 

205 """ 

206 

207 @property 

208 @cached_getter 

209 def governor(self) -> GovernorDimension | None: 

210 """Return the governor dimension. 

211 

212 This is the `GovernorDimension` that is a required dependency of this 

213 element, or `None` if there is no such dimension (`GovernorDimension` 

214 or `None`). 

215 """ 

216 if len(self.minimal_group.governors) == 1: 

217 (result,) = self.minimal_group.governors 

218 return cast("GovernorDimension", self.universe[result]) 

219 elif len(self.minimal_group.governors) > 1: 

220 raise RuntimeError( 

221 f"Dimension element {self.name} has multiple governors: {self.minimal_group.governors}." 

222 ) 

223 else: 

224 return None 

225 

226 @property 

227 @abstractmethod 

228 def required(self) -> NamedValueAbstractSet[Dimension]: 

229 """Return the required dimensions. 

230 

231 Dimensions that are necessary to uniquely identify a record of this 

232 dimension element. 

233 

234 For elements with a database representation, these dimension are 

235 exactly those used to form the (possibly compound) primary key, and all 

236 dimensions here that are not ``self`` are also used to form foreign 

237 keys. 

238 

239 For `Dimension` instances, this should be exactly the same as 

240 ``graph.required``, but that may not be true for `DimensionElement` 

241 instances in general. When they differ, there are multiple 

242 combinations of dimensions that uniquely identify this element, but 

243 this one is more direct. 

244 """ 

245 raise NotImplementedError() 

246 

247 @property 

248 @abstractmethod 

249 def implied(self) -> NamedValueAbstractSet[Dimension]: 

250 """Return the implied dimensions. 

251 

252 Other dimensions that are uniquely identified directly by a record 

253 of this dimension element. 

254 

255 For elements with a database representation, these are exactly the 

256 dimensions used to form foreign key constraints whose fields are not 

257 (wholly) also part of the primary key. 

258 

259 Unlike ``self.graph.implied``, this set is not expanded recursively. 

260 """ 

261 raise NotImplementedError() 

262 

263 @property 

264 @cached_getter 

265 def dimensions(self) -> NamedValueAbstractSet[Dimension]: 

266 """Return all dimensions. 

267 

268 The union of `required` and `implied`, with all elements in 

269 `required` before any elements in `implied`. 

270 

271 This differs from ``self.graph.dimensions`` both in order and in 

272 content: 

273 

274 - as in ``self.implied``, implied dimensions are not expanded 

275 recursively here; 

276 - implied dimensions appear after required dimensions here, instead of 

277 being topologically ordered. 

278 

279 As a result, this set is ordered consistently with 

280 ``self.RecordClass.fields``. 

281 """ 

282 return NamedValueSet(list(self.required) + list(self.implied)).freeze() 

283 

284 # Deprecated via a warning from its implementation. 

285 # TODO: remove on DM-41326. 

286 @property 

287 def graph(self) -> DimensionGraph: 

288 """Return minimal graph that includes this element (`DimensionGraph`). 

289 

290 ``self.graph.required`` includes all dimensions whose primary key 

291 values are sufficient (often necessary) to uniquely identify ``self`` 

292 (including ``self`` if ``isinstance(self, Dimension)``. 

293 ``self.graph.implied`` includes all dimensions also identified 

294 (possibly recursively) by this set. 

295 """ 

296 return self.minimal_group._as_graph() 

297 

298 @property 

299 @cached_getter 

300 def minimal_group(self) -> DimensionGroup: 

301 """Return minimal dimension group that includes this element. 

302 

303 ``self.minimal_group.required`` includes all dimensions whose primary 

304 key values are sufficient (often necessary) to uniquely identify 

305 ``self`` (including ``self`` if ``isinstance(self, Dimension)``. 

306 ``self.minimal_group.implied`` includes all dimensions also identified 

307 (possibly recursively) by this set. 

308 """ 

309 return self.universe.conform(self.dimensions.names) 

310 

311 @property 

312 @cached_getter 

313 def RecordClass(self) -> type[DimensionRecord]: 

314 """Return the record subclass for this element. 

315 

316 The `DimensionRecord` subclass used to hold records for this element 

317 (`type`). 

318 

319 Because `DimensionRecord` subclasses are generated dynamically, this 

320 type cannot be imported directly and hence can only be obtained from 

321 this attribute. 

322 """ 

323 from ._records import _subclassDimensionRecord 

324 

325 return _subclassDimensionRecord(self) 

326 

327 @property 

328 @abstractmethod 

329 def metadata(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

330 """Additional metadata fields included in this element's table. 

331 

332 (`NamedValueSet` of `FieldSpec`). 

333 """ 

334 raise NotImplementedError() 

335 

336 @property 

337 def viewOf(self) -> str | None: 

338 """Name of another table this element's records are drawn from. 

339 

340 (`str` or `None`). 

341 """ 

342 return None 

343 

344 @property 

345 def alwaysJoin(self) -> bool: 

346 """Indicate if the element should always be included. 

347 

348 If `True`, always include this element in any query or data ID in 

349 which its ``required`` dimensions appear, because it defines a 

350 relationship between those dimensions that must always be satisfied. 

351 """ 

352 return False 

353 

354 @property 

355 @abstractmethod 

356 def populated_by(self) -> Dimension | None: 

357 """The dimension that this element's records are always inserted, 

358 exported, and imported alongside. 

359 

360 Notes 

361 ----- 

362 When this is `None` (as it will be, at least at first, for any data 

363 repositories created before this attribute was added), records for 

364 this element will often need to be exported manually when datasets 

365 associated with some other related dimension are exported, in order for 

366 the post-import data repository to function as expected. 

367 """ 

368 raise NotImplementedError() 

369 

370 

371class Dimension(DimensionElement): 

372 """A dimension. 

373 

374 A named data-organization concept that can be used as a key in a data 

375 ID. 

376 """ 

377 

378 @property 

379 @abstractmethod 

380 def uniqueKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

381 """Return the unique fields. 

382 

383 All fields that can individually be used to identify records of this 

384 element, given the primary keys of all required dependencies 

385 (`NamedValueAbstractSet` of `FieldSpec`). 

386 """ 

387 raise NotImplementedError() 

388 

389 @property 

390 @cached_getter 

391 def primaryKey(self) -> ddl.FieldSpec: 

392 """Return primary key field for this dimension (`FieldSpec`). 

393 

394 Note that the database primary keys for dimension tables are in general 

395 compound; this field is the only field in the database primary key that 

396 is not also a foreign key (to a required dependency dimension table). 

397 """ 

398 primaryKey, *_ = self.uniqueKeys 

399 return primaryKey 

400 

401 @property 

402 @cached_getter 

403 def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

404 """Return alternate keys. 

405 

406 Additional unique key fields for this dimension that are not the 

407 primary key (`NamedValueAbstractSet` of `FieldSpec`). 

408 

409 If this dimension has required dependencies, the keys of those 

410 dimensions are also included in the unique constraints defined for 

411 these alternate keys. 

412 """ 

413 _, *alternateKeys = self.uniqueKeys 

414 return NamedValueSet(alternateKeys).freeze() 

415 

416 @property 

417 def populated_by(self) -> Dimension: 

418 # Docstring inherited. 

419 return self 

420 

421 

422class DimensionCombination(DimensionElement): 

423 """Element with extra information. 

424 

425 A `DimensionElement` that provides extra metadata and/or relationship 

426 endpoint information for a combination of dimensions. 

427 """