Coverage for python / lsst / daf / butler / dimensions / _elements.py: 56%

186 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 08:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "Dimension", 

32 "DimensionCombination", 

33 "DimensionElement", 

34) 

35 

36from abc import abstractmethod 

37from collections.abc import Callable 

38from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Self, TypeAlias, Union, cast 

39 

40import pydantic 

41from pydantic_core import core_schema 

42 

43from lsst.utils.classes import cached_getter 

44 

45from .. import arrow_utils, column_spec, ddl, pydantic_utils 

46from .._named import NamedValueAbstractSet, NamedValueSet 

47from .._topology import TopologicalRelationshipEndpoint 

48from ..json import from_json_generic, to_json_generic 

49 

50if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

51 from ..registry import Registry 

52 from ._governor import GovernorDimension 

53 from ._group import DimensionGroup 

54 from ._records import DimensionRecord 

55 from ._schema import DimensionRecordSchema 

56 from ._universe import DimensionUniverse 

57 

58KeyColumnSpec: TypeAlias = Annotated[ 

59 Union[ 

60 column_spec.IntColumnSpec, 

61 column_spec.StringColumnSpec, 

62 column_spec.HashColumnSpec, 

63 ], 

64 pydantic.Field(discriminator="type"), 

65] 

66 

67MetadataColumnSpec: TypeAlias = Annotated[ 

68 Union[ 

69 column_spec.IntColumnSpec, 

70 column_spec.StringColumnSpec, 

71 column_spec.FloatColumnSpec, 

72 column_spec.HashColumnSpec, 

73 column_spec.BoolColumnSpec, 

74 ], 

75 pydantic.Field(discriminator="type"), 

76] 

77 

78 

79class DimensionElement(TopologicalRelationshipEndpoint): 

80 """A label and/or metadata in the dimensions system. 

81 

82 A named data-organization concept that defines a label and/or metadata 

83 in the dimensions system. 

84 

85 A `DimensionElement` instance typically corresponds to a _logical_ table in 

86 the `Registry`: either an actual database table or a way of generating rows 

87 on-the-fly that can similarly participate in queries. The rows in that 

88 table are represented by instances of a `DimensionRecord` subclass. Most 

89 `DimensionElement` instances are instances of its `Dimension` subclass, 

90 which is used for elements that can be used as data ID keys. 

91 

92 Notes 

93 ----- 

94 `DimensionElement` instances should always be constructed by and retrieved 

95 from a `DimensionUniverse`. They are immutable after they are fully 

96 constructed, and should never be copied. 

97 

98 Pickling a `DimensionElement` just records its name and universe; 

99 unpickling one actually just looks up the element via the singleton 

100 dictionary of all universes. This allows pickle to be used to transfer 

101 elements between processes, but only when each process initializes its own 

102 instance of the same `DimensionUniverse`. 

103 """ 

104 

105 def __str__(self) -> str: 

106 return self.name 

107 

108 def __repr__(self) -> str: 

109 return f"{type(self).__name__}({self.name})" 

110 

111 def __eq__(self, other: Any) -> bool: 

112 try: 

113 return self.name == other.name 

114 except AttributeError: 

115 # TODO: try removing this fallback; it's not really consistent with 

116 # base class intent, and it could be confusing 

117 return self.name == other 

118 

119 def __hash__(self) -> int: 

120 return hash(self.name) 

121 

122 # TODO: try removing comparison operators; DimensionUniverse.sorted should 

123 # be adequate. 

124 

125 def __lt__(self, other: DimensionElement) -> bool: 

126 try: 

127 return self.universe.getElementIndex(self.name) < self.universe.getElementIndex(other.name) 

128 except KeyError: 

129 return NotImplemented 

130 

131 def __le__(self, other: DimensionElement) -> bool: 

132 try: 

133 return self.universe.getElementIndex(self.name) <= self.universe.getElementIndex(other.name) 

134 except KeyError: 

135 return NotImplemented 

136 

137 def __gt__(self, other: DimensionElement) -> bool: 

138 try: 

139 return self.universe.getElementIndex(self.name) > self.universe.getElementIndex(other.name) 

140 except KeyError: 

141 return NotImplemented 

142 

143 def __ge__(self, other: DimensionElement) -> bool: 

144 try: 

145 return self.universe.getElementIndex(self.name) >= self.universe.getElementIndex(other.name) 

146 except KeyError: 

147 return NotImplemented 

148 

149 @classmethod 

150 def _unpickle(cls, universe: DimensionUniverse, name: str) -> DimensionElement: 

151 """Callable used for unpickling. 

152 

153 For internal use only. 

154 """ 

155 return universe[name] 

156 

157 def __reduce__(self) -> tuple: 

158 return (self._unpickle, (self.universe, self.name)) 

159 

160 def __deepcopy__(self, memo: dict) -> DimensionElement: 

161 # DimensionElement is recursively immutable; see note in @immutable 

162 # decorator. 

163 return self 

164 

165 def to_simple(self, minimal: bool = False) -> str: 

166 """Convert this class to a simple python type. 

167 

168 This is suitable for serialization. 

169 

170 Parameters 

171 ---------- 

172 minimal : `bool`, optional 

173 Use minimal serialization. Has no effect on for this class. 

174 

175 Returns 

176 ------- 

177 simple : `str` 

178 The object converted to a single string. 

179 """ 

180 return self.name 

181 

182 @classmethod 

183 def from_simple( 

184 cls, simple: str, universe: DimensionUniverse | None = None, registry: Registry | None = None 

185 ) -> DimensionElement: 

186 """Construct a new object from the simplified form. 

187 

188 Usually the data is returned from the `to_simple` method. 

189 

190 Parameters 

191 ---------- 

192 simple : `str` 

193 The value returned by `to_simple()`. 

194 universe : `DimensionUniverse` 

195 The special graph of all known dimensions. 

196 registry : `lsst.daf.butler.Registry`, optional 

197 Registry from which a universe can be extracted. Can be `None` 

198 if universe is provided explicitly. 

199 

200 Returns 

201 ------- 

202 dataId : `DimensionElement` 

203 Newly-constructed object. 

204 """ 

205 if universe is None and registry is None: 

206 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate") 

207 if universe is None and registry is not None: 

208 universe = registry.dimensions 

209 if universe is None: 

210 # this is for mypy 

211 raise ValueError("Unable to determine a usable universe") 

212 

213 return universe[simple] 

214 

215 to_json = to_json_generic 

216 from_json: ClassVar[Callable[..., Self]] = cast(Callable[..., Self], classmethod(from_json_generic)) 

217 

218 def hasTable(self) -> bool: 

219 """Indicate if this element is associated with a table. 

220 

221 Return `True` if this element is associated with a table 

222 (even if that table "belongs" to another element). 

223 """ 

224 return self.has_own_table or self.implied_union_target is not None 

225 

226 universe: DimensionUniverse 

227 """The universe of all compatible dimensions with which this element is 

228 associated (`DimensionUniverse`). 

229 """ 

230 

231 @property 

232 @cached_getter 

233 def governor(self) -> GovernorDimension | None: 

234 """Return the governor dimension. 

235 

236 This is the `GovernorDimension` that is a required dependency of this 

237 element, or `None` if there is no such dimension (`GovernorDimension` 

238 or `None`). 

239 """ 

240 if len(self.minimal_group.governors) == 1: 

241 (result,) = self.minimal_group.governors 

242 return cast("GovernorDimension", self.universe[result]) 

243 elif len(self.minimal_group.governors) > 1: 

244 raise RuntimeError( 

245 f"Dimension element {self.name} has multiple governors: {self.minimal_group.governors}." 

246 ) 

247 else: 

248 return None 

249 

250 @property 

251 @abstractmethod 

252 def required(self) -> NamedValueAbstractSet[Dimension]: 

253 """Return the required dimensions. 

254 

255 Dimensions that are necessary to uniquely identify a record of this 

256 dimension element. 

257 

258 For elements with a database representation, these dimension are 

259 exactly those used to form the (possibly compound) primary key, and all 

260 dimensions here that are not ``self`` are also used to form foreign 

261 keys. 

262 

263 For `Dimension` instances, this should be exactly the same as 

264 ``graph.required``, but that may not be true for `DimensionElement` 

265 instances in general. When they differ, there are multiple 

266 combinations of dimensions that uniquely identify this element, but 

267 this one is more direct. 

268 """ 

269 raise NotImplementedError() 

270 

271 @property 

272 @abstractmethod 

273 def implied(self) -> NamedValueAbstractSet[Dimension]: 

274 """Return the implied dimensions. 

275 

276 Other dimensions that are uniquely identified directly by a record 

277 of this dimension element. 

278 

279 For elements with a database representation, these are exactly the 

280 dimensions used to form foreign key constraints whose fields are not 

281 (wholly) also part of the primary key. 

282 

283 Unlike ``self.graph.implied``, this set is not expanded recursively. 

284 """ 

285 raise NotImplementedError() 

286 

287 @property 

288 @cached_getter 

289 def dimensions(self) -> NamedValueAbstractSet[Dimension]: 

290 """Return all dimensions. 

291 

292 The union of `required` and `implied`, with all elements in 

293 `required` before any elements in `implied`. 

294 

295 This differs from ``self.graph.dimensions`` both in order and in 

296 content: 

297 

298 - as in ``self.implied``, implied dimensions are not expanded 

299 recursively here; 

300 - implied dimensions appear after required dimensions here, instead of 

301 being topologically ordered. 

302 

303 As a result, this set is ordered consistently with 

304 ``self.RecordClass.fields``. 

305 """ 

306 return NamedValueSet(list(self.required) + list(self.implied)).freeze() 

307 

308 @property 

309 @cached_getter 

310 def minimal_group(self) -> DimensionGroup: 

311 """Return minimal dimension group that includes this element. 

312 

313 ``self.minimal_group.required`` includes all dimensions whose primary 

314 key values are sufficient (often necessary) to uniquely identify 

315 ``self`` (including ``self`` if ``isinstance(self, Dimension)``. 

316 ``self.minimal_group.implied`` includes all dimensions also identified 

317 (possibly recursively) by this set. 

318 """ 

319 return self.universe.conform(self.dimensions.names) 

320 

321 @property 

322 @cached_getter 

323 def RecordClass(self) -> type[DimensionRecord]: 

324 """Return the record subclass for this element. 

325 

326 The `DimensionRecord` subclass used to hold records for this element 

327 (`type`). 

328 

329 Because `DimensionRecord` subclasses are generated dynamically, this 

330 type cannot be imported directly and hence can only be obtained from 

331 this attribute. 

332 """ 

333 from ._records import _subclassDimensionRecord 

334 

335 return _subclassDimensionRecord(self) 

336 

337 @property 

338 def alternate_keys(self) -> NamedValueAbstractSet[KeyColumnSpec]: 

339 """Additional unique key fields for this dimension element that are not 

340 the primary key (`NamedValueAbstractSet` of `KeyColumnSpec`). 

341 

342 This is always empty for elements that are not dimensions. 

343 

344 If this dimension has required dependencies, the keys of those 

345 dimensions are also included in the unique constraints defined for 

346 these alternate keys. 

347 """ 

348 return NamedValueSet().freeze() 

349 

350 @property 

351 @abstractmethod 

352 def metadata_columns(self) -> NamedValueAbstractSet[MetadataColumnSpec]: 

353 """Additional metadata fields included in this element's table. 

354 

355 (`NamedValueSet` of `MetadataColumnSpec`). 

356 """ 

357 raise NotImplementedError() 

358 

359 @property 

360 @cached_getter 

361 def metadata(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

362 """Additional metadata fields included in this element's table. 

363 

364 (`NamedValueSet` of `FieldSpec`). 

365 """ 

366 return NamedValueSet([column_spec.to_sql_spec() for column_spec in self.metadata_columns]).freeze() 

367 

368 @property 

369 def viewOf(self) -> str | None: 

370 """Name of another table this element's records are drawn from. 

371 

372 (`str` or `None`). 

373 """ 

374 return self.implied_union_target.name if self.implied_union_target is not None else None 

375 

376 @property 

377 def alwaysJoin(self) -> bool: 

378 """Indicate if the element should always be included. 

379 

380 If `True`, always include this element in any query or data ID in 

381 which its ``required`` dimensions appear, because it defines a 

382 relationship between those dimensions that must always be satisfied. 

383 """ 

384 return False 

385 

386 @property 

387 def has_own_table(self) -> bool: 

388 """Whether this element should have its own table in the database.""" 

389 return self.implied_union_target is None 

390 

391 @property 

392 def implied_union_target(self) -> DimensionElement | None: 

393 """If not `None`, another element whose implied values for this element 

394 form the set of allowable values. 

395 

396 For example, in the default dimension universe, the allowed values for 

397 ``band`` is the union of all ``band`` values in the ``physical_filter`` 

398 table, so the `implied_union_target` for ``band`` is 

399 ``physical_filter``. 

400 """ 

401 return None 

402 

403 @property 

404 def defines_relationships(self) -> bool: 

405 """Whether this element's records define one or more relationships that 

406 must be satisfied in rows over dimensions that include it. 

407 """ 

408 return bool(self.implied) 

409 

410 @property 

411 def is_cached(self) -> bool: 

412 """Whether this element's records should be aggressively cached, 

413 because they are small in number and rarely inserted. 

414 """ 

415 return False 

416 

417 @property 

418 @abstractmethod 

419 def populated_by(self) -> Dimension | None: 

420 """The dimension that this element's records are always inserted, 

421 exported, and imported alongside. 

422 

423 Notes 

424 ----- 

425 When this is `None` (as it will be, at least at first, for any data 

426 repositories created before this attribute was added), records for 

427 this element will often need to be exported manually when datasets 

428 associated with some other related dimension are exported, in order for 

429 the post-import data repository to function as expected. 

430 """ 

431 raise NotImplementedError() 

432 

433 @property 

434 @cached_getter 

435 def schema(self) -> DimensionRecordSchema: 

436 """A description of the columns in this element's records and (at least 

437 conceptual) table. 

438 """ 

439 from ._schema import DimensionRecordSchema 

440 

441 return DimensionRecordSchema(self) 

442 

443 @property 

444 @abstractmethod 

445 def documentation(self) -> str: 

446 """Extended description of this dimension element.""" 

447 raise NotImplementedError() 

448 

449 @classmethod 

450 def _validate(cls, data: Any, info: pydantic.ValidationInfo) -> DimensionElement: 

451 """Pydantic validator (deserializer) for `DimensionElement`. 

452 

453 This satisfies the `pydantic.WithInfoPlainValidatorFunction` signature. 

454 """ 

455 universe = pydantic_utils.get_universe_from_context(info.context) 

456 return universe[data] 

457 

458 def _serialize(self) -> str: 

459 """Pydantic serializer for `DimensionElement`. 

460 

461 This satisfies the `pydantic.PlainSerializerFunction` signature. 

462 """ 

463 return self.name 

464 

465 @classmethod 

466 def __get_pydantic_core_schema__( 

467 cls, source_type: Any, handler: pydantic.GetCoreSchemaHandler 

468 ) -> core_schema.CoreSchema: 

469 # This is the Pydantic hook for overriding serialization, validation, 

470 # and JSON schema generation. 

471 str_schema = core_schema.str_schema() 

472 from_str_schema = core_schema.chain_schema( 

473 [str_schema, core_schema.with_info_plain_validator_function(cls._validate)] 

474 ) 

475 return core_schema.json_or_python_schema( 

476 # When deserializing from JSON, expect it to be a `str` 

477 json_schema=from_str_schema, 

478 # When deserializing from Python, first see if it's already a 

479 # DimensionElement and then try conversion from `str`. 

480 python_schema=core_schema.union_schema( 

481 [core_schema.is_instance_schema(DimensionElement), from_str_schema] 

482 ), 

483 # When serializing convert it to a `str`. 

484 serialization=core_schema.plain_serializer_function_ser_schema( 

485 cls._serialize, return_schema=str_schema 

486 ), 

487 ) 

488 

489 

490class Dimension(DimensionElement): 

491 """A dimension. 

492 

493 A named data-organization concept that can be used as a key in a data 

494 ID. 

495 """ 

496 

497 @property 

498 @abstractmethod 

499 def unique_keys(self) -> NamedValueAbstractSet[KeyColumnSpec]: 

500 """Descriptions of unique identifiers for this dimension. 

501 

502 All fields that can individually be used to identify records of this 

503 element, given the primary keys of all required dependencies 

504 (`NamedValueAbstractSet` of `KeyColumnSpec`). 

505 """ 

506 raise NotImplementedError() 

507 

508 @property 

509 @cached_getter 

510 def primary_key(self) -> KeyColumnSpec: 

511 """The primary key field for this dimension (`KeyColumnSpec`). 

512 

513 Note that the database primary keys for dimension tables are in general 

514 compound; this field is the only field in the database primary key that 

515 is not also a foreign key (to a required dependency dimension table). 

516 """ 

517 primary_ey, *_ = self.unique_keys 

518 return primary_ey 

519 

520 @property 

521 @cached_getter 

522 def alternate_keys(self) -> NamedValueAbstractSet[KeyColumnSpec]: 

523 # Docstring inherited. 

524 _, *alternate_keys = self.unique_keys 

525 return NamedValueSet(alternate_keys).freeze() 

526 

527 @property 

528 @cached_getter 

529 def uniqueKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

530 """Return the unique fields. 

531 

532 All fields that can individually be used to identify records of this 

533 element, given the primary keys of all required dependencies 

534 (`NamedValueAbstractSet` of `FieldSpec`). 

535 """ 

536 return NamedValueSet( 

537 [column_spec.to_sql_spec(primaryKey=(n == 0)) for n, column_spec in enumerate(self.unique_keys)] 

538 ) 

539 

540 @property 

541 @cached_getter 

542 def primaryKey(self) -> ddl.FieldSpec: 

543 """Return primary key field for this dimension (`FieldSpec`). 

544 

545 Note that the database primary keys for dimension tables are in general 

546 compound; this field is the only field in the database primary key that 

547 is not also a foreign key (to a required dependency dimension table). 

548 """ 

549 primaryKey, *_ = self.uniqueKeys 

550 return primaryKey 

551 

552 @property 

553 @cached_getter 

554 def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]: 

555 """Return alternate keys. 

556 

557 Additional unique key fields for this dimension that are not the 

558 primary key (`NamedValueAbstractSet` of `FieldSpec`). 

559 

560 If this dimension has required dependencies, the keys of those 

561 dimensions are also included in the unique constraints defined for 

562 these alternate keys. 

563 """ 

564 _, *alternateKeys = self.uniqueKeys 

565 return NamedValueSet(alternateKeys).freeze() 

566 

567 @property 

568 def populated_by(self) -> Dimension: 

569 # Docstring inherited. 

570 return self 

571 

572 def to_arrow(self, dimensions: DimensionGroup, spec: KeyColumnSpec | None = None) -> arrow_utils.ToArrow: 

573 """Return an object that converts the primary key value for this 

574 dimension to column in an Arrow table. 

575 

576 Parameters 

577 ---------- 

578 dimensions : `DimensionGroup` 

579 Full set of dimensions over which the rows of the table are unique 

580 or close to unique. This is used to determine whether to use 

581 Arrow's dictionary encoding to compress duplicate values. 

582 spec : `KeyColumnSpec`, optional 

583 Column specification for this dimension. If not provided, a copy 

584 of `primary_key` the the field name replaced with the dimension 

585 name will be used, which is appropriate for when this dimension 

586 appears in data ID or the dimension record tables of other 

587 dimension elements. 

588 

589 Returns 

590 ------- 

591 converter : `~lsst.daf.butler.arrow_utils.ToArrow` 

592 Converter for this dimension's primary key. 

593 """ 

594 if spec is None: 

595 spec = self.primary_key.model_copy(update={"name": self.name}) 

596 if dimensions != self.minimal_group and spec.type != "int": 

597 # Values are large and will be duplicated in rows that are unique 

598 # over these dimensions, so dictionary encoding may help a lot. 

599 return spec.to_arrow().dictionary_encoded() 

600 else: 

601 return spec.to_arrow() 

602 

603 

604class DimensionCombination(DimensionElement): 

605 """Element with extra information. 

606 

607 A `DimensionElement` that provides extra metadata and/or relationship 

608 endpoint information for a combination of dimensions. 

609 """