Coverage for python/lsst/daf/butler/dimensions/_universe.py: 46%

202 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DimensionUniverse"] 

31 

32import logging 

33import math 

34import pickle 

35from collections import defaultdict 

36from collections.abc import Iterable, Mapping, Sequence 

37from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, cast, overload 

38 

39from deprecated.sphinx import deprecated 

40from lsst.utils.classes import cached_getter, immutable 

41 

42from .._config import Config 

43from .._named import NamedValueAbstractSet, NamedValueSet 

44from .._topology import TopologicalFamily, TopologicalSpace 

45from ._config import _DEFAULT_NAMESPACE, DimensionConfig 

46from ._database import DatabaseDimensionElement 

47from ._elements import Dimension, DimensionElement 

48from ._governor import GovernorDimension 

49from ._graph import DimensionGraph 

50from ._group import DimensionGroup 

51from ._skypix import SkyPixDimension, SkyPixSystem 

52 

53if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

54 from ._coordinate import DataCoordinate 

55 from ._packer import DimensionPacker, DimensionPackerFactory 

56 from .construction import DimensionConstructionBuilder 

57 

58 

59E = TypeVar("E", bound=DimensionElement) 

60_LOG = logging.getLogger(__name__) 

61 

62 

63@immutable 

64class DimensionUniverse: 

65 """Self-consistent set of dimensions. 

66 

67 A parent class that represents a complete, self-consistent set of 

68 dimensions and their relationships. 

69 

70 `DimensionUniverse` is not a class-level singleton, but all instances are 

71 tracked in a singleton map keyed by the version number and namespace 

72 in the configuration they were loaded from. Because these universes 

73 are solely responsible for constructing `DimensionElement` instances, 

74 these are also indirectly tracked by that singleton as well. 

75 

76 Parameters 

77 ---------- 

78 config : `Config`, optional 

79 Configuration object from which dimension definitions can be extracted. 

80 Ignored if ``builder`` is provided, or if ``version`` is provided and 

81 an instance with that version already exists. 

82 version : `int`, optional 

83 Integer version for this `DimensionUniverse`. If not provided, a 

84 version will be obtained from ``builder`` or ``config``. 

85 namespace : `str`, optional 

86 Namespace of this `DimensionUniverse`, combined with the version 

87 to provide universe safety for registries that use different 

88 dimension definitions. 

89 builder : `DimensionConstructionBuilder`, optional 

90 Builder object used to initialize a new instance. Ignored if 

91 ``version`` is provided and an instance with that version already 

92 exists. Should not have had `~DimensionConstructionBuilder.finish` 

93 called; this will be called if needed by `DimensionUniverse`. 

94 """ 

95 

96 _instances: ClassVar[dict[tuple[int, str], DimensionUniverse]] = {} 

97 """Singleton dictionary of all instances, keyed by version. 

98 

99 For internal use only. 

100 """ 

101 

102 def __new__( 

103 cls, 

104 config: Config | None = None, 

105 *, 

106 version: int | None = None, 

107 namespace: str | None = None, 

108 builder: DimensionConstructionBuilder | None = None, 

109 ) -> DimensionUniverse: 

110 # Try to get a version first, to look for existing instances; try to 

111 # do as little work as possible at this stage. 

112 if version is None: 

113 if builder is None: 

114 config = DimensionConfig(config) 

115 version = config["version"] 

116 else: 

117 version = builder.version 

118 

119 # Then a namespace. 

120 if namespace is None: 

121 if builder is None: 

122 config = DimensionConfig(config) 

123 namespace = config.get("namespace", _DEFAULT_NAMESPACE) 

124 else: 

125 namespace = builder.namespace 

126 # if still None use the default 

127 if namespace is None: 

128 namespace = _DEFAULT_NAMESPACE 

129 

130 # See if an equivalent instance already exists. 

131 self: DimensionUniverse | None = cls._instances.get((version, namespace)) 

132 if self is not None: 

133 return self 

134 

135 # Ensure we have a builder, building one from config if necessary. 

136 if builder is None: 

137 config = DimensionConfig(config) 

138 builder = config.makeBuilder() 

139 

140 # Delegate to the builder for most of the construction work. 

141 builder.finish() 

142 

143 # Create the universe instance and create core attributes, mostly 

144 # copying from builder. 

145 self = object.__new__(cls) 

146 assert self is not None 

147 self._cached_groups = {} 

148 self._dimensions = builder.dimensions 

149 self._elements = builder.elements 

150 self._topology = builder.topology 

151 self._packers = builder.packers 

152 self.dimensionConfig = builder.config 

153 commonSkyPix = self._dimensions[builder.commonSkyPixName] 

154 assert isinstance(commonSkyPix, SkyPixDimension) 

155 self.commonSkyPix = commonSkyPix 

156 

157 # Attach self to all elements. 

158 for element in self._elements: 

159 element.universe = self 

160 

161 # Add attribute for special subsets of the graph. 

162 self._empty = DimensionGroup(self, (), _conform=False) 

163 

164 # Use the version number and namespace from the config as a key in 

165 # the singleton dict containing all instances; that will let us 

166 # transfer dimension objects between processes using pickle without 

167 # actually going through real initialization, as long as a universe 

168 # with the same version and namespace has already been constructed in 

169 # the receiving process. 

170 self._version = version 

171 self._namespace = namespace 

172 cls._instances[self._version, self._namespace] = self 

173 

174 # Build mappings from element to index. These are used for 

175 # topological-sort comparison operators in DimensionElement itself. 

176 self._elementIndices = {name: i for i, name in enumerate(self._elements.names)} 

177 # Same for dimension to index, sorted topologically across required 

178 # and implied. This is used for encode/decode. 

179 self._dimensionIndices = {name: i for i, name in enumerate(self._dimensions.names)} 

180 

181 self._populates = defaultdict(NamedValueSet) 

182 for element in self._elements: 

183 if element.populated_by is not None: 

184 self._populates[element.populated_by.name].add(element) 

185 

186 return self 

187 

188 @property 

189 def version(self) -> int: 

190 """The version number of this universe. 

191 

192 Returns 

193 ------- 

194 version : `int` 

195 An integer representing the version number of this universe. 

196 Uniquely defined when combined with the `namespace`. 

197 """ 

198 return self._version 

199 

200 @property 

201 def namespace(self) -> str: 

202 """The namespace associated with this universe. 

203 

204 Returns 

205 ------- 

206 namespace : `str` 

207 The namespace. When combined with the `version` can uniquely 

208 define this universe. 

209 """ 

210 return self._namespace 

211 

212 def isCompatibleWith(self, other: DimensionUniverse) -> bool: 

213 """Check compatibility between this `DimensionUniverse` and another. 

214 

215 Parameters 

216 ---------- 

217 other : `DimensionUniverse` 

218 The other `DimensionUniverse` to check for compatibility 

219 

220 Returns 

221 ------- 

222 results : `bool` 

223 If the other `DimensionUniverse` is compatible with this one return 

224 `True`, else `False` 

225 """ 

226 # Different namespaces mean that these universes cannot be compatible. 

227 if self.namespace != other.namespace: 

228 return False 

229 if self.version != other.version: 

230 _LOG.info( 

231 "Universes share a namespace %r but have differing versions (%d != %d). " 

232 " This could be okay but may be responsible for dimension errors later.", 

233 self.namespace, 

234 self.version, 

235 other.version, 

236 ) 

237 

238 # For now assume compatibility if versions differ. 

239 return True 

240 

241 def __repr__(self) -> str: 

242 return f"DimensionUniverse({self._version}, {self._namespace})" 

243 

244 def __getitem__(self, name: str) -> DimensionElement: 

245 return self._elements[name] 

246 

247 def __contains__(self, name: Any) -> bool: 

248 return name in self._elements 

249 

250 def get(self, name: str, default: DimensionElement | None = None) -> DimensionElement | None: 

251 """Return the `DimensionElement` with the given name or a default. 

252 

253 Parameters 

254 ---------- 

255 name : `str` 

256 Name of the element. 

257 default : `DimensionElement`, optional 

258 Element to return if the named one does not exist. Defaults to 

259 `None`. 

260 

261 Returns 

262 ------- 

263 element : `DimensionElement` 

264 The named element. 

265 """ 

266 return self._elements.get(name, default) 

267 

268 def getStaticElements(self) -> NamedValueAbstractSet[DimensionElement]: 

269 """Return a set of all static elements in this universe. 

270 

271 Non-static elements that are created as needed may also exist, but 

272 these are guaranteed to have no direct relationships to other elements 

273 (though they may have spatial or temporal relationships). 

274 

275 Returns 

276 ------- 

277 elements : `NamedValueAbstractSet` [ `DimensionElement` ] 

278 A frozen set of `DimensionElement` instances. 

279 """ 

280 return self._elements 

281 

282 def getStaticDimensions(self) -> NamedValueAbstractSet[Dimension]: 

283 """Return a set of all static dimensions in this universe. 

284 

285 Non-static dimensions that are created as needed may also exist, but 

286 these are guaranteed to have no direct relationships to other elements 

287 (though they may have spatial or temporal relationships). 

288 

289 Returns 

290 ------- 

291 dimensions : `NamedValueAbstractSet` [ `Dimension` ] 

292 A frozen set of `Dimension` instances. 

293 """ 

294 return self._dimensions 

295 

296 def getGovernorDimensions(self) -> NamedValueAbstractSet[GovernorDimension]: 

297 """Return a set of all `GovernorDimension` instances in this universe. 

298 

299 Returns 

300 ------- 

301 governors : `NamedValueAbstractSet` [ `GovernorDimension` ] 

302 A frozen set of `GovernorDimension` instances. 

303 """ 

304 return self.governor_dimensions 

305 

306 def getDatabaseElements(self) -> NamedValueAbstractSet[DatabaseDimensionElement]: 

307 """Return set of all `DatabaseDimensionElement` instances in universe. 

308 

309 This does not include `GovernorDimension` instances, which are backed 

310 by the database but do not inherit from `DatabaseDimensionElement`. 

311 

312 Returns 

313 ------- 

314 elements : `NamedValueAbstractSet` [ `DatabaseDimensionElement` ] 

315 A frozen set of `DatabaseDimensionElement` instances. 

316 """ 

317 return self.database_elements 

318 

319 @property 

320 def elements(self) -> NamedValueAbstractSet[DimensionElement]: 

321 """All dimension elements defined in this universe.""" 

322 return self._elements 

323 

324 @property 

325 def dimensions(self) -> NamedValueAbstractSet[Dimension]: 

326 """All dimensions defined in this universe.""" 

327 return self._dimensions 

328 

329 @property 

330 @cached_getter 

331 def governor_dimensions(self) -> NamedValueAbstractSet[GovernorDimension]: 

332 """All governor dimensions defined in this universe. 

333 

334 Governor dimensions serve as special required dependencies of other 

335 dimensions, with special handling in dimension query expressions and 

336 collection summaries. Governor dimension records are stored in the 

337 database but the set of such values is expected to be small enough 

338 for all values to be cached by all clients. 

339 """ 

340 return NamedValueSet(d for d in self._dimensions if isinstance(d, GovernorDimension)).freeze() 

341 

342 @property 

343 @cached_getter 

344 def skypix_dimensions(self) -> NamedValueAbstractSet[SkyPixDimension]: 

345 """All skypix dimensions defined in this universe. 

346 

347 Skypix dimension records are always generated on-the-fly rather than 

348 stored in the database, and they always represent a tiling of the sky 

349 with no overlaps. 

350 """ 

351 result = NamedValueSet[SkyPixDimension]() 

352 for system in self.skypix: 

353 result.update(system) 

354 return result.freeze() 

355 

356 @property 

357 @cached_getter 

358 def database_elements(self) -> NamedValueAbstractSet[DatabaseDimensionElement]: 

359 """All dimension elements whose records are stored in the database, 

360 except governor dimensions. 

361 """ 

362 return NamedValueSet(d for d in self._elements if isinstance(d, DatabaseDimensionElement)).freeze() 

363 

364 @property 

365 @cached_getter 

366 def skypix(self) -> NamedValueAbstractSet[SkyPixSystem]: 

367 """All skypix systems known to this universe. 

368 

369 (`NamedValueAbstractSet` [ `SkyPixSystem` ]). 

370 """ 

371 return NamedValueSet( 

372 [ 

373 family 

374 for family in self._topology[TopologicalSpace.SPATIAL] 

375 if isinstance(family, SkyPixSystem) 

376 ] 

377 ).freeze() 

378 

379 def getElementIndex(self, name: str) -> int: 

380 """Return the position of the named dimension element. 

381 

382 The position is in this universe's sorting of all elements. 

383 

384 Parameters 

385 ---------- 

386 name : `str` 

387 Name of the element. 

388 

389 Returns 

390 ------- 

391 index : `int` 

392 Sorting index for this element. 

393 """ 

394 return self._elementIndices[name] 

395 

396 def getDimensionIndex(self, name: str) -> int: 

397 """Return the position of the named dimension. 

398 

399 This position is in this universe's sorting of all dimensions. 

400 

401 Parameters 

402 ---------- 

403 name : `str` 

404 Name of the dimension. 

405 

406 Returns 

407 ------- 

408 index : `int` 

409 Sorting index for this dimension. 

410 

411 Notes 

412 ----- 

413 The dimension sort order for a universe is consistent with the element 

414 order (all dimensions are elements), and either can be used to sort 

415 dimensions if used consistently. But there are also some contexts in 

416 which contiguous dimension-only indices are necessary or at least 

417 desirable. 

418 """ 

419 return self._dimensionIndices[name] 

420 

421 # TODO: remove on DM-41326. 

422 @deprecated( 

423 "Deprecated in favor of DimensionUniverse.conform, and will be removed after v27.", 

424 version="v27", 

425 category=FutureWarning, 

426 ) 

427 def expandDimensionNameSet(self, names: set[str]) -> None: 

428 """Expand a set of dimension names in-place. 

429 

430 Includes recursive dependencies. 

431 

432 This is an advanced interface for cases where constructing a 

433 `DimensionGraph` (which also expands required dependencies) is 

434 impossible or undesirable. 

435 

436 Parameters 

437 ---------- 

438 names : `set` [ `str` ] 

439 A true `set` of dimension names, to be expanded in-place. 

440 """ 

441 # Keep iterating until the set of names stops growing. This is not as 

442 # efficient as it could be, but we work pretty hard cache 

443 # DimensionGraph instances to keep actual construction rare, so that 

444 # shouldn't matter. 

445 oldSize = len(names) 

446 while True: 

447 # iterate over a temporary copy so we can modify the original 

448 for name in tuple(names): 

449 names.update(self._dimensions[name].required.names) 

450 names.update(self._dimensions[name].implied.names) 

451 if oldSize == len(names): 

452 break 

453 else: 

454 oldSize = len(names) 

455 

456 # TODO: remove on DM-41326. 

457 @deprecated( 

458 "DimensionUniverse.extract and DimensionGraph are deprecated in favor of DimensionUniverse.conform " 

459 "and DimensionGroup, and will be removed after v27.", 

460 version="v27", 

461 category=FutureWarning, 

462 ) 

463 def extract(self, iterable: Iterable[Dimension | str]) -> DimensionGraph: 

464 """Construct graph from iterable. 

465 

466 Constructs a `DimensionGraph` from a possibly-heterogenous iterable 

467 of `Dimension` instances and string names thereof. 

468 

469 Constructing `DimensionGraph` directly from names or dimension 

470 instances is slightly more efficient when it is known in advance that 

471 the iterable is not heterogenous. 

472 

473 Parameters 

474 ---------- 

475 iterable: iterable of `Dimension` or `str` 

476 Dimensions that must be included in the returned graph (their 

477 dependencies will be as well). 

478 

479 Returns 

480 ------- 

481 graph : `DimensionGraph` 

482 A `DimensionGraph` instance containing all given dimensions. 

483 """ 

484 return self.conform(iterable)._as_graph() 

485 

486 def conform( 

487 self, 

488 dimensions: Iterable[str | Dimension] | str | DimensionElement | DimensionGroup | DimensionGraph, 

489 /, 

490 ) -> DimensionGroup: 

491 """Construct a dimension group from an iterable of dimension names. 

492 

493 Parameters 

494 ---------- 

495 dimensions : `~collections.abc.Iterable` [ `str` or `Dimension` ], \ 

496 `str`, `DimensionElement`, `DimensionGroup`, or \ 

497 `DimensionGraph` 

498 Dimensions that must be included in the returned group; their 

499 dependencies will be as well. Support for `Dimension`, 

500 `DimensionElement` and `DimensionGraph` objects is deprecated and 

501 will be removed after v27. Passing `DimensionGraph` objects will 

502 not yield a deprecation warning to allow non-deprecated methods and 

503 properties that return `DimensionGraph` objects to be passed 

504 though, since these will be changed to return `DimensionGroup` in 

505 the future. 

506 

507 Returns 

508 ------- 

509 group : `DimensionGroup` 

510 A `DimensionGroup` instance containing all given dimensions. 

511 """ 

512 match dimensions: 

513 case DimensionGroup(): 

514 return dimensions 

515 case DimensionGraph(): 

516 return dimensions.as_group() 

517 case DimensionElement() as d: 

518 return d.minimal_group 

519 case str() as name: 

520 return self[name].minimal_group 

521 case iterable: 

522 names: set[str] = {getattr(d, "name", cast(str, d)) for d in iterable} 

523 return DimensionGroup(self, names) 

524 

525 @overload 

526 def sorted(self, elements: Iterable[Dimension], *, reverse: bool = False) -> Sequence[Dimension]: 

527 ... 

528 

529 @overload 

530 def sorted( 

531 self, elements: Iterable[DimensionElement | str], *, reverse: bool = False 

532 ) -> Sequence[DimensionElement]: 

533 ... 

534 

535 def sorted(self, elements: Iterable[Any], *, reverse: bool = False) -> list[Any]: 

536 """Return a sorted version of the given iterable of dimension elements. 

537 

538 The universe's sort order is topological (an element's dependencies 

539 precede it), with an unspecified (but deterministic) approach to 

540 breaking ties. 

541 

542 Parameters 

543 ---------- 

544 elements : iterable of `DimensionElement`. 

545 Elements to be sorted. 

546 reverse : `bool`, optional 

547 If `True`, sort in the opposite order. 

548 

549 Returns 

550 ------- 

551 sorted : `~collections.abc.Sequence` [ `Dimension` or \ 

552 `DimensionElement` ] 

553 A sorted sequence containing the same elements that were given. 

554 """ 

555 s = set(elements) 

556 result = [element for element in self._elements if element in s or element.name in s] 

557 if reverse: 

558 result.reverse() 

559 return result 

560 

561 # TODO: Remove this method on DM-38687. 

562 @deprecated( 

563 "Deprecated in favor of configurable dimension packers. Will be removed after v26.", 

564 version="v26", 

565 category=FutureWarning, 

566 ) 

567 def makePacker(self, name: str, dataId: DataCoordinate) -> DimensionPacker: 

568 """Make a dimension packer. 

569 

570 Constructs a `DimensionPacker` that can pack data ID dictionaries 

571 into unique integers. 

572 

573 Parameters 

574 ---------- 

575 name : `str` 

576 Name of the packer, matching a key in the "packers" section of the 

577 dimension configuration. 

578 dataId : `DataCoordinate` 

579 Fully-expanded data ID that identifies the at least the "fixed" 

580 dimensions of the packer (i.e. those that are assumed/given, 

581 setting the space over which packed integer IDs are unique). 

582 ``dataId.hasRecords()`` must return `True`. 

583 """ 

584 return self._packers[name](self, dataId) 

585 

586 def getEncodeLength(self) -> int: 

587 """Return encoded size of graph. 

588 

589 Returns the size (in bytes) of the encoded size of `DimensionGraph` 

590 instances in this universe. 

591 

592 See `DimensionGraph.encode` and `DimensionGraph.decode` for more 

593 information. 

594 """ 

595 return math.ceil(len(self._dimensions) / 8) 

596 

597 def get_elements_populated_by(self, dimension: Dimension) -> NamedValueAbstractSet[DimensionElement]: 

598 """Return the set of `DimensionElement` objects whose 

599 `~DimensionElement.populated_by` atttribute is the given dimension. 

600 """ 

601 return self._populates[dimension.name] 

602 

603 @property 

604 def empty(self) -> DimensionGraph: 

605 """The `DimensionGraph` that contains no dimensions. 

606 

607 After v27 this will be a `DimensionGroup`. 

608 """ 

609 return self._empty._as_graph() 

610 

611 @classmethod 

612 def _unpickle(cls, version: int, namespace: str | None = None) -> DimensionUniverse: 

613 """Return an unpickled dimension universe. 

614 

615 Callable used for unpickling. 

616 

617 For internal use only. 

618 """ 

619 if namespace is None: 

620 # Old pickled universe. 

621 namespace = _DEFAULT_NAMESPACE 

622 try: 

623 return cls._instances[version, namespace] 

624 except KeyError as err: 

625 raise pickle.UnpicklingError( 

626 f"DimensionUniverse with version '{version}' and namespace {namespace!r} " 

627 "not found. Note that DimensionUniverse objects are not " 

628 "truly serialized; when using pickle to transfer them " 

629 "between processes, an equivalent instance with the same " 

630 "version must already exist in the receiving process." 

631 ) from err 

632 

633 def __reduce__(self) -> tuple: 

634 return (self._unpickle, (self._version, self._namespace)) 

635 

636 def __deepcopy__(self, memo: dict) -> DimensionUniverse: 

637 # DimensionUniverse is recursively immutable; see note in @immutable 

638 # decorator. 

639 return self 

640 

641 # Class attributes below are shadowed by instance attributes, and are 

642 # present just to hold the docstrings for those instance attributes. 

643 

644 commonSkyPix: SkyPixDimension 

645 """The special skypix dimension that is used to relate all other spatial 

646 dimensions in the `Registry` database (`SkyPixDimension`). 

647 """ 

648 

649 dimensionConfig: DimensionConfig 

650 """The configuration used to create this Universe (`DimensionConfig`).""" 

651 

652 _cached_groups: dict[frozenset[str], DimensionGroup] 

653 

654 _dimensions: NamedValueAbstractSet[Dimension] 

655 

656 _elements: NamedValueAbstractSet[DimensionElement] 

657 

658 _empty: DimensionGroup 

659 

660 _topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

661 

662 _dimensionIndices: dict[str, int] 

663 

664 _elementIndices: dict[str, int] 

665 

666 _packers: dict[str, DimensionPackerFactory] 

667 

668 _populates: defaultdict[str, NamedValueSet[DimensionElement]] 

669 

670 _version: int 

671 

672 _namespace: str