Coverage for python / lsst / daf / butler / dimensions / _group.py: 39%

211 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 08:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("DimensionGroup", "SerializedDimensionGroup") 

31 

32import itertools 

33from collections.abc import Iterable, Iterator, Mapping, Set 

34from types import MappingProxyType 

35from typing import TYPE_CHECKING, Any, TypeAlias 

36 

37import pydantic 

38from deprecated.sphinx import deprecated 

39from pydantic_core import core_schema 

40 

41from lsst.utils.classes import cached_getter, immutable 

42 

43from .. import pydantic_utils 

44from .._named import NamedValueAbstractSet, NamedValueSet 

45from .._topology import TopologicalFamily, TopologicalSpace 

46 

47if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

48 from ._elements import DimensionElement 

49 from ._universe import DimensionUniverse 

50 

51 

52class SortedSequenceSet(Set[str]): 

53 """A set-like interface wrapper around a tuple. 

54 

55 This delegates directly to ``tuple.__contains__``, so there is an implicit 

56 assumption that `len` is small and hence O(N) lookups are not a problem, as 

57 is the case for sets of dimension names. 

58 

59 Parameters 

60 ---------- 

61 seq : `tuple` [`str`, ...] 

62 Strings to see the set. 

63 """ 

64 

65 def __init__(self, seq: tuple[str, ...]): 

66 self._seq = seq 

67 

68 __slots__ = ("_seq",) 

69 

70 def __contains__(self, x: object) -> bool: 

71 return x in self._seq 

72 

73 def __iter__(self) -> Iterator[str]: 

74 return iter(self._seq) 

75 

76 def __len__(self) -> int: 

77 return len(self._seq) 

78 

79 def __hash__(self) -> int: 

80 return hash(self._seq) 

81 

82 def __eq__(self, other: object) -> bool: 

83 if seq := getattr(other, "_seq", None): 

84 return seq == self._seq 

85 return super().__eq__(other) 

86 

87 # MyPy really wants _from_iterable to be generic, but this set doesn't 

88 # support anything other than strings. 

89 @classmethod 

90 def _from_iterable(cls, iterable: Iterable[str]) -> set[str]: # type: ignore[override] 

91 # This is used by collections.abc.Set mixin methods when they need 

92 # to return a new object (e.g. in `__and__`). 

93 return set(iterable) 

94 

95 def __repr__(self) -> str: 

96 return f"{{{', '.join(str(k) for k in self._seq)}}}" 

97 

98 def as_tuple(self) -> tuple[str, ...]: 

99 """Return the underlying tuple. 

100 

101 Returns 

102 ------- 

103 t : `tuple` 

104 A tuple of all the values. 

105 """ 

106 return self._seq 

107 

108 # TODO: remove on DM-45185 

109 @property 

110 @deprecated( 

111 "Deprecated in favor of direct iteration over the parent set. Will be removed after v28.", 

112 version="v28", 

113 category=FutureWarning, 

114 ) 

115 def names(self) -> Set[str]: 

116 """An alias to ``self``. 

117 

118 This is a backwards-compatibility API that allows `DimensionGroup` to 

119 mimic the old ``DimensionGraph`` object it replaced, by permitting 

120 expressions like ``x.required.names`` when ``x`` can be an object of 

121 either type. 

122 """ 

123 return self 

124 

125 

126@immutable 

127class DimensionGroup: # numpydoc ignore=PR02 

128 """An immutable, dependency-complete collection of dimensions. 

129 

130 `DimensionGroup` behaves in many respects like a set of `str` dimension 

131 names that maintains several special subsets and supersets of related 

132 dimension elements. It does not fully implement the `collections.abc.Set` 

133 interface, because it defines a few different iteration orders and does not 

134 privilege any one of them by implementing ``__iter__``. 

135 

136 Parameters 

137 ---------- 

138 universe : `DimensionUniverse` 

139 Object that manages all known dimensions. 

140 names : `~collections.abc.Iterable` of `str`, optional 

141 An iterable of the names of dimensions that must be included in the 

142 group. All (recursive) dependencies of these dimensions will also be 

143 included. At most one of ``dimensions`` and ``names`` must be 

144 provided. 

145 _conform : `bool`, optional 

146 If `True` (default), expand to include dependencies. `False` should 

147 only be used for callers that can guarantee that other arguments are 

148 already correctly expanded, and is for internal use only. 

149 

150 Notes 

151 ----- 

152 `DimensionGroup` should be used instead of other collections in most 

153 contexts where a collection of dimensions is required and a 

154 `DimensionUniverse` is available. Exceptions include cases where order 

155 matters (and is different from the consistent ordering defined by the 

156 `DimensionUniverse`), or complete `~collections.abc.Set` semantics are 

157 required. 

158 

159 This class is not a Pydantic model, but it implements the 

160 `__get_pydantic_core_schema__` special method and hence can be used as a 

161 field in Pydantic models or [de]serialized directly via 

162 `pydantic.TypeAdapter`, but validation requires a `DimensionUniverse` to be 

163 passed as the "universe" key in the Pydantic validation context. The 

164 `.pydantic_utils.DeferredValidation` class can be used to defer validation 

165 of this object or other types that use it until that context is available. 

166 """ 

167 

168 def __new__( 

169 cls, 

170 universe: DimensionUniverse, 

171 names: Iterable[str] | DimensionGroup = frozenset(), 

172 _conform: bool = True, 

173 ) -> DimensionGroup: 

174 if isinstance(names, DimensionGroup): 

175 if names.universe is universe: 

176 return names 

177 else: 

178 names = names.names 

179 if _conform: 

180 # Expand dimension names to include all required and implied 

181 # dependencies. 

182 to_expand = set(names) 

183 names = set() 

184 while to_expand: 

185 dimension = universe[to_expand.pop()] 

186 names.add(dimension.name) 

187 to_expand.update(dimension.required.names) 

188 to_expand.update(dimension.implied.names) 

189 to_expand.difference_update(names) 

190 else: 

191 names = frozenset(names) 

192 # Look in the cache of existing groups, with the expanded set of names. 

193 cache_key = frozenset(names) 

194 self = universe._cached_groups.get(cache_key) 

195 if self is not None: 

196 return self 

197 # This is apparently a new group. Create it, and add it to the cache. 

198 self = super().__new__(cls) 

199 self.universe = universe 

200 # Reorder dimensions by iterating over the universe (which is 

201 # ordered already) and extracting the ones in the set. 

202 self.names = SortedSequenceSet(tuple(d.name for d in universe.sorted(names))) 

203 # Make a set that includes both the dimensions and any 

204 # DimensionElements whose dependencies are in self.dimensions. 

205 self.elements = SortedSequenceSet( 

206 tuple(e.name for e in universe.elements if e.required.names <= self.names) 

207 ) 

208 self.governors = SortedSequenceSet( 

209 tuple(d for d in self.names if d in universe.governor_dimensions.names) 

210 ) 

211 self.skypix = SortedSequenceSet(tuple(d for d in self.names if d in universe.skypix_dimensions.names)) 

212 # Split dependencies up into "required" and "implied" subsets. 

213 # Note that a dimension may be required in one group and implied in 

214 # another. 

215 required: list[str] = [] 

216 implied: list[str] = [] 

217 for dim1 in self.names: 

218 for dim2 in self.names: 

219 if dim1 in universe[dim2].implied.names: 

220 implied.append(dim1) 

221 break 

222 else: 

223 # If no other dimension implies dim1, it's required. 

224 required.append(dim1) 

225 self.required = SortedSequenceSet(tuple(required)) 

226 self.implied = SortedSequenceSet(tuple(implied)) 

227 

228 self._space_families = MappingProxyType( 

229 { 

230 space: NamedValueSet( 

231 universe[e].topology[space] for e in self.elements if space in universe[e].topology 

232 ).freeze() 

233 for space in TopologicalSpace.__members__.values() 

234 } 

235 ) 

236 

237 # Build mappings from dimension to index; this is really for 

238 # DataCoordinate, but we put it in DimensionGroup because many (many!) 

239 # DataCoordinates will share the same DimensionGroup, and we want them 

240 # to be lightweight. The order here is what's convenient for 

241 # DataCoordinate: all required dimensions before all implied 

242 # dimensions. 

243 self._data_coordinate_indices = { 

244 name: i for i, name in enumerate(itertools.chain(self.required, self.implied)) 

245 } 

246 return universe._cached_groups.set_or_get(cache_key, self) 

247 

248 def __getnewargs__(self) -> tuple: 

249 return (self.universe, self.names._seq, False) 

250 

251 def __deepcopy__(self, memo: dict) -> DimensionGroup: 

252 # DimensionGroup is recursively immutable; see note in @immutable 

253 # decorator. 

254 return self 

255 

256 def __len__(self) -> int: 

257 return len(self.names) 

258 

259 def __contains__(self, element: str) -> bool: 

260 if element in self.elements: 

261 return True 

262 else: 

263 from ._elements import DimensionElement 

264 

265 if isinstance(element, DimensionElement): # type: ignore[unreachable] 

266 raise TypeError( 

267 "DimensionGroup does not support membership tests using DimensionElement " 

268 "instances; use their names instead." 

269 ) 

270 return False 

271 

272 def __str__(self) -> str: 

273 return str(self.names) 

274 

275 def __repr__(self) -> str: 

276 return f"DimensionGroup({self.names})" 

277 

278 # TODO: remove on DM-45185 

279 @deprecated( 

280 "Deprecated as no longer necessary (this method always returns 'self'). Will be removed after v28.", 

281 version="v28", 

282 category=FutureWarning, 

283 ) 

284 def as_group(self) -> DimensionGroup: 

285 """Return ``self``. 

286 

287 Returns 

288 ------- 

289 group : `DimensionGroup` 

290 Returns itself. 

291 

292 Notes 

293 ----- 

294 This is a backwards-compatibility API that allowed both the old 

295 ``DimensionGraph`` class and `DimensionGroup` to be coerced to the 

296 latter. 

297 """ 

298 return self 

299 

300 def isdisjoint(self, other: DimensionGroup) -> bool: 

301 """Test whether the intersection of two groups is empty. 

302 

303 Parameters 

304 ---------- 

305 other : `DimensionGroup` 

306 Other group to compare with. 

307 

308 Returns 

309 ------- 

310 is_disjoin : `bool` 

311 Returns `True` if either operand is the empty. 

312 """ 

313 return self.names.isdisjoint(other.names) 

314 

315 def issubset(self, other: DimensionGroup) -> bool: 

316 """Test whether all dimensions in ``self`` are also in ``other``. 

317 

318 Parameters 

319 ---------- 

320 other : `DimensionGroup` 

321 Other group to compare with. 

322 

323 Returns 

324 ------- 

325 is_subset : `bool` 

326 Returns `True` if ``self`` is empty. 

327 """ 

328 return self.names <= other.names 

329 

330 def issuperset(self, other: DimensionGroup) -> bool: 

331 """Test whether all dimensions in ``other`` are also in ``self``. 

332 

333 Parameters 

334 ---------- 

335 other : `DimensionGroup` 

336 Other group to compare with. 

337 

338 Returns 

339 ------- 

340 is_superset : `bool` 

341 Returns `True` if ``other`` is empty. 

342 """ 

343 return self.names >= other.names 

344 

345 def __eq__(self, other: Any) -> bool: 

346 if isinstance(other, DimensionGroup): 

347 return self.names == other.names 

348 else: 

349 return False 

350 

351 def __hash__(self) -> int: 

352 return hash(self.required._seq) 

353 

354 def __le__(self, other: DimensionGroup) -> bool: 

355 return self.names <= other.names 

356 

357 def __ge__(self, other: DimensionGroup) -> bool: 

358 return self.names >= other.names 

359 

360 def __lt__(self, other: DimensionGroup) -> bool: 

361 return self.names < other.names 

362 

363 def __gt__(self, other: DimensionGroup) -> bool: 

364 return self.names > other.names 

365 

366 def union(*operands: DimensionGroup, universe: DimensionUniverse | None = None) -> DimensionGroup: 

367 """Construct a new group with all dimensions in any of the operands. 

368 

369 Parameters 

370 ---------- 

371 *operands : `DimensionGroup` 

372 Groups to union. 

373 universe : `DimensionUniverse`, optional 

374 Universe to use to create an empty universe when no operands are 

375 provided (i.e. when this method is called on the class). 

376 

377 Returns 

378 ------- 

379 union : `DimensionGroup` 

380 Union of all the groups. 

381 

382 Notes 

383 ----- 

384 The elements of the returned group may exceed the naive union of their 

385 elements, as some dimension elements are included in groups whenever 

386 multiple dimensions are present, and those dependency dimensions could 

387 have been provided by different operands. 

388 """ 

389 names = set().union(*[operand.names for operand in operands]) 

390 if universe is None: 

391 try: 

392 universe = operands[0].universe 

393 except IndexError: 

394 raise TypeError( 

395 "'universe' must be provided when 'union' is called with an empty iterable." 

396 ) from None 

397 return DimensionGroup(universe, names) 

398 

399 def intersection(self, *others: DimensionGroup) -> DimensionGroup: 

400 """Construct a new group with only dimensions in all of the operands. 

401 

402 Parameters 

403 ---------- 

404 *others : `DimensionGroup` 

405 Other groups to compare with. 

406 

407 Returns 

408 ------- 

409 inter : `DimensionGroup` 

410 Intersection of all the groups. 

411 

412 Notes 

413 ----- 

414 See also `union`. 

415 """ 

416 names = set(self.names).intersection(*[other.names for other in others]) 

417 return DimensionGroup(self.universe, names=names) 

418 

419 def difference(self, other: DimensionGroup) -> DimensionGroup: 

420 """Construct a new group with dimensions that are in ``self`` but not 

421 ``other`` OR dependencies of those in ``self`` but not in ``other``. 

422 

423 Parameters 

424 ---------- 

425 other : `DimensionGroup` 

426 Other group to compare with. 

427 

428 Returns 

429 ------- 

430 diff : `DimensionGroup` 

431 Difference of the two groups. 

432 

433 Notes 

434 ----- 

435 This is not exactly equivalent to a true `set` difference, because the 

436 result must be expanded to include required and implied dependencies, 

437 and those may be common to ``self`` and ``other``. 

438 """ 

439 return DimensionGroup(self.universe, names=self.names - other.names) 

440 

441 def __or__(self, other: DimensionGroup) -> DimensionGroup: 

442 return self.union(other) 

443 

444 def __and__(self, other: DimensionGroup) -> DimensionGroup: 

445 return self.intersection(other) 

446 

447 def __sub__(self, other: DimensionGroup) -> DimensionGroup: 

448 return self.difference(other) 

449 

450 @property 

451 def data_coordinate_keys(self) -> Set[str]: 

452 """A set of dimensions ordered like `DataCoordinate.mapping`. 

453 

454 This order is defined as all required dimensions followed by all 

455 implied dimensions. 

456 """ 

457 return self._data_coordinate_indices.keys() 

458 

459 @property 

460 @cached_getter 

461 def lookup_order(self) -> tuple[str, ...]: 

462 """A tuple of all elements in the order needed to find their records. 

463 

464 Unlike the table definition/topological order (which is what 

465 `DimensionUniverse.sorted` gives you), when dimension A implies 

466 dimension B, dimension A appears first. 

467 """ 

468 done: set[str] = set() 

469 order: list[str] = [] 

470 

471 def add_to_order(element: DimensionElement) -> None: 

472 if element.name in done: 

473 return 

474 predecessors = set(element.required.names) 

475 predecessors.discard(element.name) 

476 if not done.issuperset(predecessors): 

477 return 

478 order.append(element.name) 

479 done.add(element.name) 

480 for other in element.implied: 

481 add_to_order(other) 

482 

483 while not done.issuperset(self.required): 

484 for dimension in self.required: 

485 add_to_order(self.universe[dimension]) 

486 

487 order.extend(element for element in self.elements if element not in done) 

488 return tuple(order) 

489 

490 def _choose_dimension(self, families: NamedValueAbstractSet[TopologicalFamily]) -> str | None: 

491 if len(families) != 1: 

492 return None 

493 return list(families)[0].choose(self).name 

494 

495 @property 

496 def region_dimension(self) -> str | None: 

497 """Return the most appropriate spatial dimension to use when looking 

498 up a region. 

499 

500 Returns `None` if there are no appropriate dimensions or more than one 

501 spatial family. 

502 """ 

503 return self._choose_dimension(self.spatial) 

504 

505 @property 

506 def timespan_dimension(self) -> str | None: 

507 """Return the most appropriate temporal dimension to use when looking 

508 up a time span. 

509 

510 Returns `None` if there are no appropriate dimensions or more than one 

511 temporal family. 

512 """ 

513 return self._choose_dimension(self.temporal) 

514 

515 @property 

516 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

517 """Families represented by the spatial elements in this graph.""" 

518 return self._space_families[TopologicalSpace.SPATIAL] 

519 

520 @property 

521 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

522 """Families represented by the temporal elements in this graph.""" 

523 return self._space_families[TopologicalSpace.TEMPORAL] 

524 

525 # Class attributes below are shadowed by instance attributes, and are 

526 # present just to hold the docstrings for those instance attributes. 

527 

528 universe: DimensionUniverse 

529 """The set of all known dimensions, of which this group is a subset 

530 (`DimensionUniverse`). 

531 """ 

532 

533 names: SortedSequenceSet 

534 """A true `~collections.abc.Set` of the dimension names. 

535 

536 Iteration order is consist with `DimensionUniverse.sorted`: each dimension 

537 is preceded by its required and implied dependencies. 

538 """ 

539 

540 elements: SortedSequenceSet 

541 """A true `~collections.abc.Set` of all dimension element names in the 

542 group; a superset of `dimensions`. 

543 """ 

544 

545 governors: SortedSequenceSet 

546 """A true `~collections.abc.Set` of all governor dimension names in the 

547 group. 

548 """ 

549 

550 skypix: SortedSequenceSet 

551 """A true `~collections.abc.Set` of all skypix dimension names in the 

552 group. 

553 """ 

554 

555 required: SortedSequenceSet 

556 """The dimensions that must be directly identified via their primary keys 

557 in a data ID in order to identify the rest of the elements in the group. 

558 """ 

559 

560 implied: SortedSequenceSet 

561 """The dimensions that need not be directly identified via their primary 

562 keys in a data ID. 

563 """ 

564 

565 _space_families: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

566 """Families of elements in this graph that exist in topological spaces 

567 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

568 `NamedValueAbstractSet` of `TopologicalFamily`). 

569 """ 

570 

571 _data_coordinate_indices: dict[str, int] 

572 

573 @classmethod 

574 def _validate(cls, data: Any, info: pydantic.ValidationInfo) -> DimensionGroup: 

575 """Pydantic validator (deserializer) for `DimensionGroup`. 

576 

577 This satisfies the `pydantic.WithInfoPlainValidatorFunction` signature. 

578 """ 

579 universe = pydantic_utils.get_universe_from_context(info.context) 

580 return cls.from_simple(data, universe) 

581 

582 @classmethod 

583 def from_simple(cls, data: SerializedDimensionGroup, universe: DimensionUniverse) -> DimensionGroup: 

584 """Create an instance of this class from serialized data. 

585 

586 Parameters 

587 ---------- 

588 data : `SerializedDimensionGroup` 

589 Serialized data from a previous call to ``to_simple``. 

590 universe : `DimensionUniverse` 

591 Dimension universe in which this dimension group will be defined. 

592 """ 

593 return universe.conform(data) 

594 

595 def to_simple(self) -> SerializedDimensionGroup: 

596 """Convert this class to a simple data format suitable for 

597 serialization. 

598 """ 

599 return list(self.names) 

600 

601 @classmethod 

602 def __get_pydantic_core_schema__( 

603 cls, source_type: Any, handler: pydantic.GetCoreSchemaHandler 

604 ) -> core_schema.CoreSchema: 

605 # This is the Pydantic hook for overriding serialization, validation, 

606 # and JSON schema generation. 

607 list_of_str_schema = core_schema.list_schema(core_schema.str_schema()) 

608 from_list_of_str_schema = core_schema.chain_schema( 

609 [list_of_str_schema, core_schema.with_info_plain_validator_function(cls._validate)] 

610 ) 

611 return core_schema.json_or_python_schema( 

612 # When deserializing from JSON, expect it to look like list[str]. 

613 json_schema=from_list_of_str_schema, 

614 # When deserializing from Python, first see if it's already a 

615 # DimensionGroup and then try conversion from list[str]. 

616 python_schema=core_schema.union_schema( 

617 [core_schema.is_instance_schema(DimensionGroup), from_list_of_str_schema] 

618 ), 

619 # When serializing convert it to a `list[str]`. 

620 serialization=core_schema.plain_serializer_function_ser_schema( 

621 cls.to_simple, return_schema=list_of_str_schema 

622 ), 

623 ) 

624 

625 

626SerializedDimensionGroup: TypeAlias = list[str]