Coverage for python/lsst/daf/butler/dimensions/_group.py: 40%

192 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-15 02:03 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DimensionGroup"] 

31 

32import itertools 

33from collections.abc import Iterable, Iterator, Mapping, Set 

34from types import MappingProxyType 

35from typing import TYPE_CHECKING, Any 

36 

37import pydantic 

38from lsst.utils.classes import cached_getter, immutable 

39from pydantic_core import core_schema 

40 

41from .. import pydantic_utils 

42from .._named import NamedValueAbstractSet, NamedValueSet 

43from .._topology import TopologicalFamily, TopologicalSpace 

44 

45if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

46 from ._elements import DimensionElement 

47 from ._graph import DimensionGraph 

48 from ._universe import DimensionUniverse 

49 

50 

51class SortedSequenceSet(Set[str]): 

52 """A set-like interface wrapper around a tuple. 

53 

54 This delegates directly to ``tuple.__contains__``, so there is an implicit 

55 assumption that `len` is small and hence O(N) lookups are not a problem, as 

56 is the case for sets of dimension names. 

57 

58 Parameters 

59 ---------- 

60 seq : `tuple` [`str`, ...] 

61 Strings to see the set. 

62 """ 

63 

64 def __init__(self, seq: tuple[str, ...]): 

65 self._seq = seq 

66 

67 __slots__ = ("_seq",) 

68 

69 def __contains__(self, x: object) -> bool: 

70 return x in self._seq 

71 

72 def __iter__(self) -> Iterator[str]: 

73 return iter(self._seq) 

74 

75 def __len__(self) -> int: 

76 return len(self._seq) 

77 

78 def __hash__(self) -> int: 

79 return hash(self._seq) 

80 

81 def __eq__(self, other: object) -> bool: 

82 if seq := getattr(other, "_seq", None): 

83 return seq == self._seq 

84 return super().__eq__(other) 

85 

86 @classmethod 

87 def _from_iterable(cls, iterable: Iterable[str]) -> set[str]: 

88 # This is used by collections.abc.Set mixin methods when they need 

89 # to return a new object (e.g. in `__and__`). 

90 return set(iterable) 

91 

92 def __repr__(self) -> str: 

93 return f"{{{', '.join(str(k) for k in self._seq)}}}" 

94 

95 def as_tuple(self) -> tuple[str, ...]: 

96 """Return the underlying tuple. 

97 

98 Returns 

99 ------- 

100 t : `tuple` 

101 A tuple of all the values. 

102 """ 

103 return self._seq 

104 

105 @property 

106 def names(self) -> Set[str]: 

107 """An alias to ``self``. 

108 

109 This is a backwards-compatibility API that allows `DimensionGroup` 

110 to mimic the `DimensionGraph` object it is intended to replace, by 

111 permitting expressions like ``x.required.names`` when ``x`` can be 

112 an object of either type. 

113 """ 

114 return self 

115 

116 

117@immutable 

118class DimensionGroup: # numpydoc ignore=PR02 

119 """An immutable, dependency-complete collection of dimensions. 

120 

121 `DimensionGroup` behaves in many respects like a set of `str` dimension 

122 names that maintains several special subsets and supersets of related 

123 dimension elements. It does not fully implement the `collections.abc.Set` 

124 interface, because it defines a few different iteration orders and does not 

125 privilege any one of them by implementing ``__iter__``. 

126 

127 Parameters 

128 ---------- 

129 universe : `DimensionUniverse` 

130 Object that manages all known dimensions. 

131 names : iterable of `str`, optional 

132 An iterable of the names of dimensions that must be included in the 

133 group. All (recursive) dependencies of these dimensions will also be 

134 included. At most one of ``dimensions`` and ``names`` must be 

135 provided. 

136 _conform : `bool`, optional 

137 If `True` (default), expand to include dependencies. `False` should 

138 only be used for callers that can guarantee that other arguments are 

139 already correctly expanded, and is for internal use only. 

140 

141 Notes 

142 ----- 

143 `DimensionGroup` should be used instead of other collections in most 

144 contexts where a collection of dimensions is required and a 

145 `DimensionUniverse` is available. Exceptions include cases where order 

146 matters (and is different from the consistent ordering defined by the 

147 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

148 required. 

149 

150 This class is not a Pydantic model, but it implements the 

151 `__get_pydantic_core_schema__` special method and hence can be used as a 

152 field in Pydantic models or [de]serialized directly via 

153 `pydantic.TypeAdapter`, but validation requires a `DimensionUniverse` to be 

154 passed as the "universe" key in the Pydantic validation context. The 

155 `.pydantic_utils.DeferredValidation` class can be used to defer validation 

156 of this object or other types that use it until that context is available. 

157 """ 

158 

159 def __new__( 

160 cls, 

161 universe: DimensionUniverse, 

162 names: Iterable[str] | DimensionGroup = frozenset(), 

163 _conform: bool = True, 

164 ) -> DimensionGroup: 

165 if isinstance(names, DimensionGroup): 

166 if names.universe is universe: 

167 return names 

168 else: 

169 names = names.names 

170 if _conform: 

171 # Expand dimension names to include all required and implied 

172 # dependencies. 

173 to_expand = set(names) 

174 names = set() 

175 while to_expand: 

176 dimension = universe[to_expand.pop()] 

177 names.add(dimension.name) 

178 to_expand.update(dimension.required.names) 

179 to_expand.update(dimension.implied.names) 

180 to_expand.difference_update(names) 

181 else: 

182 names = frozenset(names) 

183 # Look in the cache of existing groups, with the expanded set of names. 

184 cache_key = frozenset(names) 

185 self = universe._cached_groups.get(cache_key) 

186 if self is not None: 

187 return self 

188 # This is apparently a new group. Create it, and add it to the cache. 

189 self = super().__new__(cls) 

190 self.universe = universe 

191 # Reorder dimensions by iterating over the universe (which is 

192 # ordered already) and extracting the ones in the set. 

193 self.names = SortedSequenceSet(tuple(d.name for d in universe.sorted(names))) 

194 # Make a set that includes both the dimensions and any 

195 # DimensionElements whose dependencies are in self.dimensions. 

196 self.elements = SortedSequenceSet( 

197 tuple(e.name for e in universe.elements if e.required.names <= self.names) 

198 ) 

199 self.governors = SortedSequenceSet( 

200 tuple(d for d in self.names if d in universe.governor_dimensions.names) 

201 ) 

202 self.skypix = SortedSequenceSet(tuple(d for d in self.names if d in universe.skypix_dimensions.names)) 

203 # Split dependencies up into "required" and "implied" subsets. 

204 # Note that a dimension may be required in one group and implied in 

205 # another. 

206 required: list[str] = [] 

207 implied: list[str] = [] 

208 for dim1 in self.names: 

209 for dim2 in self.names: 

210 if dim1 in universe[dim2].implied.names: 

211 implied.append(dim1) 

212 break 

213 else: 

214 # If no other dimension implies dim1, it's required. 

215 required.append(dim1) 

216 self.required = SortedSequenceSet(tuple(required)) 

217 self.implied = SortedSequenceSet(tuple(implied)) 

218 

219 self._space_families = MappingProxyType( 

220 { 

221 space: NamedValueSet( 

222 universe[e].topology[space] for e in self.elements if space in universe[e].topology 

223 ).freeze() 

224 for space in TopologicalSpace.__members__.values() 

225 } 

226 ) 

227 

228 # Build mappings from dimension to index; this is really for 

229 # DataCoordinate, but we put it in DimensionGroup because many (many!) 

230 # DataCoordinates will share the same DimensionGroup, and we want them 

231 # to be lightweight. The order here is what's convenient for 

232 # DataCoordinate: all required dimensions before all implied 

233 # dimensions. 

234 self._data_coordinate_indices = { 

235 name: i for i, name in enumerate(itertools.chain(self.required, self.implied)) 

236 } 

237 return universe._cached_groups.set_or_get(cache_key, self) 

238 

239 def __getnewargs__(self) -> tuple: 

240 return (self.universe, self.names._seq, False) 

241 

242 def __deepcopy__(self, memo: dict) -> DimensionGroup: 

243 # DimensionGroup is recursively immutable; see note in @immutable 

244 # decorator. 

245 return self 

246 

247 def __len__(self) -> int: 

248 return len(self.names) 

249 

250 def __contains__(self, element: str) -> bool: 

251 if element in self.elements: 

252 return True 

253 else: 

254 from ._elements import DimensionElement 

255 

256 if isinstance(element, DimensionElement): # type: ignore[unreachable] 

257 raise TypeError( 

258 "DimensionGroup does not support membership tests using DimensionElement " 

259 "instances; use their names instead." 

260 ) 

261 return False 

262 

263 def __str__(self) -> str: 

264 return str(self.names) 

265 

266 def __repr__(self) -> str: 

267 return f"DimensionGroup({self.names})" 

268 

269 def as_group(self) -> DimensionGroup: 

270 """Return ``self``. 

271 

272 Returns 

273 ------- 

274 group : `DimensionGroup` 

275 Returns itself. 

276 

277 Notes 

278 ----- 

279 This is a backwards-compatibility API that allows both `DimensionGraph` 

280 and `DimensionGroup` to be coerced to the latter. 

281 """ 

282 return self 

283 

284 @cached_getter 

285 def _as_graph(self) -> DimensionGraph: 

286 """Return a view of ``self`` as a `DimensionGraph`. 

287 

288 Returns 

289 ------- 

290 graph : `DimensionGraph` 

291 The deprecated form of `DimensionGroup`. 

292 

293 Notes 

294 ----- 

295 This is provided as a convenience for methods and properties that must 

296 return a `DimensionGraph` for backwards compatibility (until v27). It 

297 is the only way of making a `DimensionGraph` that does not produce 

298 a warning. 

299 """ 

300 from ._graph import DimensionGraph 

301 

302 result = object.__new__(DimensionGraph) 

303 result._group = self 

304 return result 

305 

306 def isdisjoint(self, other: DimensionGroup) -> bool: 

307 """Test whether the intersection of two groups is empty. 

308 

309 Parameters 

310 ---------- 

311 other : `DimensionGroup` 

312 Other group to compare with. 

313 

314 Returns 

315 ------- 

316 is_disjoin : `bool` 

317 Returns `True` if either operand is the empty. 

318 """ 

319 return self.names.isdisjoint(other.names) 

320 

321 def issubset(self, other: DimensionGroup) -> bool: 

322 """Test whether all dimensions in ``self`` are also in ``other``. 

323 

324 Parameters 

325 ---------- 

326 other : `DimensionGroup` 

327 Other group to compare with. 

328 

329 Returns 

330 ------- 

331 is_subset : `bool` 

332 Returns `True` if ``self`` is empty. 

333 """ 

334 return self.names <= other.names 

335 

336 def issuperset(self, other: DimensionGroup) -> bool: 

337 """Test whether all dimensions in ``other`` are also in ``self``. 

338 

339 Parameters 

340 ---------- 

341 other : `DimensionGroup` 

342 Other group to compare with. 

343 

344 Returns 

345 ------- 

346 is_superset : `bool` 

347 Returns `True` if ``other`` is empty. 

348 """ 

349 return self.names >= other.names 

350 

351 def __eq__(self, other: Any) -> bool: 

352 from ._graph import DimensionGraph 

353 

354 # TODO: Drop DimensionGraph support here on DM-41326. 

355 if isinstance(other, DimensionGroup | DimensionGraph): 

356 return self.names == other.names 

357 else: 

358 return False 

359 

360 def __hash__(self) -> int: 

361 return hash(self.required._seq) 

362 

363 def __le__(self, other: DimensionGroup) -> bool: 

364 return self.names <= other.names 

365 

366 def __ge__(self, other: DimensionGroup) -> bool: 

367 return self.names >= other.names 

368 

369 def __lt__(self, other: DimensionGroup) -> bool: 

370 return self.names < other.names 

371 

372 def __gt__(self, other: DimensionGroup) -> bool: 

373 return self.names > other.names 

374 

375 def union(self, *others: DimensionGroup) -> DimensionGroup: 

376 """Construct a new group with all dimensions in any of the operands. 

377 

378 Parameters 

379 ---------- 

380 *others : `DimensionGroup` 

381 Other groups to join with. 

382 

383 Returns 

384 ------- 

385 union : `DimensionGroup` 

386 Union of all the groups. 

387 

388 Notes 

389 ----- 

390 The elements of the returned group may exceed the naive union of their 

391 elements, as some dimension elements are included in groups whenever 

392 multiple dimensions are present, and those dependency dimensions could 

393 have been provided by different operands. 

394 """ 

395 names = set(self.names).union(*[other.names for other in others]) 

396 return DimensionGroup(self.universe, names) 

397 

398 def intersection(self, *others: DimensionGroup) -> DimensionGroup: 

399 """Construct a new group with only dimensions in all of the operands. 

400 

401 Parameters 

402 ---------- 

403 *others : `DimensionGroup` 

404 Other groups to compare with. 

405 

406 Returns 

407 ------- 

408 inter : `DimensionGroup` 

409 Intersection of all the groups. 

410 

411 Notes 

412 ----- 

413 See also `union`. 

414 """ 

415 names = set(self.names).intersection(*[other.names for other in others]) 

416 return DimensionGroup(self.universe, names=names) 

417 

418 def __or__(self, other: DimensionGroup) -> DimensionGroup: 

419 return self.union(other) 

420 

421 def __and__(self, other: DimensionGroup) -> DimensionGroup: 

422 return self.intersection(other) 

423 

424 @property 

425 def data_coordinate_keys(self) -> Set[str]: 

426 """A set of dimensions ordered like `DataCoordinate.mapping`. 

427 

428 This order is defined as all required dimensions followed by all 

429 implied dimensions. 

430 """ 

431 return self._data_coordinate_indices.keys() 

432 

433 @property 

434 @cached_getter 

435 def lookup_order(self) -> tuple[str, ...]: 

436 """A tuple of all elements in the order needed to find their records. 

437 

438 Unlike the table definition/topological order (which is what 

439 `DimensionUniverse.sorted` gives you), when dimension A implies 

440 dimension B, dimension A appears first. 

441 """ 

442 done: set[str] = set() 

443 order: list[str] = [] 

444 

445 def add_to_order(element: DimensionElement) -> None: 

446 if element.name in done: 

447 return 

448 predecessors = set(element.required.names) 

449 predecessors.discard(element.name) 

450 if not done.issuperset(predecessors): 

451 return 

452 order.append(element.name) 

453 done.add(element.name) 

454 for other in element.implied: 

455 add_to_order(other) 

456 

457 while not done.issuperset(self.required): 

458 for dimension in self.required: 

459 add_to_order(self.universe[dimension]) 

460 

461 order.extend(element for element in self.elements if element not in done) 

462 return tuple(order) 

463 

464 @property 

465 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

466 """Families represented by the spatial elements in this graph.""" 

467 return self._space_families[TopologicalSpace.SPATIAL] 

468 

469 @property 

470 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

471 """Families represented by the temporal elements in this graph.""" 

472 return self._space_families[TopologicalSpace.TEMPORAL] 

473 

474 # Class attributes below are shadowed by instance attributes, and are 

475 # present just to hold the docstrings for those instance attributes. 

476 

477 universe: DimensionUniverse 

478 """The set of all known dimensions, of which this group is a subset 

479 (`DimensionUniverse`). 

480 """ 

481 

482 names: SortedSequenceSet 

483 """A true `~collections.abc.Set` of the dimension names. 

484 

485 Iteration order is consist with `DimensionUniverse.sorted`: each dimension 

486 is preceded by its required and implied dependencies. 

487 """ 

488 

489 elements: SortedSequenceSet 

490 """A true `~collections.abc.Set` of all dimension element names in the 

491 group; a superset of `dimensions`. 

492 """ 

493 

494 governors: SortedSequenceSet 

495 """A true `~collections.abc.Set` of all governor dimension names in the 

496 group. 

497 """ 

498 

499 skypix: SortedSequenceSet 

500 """A true `~collections.abc.Set` of all skypix dimension names in the " 

501 group. 

502 """ 

503 

504 required: SortedSequenceSet 

505 """The dimensions that must be directly identified via their primary keys 

506 in a data ID in order to identify the rest of the elements in the group. 

507 """ 

508 

509 implied: SortedSequenceSet 

510 """The dimensions that need not be directly identified via their primary 

511 keys in a data ID. 

512 """ 

513 

514 _space_families: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

515 """Families of elements in this graph that exist in topological spaces 

516 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

517 `NamedValueAbstractSet` of `TopologicalFamily`). 

518 """ 

519 

520 _data_coordinate_indices: dict[str, int] 

521 

522 @classmethod 

523 def _validate(cls, data: Any, info: pydantic.ValidationInfo) -> DimensionGroup: 

524 """Pydantic validator (deserializer) for `DimensionGroup`. 

525 

526 This satisfies the `pydantic.WithInfoPlainValidatorFunction` signature. 

527 """ 

528 universe = pydantic_utils.get_universe_from_context(info.context) 

529 return universe.conform(data) 

530 

531 def _serialize(self) -> list[str]: 

532 """Pydantic serializer for `DimensionGroup`. 

533 

534 This satisfies the `pydantic.PlainSerializerFunction` signature. 

535 """ 

536 return list(self.names) 

537 

538 @classmethod 

539 def __get_pydantic_core_schema__( 

540 cls, source_type: Any, handler: pydantic.GetCoreSchemaHandler 

541 ) -> core_schema.CoreSchema: 

542 # This is the Pydantic hook for overriding serialization, validation, 

543 # and JSON schema generation. 

544 list_of_str_schema = core_schema.list_schema(core_schema.str_schema()) 

545 from_list_of_str_schema = core_schema.chain_schema( 

546 [list_of_str_schema, core_schema.with_info_plain_validator_function(cls._validate)] 

547 ) 

548 return core_schema.json_or_python_schema( 

549 # When deserializing from JSON, expect it to look like list[str]. 

550 json_schema=from_list_of_str_schema, 

551 # When deserializing from Python, first see if it's already a 

552 # DimensionGroup and then try conversion from list[str]. 

553 python_schema=core_schema.union_schema( 

554 [core_schema.is_instance_schema(DimensionGroup), from_list_of_str_schema] 

555 ), 

556 # When serializing convert it to a `list[str]`. 

557 serialization=core_schema.plain_serializer_function_ser_schema( 

558 cls._serialize, return_schema=list_of_str_schema 

559 ), 

560 )