Coverage for python/lsst/daf/butler/dimensions/_group.py: 37%

178 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:50 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DimensionGroup"] 

31 

32import itertools 

33from collections.abc import Iterable, Iterator, Mapping, Set 

34from types import MappingProxyType 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.utils.classes import cached_getter, immutable 

38 

39from .._named import NamedValueAbstractSet, NamedValueSet 

40from .._topology import TopologicalFamily, TopologicalSpace 

41 

42if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

43 from ._elements import DimensionElement 

44 from ._graph import DimensionGraph 

45 from ._universe import DimensionUniverse 

46 

47 

48class SortedSequenceSet(Set[str]): 

49 """A set-like interface wrapper around a tuple. 

50 

51 This delegates directly to ``tuple.__contains__``, so there is an implicit 

52 assumption that `len` is small and hence O(N) lookups are not a problem, as 

53 is the case for sets of dimension names. 

54 

55 Parameters 

56 ---------- 

57 seq : `tuple` [`str`, ...] 

58 Strings to see the set. 

59 """ 

60 

61 def __init__(self, seq: tuple[str, ...]): 

62 self._seq = seq 

63 

64 __slots__ = ("_seq",) 

65 

66 def __contains__(self, x: object) -> bool: 

67 return x in self._seq 

68 

69 def __iter__(self) -> Iterator[str]: 

70 return iter(self._seq) 

71 

72 def __len__(self) -> int: 

73 return len(self._seq) 

74 

75 def __hash__(self) -> int: 

76 return hash(self._seq) 

77 

78 def __eq__(self, other: object) -> bool: 

79 if seq := getattr(other, "_seq", None): 

80 return seq == self._seq 

81 return super().__eq__(other) 

82 

83 @classmethod 

84 def _from_iterable(cls, iterable: Iterable[str]) -> set[str]: 

85 # This is used by collections.abc.Set mixin methods when they need 

86 # to return a new object (e.g. in `__and__`). 

87 return set(iterable) 

88 

89 def __repr__(self) -> str: 

90 return f"{{{', '.join(str(k) for k in self._seq)}}}" 

91 

92 def as_tuple(self) -> tuple[str, ...]: 

93 """Return the underlying tuple. 

94 

95 Returns 

96 ------- 

97 t : `tuple` 

98 A tuple of all the values. 

99 """ 

100 return self._seq 

101 

102 @property 

103 def names(self) -> Set[str]: 

104 """An alias to ``self``. 

105 

106 This is a backwards-compatibility API that allows `DimensionGroup` 

107 to mimic the `DimensionGraph` object it is intended to replace, by 

108 permitting expressions like ``x.required.names`` when ``x`` can be 

109 an object of either type. 

110 """ 

111 return self 

112 

113 

114@immutable 

115class DimensionGroup: # numpydoc ignore=PR02 

116 """An immutable, dependency-complete collection of dimensions. 

117 

118 `DimensionGroup` behaves in many respects like a set of `str` dimension 

119 names that maintains several special subsets and supersets of related 

120 dimension elements. It does not fully implement the `collections.abc.Set` 

121 interface, because it defines a few different iteration orders and does not 

122 privilege any one of them by implementing ``__iter__``. 

123 

124 Parameters 

125 ---------- 

126 universe : `DimensionUniverse` 

127 Object that manages all known dimensions. 

128 names : iterable of `str`, optional 

129 An iterable of the names of dimensions that must be included in the 

130 group. All (recursive) dependencies of these dimensions will also be 

131 included. At most one of ``dimensions`` and ``names`` must be 

132 provided. 

133 _conform : `bool`, optional 

134 If `True` (default), expand to include dependencies. `False` should 

135 only be used for callers that can guarantee that other arguments are 

136 already correctly expanded, and is for internal use only. 

137 

138 Notes 

139 ----- 

140 `DimensionGroup` should be used instead of other collections in most 

141 contexts where a collection of dimensions is required and a 

142 `DimensionUniverse` is available. Exceptions include cases where order 

143 matters (and is different from the consistent ordering defined by the 

144 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

145 required. 

146 """ 

147 

148 def __new__( 

149 cls, 

150 universe: DimensionUniverse, 

151 names: Iterable[str] | DimensionGroup = frozenset(), 

152 _conform: bool = True, 

153 ) -> DimensionGroup: 

154 if isinstance(names, DimensionGroup): 

155 if names.universe is universe: 

156 return names 

157 else: 

158 names = names.names 

159 if _conform: 

160 # Expand dimension names to include all required and implied 

161 # dependencies. 

162 to_expand = set(names) 

163 names = set() 

164 while to_expand: 

165 dimension = universe[to_expand.pop()] 

166 names.add(dimension.name) 

167 to_expand.update(dimension.required.names) 

168 to_expand.update(dimension.implied.names) 

169 to_expand.difference_update(names) 

170 else: 

171 names = frozenset(names) 

172 # Look in the cache of existing groups, with the expanded set of names. 

173 cache_key = frozenset(names) 

174 self = universe._cached_groups.get(cache_key) 

175 if self is not None: 

176 return self 

177 # This is apparently a new group. Create it, and add it to the cache. 

178 self = super().__new__(cls) 

179 self.universe = universe 

180 # Reorder dimensions by iterating over the universe (which is 

181 # ordered already) and extracting the ones in the set. 

182 self.names = SortedSequenceSet(tuple(d.name for d in universe.sorted(names))) 

183 # Make a set that includes both the dimensions and any 

184 # DimensionElements whose dependencies are in self.dimensions. 

185 self.elements = SortedSequenceSet( 

186 tuple(e.name for e in universe.elements if e.required.names <= self.names) 

187 ) 

188 self.governors = SortedSequenceSet( 

189 tuple(d for d in self.names if d in universe.governor_dimensions.names) 

190 ) 

191 self.skypix = SortedSequenceSet(tuple(d for d in self.names if d in universe.skypix_dimensions.names)) 

192 # Split dependencies up into "required" and "implied" subsets. 

193 # Note that a dimension may be required in one group and implied in 

194 # another. 

195 required: list[str] = [] 

196 implied: list[str] = [] 

197 for dim1 in self.names: 

198 for dim2 in self.names: 

199 if dim1 in universe[dim2].implied.names: 

200 implied.append(dim1) 

201 break 

202 else: 

203 # If no other dimension implies dim1, it's required. 

204 required.append(dim1) 

205 self.required = SortedSequenceSet(tuple(required)) 

206 self.implied = SortedSequenceSet(tuple(implied)) 

207 

208 self._space_families = MappingProxyType( 

209 { 

210 space: NamedValueSet( 

211 universe[e].topology[space] for e in self.elements if space in universe[e].topology 

212 ).freeze() 

213 for space in TopologicalSpace.__members__.values() 

214 } 

215 ) 

216 

217 # Build mappings from dimension to index; this is really for 

218 # DataCoordinate, but we put it in DimensionGroup because many (many!) 

219 # DataCoordinates will share the same DimensionGroup, and we want them 

220 # to be lightweight. The order here is what's convenient for 

221 # DataCoordinate: all required dimensions before all implied 

222 # dimensions. 

223 self._data_coordinate_indices = { 

224 name: i for i, name in enumerate(itertools.chain(self.required, self.implied)) 

225 } 

226 return universe._cached_groups.set_or_get(cache_key, self) 

227 

228 def __getnewargs__(self) -> tuple: 

229 return (self.universe, self.names._seq, False) 

230 

231 def __deepcopy__(self, memo: dict) -> DimensionGroup: 

232 # DimensionGroup is recursively immutable; see note in @immutable 

233 # decorator. 

234 return self 

235 

236 def __len__(self) -> int: 

237 return len(self.names) 

238 

239 def __contains__(self, element: str) -> bool: 

240 if element in self.elements: 

241 return True 

242 else: 

243 from ._elements import DimensionElement 

244 

245 if isinstance(element, DimensionElement): # type: ignore[unreachable] 

246 raise TypeError( 

247 "DimensionGroup does not support membership tests using DimensionElement " 

248 "instances; use their names instead." 

249 ) 

250 return False 

251 

252 def __str__(self) -> str: 

253 return str(self.names) 

254 

255 def __repr__(self) -> str: 

256 return f"DimensionGroup({self.names})" 

257 

258 def as_group(self) -> DimensionGroup: 

259 """Return ``self``. 

260 

261 Returns 

262 ------- 

263 group : `DimensionGroup` 

264 Returns itself. 

265 

266 Notes 

267 ----- 

268 This is a backwards-compatibility API that allows both `DimensionGraph` 

269 and `DimensionGroup` to be coerced to the latter. 

270 """ 

271 return self 

272 

273 @cached_getter 

274 def _as_graph(self) -> DimensionGraph: 

275 """Return a view of ``self`` as a `DimensionGraph`. 

276 

277 Returns 

278 ------- 

279 graph : `DimensionGraph` 

280 The deprecated form of `DimensionGroup`. 

281 

282 Notes 

283 ----- 

284 This is provided as a convenience for methods and properties that must 

285 return a `DimensionGraph` for backwards compatibility (until v27). It 

286 is the only way of making a `DimensionGraph` that does not produce 

287 a warning. 

288 """ 

289 from ._graph import DimensionGraph 

290 

291 result = object.__new__(DimensionGraph) 

292 result._group = self 

293 return result 

294 

295 def isdisjoint(self, other: DimensionGroup) -> bool: 

296 """Test whether the intersection of two groups is empty. 

297 

298 Parameters 

299 ---------- 

300 other : `DimensionGroup` 

301 Other group to compare with. 

302 

303 Returns 

304 ------- 

305 is_disjoin : `bool` 

306 Returns `True` if either operand is the empty. 

307 """ 

308 return self.names.isdisjoint(other.names) 

309 

310 def issubset(self, other: DimensionGroup) -> bool: 

311 """Test whether all dimensions in ``self`` are also in ``other``. 

312 

313 Parameters 

314 ---------- 

315 other : `DimensionGroup` 

316 Other group to compare with. 

317 

318 Returns 

319 ------- 

320 is_subset : `bool` 

321 Returns `True` if ``self`` is empty. 

322 """ 

323 return self.names <= other.names 

324 

325 def issuperset(self, other: DimensionGroup) -> bool: 

326 """Test whether all dimensions in ``other`` are also in ``self``. 

327 

328 Parameters 

329 ---------- 

330 other : `DimensionGroup` 

331 Other group to compare with. 

332 

333 Returns 

334 ------- 

335 is_superset : `bool` 

336 Returns `True` if ``other`` is empty. 

337 """ 

338 return self.names >= other.names 

339 

340 def __eq__(self, other: Any) -> bool: 

341 from ._graph import DimensionGraph 

342 

343 # TODO: Drop DimensionGraph support here on DM-41326. 

344 if isinstance(other, (DimensionGroup, DimensionGraph)): 

345 return self.names == other.names 

346 else: 

347 return False 

348 

349 def __hash__(self) -> int: 

350 return hash(self.required._seq) 

351 

352 def __le__(self, other: DimensionGroup) -> bool: 

353 return self.names <= other.names 

354 

355 def __ge__(self, other: DimensionGroup) -> bool: 

356 return self.names >= other.names 

357 

358 def __lt__(self, other: DimensionGroup) -> bool: 

359 return self.names < other.names 

360 

361 def __gt__(self, other: DimensionGroup) -> bool: 

362 return self.names > other.names 

363 

364 def union(self, *others: DimensionGroup) -> DimensionGroup: 

365 """Construct a new group with all dimensions in any of the operands. 

366 

367 Parameters 

368 ---------- 

369 *others : `DimensionGroup` 

370 Other groups to join with. 

371 

372 Returns 

373 ------- 

374 union : `DimensionGroup` 

375 Union of all the groups. 

376 

377 Notes 

378 ----- 

379 The elements of the returned group may exceed the naive union of their 

380 elements, as some dimension elements are included in groups whenever 

381 multiple dimensions are present, and those dependency dimensions could 

382 have been provided by different operands. 

383 """ 

384 names = set(self.names).union(*[other.names for other in others]) 

385 return DimensionGroup(self.universe, names) 

386 

387 def intersection(self, *others: DimensionGroup) -> DimensionGroup: 

388 """Construct a new group with only dimensions in all of the operands. 

389 

390 Parameters 

391 ---------- 

392 *others : `DimensionGroup` 

393 Other groups to compare with. 

394 

395 Returns 

396 ------- 

397 inter : `DimensionGroup` 

398 Intersection of all the groups. 

399 

400 Notes 

401 ----- 

402 See also `union`. 

403 """ 

404 names = set(self.names).intersection(*[other.names for other in others]) 

405 return DimensionGroup(self.universe, names=names) 

406 

407 def __or__(self, other: DimensionGroup) -> DimensionGroup: 

408 return self.union(other) 

409 

410 def __and__(self, other: DimensionGroup) -> DimensionGroup: 

411 return self.intersection(other) 

412 

413 @property 

414 def data_coordinate_keys(self) -> Set[str]: 

415 """A set of dimensions ordered like `DataCoordinate.mapping`. 

416 

417 This order is defined as all required dimensions followed by all 

418 implied dimensions. 

419 """ 

420 return self._data_coordinate_indices.keys() 

421 

422 @property 

423 @cached_getter 

424 def lookup_order(self) -> tuple[str, ...]: 

425 """A tuple of all elements in the order needed to find their records. 

426 

427 Unlike the table definition/topological order (which is what 

428 `DimensionUniverse.sorted` gives you), when dimension A implies 

429 dimension B, dimension A appears first. 

430 """ 

431 done: set[str] = set() 

432 order: list[str] = [] 

433 

434 def add_to_order(element: DimensionElement) -> None: 

435 if element.name in done: 

436 return 

437 predecessors = set(element.required.names) 

438 predecessors.discard(element.name) 

439 if not done.issuperset(predecessors): 

440 return 

441 order.append(element.name) 

442 done.add(element.name) 

443 for other in element.implied: 

444 add_to_order(other) 

445 

446 while not done.issuperset(self.required): 

447 for dimension in self.required: 

448 add_to_order(self.universe[dimension]) 

449 

450 order.extend(element for element in self.elements if element not in done) 

451 return tuple(order) 

452 

453 @property 

454 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

455 """Families represented by the spatial elements in this graph.""" 

456 return self._space_families[TopologicalSpace.SPATIAL] 

457 

458 @property 

459 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

460 """Families represented by the temporal elements in this graph.""" 

461 return self._space_families[TopologicalSpace.TEMPORAL] 

462 

463 # Class attributes below are shadowed by instance attributes, and are 

464 # present just to hold the docstrings for those instance attributes. 

465 

466 universe: DimensionUniverse 

467 """The set of all known dimensions, of which this group is a subset 

468 (`DimensionUniverse`). 

469 """ 

470 

471 names: SortedSequenceSet 

472 """A true `~collections.abc.Set` of the dimension names. 

473 

474 Iteration order is consist with `DimensionUniverse.sorted`: each dimension 

475 is preceded by its required and implied dependencies. 

476 """ 

477 

478 elements: SortedSequenceSet 

479 """A true `~collections.abc.Set` of all dimension element names in the 

480 group; a superset of `dimensions`. 

481 """ 

482 

483 governors: SortedSequenceSet 

484 """A true `~collections.abc.Set` of all governor dimension names in the 

485 group. 

486 """ 

487 

488 skypix: SortedSequenceSet 

489 """A true `~collections.abc.Set` of all skypix dimension names in the " 

490 group. 

491 """ 

492 

493 required: SortedSequenceSet 

494 """The dimensions that must be directly identified via their primary keys 

495 in a data ID in order to identify the rest of the elements in the group. 

496 """ 

497 

498 implied: SortedSequenceSet 

499 """The dimensions that need not be directly identified via their primary 

500 keys in a data ID. 

501 """ 

502 

503 _space_families: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

504 """Families of elements in this graph that exist in topological spaces 

505 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

506 `NamedValueAbstractSet` of `TopologicalFamily`). 

507 """ 

508 

509 _data_coordinate_indices: dict[str, int]