Coverage for python/lsst/daf/butler/dimensions/_group.py: 37%

179 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DimensionGroup"] 

31 

32import itertools 

33from collections.abc import Iterable, Iterator, Mapping, Set 

34from types import MappingProxyType 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.utils.classes import cached_getter, immutable 

38 

39from .._named import NamedValueAbstractSet, NamedValueSet 

40from .._topology import TopologicalFamily, TopologicalSpace 

41 

42if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

43 from ._elements import DimensionElement 

44 from ._graph import DimensionGraph 

45 from ._universe import DimensionUniverse 

46 

47 

48class SortedSequenceSet(Set[str]): 

49 """A set-like interface wrapper around a tuple. 

50 

51 This delegates directly to ``tuple.__contains__``, so there is an implicit 

52 assumption that `len` is small and hence O(N) lookups are not a problem, as 

53 is the case for sets of dimension names. 

54 

55 Parameters 

56 ---------- 

57 seq : `tuple` [`str`, ...] 

58 Strings to see the set. 

59 """ 

60 

61 def __init__(self, seq: tuple[str, ...]): 

62 self._seq = seq 

63 

64 __slots__ = ("_seq",) 

65 

66 def __contains__(self, x: object) -> bool: 

67 return x in self._seq 

68 

69 def __iter__(self) -> Iterator[str]: 

70 return iter(self._seq) 

71 

72 def __len__(self) -> int: 

73 return len(self._seq) 

74 

75 def __hash__(self) -> int: 

76 return hash(self._seq) 

77 

78 def __eq__(self, other: object) -> bool: 

79 if seq := getattr(other, "_seq", None): 

80 return seq == self._seq 

81 return super().__eq__(other) 

82 

83 @classmethod 

84 def _from_iterable(cls, iterable: Iterable[str]) -> set[str]: 

85 # This is used by collections.abc.Set mixin methods when they need 

86 # to return a new object (e.g. in `__and__`). 

87 return set(iterable) 

88 

89 def __repr__(self) -> str: 

90 return f"{{{', '.join(str(k) for k in self._seq)}}}" 

91 

92 def as_tuple(self) -> tuple[str, ...]: 

93 """Return the underlying tuple. 

94 

95 Returns 

96 ------- 

97 t : `tuple` 

98 A tuple of all the values. 

99 """ 

100 return self._seq 

101 

102 @property 

103 def names(self) -> Set[str]: 

104 """An alias to ``self``. 

105 

106 This is a backwards-compatibility API that allows `DimensionGroup` 

107 to mimic the `DimensionGraph` object it is intended to replace, by 

108 permitting expressions like ``x.required.names`` when ``x`` can be 

109 an object of either type. 

110 """ 

111 return self 

112 

113 

114@immutable 

115class DimensionGroup: # numpydoc ignore=PR02 

116 """An immutable, dependency-complete collection of dimensions. 

117 

118 `DimensionGroup` behaves in many respects like a set of `str` dimension 

119 names that maintains several special subsets and supersets of related 

120 dimension elements. It does not fully implement the `collections.abc.Set` 

121 interface, because it defines a few different iteration orders and does not 

122 privilege any one of them by implementing ``__iter__``. 

123 

124 Parameters 

125 ---------- 

126 universe : `DimensionUniverse` 

127 Object that manages all known dimensions. 

128 names : iterable of `str`, optional 

129 An iterable of the names of dimensions that must be included in the 

130 group. All (recursive) dependencies of these dimensions will also be 

131 included. At most one of ``dimensions`` and ``names`` must be 

132 provided. 

133 _conform : `bool`, optional 

134 If `True` (default), expand to include dependencies. `False` should 

135 only be used for callers that can guarantee that other arguments are 

136 already correctly expanded, and is for internal use only. 

137 

138 Notes 

139 ----- 

140 `DimensionGroup` should be used instead of other collections in most 

141 contexts where a collection of dimensions is required and a 

142 `DimensionUniverse` is available. Exceptions include cases where order 

143 matters (and is different from the consistent ordering defined by the 

144 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

145 required. 

146 """ 

147 

148 def __new__( 

149 cls, 

150 universe: DimensionUniverse, 

151 names: Iterable[str] | DimensionGroup = frozenset(), 

152 _conform: bool = True, 

153 ) -> DimensionGroup: 

154 if isinstance(names, DimensionGroup): 

155 if names.universe is universe: 

156 return names 

157 else: 

158 names = names.names 

159 if _conform: 

160 # Expand dimension names to include all required and implied 

161 # dependencies. 

162 to_expand = set(names) 

163 names = set() 

164 while to_expand: 

165 dimension = universe[to_expand.pop()] 

166 names.add(dimension.name) 

167 to_expand.update(dimension.required.names) 

168 to_expand.update(dimension.implied.names) 

169 to_expand.difference_update(names) 

170 else: 

171 names = frozenset(names) 

172 # Look in the cache of existing groups, with the expanded set of names. 

173 cache_key = frozenset(names) 

174 self = universe._cached_groups.get(cache_key, None) 

175 if self is not None: 

176 return self 

177 # This is apparently a new group. Create it, and add it to the cache. 

178 self = super().__new__(cls) 

179 universe._cached_groups[cache_key] = self 

180 self.universe = universe 

181 # Reorder dimensions by iterating over the universe (which is 

182 # ordered already) and extracting the ones in the set. 

183 self.names = SortedSequenceSet(tuple(d.name for d in universe.sorted(names))) 

184 # Make a set that includes both the dimensions and any 

185 # DimensionElements whose dependencies are in self.dimensions. 

186 self.elements = SortedSequenceSet( 

187 tuple(e.name for e in universe.elements if e.required.names <= self.names) 

188 ) 

189 self.governors = SortedSequenceSet( 

190 tuple(d for d in self.names if d in universe.governor_dimensions.names) 

191 ) 

192 self.skypix = SortedSequenceSet(tuple(d for d in self.names if d in universe.skypix_dimensions.names)) 

193 # Split dependencies up into "required" and "implied" subsets. 

194 # Note that a dimension may be required in one group and implied in 

195 # another. 

196 required: list[str] = [] 

197 implied: list[str] = [] 

198 for dim1 in self.names: 

199 for dim2 in self.names: 

200 if dim1 in universe[dim2].implied.names: 

201 implied.append(dim1) 

202 break 

203 else: 

204 # If no other dimension implies dim1, it's required. 

205 required.append(dim1) 

206 self.required = SortedSequenceSet(tuple(required)) 

207 self.implied = SortedSequenceSet(tuple(implied)) 

208 

209 self._space_families = MappingProxyType( 

210 { 

211 space: NamedValueSet( 

212 universe[e].topology[space] for e in self.elements if space in universe[e].topology 

213 ).freeze() 

214 for space in TopologicalSpace.__members__.values() 

215 } 

216 ) 

217 

218 # Build mappings from dimension to index; this is really for 

219 # DataCoordinate, but we put it in DimensionGroup because many (many!) 

220 # DataCoordinates will share the same DimensionGroup, and we want them 

221 # to be lightweight. The order here is what's convenient for 

222 # DataCoordinate: all required dimensions before all implied 

223 # dimensions. 

224 self._data_coordinate_indices = { 

225 name: i for i, name in enumerate(itertools.chain(self.required, self.implied)) 

226 } 

227 return self 

228 

229 def __getnewargs__(self) -> tuple: 

230 return (self.universe, self.names._seq, False) 

231 

232 def __deepcopy__(self, memo: dict) -> DimensionGroup: 

233 # DimensionGroup is recursively immutable; see note in @immutable 

234 # decorator. 

235 return self 

236 

237 def __len__(self) -> int: 

238 return len(self.names) 

239 

240 def __contains__(self, element: str) -> bool: 

241 if element in self.elements: 

242 return True 

243 else: 

244 from ._elements import DimensionElement 

245 

246 if isinstance(element, DimensionElement): # type: ignore[unreachable] 

247 raise TypeError( 

248 "DimensionGroup does not support membership tests using DimensionElement " 

249 "instances; use their names instead." 

250 ) 

251 return False 

252 

253 def __str__(self) -> str: 

254 return str(self.names) 

255 

256 def __repr__(self) -> str: 

257 return f"DimensionGroup({self.names})" 

258 

259 def as_group(self) -> DimensionGroup: 

260 """Return ``self``. 

261 

262 Returns 

263 ------- 

264 group : `DimensionGroup` 

265 Returns itself. 

266 

267 Notes 

268 ----- 

269 This is a backwards-compatibility API that allows both `DimensionGraph` 

270 and `DimensionGroup` to be coerced to the latter. 

271 """ 

272 return self 

273 

274 @cached_getter 

275 def _as_graph(self) -> DimensionGraph: 

276 """Return a view of ``self`` as a `DimensionGraph`. 

277 

278 Returns 

279 ------- 

280 graph : `DimensionGraph` 

281 The deprecated form of `DimensionGroup`. 

282 

283 Notes 

284 ----- 

285 This is provided as a convenience for methods and properties that must 

286 return a `DimensionGraph` for backwards compatibility (until v27). It 

287 is the only way of making a `DimensionGraph` that does not produce 

288 a warning. 

289 """ 

290 from ._graph import DimensionGraph 

291 

292 result = object.__new__(DimensionGraph) 

293 result._group = self 

294 return result 

295 

296 def isdisjoint(self, other: DimensionGroup) -> bool: 

297 """Test whether the intersection of two groups is empty. 

298 

299 Parameters 

300 ---------- 

301 other : `DimensionGroup` 

302 Other group to compare with. 

303 

304 Returns 

305 ------- 

306 is_disjoin : `bool` 

307 Returns `True` if either operand is the empty. 

308 """ 

309 return self.names.isdisjoint(other.names) 

310 

311 def issubset(self, other: DimensionGroup) -> bool: 

312 """Test whether all dimensions in ``self`` are also in ``other``. 

313 

314 Parameters 

315 ---------- 

316 other : `DimensionGroup` 

317 Other group to compare with. 

318 

319 Returns 

320 ------- 

321 is_subset : `bool` 

322 Returns `True` if ``self`` is empty. 

323 """ 

324 return self.names <= other.names 

325 

326 def issuperset(self, other: DimensionGroup) -> bool: 

327 """Test whether all dimensions in ``other`` are also in ``self``. 

328 

329 Parameters 

330 ---------- 

331 other : `DimensionGroup` 

332 Other group to compare with. 

333 

334 Returns 

335 ------- 

336 is_superset : `bool` 

337 Returns `True` if ``other`` is empty. 

338 """ 

339 return self.names >= other.names 

340 

341 def __eq__(self, other: Any) -> bool: 

342 from ._graph import DimensionGraph 

343 

344 # TODO: Drop DimensionGraph support here on DM-41326. 

345 if isinstance(other, (DimensionGroup, DimensionGraph)): 

346 return self.names == other.names 

347 else: 

348 return False 

349 

350 def __hash__(self) -> int: 

351 return hash(self.required._seq) 

352 

353 def __le__(self, other: DimensionGroup) -> bool: 

354 return self.names <= other.names 

355 

356 def __ge__(self, other: DimensionGroup) -> bool: 

357 return self.names >= other.names 

358 

359 def __lt__(self, other: DimensionGroup) -> bool: 

360 return self.names < other.names 

361 

362 def __gt__(self, other: DimensionGroup) -> bool: 

363 return self.names > other.names 

364 

365 def union(self, *others: DimensionGroup) -> DimensionGroup: 

366 """Construct a new group with all dimensions in any of the operands. 

367 

368 Parameters 

369 ---------- 

370 *others : `DimensionGroup` 

371 Other groups to join with. 

372 

373 Returns 

374 ------- 

375 union : `DimensionGroup` 

376 Union of all the groups. 

377 

378 Notes 

379 ----- 

380 The elements of the returned group may exceed the naive union of their 

381 elements, as some dimension elements are included in groups whenever 

382 multiple dimensions are present, and those dependency dimensions could 

383 have been provided by different operands. 

384 """ 

385 names = set(self.names).union(*[other.names for other in others]) 

386 return DimensionGroup(self.universe, names) 

387 

388 def intersection(self, *others: DimensionGroup) -> DimensionGroup: 

389 """Construct a new group with only dimensions in all of the operands. 

390 

391 Parameters 

392 ---------- 

393 *others : `DimensionGroup` 

394 Other groups to compare with. 

395 

396 Returns 

397 ------- 

398 inter : `DimensionGroup` 

399 Intersection of all the groups. 

400 

401 Notes 

402 ----- 

403 See also `union`. 

404 """ 

405 names = set(self.names).intersection(*[other.names for other in others]) 

406 return DimensionGroup(self.universe, names=names) 

407 

408 def __or__(self, other: DimensionGroup) -> DimensionGroup: 

409 return self.union(other) 

410 

411 def __and__(self, other: DimensionGroup) -> DimensionGroup: 

412 return self.intersection(other) 

413 

414 @property 

415 def data_coordinate_keys(self) -> Set[str]: 

416 """A set of dimensions ordered like `DataCoordinate.mapping`. 

417 

418 This order is defined as all required dimensions followed by all 

419 implied dimensions. 

420 """ 

421 return self._data_coordinate_indices.keys() 

422 

423 @property 

424 @cached_getter 

425 def lookup_order(self) -> tuple[str, ...]: 

426 """A tuple of all elements in the order needed to find their records. 

427 

428 Unlike the table definition/topological order (which is what 

429 `DimensionUniverse.sorted` gives you), when dimension A implies 

430 dimension B, dimension A appears first. 

431 """ 

432 done: set[str] = set() 

433 order: list[str] = [] 

434 

435 def add_to_order(element: DimensionElement) -> None: 

436 if element.name in done: 

437 return 

438 predecessors = set(element.required.names) 

439 predecessors.discard(element.name) 

440 if not done.issuperset(predecessors): 

441 return 

442 order.append(element.name) 

443 done.add(element.name) 

444 for other in element.implied: 

445 add_to_order(other) 

446 

447 while not done.issuperset(self.required): 

448 for dimension in self.required: 

449 add_to_order(self.universe[dimension]) 

450 

451 order.extend(element for element in self.elements if element not in done) 

452 return tuple(order) 

453 

454 @property 

455 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

456 """Families represented by the spatial elements in this graph.""" 

457 return self._space_families[TopologicalSpace.SPATIAL] 

458 

459 @property 

460 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

461 """Families represented by the temporal elements in this graph.""" 

462 return self._space_families[TopologicalSpace.TEMPORAL] 

463 

464 # Class attributes below are shadowed by instance attributes, and are 

465 # present just to hold the docstrings for those instance attributes. 

466 

467 universe: DimensionUniverse 

468 """The set of all known dimensions, of which this group is a subset 

469 (`DimensionUniverse`). 

470 """ 

471 

472 names: SortedSequenceSet 

473 """A true `~collections.abc.Set` of the dimension names. 

474 

475 Iteration order is consist with `DimensionUniverse.sorted`: each dimension 

476 is preceded by its required and implied dependencies. 

477 """ 

478 

479 elements: SortedSequenceSet 

480 """A true `~collections.abc.Set` of all dimension element names in the 

481 group; a superset of `dimensions`. 

482 """ 

483 

484 governors: SortedSequenceSet 

485 """A true `~collections.abc.Set` of all governor dimension names in the 

486 group. 

487 """ 

488 

489 skypix: SortedSequenceSet 

490 """A true `~collections.abc.Set` of all skypix dimension names in the " 

491 group. 

492 """ 

493 

494 required: SortedSequenceSet 

495 """The dimensions that must be directly identified via their primary keys 

496 in a data ID in order to identify the rest of the elements in the group. 

497 """ 

498 

499 implied: SortedSequenceSet 

500 """The dimensions that need not be directly identified via their primary 

501 keys in a data ID. 

502 """ 

503 

504 _space_families: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

505 """Families of elements in this graph that exist in topological spaces 

506 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

507 `NamedValueAbstractSet` of `TopologicalFamily`). 

508 """ 

509 

510 _data_coordinate_indices: dict[str, int]