Coverage for python/lsst/daf/butler/dimensions/_group.py: 37%

179 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 11:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DimensionGroup"] 

31 

32import itertools 

33from collections.abc import Iterable, Iterator, Mapping, Set 

34from types import MappingProxyType 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.utils.classes import cached_getter, immutable 

38 

39from .._named import NamedValueAbstractSet, NamedValueSet 

40from .._topology import TopologicalFamily, TopologicalSpace 

41 

42if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

43 from ._elements import DimensionElement 

44 from ._graph import DimensionGraph 

45 from ._universe import DimensionUniverse 

46 

47 

48class SortedSequenceSet(Set[str]): 

49 """A set-like interface wrapper around a tuple. 

50 

51 This delegates directly to ``tuple.__contains__``, so there is an implicit 

52 assumption that `len` is small and hence O(N) lookups are not a problem, as 

53 is the case for sets of dimension names. 

54 """ 

55 

56 def __init__(self, seq: tuple[str, ...]): 

57 self._seq = seq 

58 

59 __slots__ = ("_seq",) 

60 

61 def __contains__(self, x: object) -> bool: 

62 return x in self._seq 

63 

64 def __iter__(self) -> Iterator[str]: 

65 return iter(self._seq) 

66 

67 def __len__(self) -> int: 

68 return len(self._seq) 

69 

70 def __hash__(self) -> int: 

71 return hash(self._seq) 

72 

73 def __eq__(self, other: object) -> bool: 

74 if seq := getattr(other, "_seq", None): 

75 return seq == self._seq 

76 return super().__eq__(other) 

77 

78 @classmethod 

79 def _from_iterable(cls, iterable: Iterable[str]) -> set[str]: 

80 # This is used by collections.abc.Set mixin methods when they need 

81 # to return a new object (e.g. in `__and__`). 

82 return set(iterable) 

83 

84 def __repr__(self) -> str: 

85 return f"{{{', '.join(str(k) for k in self._seq)}}}" 

86 

87 def as_tuple(self) -> tuple[str, ...]: 

88 """Return the underlying tuple.""" 

89 return self._seq 

90 

91 @property 

92 def names(self) -> Set[str]: 

93 """An alias to ``self``. 

94 

95 This is a backwards-compatibility API that allows `DimensionGroup` 

96 to mimic the `DimensionGraph` object it is intended to replace, by 

97 permitting expressions like ``x.required.names`` when ``x`` can be 

98 an object of either type. 

99 """ 

100 return self 

101 

102 

103@immutable 

104class DimensionGroup: 

105 """An immutable, dependency-complete collection of dimensions. 

106 

107 `DimensionGroup` behaves in many respects like a set of `str` dimension 

108 names that maintains several special subsets and supersets of related 

109 dimension elements. It does not fully implement the `collections.abc.Set` 

110 interface, because it defines a few different iteration orders and does not 

111 privilege any one of them by implementing ``__iter__``. 

112 

113 Parameters 

114 ---------- 

115 universe : `DimensionUniverse` 

116 Object that manages all known dimensions. 

117 names : iterable of `str`, optional 

118 An iterable of the names of dimensions that must be included in the 

119 group. All (recursive) dependencies of these dimensions will also be 

120 included. At most one of ``dimensions`` and ``names`` must be 

121 provided. 

122 _conform : `bool`, optional 

123 If `True` (default), expand to include dependencies. `False` should 

124 only be used for callers that can guarantee that other arguments are 

125 already correctly expanded, and is for internal use only. 

126 

127 Notes 

128 ----- 

129 `DimensionGroup` should be used instead of other collections in most 

130 contexts where a collection of dimensions is required and a 

131 `DimensionUniverse` is available. Exceptions include cases where order 

132 matters (and is different from the consistent ordering defined by the 

133 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

134 required. 

135 """ 

136 

137 def __new__( 

138 cls, 

139 universe: DimensionUniverse, 

140 names: Iterable[str] | DimensionGroup = frozenset(), 

141 _conform: bool = True, 

142 ) -> DimensionGroup: 

143 if isinstance(names, DimensionGroup): 

144 if names.universe is universe: 

145 return names 

146 else: 

147 names = names.names 

148 if _conform: 

149 # Expand dimension names to include all required and implied 

150 # dependencies. 

151 to_expand = set(names) 

152 names = set() 

153 while to_expand: 

154 dimension = universe[to_expand.pop()] 

155 names.add(dimension.name) 

156 to_expand.update(dimension.required.names) 

157 to_expand.update(dimension.implied.names) 

158 to_expand.difference_update(names) 

159 else: 

160 names = frozenset(names) 

161 # Look in the cache of existing groups, with the expanded set of names. 

162 cache_key = frozenset(names) 

163 self = universe._cached_groups.get(cache_key, None) 

164 if self is not None: 

165 return self 

166 # This is apparently a new group. Create it, and add it to the cache. 

167 self = super().__new__(cls) 

168 universe._cached_groups[cache_key] = self 

169 self.universe = universe 

170 # Reorder dimensions by iterating over the universe (which is 

171 # ordered already) and extracting the ones in the set. 

172 self.names = SortedSequenceSet(tuple(d.name for d in universe.sorted(names))) 

173 # Make a set that includes both the dimensions and any 

174 # DimensionElements whose dependencies are in self.dimensions. 

175 self.elements = SortedSequenceSet( 

176 tuple(e.name for e in universe.elements if e.required.names <= self.names) 

177 ) 

178 self.governors = SortedSequenceSet( 

179 tuple(d for d in self.names if d in universe.governor_dimensions.names) 

180 ) 

181 self.skypix = SortedSequenceSet(tuple(d for d in self.names if d in universe.skypix_dimensions.names)) 

182 # Split dependencies up into "required" and "implied" subsets. 

183 # Note that a dimension may be required in one group and implied in 

184 # another. 

185 required: list[str] = [] 

186 implied: list[str] = [] 

187 for dim1 in self.names: 

188 for dim2 in self.names: 

189 if dim1 in universe[dim2].implied.names: 

190 implied.append(dim1) 

191 break 

192 else: 

193 # If no other dimension implies dim1, it's required. 

194 required.append(dim1) 

195 self.required = SortedSequenceSet(tuple(required)) 

196 self.implied = SortedSequenceSet(tuple(implied)) 

197 

198 self._space_families = MappingProxyType( 

199 { 

200 space: NamedValueSet( 

201 universe[e].topology[space] for e in self.elements if space in universe[e].topology 

202 ).freeze() 

203 for space in TopologicalSpace.__members__.values() 

204 } 

205 ) 

206 

207 # Build mappings from dimension to index; this is really for 

208 # DataCoordinate, but we put it in DimensionGroup because many (many!) 

209 # DataCoordinates will share the same DimensionGroup, and we want them 

210 # to be lightweight. The order here is what's convenient for 

211 # DataCoordinate: all required dimensions before all implied 

212 # dimensions. 

213 self._data_coordinate_indices = { 

214 name: i for i, name in enumerate(itertools.chain(self.required, self.implied)) 

215 } 

216 return self 

217 

218 def __getnewargs__(self) -> tuple: 

219 return (self.universe, self.names._seq, False) 

220 

221 def __deepcopy__(self, memo: dict) -> DimensionGroup: 

222 # DimensionGroup is recursively immutable; see note in @immutable 

223 # decorator. 

224 return self 

225 

226 def __len__(self) -> int: 

227 return len(self.names) 

228 

229 def __contains__(self, element: str) -> bool: 

230 if element in self.elements: 

231 return True 

232 else: 

233 from ._elements import DimensionElement 

234 

235 if isinstance(element, DimensionElement): # type: ignore[unreachable] 

236 raise TypeError( 

237 "DimensionGroup does not support membership tests using DimensionElement " 

238 "instances; use their names instead." 

239 ) 

240 return False 

241 

242 def __str__(self) -> str: 

243 return str(self.names) 

244 

245 def __repr__(self) -> str: 

246 return f"DimensionGroup({self.names})" 

247 

248 def as_group(self) -> DimensionGroup: 

249 """Return ``self``. 

250 

251 This is a backwards-compatibility API that allows both `DimensionGraph` 

252 and `DimensionGroup` to be coerced to the latter. 

253 """ 

254 return self 

255 

256 @cached_getter 

257 def _as_graph(self) -> DimensionGraph: 

258 """Return a view of ``self`` as a `DimensionGraph`. 

259 

260 This is provided as a convenience for methods and properties that must 

261 return a `DimensionGraph` for backwards compatibility (until v27). It 

262 is the only way of making a `DimensionGraph` that does not produce 

263 a warning. 

264 """ 

265 from ._graph import DimensionGraph 

266 

267 result = object.__new__(DimensionGraph) 

268 result._group = self 

269 return result 

270 

271 def isdisjoint(self, other: DimensionGroup) -> bool: 

272 """Test whether the intersection of two groups is empty. 

273 

274 Returns `True` if either operand is the empty. 

275 """ 

276 return self.names.isdisjoint(other.names) 

277 

278 def issubset(self, other: DimensionGroup) -> bool: 

279 """Test whether all dimensions in ``self`` are also in ``other``. 

280 

281 Returns `True` if ``self`` is empty. 

282 """ 

283 return self.names <= other.names 

284 

285 def issuperset(self, other: DimensionGroup) -> bool: 

286 """Test whether all dimensions in ``other`` are also in ``self``. 

287 

288 Returns `True` if ``other`` is empty. 

289 """ 

290 return self.names >= other.names 

291 

292 def __eq__(self, other: Any) -> bool: 

293 from ._graph import DimensionGraph 

294 

295 # TODO: Drop DimensionGraph support here on DM-41326. 

296 if isinstance(other, (DimensionGroup, DimensionGraph)): 

297 return self.names == other.names 

298 else: 

299 return False 

300 

301 def __hash__(self) -> int: 

302 return hash(self.required._seq) 

303 

304 def __le__(self, other: DimensionGroup) -> bool: 

305 return self.names <= other.names 

306 

307 def __ge__(self, other: DimensionGroup) -> bool: 

308 return self.names >= other.names 

309 

310 def __lt__(self, other: DimensionGroup) -> bool: 

311 return self.names < other.names 

312 

313 def __gt__(self, other: DimensionGroup) -> bool: 

314 return self.names > other.names 

315 

316 def union(self, *others: DimensionGroup) -> DimensionGroup: 

317 """Construct a new group with all dimensions in any of the operands. 

318 

319 The elements of the returned group may exceed the naive union of their 

320 elements, as some dimension elements are included in groups whenever 

321 multiple dimensions are present, and those dependency dimensions could 

322 have been provided by different operands. 

323 """ 

324 names = set(self.names).union(*[other.names for other in others]) 

325 return DimensionGroup(self.universe, names) 

326 

327 def intersection(self, *others: DimensionGroup) -> DimensionGroup: 

328 """Construct a new group with only dimensions in all of the operands. 

329 

330 See also `union`. 

331 """ 

332 names = set(self.names).intersection(*[other.names for other in others]) 

333 return DimensionGroup(self.universe, names=names) 

334 

335 def __or__(self, other: DimensionGroup) -> DimensionGroup: 

336 return self.union(other) 

337 

338 def __and__(self, other: DimensionGroup) -> DimensionGroup: 

339 return self.intersection(other) 

340 

341 @property 

342 def data_coordinate_keys(self) -> Set[str]: 

343 """A set of dimensions ordered like `DataCoordinate.mapping`. 

344 

345 This order is defined as all required dimensions followed by all 

346 implied dimensions. 

347 """ 

348 return self._data_coordinate_indices.keys() 

349 

350 @property 

351 @cached_getter 

352 def lookup_order(self) -> tuple[str, ...]: 

353 """A tuple of all elements in the order needed to find their records. 

354 

355 Unlike the table definition/topological order (which is what 

356 `DimensionUniverse.sorted` gives you), when dimension A implies 

357 dimension B, dimension A appears first. 

358 """ 

359 done: set[str] = set() 

360 order: list[str] = [] 

361 

362 def add_to_order(element: DimensionElement) -> None: 

363 if element.name in done: 

364 return 

365 predecessors = set(element.required.names) 

366 predecessors.discard(element.name) 

367 if not done.issuperset(predecessors): 

368 return 

369 order.append(element.name) 

370 done.add(element.name) 

371 for other in element.implied: 

372 add_to_order(other) 

373 

374 while not done.issuperset(self.required): 

375 for dimension in self.required: 

376 add_to_order(self.universe[dimension]) 

377 

378 order.extend(element for element in self.elements if element not in done) 

379 return tuple(order) 

380 

381 @property 

382 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

383 """Families represented by the spatial elements in this graph.""" 

384 return self._space_families[TopologicalSpace.SPATIAL] 

385 

386 @property 

387 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

388 """Families represented by the temporal elements in this graph.""" 

389 return self._space_families[TopologicalSpace.TEMPORAL] 

390 

391 # Class attributes below are shadowed by instance attributes, and are 

392 # present just to hold the docstrings for those instance attributes. 

393 

394 universe: DimensionUniverse 

395 """The set of all known dimensions, of which this group is a subset 

396 (`DimensionUniverse`). 

397 """ 

398 

399 names: SortedSequenceSet 

400 """A true `~collections.abc.Set` of the dimension names. 

401 

402 Iteration order is consist with `DimensionUniverse.sorted`: each dimension 

403 is preceded by its required and implied dependencies. 

404 """ 

405 

406 elements: SortedSequenceSet 

407 """A true `~collections.abc.Set` of all dimension element names in the 

408 group; a superset of `dimensions`. 

409 """ 

410 

411 governors: SortedSequenceSet 

412 """A true `~collections.abc.Set` of all governor dimension names in the 

413 group. 

414 """ 

415 

416 skypix: SortedSequenceSet 

417 """A true `~collections.abc.Set` of all skypix dimension names in the " 

418 group. 

419 """ 

420 

421 required: SortedSequenceSet 

422 """The dimensions that must be directly identified via their primary keys 

423 in a data ID in order to identify the rest of the elements in the group. 

424 """ 

425 

426 implied: SortedSequenceSet 

427 """The dimensions that need not be directly identified via their primary 

428 keys in a data ID. 

429 """ 

430 

431 _space_families: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

432 """Families of elements in this graph that exist in topological spaces 

433 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

434 `NamedValueAbstractSet` of `TopologicalFamily`). 

435 """ 

436 

437 _data_coordinate_indices: dict[str, int]