Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 38%

163 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DimensionGraph", "SerializedDimensionGraph"] 

31 

32import itertools 

33from collections.abc import Iterable, Iterator, Mapping, Set 

34from types import MappingProxyType 

35from typing import TYPE_CHECKING, Any, ClassVar 

36 

37from lsst.daf.butler._compat import _BaseModelCompat 

38from lsst.utils.classes import cached_getter, immutable 

39 

40from .._topology import TopologicalFamily, TopologicalSpace 

41from ..json import from_json_pydantic, to_json_pydantic 

42from ..named import NamedValueAbstractSet, NamedValueSet 

43 

44if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

45 from ...registry import Registry 

46 from ._elements import Dimension, DimensionElement 

47 from ._governor import GovernorDimension 

48 from ._universe import DimensionUniverse 

49 

50 

51class SerializedDimensionGraph(_BaseModelCompat): 

52 """Simplified model of a `DimensionGraph` suitable for serialization.""" 

53 

54 names: list[str] 

55 

56 @classmethod 

57 def direct(cls, *, names: list[str]) -> SerializedDimensionGraph: 

58 """Construct a `SerializedDimensionGraph` directly without validators. 

59 

60 This differs from the pydantic "construct" method in that the arguments 

61 are explicitly what the model requires, and it will recurse through 

62 members, constructing them from their corresponding `direct` methods. 

63 

64 This method should only be called when the inputs are trusted. 

65 """ 

66 return cls.model_construct(names=names) 

67 

68 

69@immutable 

70class DimensionGraph: 

71 """An immutable, dependency-complete collection of dimensions. 

72 

73 `DimensionGraph` behaves in many respects like a set of `Dimension` 

74 instances that maintains several special subsets and supersets of 

75 related `DimensionElement` instances. It does not fully implement the 

76 `collections.abc.Set` interface, as its automatic expansion of dependencies 

77 would make set difference and XOR operations behave surprisingly. 

78 

79 It also provides dict-like lookup of `DimensionElement` instances from 

80 their names. 

81 

82 Parameters 

83 ---------- 

84 universe : `DimensionUniverse` 

85 The special graph of all known dimensions of which this graph will be 

86 a subset. 

87 dimensions : iterable of `Dimension`, optional 

88 An iterable of `Dimension` instances that must be included in the 

89 graph. All (recursive) dependencies of these dimensions will also 

90 be included. At most one of ``dimensions`` and ``names`` must be 

91 provided. 

92 names : iterable of `str`, optional 

93 An iterable of the names of dimensions that must be included in the 

94 graph. All (recursive) dependencies of these dimensions will also 

95 be included. At most one of ``dimensions`` and ``names`` must be 

96 provided. 

97 conform : `bool`, optional 

98 If `True` (default), expand to include dependencies. `False` should 

99 only be used for callers that can guarantee that other arguments are 

100 already correctly expanded, and is primarily for internal use. 

101 

102 Notes 

103 ----- 

104 `DimensionGraph` should be used instead of other collections in most 

105 contexts where a collection of dimensions is required and a 

106 `DimensionUniverse` is available. Exceptions include cases where order 

107 matters (and is different from the consistent ordering defined by the 

108 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

109 required. 

110 """ 

111 

112 _serializedType = SerializedDimensionGraph 

113 

114 def __new__( 

115 cls, 

116 universe: DimensionUniverse, 

117 dimensions: Iterable[Dimension] | None = None, 

118 names: Iterable[str] | None = None, 

119 conform: bool = True, 

120 ) -> DimensionGraph: 

121 conformedNames: set[str] 

122 if names is None: 

123 if dimensions is None: 

124 conformedNames = set() 

125 else: 

126 try: 

127 # Optimize for NamedValueSet/NamedKeyDict, though that's 

128 # not required. 

129 conformedNames = set(dimensions.names) # type: ignore 

130 except AttributeError: 

131 conformedNames = {d.name for d in dimensions} 

132 else: 

133 if dimensions is not None: 

134 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

135 conformedNames = set(names) 

136 if conform: 

137 universe.expandDimensionNameSet(conformedNames) 

138 # Look in the cache of existing graphs, with the expanded set of names. 

139 cacheKey = frozenset(conformedNames) 

140 self = universe._cache.get(cacheKey, None) 

141 if self is not None: 

142 return self 

143 # This is apparently a new graph. Create it, and add it to the cache. 

144 self = super().__new__(cls) 

145 universe._cache[cacheKey] = self 

146 self.universe = universe 

147 # Reorder dimensions by iterating over the universe (which is 

148 # ordered already) and extracting the ones in the set. 

149 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() 

150 # Make a set that includes both the dimensions and any 

151 # DimensionElements whose dependencies are in self.dimensions. 

152 self.elements = NamedValueSet( 

153 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names 

154 ).freeze() 

155 self._finish() 

156 return self 

157 

158 def _finish(self) -> None: 

159 # Make a set containing just the governor dimensions in this graph. 

160 # Need local import to avoid cycle. 

161 from ._governor import GovernorDimension 

162 

163 self.governors = NamedValueSet( 

164 d for d in self.dimensions if isinstance(d, GovernorDimension) 

165 ).freeze() 

166 # Split dependencies up into "required" and "implied" subsets. 

167 # Note that a dimension may be required in one graph and implied in 

168 # another. 

169 required: NamedValueSet[Dimension] = NamedValueSet() 

170 implied: NamedValueSet[Dimension] = NamedValueSet() 

171 for dim1 in self.dimensions: 

172 for dim2 in self.dimensions: 

173 if dim1.name in dim2.implied.names: 

174 implied.add(dim1) 

175 break 

176 else: 

177 # If no other dimension implies dim1, it's required. 

178 required.add(dim1) 

179 self.required = required.freeze() 

180 self.implied = implied.freeze() 

181 

182 self.topology = MappingProxyType( 

183 { 

184 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() 

185 for space in TopologicalSpace.__members__.values() 

186 } 

187 ) 

188 

189 # Build mappings from dimension to index; this is really for 

190 # DataCoordinate, but we put it in DimensionGraph because many 

191 # (many!) DataCoordinates will share the same DimensionGraph, and 

192 # we want them to be lightweight. The order here is what's convenient 

193 # for DataCoordinate: all required dimensions before all implied 

194 # dimensions. 

195 self._dataCoordinateIndices: dict[str, int] = { 

196 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

197 } 

198 

199 def __getnewargs__(self) -> tuple: 

200 return (self.universe, None, tuple(self.dimensions.names), False) 

201 

202 def __deepcopy__(self, memo: dict) -> DimensionGraph: 

203 # DimensionGraph is recursively immutable; see note in @immutable 

204 # decorator. 

205 return self 

206 

207 @property 

208 def names(self) -> Set[str]: 

209 """Set of the names of all dimensions in the graph (`KeysView`).""" 

210 return self.dimensions.names 

211 

212 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: 

213 """Convert this class to a simple python type. 

214 

215 This type is suitable for serialization. 

216 

217 Parameters 

218 ---------- 

219 minimal : `bool`, optional 

220 Use minimal serialization. Has no effect on for this class. 

221 

222 Returns 

223 ------- 

224 names : `list` 

225 The names of the dimensions. 

226 """ 

227 # Names are all we can serialize. 

228 return SerializedDimensionGraph(names=list(self.names)) 

229 

230 @classmethod 

231 def from_simple( 

232 cls, 

233 names: SerializedDimensionGraph, 

234 universe: DimensionUniverse | None = None, 

235 registry: Registry | None = None, 

236 ) -> DimensionGraph: 

237 """Construct a new object from the simplified form. 

238 

239 This is assumed to support data data returned from the `to_simple` 

240 method. 

241 

242 Parameters 

243 ---------- 

244 names : `list` of `str` 

245 The names of the dimensions. 

246 universe : `DimensionUniverse` 

247 The special graph of all known dimensions of which this graph will 

248 be a subset. Can be `None` if `Registry` is provided. 

249 registry : `lsst.daf.butler.Registry`, optional 

250 Registry from which a universe can be extracted. Can be `None` 

251 if universe is provided explicitly. 

252 

253 Returns 

254 ------- 

255 graph : `DimensionGraph` 

256 Newly-constructed object. 

257 """ 

258 if universe is None and registry is None: 

259 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

260 if universe is None and registry is not None: 

261 universe = registry.dimensions 

262 if universe is None: 

263 # this is for mypy 

264 raise ValueError("Unable to determine a usable universe") 

265 

266 return cls(names=names.names, universe=universe) 

267 

268 to_json = to_json_pydantic 

269 from_json: ClassVar = classmethod(from_json_pydantic) 

270 

271 def __iter__(self) -> Iterator[Dimension]: 

272 """Iterate over all dimensions in the graph. 

273 

274 (and true `Dimension` instances only). 

275 """ 

276 return iter(self.dimensions) 

277 

278 def __len__(self) -> int: 

279 """Return the number of dimensions in the graph. 

280 

281 (and true `Dimension` instances only). 

282 """ 

283 return len(self.dimensions) 

284 

285 def __contains__(self, element: str | DimensionElement) -> bool: 

286 """Return `True` if the given element or element name is in the graph. 

287 

288 This test covers all `DimensionElement` instances in ``self.elements``, 

289 not just true `Dimension` instances). 

290 """ 

291 return element in self.elements 

292 

293 def __getitem__(self, name: str) -> DimensionElement: 

294 """Return the element with the given name. 

295 

296 This lookup covers all `DimensionElement` instances in 

297 ``self.elements``, not just true `Dimension` instances). 

298 """ 

299 return self.elements[name] 

300 

301 def get(self, name: str, default: Any = None) -> DimensionElement: 

302 """Return the element with the given name. 

303 

304 This lookup covers all `DimensionElement` instances in 

305 ``self.elements``, not just true `Dimension` instances). 

306 """ 

307 return self.elements.get(name, default) 

308 

309 def __str__(self) -> str: 

310 return str(self.dimensions) 

311 

312 def __repr__(self) -> str: 

313 return f"DimensionGraph({str(self)})" 

314 

315 def isdisjoint(self, other: DimensionGraph) -> bool: 

316 """Test whether the intersection of two graphs is empty. 

317 

318 Returns `True` if either operand is the empty. 

319 """ 

320 return self.dimensions.isdisjoint(other.dimensions) 

321 

322 def issubset(self, other: DimensionGraph) -> bool: 

323 """Test whether all dimensions in ``self`` are also in ``other``. 

324 

325 Returns `True` if ``self`` is empty. 

326 """ 

327 return self.dimensions <= other.dimensions 

328 

329 def issuperset(self, other: DimensionGraph) -> bool: 

330 """Test whether all dimensions in ``other`` are also in ``self``. 

331 

332 Returns `True` if ``other`` is empty. 

333 """ 

334 return self.dimensions >= other.dimensions 

335 

336 def __eq__(self, other: Any) -> bool: 

337 """Test the arguments have exactly the same dimensions & elements.""" 

338 if isinstance(other, DimensionGraph): 

339 return self.dimensions == other.dimensions 

340 else: 

341 return False 

342 

343 def __hash__(self) -> int: 

344 return hash(tuple(self.dimensions.names)) 

345 

346 def __le__(self, other: DimensionGraph) -> bool: 

347 """Test whether ``self`` is a subset of ``other``.""" 

348 return self.dimensions <= other.dimensions 

349 

350 def __ge__(self, other: DimensionGraph) -> bool: 

351 """Test whether ``self`` is a superset of ``other``.""" 

352 return self.dimensions >= other.dimensions 

353 

354 def __lt__(self, other: DimensionGraph) -> bool: 

355 """Test whether ``self`` is a strict subset of ``other``.""" 

356 return self.dimensions < other.dimensions 

357 

358 def __gt__(self, other: DimensionGraph) -> bool: 

359 """Test whether ``self`` is a strict superset of ``other``.""" 

360 return self.dimensions > other.dimensions 

361 

362 def union(self, *others: DimensionGraph) -> DimensionGraph: 

363 """Construct a new graph with all dimensions in any of the operands. 

364 

365 The elements of the returned graph may exceed the naive union of 

366 their elements, as some `DimensionElement` instances are included 

367 in graphs whenever multiple dimensions are present, and those 

368 dependency dimensions could have been provided by different operands. 

369 """ 

370 names = set(self.names).union(*[other.names for other in others]) 

371 return DimensionGraph(self.universe, names=names) 

372 

373 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

374 """Construct a new graph with only dimensions in all of the operands. 

375 

376 See also `union`. 

377 """ 

378 names = set(self.names).intersection(*[other.names for other in others]) 

379 return DimensionGraph(self.universe, names=names) 

380 

381 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

382 """Construct a new graph with all dimensions in any of the operands. 

383 

384 See `union`. 

385 """ 

386 return self.union(other) 

387 

388 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

389 """Construct a new graph with only dimensions in all of the operands. 

390 

391 See `intersection`. 

392 """ 

393 return self.intersection(other) 

394 

395 @property 

396 @cached_getter 

397 def primaryKeyTraversalOrder(self) -> tuple[DimensionElement, ...]: 

398 """Return a tuple of all elements in specific order. 

399 

400 The order allows records to be 

401 found given their primary keys, starting from only the primary keys of 

402 required dimensions (`tuple` [ `DimensionRecord` ]). 

403 

404 Unlike the table definition/topological order (which is what 

405 DimensionUniverse.sorted gives you), when dimension A implies 

406 dimension B, dimension A appears first. 

407 """ 

408 done: set[str] = set() 

409 order = [] 

410 

411 def addToOrder(element: DimensionElement) -> None: 

412 if element.name in done: 

413 return 

414 predecessors = set(element.required.names) 

415 predecessors.discard(element.name) 

416 if not done.issuperset(predecessors): 

417 return 

418 order.append(element) 

419 done.add(element.name) 

420 for other in element.implied: 

421 addToOrder(other) 

422 

423 while not done.issuperset(self.required): 

424 for dimension in self.required: 

425 addToOrder(dimension) 

426 

427 order.extend(element for element in self.elements if element.name not in done) 

428 return tuple(order) 

429 

430 @property 

431 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

432 """Families represented by the spatial elements in this graph.""" 

433 return self.topology[TopologicalSpace.SPATIAL] 

434 

435 @property 

436 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

437 """Families represented by the temporal elements in this graph.""" 

438 return self.topology[TopologicalSpace.TEMPORAL] 

439 

440 # Class attributes below are shadowed by instance attributes, and are 

441 # present just to hold the docstrings for those instance attributes. 

442 

443 universe: DimensionUniverse 

444 """The set of all known dimensions, of which this graph is a subset 

445 (`DimensionUniverse`). 

446 """ 

447 

448 dimensions: NamedValueAbstractSet[Dimension] 

449 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

450 graph (`NamedValueAbstractSet` of `Dimension`). 

451 

452 This is the set used for iteration, ``len()``, and most set-like operations 

453 on `DimensionGraph` itself. 

454 """ 

455 

456 elements: NamedValueAbstractSet[DimensionElement] 

457 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

458 graph; a superset of `dimensions` (`NamedValueAbstractSet` of 

459 `DimensionElement`). 

460 

461 This is the set used for dict-like lookups, including the ``in`` operator, 

462 on `DimensionGraph` itself. 

463 """ 

464 

465 governors: NamedValueAbstractSet[GovernorDimension] 

466 """A true `~collections.abc.Set` of all true `GovernorDimension` instances 

467 in the graph (`NamedValueAbstractSet` of `GovernorDimension`). 

468 """ 

469 

470 required: NamedValueAbstractSet[Dimension] 

471 """The subset of `dimensions` whose elements must be directly identified 

472 via their primary keys in a data ID in order to identify the rest of the 

473 elements in the graph (`NamedValueAbstractSet` of `Dimension`). 

474 """ 

475 

476 implied: NamedValueAbstractSet[Dimension] 

477 """The subset of `dimensions` whose elements need not be directly 

478 identified via their primary keys in a data ID (`NamedValueAbstractSet` of 

479 `Dimension`). 

480 """ 

481 

482 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

483 """Families of elements in this graph that can participate in topological 

484 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

485 `NamedValueAbstractSet` of `TopologicalFamily`). 

486 """