Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 33%

165 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-08 05:05 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph", "SerializedDimensionGraph"] 

25 

26import itertools 

27from types import MappingProxyType 

28from typing import ( 

29 TYPE_CHECKING, 

30 AbstractSet, 

31 Any, 

32 ClassVar, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Tuple, 

41 Union, 

42) 

43 

44from lsst.utils.classes import cached_getter, immutable 

45from pydantic import BaseModel 

46 

47from .._topology import TopologicalFamily, TopologicalSpace 

48from ..json import from_json_pydantic, to_json_pydantic 

49from ..named import NamedValueAbstractSet, NamedValueSet 

50 

51if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

52 from ...registry import Registry 

53 from ._elements import Dimension, DimensionElement 

54 from ._governor import GovernorDimension 

55 from ._universe import DimensionUniverse 

56 

57 

58class SerializedDimensionGraph(BaseModel): 

59 """Simplified model of a `DimensionGraph` suitable for serialization.""" 

60 

61 names: List[str] 

62 

63 @classmethod 

64 def direct(cls, *, names: List[str]) -> SerializedDimensionGraph: 

65 """Construct a `SerializedDimensionGraph` directly without validators. 

66 

67 This differs from the pydantic "construct" method in that the arguments 

68 are explicitly what the model requires, and it will recurse through 

69 members, constructing them from their corresponding `direct` methods. 

70 

71 This method should only be called when the inputs are trusted. 

72 """ 

73 node = SerializedDimensionGraph.__new__(cls) 

74 object.__setattr__(node, "names", names) 

75 object.__setattr__(node, "__fields_set__", {"names"}) 

76 return node 

77 

78 

79@immutable 

80class DimensionGraph: 

81 """An immutable, dependency-complete collection of dimensions. 

82 

83 `DimensionGraph` behaves in many respects like a set of `Dimension` 

84 instances that maintains several special subsets and supersets of 

85 related `DimensionElement` instances. It does not fully implement the 

86 `collections.abc.Set` interface, as its automatic expansion of dependencies 

87 would make set difference and XOR operations behave surprisingly. 

88 

89 It also provides dict-like lookup of `DimensionElement` instances from 

90 their names. 

91 

92 Parameters 

93 ---------- 

94 universe : `DimensionUniverse` 

95 The special graph of all known dimensions of which this graph will be 

96 a subset. 

97 dimensions : iterable of `Dimension`, optional 

98 An iterable of `Dimension` instances that must be included in the 

99 graph. All (recursive) dependencies of these dimensions will also 

100 be included. At most one of ``dimensions`` and ``names`` must be 

101 provided. 

102 names : iterable of `str`, optional 

103 An iterable of the names of dimensions that must be included in the 

104 graph. All (recursive) dependencies of these dimensions will also 

105 be included. At most one of ``dimensions`` and ``names`` must be 

106 provided. 

107 conform : `bool`, optional 

108 If `True` (default), expand to include dependencies. `False` should 

109 only be used for callers that can guarantee that other arguments are 

110 already correctly expanded, and is primarily for internal use. 

111 

112 Notes 

113 ----- 

114 `DimensionGraph` should be used instead of other collections in most 

115 contexts where a collection of dimensions is required and a 

116 `DimensionUniverse` is available. Exceptions include cases where order 

117 matters (and is different from the consistent ordering defined by the 

118 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

119 required. 

120 """ 

121 

122 _serializedType = SerializedDimensionGraph 

123 

124 def __new__( 

125 cls, 

126 universe: DimensionUniverse, 

127 dimensions: Optional[Iterable[Dimension]] = None, 

128 names: Optional[Iterable[str]] = None, 

129 conform: bool = True, 

130 ) -> DimensionGraph: 

131 conformedNames: Set[str] 

132 if names is None: 

133 if dimensions is None: 

134 conformedNames = set() 

135 else: 

136 try: 

137 # Optimize for NamedValueSet/NamedKeyDict, though that's 

138 # not required. 

139 conformedNames = set(dimensions.names) # type: ignore 

140 except AttributeError: 

141 conformedNames = set(d.name for d in dimensions) 

142 else: 

143 if dimensions is not None: 

144 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

145 conformedNames = set(names) 

146 if conform: 

147 universe.expandDimensionNameSet(conformedNames) 

148 # Look in the cache of existing graphs, with the expanded set of names. 

149 cacheKey = frozenset(conformedNames) 

150 self = universe._cache.get(cacheKey, None) 

151 if self is not None: 

152 return self 

153 # This is apparently a new graph. Create it, and add it to the cache. 

154 self = super().__new__(cls) 

155 universe._cache[cacheKey] = self 

156 self.universe = universe 

157 # Reorder dimensions by iterating over the universe (which is 

158 # ordered already) and extracting the ones in the set. 

159 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() 

160 # Make a set that includes both the dimensions and any 

161 # DimensionElements whose dependencies are in self.dimensions. 

162 self.elements = NamedValueSet( 

163 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names 

164 ).freeze() 

165 self._finish() 

166 return self 

167 

168 def _finish(self) -> None: 

169 # Make a set containing just the governor dimensions in this graph. 

170 # Need local import to avoid cycle. 

171 from ._governor import GovernorDimension 

172 

173 self.governors = NamedValueSet( 

174 d for d in self.dimensions if isinstance(d, GovernorDimension) 

175 ).freeze() 

176 # Split dependencies up into "required" and "implied" subsets. 

177 # Note that a dimension may be required in one graph and implied in 

178 # another. 

179 required: NamedValueSet[Dimension] = NamedValueSet() 

180 implied: NamedValueSet[Dimension] = NamedValueSet() 

181 for i1, dim1 in enumerate(self.dimensions): 

182 for i2, dim2 in enumerate(self.dimensions): 

183 if dim1.name in dim2.implied.names: 

184 implied.add(dim1) 

185 break 

186 else: 

187 # If no other dimension implies dim1, it's required. 

188 required.add(dim1) 

189 self.required = required.freeze() 

190 self.implied = implied.freeze() 

191 

192 self.topology = MappingProxyType( 

193 { 

194 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() 

195 for space in TopologicalSpace.__members__.values() 

196 } 

197 ) 

198 

199 # Build mappings from dimension to index; this is really for 

200 # DataCoordinate, but we put it in DimensionGraph because many 

201 # (many!) DataCoordinates will share the same DimensionGraph, and 

202 # we want them to be lightweight. The order here is what's convenient 

203 # for DataCoordinate: all required dimensions before all implied 

204 # dimensions. 

205 self._dataCoordinateIndices: Dict[str, int] = { 

206 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

207 } 

208 

209 def __getnewargs__(self) -> tuple: 

210 return (self.universe, None, tuple(self.dimensions.names), False) 

211 

212 def __deepcopy__(self, memo: dict) -> DimensionGraph: 

213 # DimensionGraph is recursively immutable; see note in @immutable 

214 # decorator. 

215 return self 

216 

217 @property 

218 def names(self) -> AbstractSet[str]: 

219 """Set of the names of all dimensions in the graph (`KeysView`).""" 

220 return self.dimensions.names 

221 

222 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: 

223 """Convert this class to a simple python type. 

224 

225 This type is suitable for serialization. 

226 

227 Parameters 

228 ---------- 

229 minimal : `bool`, optional 

230 Use minimal serialization. Has no effect on for this class. 

231 

232 Returns 

233 ------- 

234 names : `list` 

235 The names of the dimensions. 

236 """ 

237 # Names are all we can serialize. 

238 return SerializedDimensionGraph(names=list(self.names)) 

239 

240 @classmethod 

241 def from_simple( 

242 cls, 

243 names: SerializedDimensionGraph, 

244 universe: Optional[DimensionUniverse] = None, 

245 registry: Optional[Registry] = None, 

246 ) -> DimensionGraph: 

247 """Construct a new object from the simplified form. 

248 

249 This is assumed to support data data returned from the `to_simple` 

250 method. 

251 

252 Parameters 

253 ---------- 

254 names : `list` of `str` 

255 The names of the dimensions. 

256 universe : `DimensionUniverse` 

257 The special graph of all known dimensions of which this graph will 

258 be a subset. Can be `None` if `Registry` is provided. 

259 registry : `lsst.daf.butler.Registry`, optional 

260 Registry from which a universe can be extracted. Can be `None` 

261 if universe is provided explicitly. 

262 

263 Returns 

264 ------- 

265 graph : `DimensionGraph` 

266 Newly-constructed object. 

267 """ 

268 if universe is None and registry is None: 

269 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

270 if universe is None and registry is not None: 

271 universe = registry.dimensions 

272 if universe is None: 

273 # this is for mypy 

274 raise ValueError("Unable to determine a usable universe") 

275 

276 return cls(names=names.names, universe=universe) 

277 

278 to_json = to_json_pydantic 

279 from_json: ClassVar = classmethod(from_json_pydantic) 

280 

281 def __iter__(self) -> Iterator[Dimension]: 

282 """Iterate over all dimensions in the graph. 

283 

284 (and true `Dimension` instances only). 

285 """ 

286 return iter(self.dimensions) 

287 

288 def __len__(self) -> int: 

289 """Return the number of dimensions in the graph. 

290 

291 (and true `Dimension` instances only). 

292 """ 

293 return len(self.dimensions) 

294 

295 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

296 """Return `True` if the given element or element name is in the graph. 

297 

298 This test covers all `DimensionElement` instances in ``self.elements``, 

299 not just true `Dimension` instances). 

300 """ 

301 return element in self.elements 

302 

303 def __getitem__(self, name: str) -> DimensionElement: 

304 """Return the element with the given name. 

305 

306 This lookup covers all `DimensionElement` instances in 

307 ``self.elements``, not just true `Dimension` instances). 

308 """ 

309 return self.elements[name] 

310 

311 def get(self, name: str, default: Any = None) -> DimensionElement: 

312 """Return the element with the given name. 

313 

314 This lookup covers all `DimensionElement` instances in 

315 ``self.elements``, not just true `Dimension` instances). 

316 """ 

317 return self.elements.get(name, default) 

318 

319 def __str__(self) -> str: 

320 return str(self.dimensions) 

321 

322 def __repr__(self) -> str: 

323 return f"DimensionGraph({str(self)})" 

324 

325 def isdisjoint(self, other: DimensionGraph) -> bool: 

326 """Test whether the intersection of two graphs is empty. 

327 

328 Returns `True` if either operand is the empty. 

329 """ 

330 return self.dimensions.isdisjoint(other.dimensions) 

331 

332 def issubset(self, other: DimensionGraph) -> bool: 

333 """Test whether all dimensions in ``self`` are also in ``other``. 

334 

335 Returns `True` if ``self`` is empty. 

336 """ 

337 return self.dimensions <= other.dimensions 

338 

339 def issuperset(self, other: DimensionGraph) -> bool: 

340 """Test whether all dimensions in ``other`` are also in ``self``. 

341 

342 Returns `True` if ``other`` is empty. 

343 """ 

344 return self.dimensions >= other.dimensions 

345 

346 def __eq__(self, other: Any) -> bool: 

347 """Test the arguments have exactly the same dimensions & elements.""" 

348 if isinstance(other, DimensionGraph): 

349 return self.dimensions == other.dimensions 

350 else: 

351 return False 

352 

353 def __hash__(self) -> int: 

354 return hash(tuple(self.dimensions.names)) 

355 

356 def __le__(self, other: DimensionGraph) -> bool: 

357 """Test whether ``self`` is a subset of ``other``.""" 

358 return self.dimensions <= other.dimensions 

359 

360 def __ge__(self, other: DimensionGraph) -> bool: 

361 """Test whether ``self`` is a superset of ``other``.""" 

362 return self.dimensions >= other.dimensions 

363 

364 def __lt__(self, other: DimensionGraph) -> bool: 

365 """Test whether ``self`` is a strict subset of ``other``.""" 

366 return self.dimensions < other.dimensions 

367 

368 def __gt__(self, other: DimensionGraph) -> bool: 

369 """Test whether ``self`` is a strict superset of ``other``.""" 

370 return self.dimensions > other.dimensions 

371 

372 def union(self, *others: DimensionGraph) -> DimensionGraph: 

373 """Construct a new graph with all dimensions in any of the operands. 

374 

375 The elements of the returned graph may exceed the naive union of 

376 their elements, as some `DimensionElement` instances are included 

377 in graphs whenever multiple dimensions are present, and those 

378 dependency dimensions could have been provided by different operands. 

379 """ 

380 names = set(self.names).union(*[other.names for other in others]) 

381 return DimensionGraph(self.universe, names=names) 

382 

383 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

384 """Construct a new graph with only dimensions in all of the operands. 

385 

386 See also `union`. 

387 """ 

388 names = set(self.names).intersection(*[other.names for other in others]) 

389 return DimensionGraph(self.universe, names=names) 

390 

391 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

392 """Construct a new graph with all dimensions in any of the operands. 

393 

394 See `union`. 

395 """ 

396 return self.union(other) 

397 

398 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

399 """Construct a new graph with only dimensions in all of the operands. 

400 

401 See `intersection`. 

402 """ 

403 return self.intersection(other) 

404 

405 @property 

406 @cached_getter 

407 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]: 

408 """Return a tuple of all elements in specific order. 

409 

410 The order allows records to be 

411 found given their primary keys, starting from only the primary keys of 

412 required dimensions (`tuple` [ `DimensionRecord` ]). 

413 

414 Unlike the table definition/topological order (which is what 

415 DimensionUniverse.sorted gives you), when dimension A implies 

416 dimension B, dimension A appears first. 

417 """ 

418 done: Set[str] = set() 

419 order = [] 

420 

421 def addToOrder(element: DimensionElement) -> None: 

422 if element.name in done: 

423 return 

424 predecessors = set(element.required.names) 

425 predecessors.discard(element.name) 

426 if not done.issuperset(predecessors): 

427 return 

428 order.append(element) 

429 done.add(element.name) 

430 for other in element.implied: 

431 addToOrder(other) 

432 

433 while not done.issuperset(self.required): 

434 for dimension in self.required: 

435 addToOrder(dimension) 

436 

437 order.extend(element for element in self.elements if element.name not in done) 

438 return tuple(order) 

439 

440 @property 

441 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

442 """Families represented by the spatial elements in this graph.""" 

443 return self.topology[TopologicalSpace.SPATIAL] 

444 

445 @property 

446 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

447 """Families represented by the temporal elements in this graph.""" 

448 return self.topology[TopologicalSpace.TEMPORAL] 

449 

450 # Class attributes below are shadowed by instance attributes, and are 

451 # present just to hold the docstrings for those instance attributes. 

452 

453 universe: DimensionUniverse 

454 """The set of all known dimensions, of which this graph is a subset 

455 (`DimensionUniverse`). 

456 """ 

457 

458 dimensions: NamedValueAbstractSet[Dimension] 

459 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

460 graph (`NamedValueAbstractSet` of `Dimension`). 

461 

462 This is the set used for iteration, ``len()``, and most set-like operations 

463 on `DimensionGraph` itself. 

464 """ 

465 

466 elements: NamedValueAbstractSet[DimensionElement] 

467 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

468 graph; a superset of `dimensions` (`NamedValueAbstractSet` of 

469 `DimensionElement`). 

470 

471 This is the set used for dict-like lookups, including the ``in`` operator, 

472 on `DimensionGraph` itself. 

473 """ 

474 

475 governors: NamedValueAbstractSet[GovernorDimension] 

476 """A true `~collections.abc.Set` of all true `GovernorDimension` instances 

477 in the graph (`NamedValueAbstractSet` of `GovernorDimension`). 

478 """ 

479 

480 required: NamedValueAbstractSet[Dimension] 

481 """The subset of `dimensions` whose elements must be directly identified 

482 via their primary keys in a data ID in order to identify the rest of the 

483 elements in the graph (`NamedValueAbstractSet` of `Dimension`). 

484 """ 

485 

486 implied: NamedValueAbstractSet[Dimension] 

487 """The subset of `dimensions` whose elements need not be directly 

488 identified via their primary keys in a data ID (`NamedValueAbstractSet` of 

489 `Dimension`). 

490 """ 

491 

492 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

493 """Families of elements in this graph that can participate in topological 

494 relationships (`Mapping` from `TopologicalSpace` to 

495 `NamedValueAbstractSet` of `TopologicalFamily`). 

496 """