Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 33%

170 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-23 02:06 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph", "SerializedDimensionGraph"] 

25 

26import itertools 

27from types import MappingProxyType 

28from typing import ( 

29 TYPE_CHECKING, 

30 AbstractSet, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Set, 

39 Tuple, 

40 Union, 

41) 

42 

43from lsst.utils.classes import cached_getter, immutable 

44from pydantic import BaseModel 

45 

46from .._topology import TopologicalFamily, TopologicalSpace 

47from ..json import from_json_pydantic, to_json_pydantic 

48from ..named import NamedValueAbstractSet, NamedValueSet 

49 

50if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from ...registry import Registry 

52 from ._elements import Dimension, DimensionElement 

53 from ._governor import GovernorDimension 

54 from ._universe import DimensionUniverse 

55 

56 

57class SerializedDimensionGraph(BaseModel): 

58 """Simplified model of a `DimensionGraph` suitable for serialization.""" 

59 

60 names: List[str] 

61 

62 @classmethod 

63 def direct(cls, *, names: List[str]) -> SerializedDimensionGraph: 

64 """Construct a `SerializedDimensionGraph` directly without validators. 

65 

66 This differs from the pydantic "construct" method in that the arguments 

67 are explicitly what the model requires, and it will recurse through 

68 members, constructing them from their corresponding `direct` methods. 

69 

70 This method should only be called when the inputs are trusted. 

71 """ 

72 node = SerializedDimensionGraph.__new__(cls) 

73 object.__setattr__(node, "names", names) 

74 object.__setattr__(node, "__fields_set__", {"names"}) 

75 return node 

76 

77 

78@immutable 

79class DimensionGraph: 

80 """An immutable, dependency-complete collection of dimensions. 

81 

82 `DimensionGraph` behaves in many respects like a set of `Dimension` 

83 instances that maintains several special subsets and supersets of 

84 related `DimensionElement` instances. It does not fully implement the 

85 `collections.abc.Set` interface, as its automatic expansion of dependencies 

86 would make set difference and XOR operations behave surprisingly. 

87 

88 It also provides dict-like lookup of `DimensionElement` instances from 

89 their names. 

90 

91 Parameters 

92 ---------- 

93 universe : `DimensionUniverse` 

94 The special graph of all known dimensions of which this graph will be 

95 a subset. 

96 dimensions : iterable of `Dimension`, optional 

97 An iterable of `Dimension` instances that must be included in the 

98 graph. All (recursive) dependencies of these dimensions will also 

99 be included. At most one of ``dimensions`` and ``names`` must be 

100 provided. 

101 names : iterable of `str`, optional 

102 An iterable of the names of dimensions that must be included in the 

103 graph. All (recursive) dependencies of these dimensions will also 

104 be included. At most one of ``dimensions`` and ``names`` must be 

105 provided. 

106 conform : `bool`, optional 

107 If `True` (default), expand to include dependencies. `False` should 

108 only be used for callers that can guarantee that other arguments are 

109 already correctly expanded, and is primarily for internal use. 

110 

111 Notes 

112 ----- 

113 `DimensionGraph` should be used instead of other collections in most 

114 contexts where a collection of dimensions is required and a 

115 `DimensionUniverse` is available. Exceptions include cases where order 

116 matters (and is different from the consistent ordering defined by the 

117 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

118 required. 

119 """ 

120 

121 _serializedType = SerializedDimensionGraph 

122 

123 def __new__( 

124 cls, 

125 universe: DimensionUniverse, 

126 dimensions: Optional[Iterable[Dimension]] = None, 

127 names: Optional[Iterable[str]] = None, 

128 conform: bool = True, 

129 ) -> DimensionGraph: 

130 conformedNames: Set[str] 

131 if names is None: 

132 if dimensions is None: 

133 conformedNames = set() 

134 else: 

135 try: 

136 # Optimize for NamedValueSet/NamedKeyDict, though that's 

137 # not required. 

138 conformedNames = set(dimensions.names) # type: ignore 

139 except AttributeError: 

140 conformedNames = set(d.name for d in dimensions) 

141 else: 

142 if dimensions is not None: 

143 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

144 conformedNames = set(names) 

145 if conform: 

146 universe.expandDimensionNameSet(conformedNames) 

147 # Look in the cache of existing graphs, with the expanded set of names. 

148 cacheKey = frozenset(conformedNames) 

149 self = universe._cache.get(cacheKey, None) 

150 if self is not None: 

151 return self 

152 # This is apparently a new graph. Create it, and add it to the cache. 

153 self = super().__new__(cls) 

154 universe._cache[cacheKey] = self 

155 self.universe = universe 

156 # Reorder dimensions by iterating over the universe (which is 

157 # ordered already) and extracting the ones in the set. 

158 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() 

159 # Make a set that includes both the dimensions and any 

160 # DimensionElements whose dependencies are in self.dimensions. 

161 self.elements = NamedValueSet( 

162 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names 

163 ).freeze() 

164 self._finish() 

165 return self 

166 

167 def _finish(self) -> None: 

168 # Make a set containing just the governor dimensions in this graph. 

169 # Need local import to avoid cycle. 

170 from ._governor import GovernorDimension 

171 

172 self.governors = NamedValueSet( 

173 d for d in self.dimensions if isinstance(d, GovernorDimension) 

174 ).freeze() 

175 # Split dependencies up into "required" and "implied" subsets. 

176 # Note that a dimension may be required in one graph and implied in 

177 # another. 

178 required: NamedValueSet[Dimension] = NamedValueSet() 

179 implied: NamedValueSet[Dimension] = NamedValueSet() 

180 for i1, dim1 in enumerate(self.dimensions): 

181 for i2, dim2 in enumerate(self.dimensions): 

182 if dim1.name in dim2.implied.names: 

183 implied.add(dim1) 

184 break 

185 else: 

186 # If no other dimension implies dim1, it's required. 

187 required.add(dim1) 

188 self.required = required.freeze() 

189 self.implied = implied.freeze() 

190 

191 self.topology = MappingProxyType( 

192 { 

193 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() 

194 for space in TopologicalSpace.__members__.values() 

195 } 

196 ) 

197 

198 # Build mappings from dimension to index; this is really for 

199 # DataCoordinate, but we put it in DimensionGraph because many 

200 # (many!) DataCoordinates will share the same DimensionGraph, and 

201 # we want them to be lightweight. The order here is what's convenient 

202 # for DataCoordinate: all required dimensions before all implied 

203 # dimensions. 

204 self._dataCoordinateIndices: Dict[str, int] = { 

205 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

206 } 

207 

208 def __getnewargs__(self) -> tuple: 

209 return (self.universe, None, tuple(self.dimensions.names), False) 

210 

211 def __deepcopy__(self, memo: dict) -> DimensionGraph: 

212 # DimensionGraph is recursively immutable; see note in @immutable 

213 # decorator. 

214 return self 

215 

216 @property 

217 def names(self) -> AbstractSet[str]: 

218 """Set of the names of all dimensions in the graph (`KeysView`).""" 

219 return self.dimensions.names 

220 

221 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: 

222 """Convert this class to a simple python type. 

223 

224 This type is suitable for serialization. 

225 

226 Parameters 

227 ---------- 

228 minimal : `bool`, optional 

229 Use minimal serialization. Has no effect on for this class. 

230 

231 Returns 

232 ------- 

233 names : `list` 

234 The names of the dimensions. 

235 """ 

236 # Names are all we can serialize. 

237 return SerializedDimensionGraph(names=list(self.names)) 

238 

239 @classmethod 

240 def from_simple( 

241 cls, 

242 names: SerializedDimensionGraph, 

243 universe: Optional[DimensionUniverse] = None, 

244 registry: Optional[Registry] = None, 

245 ) -> DimensionGraph: 

246 """Construct a new object from the simplified form. 

247 

248 This is assumed to support data data returned from the `to_simple` 

249 method. 

250 

251 Parameters 

252 ---------- 

253 names : `list` of `str` 

254 The names of the dimensions. 

255 universe : `DimensionUniverse` 

256 The special graph of all known dimensions of which this graph will 

257 be a subset. Can be `None` if `Registry` is provided. 

258 registry : `lsst.daf.butler.Registry`, optional 

259 Registry from which a universe can be extracted. Can be `None` 

260 if universe is provided explicitly. 

261 

262 Returns 

263 ------- 

264 graph : `DimensionGraph` 

265 Newly-constructed object. 

266 """ 

267 if universe is None and registry is None: 

268 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

269 if universe is None and registry is not None: 

270 universe = registry.dimensions 

271 if universe is None: 

272 # this is for mypy 

273 raise ValueError("Unable to determine a usable universe") 

274 

275 return cls(names=names.names, universe=universe) 

276 

277 to_json = to_json_pydantic 

278 from_json = classmethod(from_json_pydantic) 

279 

280 def __iter__(self) -> Iterator[Dimension]: 

281 """Iterate over all dimensions in the graph. 

282 

283 (and true `Dimension` instances only). 

284 """ 

285 return iter(self.dimensions) 

286 

287 def __len__(self) -> int: 

288 """Return the number of dimensions in the graph. 

289 

290 (and true `Dimension` instances only). 

291 """ 

292 return len(self.dimensions) 

293 

294 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

295 """Return `True` if the given element or element name is in the graph. 

296 

297 This test covers all `DimensionElement` instances in ``self.elements``, 

298 not just true `Dimension` instances). 

299 """ 

300 return element in self.elements 

301 

302 def __getitem__(self, name: str) -> DimensionElement: 

303 """Return the element with the given name. 

304 

305 This lookup covers all `DimensionElement` instances in 

306 ``self.elements``, not just true `Dimension` instances). 

307 """ 

308 return self.elements[name] 

309 

310 def get(self, name: str, default: Any = None) -> DimensionElement: 

311 """Return the element with the given name. 

312 

313 This lookup covers all `DimensionElement` instances in 

314 ``self.elements``, not just true `Dimension` instances). 

315 """ 

316 return self.elements.get(name, default) 

317 

318 def __str__(self) -> str: 

319 return str(self.dimensions) 

320 

321 def __repr__(self) -> str: 

322 return f"DimensionGraph({str(self)})" 

323 

324 def isdisjoint(self, other: DimensionGraph) -> bool: 

325 """Test whether the intersection of two graphs is empty. 

326 

327 Returns `True` if either operand is the empty. 

328 """ 

329 return self.dimensions.isdisjoint(other.dimensions) 

330 

331 def issubset(self, other: DimensionGraph) -> bool: 

332 """Test whether all dimensions in ``self`` are also in ``other``. 

333 

334 Returns `True` if ``self`` is empty. 

335 """ 

336 return self.dimensions <= other.dimensions 

337 

338 def issuperset(self, other: DimensionGraph) -> bool: 

339 """Test whether all dimensions in ``other`` are also in ``self``. 

340 

341 Returns `True` if ``other`` is empty. 

342 """ 

343 return self.dimensions >= other.dimensions 

344 

345 def __eq__(self, other: Any) -> bool: 

346 """Test the arguments have exactly the same dimensions & elements.""" 

347 if isinstance(other, DimensionGraph): 

348 return self.dimensions == other.dimensions 

349 else: 

350 return False 

351 

352 def __hash__(self) -> int: 

353 return hash(tuple(self.dimensions.names)) 

354 

355 def __le__(self, other: DimensionGraph) -> bool: 

356 """Test whether ``self`` is a subset of ``other``.""" 

357 return self.dimensions <= other.dimensions 

358 

359 def __ge__(self, other: DimensionGraph) -> bool: 

360 """Test whether ``self`` is a superset of ``other``.""" 

361 return self.dimensions >= other.dimensions 

362 

363 def __lt__(self, other: DimensionGraph) -> bool: 

364 """Test whether ``self`` is a strict subset of ``other``.""" 

365 return self.dimensions < other.dimensions 

366 

367 def __gt__(self, other: DimensionGraph) -> bool: 

368 """Test whether ``self`` is a strict superset of ``other``.""" 

369 return self.dimensions > other.dimensions 

370 

371 def union(self, *others: DimensionGraph) -> DimensionGraph: 

372 """Construct a new graph with all dimensions in any of the operands. 

373 

374 The elements of the returned graph may exceed the naive union of 

375 their elements, as some `DimensionElement` instances are included 

376 in graphs whenever multiple dimensions are present, and those 

377 dependency dimensions could have been provided by different operands. 

378 """ 

379 names = set(self.names).union(*[other.names for other in others]) 

380 return DimensionGraph(self.universe, names=names) 

381 

382 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

383 """Construct a new graph with only dimensions in all of the operands. 

384 

385 See also `union`. 

386 """ 

387 names = set(self.names).intersection(*[other.names for other in others]) 

388 return DimensionGraph(self.universe, names=names) 

389 

390 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

391 """Construct a new graph with all dimensions in any of the operands. 

392 

393 See `union`. 

394 """ 

395 return self.union(other) 

396 

397 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

398 """Construct a new graph with only dimensions in all of the operands. 

399 

400 See `intersection`. 

401 """ 

402 return self.intersection(other) 

403 

404 @property 

405 @cached_getter 

406 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]: 

407 """Return a tuple of all elements in specific order. 

408 

409 The order allows records to be 

410 found given their primary keys, starting from only the primary keys of 

411 required dimensions (`tuple` [ `DimensionRecord` ]). 

412 

413 Unlike the table definition/topological order (which is what 

414 DimensionUniverse.sorted gives you), when dimension A implies 

415 dimension B, dimension A appears first. 

416 """ 

417 done: Set[str] = set() 

418 order = [] 

419 

420 def addToOrder(element: DimensionElement) -> None: 

421 if element.name in done: 

422 return 

423 predecessors = set(element.required.names) 

424 predecessors.discard(element.name) 

425 if not done.issuperset(predecessors): 

426 return 

427 order.append(element) 

428 done.add(element.name) 

429 for other in element.implied: 

430 addToOrder(other) 

431 

432 while not done.issuperset(self.required): 

433 for dimension in self.required: 

434 addToOrder(dimension) 

435 

436 order.extend(element for element in self.elements if element.name not in done) 

437 return tuple(order) 

438 

439 @property 

440 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

441 """Families represented by the spatial elements in this graph.""" 

442 return self.topology[TopologicalSpace.SPATIAL] 

443 

444 @property 

445 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

446 """Families represented by the temporal elements in this graph.""" 

447 return self.topology[TopologicalSpace.TEMPORAL] 

448 

449 # Class attributes below are shadowed by instance attributes, and are 

450 # present just to hold the docstrings for those instance attributes. 

451 

452 universe: DimensionUniverse 

453 """The set of all known dimensions, of which this graph is a subset 

454 (`DimensionUniverse`). 

455 """ 

456 

457 dimensions: NamedValueAbstractSet[Dimension] 

458 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

459 graph (`NamedValueAbstractSet` of `Dimension`). 

460 

461 This is the set used for iteration, ``len()``, and most set-like operations 

462 on `DimensionGraph` itself. 

463 """ 

464 

465 elements: NamedValueAbstractSet[DimensionElement] 

466 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

467 graph; a superset of `dimensions` (`NamedValueAbstractSet` of 

468 `DimensionElement`). 

469 

470 This is the set used for dict-like lookups, including the ``in`` operator, 

471 on `DimensionGraph` itself. 

472 """ 

473 

474 governors: NamedValueAbstractSet[GovernorDimension] 

475 """A true `~collections.abc.Set` of all true `GovernorDimension` instances 

476 in the graph (`NamedValueAbstractSet` of `GovernorDimension`). 

477 """ 

478 

479 required: NamedValueAbstractSet[Dimension] 

480 """The subset of `dimensions` whose elements must be directly identified 

481 via their primary keys in a data ID in order to identify the rest of the 

482 elements in the graph (`NamedValueAbstractSet` of `Dimension`). 

483 """ 

484 

485 implied: NamedValueAbstractSet[Dimension] 

486 """The subset of `dimensions` whose elements need not be directly 

487 identified via their primary keys in a data ID (`NamedValueAbstractSet` of 

488 `Dimension`). 

489 """ 

490 

491 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

492 """Families of elements in this graph that can participate in topological 

493 relationships (`Mapping` from `TopologicalSpace` to 

494 `NamedValueAbstractSet` of `TopologicalFamily`). 

495 """