Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 31%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

164 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph", "SerializedDimensionGraph"] 

25 

26from pydantic import BaseModel 

27import itertools 

28from types import MappingProxyType 

29from typing import ( 

30 AbstractSet, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Set, 

39 Tuple, 

40 TYPE_CHECKING, 

41 Union, 

42) 

43 

44from lsst.utils.classes import cached_getter, immutable 

45from ..named import NamedValueAbstractSet, NamedValueSet 

46from .._topology import TopologicalSpace, TopologicalFamily 

47from ..json import from_json_pydantic, to_json_pydantic 

48 

49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from ._universe import DimensionUniverse 

51 from ._elements import DimensionElement, Dimension 

52 from ._governor import GovernorDimension 

53 from ...registry import Registry 

54 

55 

56class SerializedDimensionGraph(BaseModel): 

57 """Simplified model of a `DimensionGraph` suitable for serialization.""" 

58 

59 names: List[str] 

60 

61 @classmethod 

62 def direct(cls, *, names: List[str]) -> SerializedDimensionGraph: 

63 """Construct a `SerializedDimensionGraph` directly without validators. 

64 

65 This differs from the pydantic "construct" method in that the arguments 

66 are explicitly what the model requires, and it will recurse through 

67 members, constructing them from their corresponding `direct` methods. 

68 

69 This method should only be called when the inputs are trusted. 

70 """ 

71 node = SerializedDimensionGraph.__new__(cls) 

72 object.__setattr__(node, 'names', names) 

73 object.__setattr__(node, '__fields_set__', {'names'}) 

74 return node 

75 

76 

77@immutable 

78class DimensionGraph: 

79 """An immutable, dependency-complete collection of dimensions. 

80 

81 `DimensionGraph` behaves in many respects like a set of `Dimension` 

82 instances that maintains several special subsets and supersets of 

83 related `DimensionElement` instances. It does not fully implement the 

84 `collections.abc.Set` interface, as its automatic expansion of dependencies 

85 would make set difference and XOR operations behave surprisingly. 

86 

87 It also provides dict-like lookup of `DimensionElement` instances from 

88 their names. 

89 

90 Parameters 

91 ---------- 

92 universe : `DimensionUniverse` 

93 The special graph of all known dimensions of which this graph will be 

94 a subset. 

95 dimensions : iterable of `Dimension`, optional 

96 An iterable of `Dimension` instances that must be included in the 

97 graph. All (recursive) dependencies of these dimensions will also 

98 be included. At most one of ``dimensions`` and ``names`` must be 

99 provided. 

100 names : iterable of `str`, optional 

101 An iterable of the names of dimensions that must be included in the 

102 graph. All (recursive) dependencies of these dimensions will also 

103 be included. At most one of ``dimensions`` and ``names`` must be 

104 provided. 

105 conform : `bool`, optional 

106 If `True` (default), expand to include dependencies. `False` should 

107 only be used for callers that can guarantee that other arguments are 

108 already correctly expanded, and is primarily for internal use. 

109 

110 Notes 

111 ----- 

112 `DimensionGraph` should be used instead of other collections in most 

113 contexts where a collection of dimensions is required and a 

114 `DimensionUniverse` is available. Exceptions include cases where order 

115 matters (and is different from the consistent ordering defined by the 

116 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

117 required. 

118 """ 

119 

120 _serializedType = SerializedDimensionGraph 

121 

122 def __new__( 

123 cls, 

124 universe: DimensionUniverse, 

125 dimensions: Optional[Iterable[Dimension]] = None, 

126 names: Optional[Iterable[str]] = None, 

127 conform: bool = True 

128 ) -> DimensionGraph: 

129 conformedNames: Set[str] 

130 if names is None: 

131 if dimensions is None: 

132 conformedNames = set() 

133 else: 

134 try: 

135 # Optimize for NamedValueSet/NamedKeyDict, though that's 

136 # not required. 

137 conformedNames = set(dimensions.names) # type: ignore 

138 except AttributeError: 

139 conformedNames = set(d.name for d in dimensions) 

140 else: 

141 if dimensions is not None: 

142 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

143 conformedNames = set(names) 

144 if conform: 

145 universe.expandDimensionNameSet(conformedNames) 

146 # Look in the cache of existing graphs, with the expanded set of names. 

147 cacheKey = frozenset(conformedNames) 

148 self = universe._cache.get(cacheKey, None) 

149 if self is not None: 

150 return self 

151 # This is apparently a new graph. Create it, and add it to the cache. 

152 self = super().__new__(cls) 

153 universe._cache[cacheKey] = self 

154 self.universe = universe 

155 # Reorder dimensions by iterating over the universe (which is 

156 # ordered already) and extracting the ones in the set. 

157 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() 

158 # Make a set that includes both the dimensions and any 

159 # DimensionElements whose dependencies are in self.dimensions. 

160 self.elements = NamedValueSet(e for e in universe.getStaticElements() 

161 if e.required.names <= self.dimensions.names).freeze() 

162 self._finish() 

163 return self 

164 

165 def _finish(self) -> None: 

166 # Make a set containing just the governor dimensions in this graph. 

167 # Need local import to avoid cycle. 

168 from ._governor import GovernorDimension 

169 self.governors = NamedValueSet( 

170 d for d in self.dimensions if isinstance(d, GovernorDimension) 

171 ).freeze() 

172 # Split dependencies up into "required" and "implied" subsets. 

173 # Note that a dimension may be required in one graph and implied in 

174 # another. 

175 required: NamedValueSet[Dimension] = NamedValueSet() 

176 implied: NamedValueSet[Dimension] = NamedValueSet() 

177 for i1, dim1 in enumerate(self.dimensions): 

178 for i2, dim2 in enumerate(self.dimensions): 

179 if dim1.name in dim2.implied.names: 

180 implied.add(dim1) 

181 break 

182 else: 

183 # If no other dimension implies dim1, it's required. 

184 required.add(dim1) 

185 self.required = required.freeze() 

186 self.implied = implied.freeze() 

187 

188 self.topology = MappingProxyType({ 

189 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() 

190 for space in TopologicalSpace.__members__.values() 

191 }) 

192 

193 # Build mappings from dimension to index; this is really for 

194 # DataCoordinate, but we put it in DimensionGraph because many 

195 # (many!) DataCoordinates will share the same DimensionGraph, and 

196 # we want them to be lightweight. The order here is what's convenient 

197 # for DataCoordinate: all required dimensions before all implied 

198 # dimensions. 

199 self._dataCoordinateIndices: Dict[str, int] = { 

200 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

201 } 

202 

203 def __getnewargs__(self) -> tuple: 

204 return (self.universe, None, tuple(self.dimensions.names), False) 

205 

206 def __deepcopy__(self, memo: dict) -> DimensionGraph: 

207 # DimensionGraph is recursively immutable; see note in @immutable 

208 # decorator. 

209 return self 

210 

211 @property 

212 def names(self) -> AbstractSet[str]: 

213 """Set of the names of all dimensions in the graph (`KeysView`).""" 

214 return self.dimensions.names 

215 

216 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: 

217 """Convert this class to a simple python type. 

218 

219 This type is suitable for serialization. 

220 

221 Parameters 

222 ---------- 

223 minimal : `bool`, optional 

224 Use minimal serialization. Has no effect on for this class. 

225 

226 Returns 

227 ------- 

228 names : `list` 

229 The names of the dimensions. 

230 """ 

231 # Names are all we can serialize. 

232 return SerializedDimensionGraph(names=list(self.names)) 

233 

234 @classmethod 

235 def from_simple(cls, names: SerializedDimensionGraph, 

236 universe: Optional[DimensionUniverse] = None, 

237 registry: Optional[Registry] = None) -> DimensionGraph: 

238 """Construct a new object from the simplified form. 

239 

240 This is assumed to support data data returned from the `to_simple` 

241 method. 

242 

243 Parameters 

244 ---------- 

245 names : `list` of `str` 

246 The names of the dimensions. 

247 universe : `DimensionUniverse` 

248 The special graph of all known dimensions of which this graph will 

249 be a subset. Can be `None` if `Registry` is provided. 

250 registry : `lsst.daf.butler.Registry`, optional 

251 Registry from which a universe can be extracted. Can be `None` 

252 if universe is provided explicitly. 

253 

254 Returns 

255 ------- 

256 graph : `DimensionGraph` 

257 Newly-constructed object. 

258 """ 

259 if universe is None and registry is None: 

260 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

261 if universe is None and registry is not None: 

262 universe = registry.dimensions 

263 if universe is None: 

264 # this is for mypy 

265 raise ValueError("Unable to determine a usable universe") 

266 

267 return cls(names=names.names, universe=universe) 

268 

269 to_json = to_json_pydantic 

270 from_json = classmethod(from_json_pydantic) 

271 

272 def __iter__(self) -> Iterator[Dimension]: 

273 """Iterate over all dimensions in the graph. 

274 

275 (and true `Dimension` instances only). 

276 """ 

277 return iter(self.dimensions) 

278 

279 def __len__(self) -> int: 

280 """Return the number of dimensions in the graph. 

281 

282 (and true `Dimension` instances only). 

283 """ 

284 return len(self.dimensions) 

285 

286 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

287 """Return `True` if the given element or element name is in the graph. 

288 

289 This test covers all `DimensionElement` instances in ``self.elements``, 

290 not just true `Dimension` instances). 

291 """ 

292 return element in self.elements 

293 

294 def __getitem__(self, name: str) -> DimensionElement: 

295 """Return the element with the given name. 

296 

297 This lookup covers all `DimensionElement` instances in 

298 ``self.elements``, not just true `Dimension` instances). 

299 """ 

300 return self.elements[name] 

301 

302 def get(self, name: str, default: Any = None) -> DimensionElement: 

303 """Return the element with the given name. 

304 

305 This lookup covers all `DimensionElement` instances in 

306 ``self.elements``, not just true `Dimension` instances). 

307 """ 

308 return self.elements.get(name, default) 

309 

310 def __str__(self) -> str: 

311 return str(self.dimensions) 

312 

313 def __repr__(self) -> str: 

314 return f"DimensionGraph({str(self)})" 

315 

316 def isdisjoint(self, other: DimensionGraph) -> bool: 

317 """Test whether the intersection of two graphs is empty. 

318 

319 Returns `True` if either operand is the empty. 

320 """ 

321 return self.dimensions.isdisjoint(other.dimensions) 

322 

323 def issubset(self, other: DimensionGraph) -> bool: 

324 """Test whether all dimensions in ``self`` are also in ``other``. 

325 

326 Returns `True` if ``self`` is empty. 

327 """ 

328 return self.dimensions <= other.dimensions 

329 

330 def issuperset(self, other: DimensionGraph) -> bool: 

331 """Test whether all dimensions in ``other`` are also in ``self``. 

332 

333 Returns `True` if ``other`` is empty. 

334 """ 

335 return self.dimensions >= other.dimensions 

336 

337 def __eq__(self, other: Any) -> bool: 

338 """Test the arguments have exactly the same dimensions & elements.""" 

339 if isinstance(other, DimensionGraph): 

340 return self.dimensions == other.dimensions 

341 else: 

342 return False 

343 

344 def __hash__(self) -> int: 

345 return hash(tuple(self.dimensions.names)) 

346 

347 def __le__(self, other: DimensionGraph) -> bool: 

348 """Test whether ``self`` is a subset of ``other``.""" 

349 return self.dimensions <= other.dimensions 

350 

351 def __ge__(self, other: DimensionGraph) -> bool: 

352 """Test whether ``self`` is a superset of ``other``.""" 

353 return self.dimensions >= other.dimensions 

354 

355 def __lt__(self, other: DimensionGraph) -> bool: 

356 """Test whether ``self`` is a strict subset of ``other``.""" 

357 return self.dimensions < other.dimensions 

358 

359 def __gt__(self, other: DimensionGraph) -> bool: 

360 """Test whether ``self`` is a strict superset of ``other``.""" 

361 return self.dimensions > other.dimensions 

362 

363 def union(self, *others: DimensionGraph) -> DimensionGraph: 

364 """Construct a new graph with all dimensions in any of the operands. 

365 

366 The elements of the returned graph may exceed the naive union of 

367 their elements, as some `DimensionElement` instances are included 

368 in graphs whenever multiple dimensions are present, and those 

369 dependency dimensions could have been provided by different operands. 

370 """ 

371 names = set(self.names).union(*[other.names for other in others]) 

372 return DimensionGraph(self.universe, names=names) 

373 

374 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

375 """Construct a new graph with only dimensions in all of the operands. 

376 

377 See also `union`. 

378 """ 

379 names = set(self.names).intersection(*[other.names for other in others]) 

380 return DimensionGraph(self.universe, names=names) 

381 

382 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

383 """Construct a new graph with all dimensions in any of the operands. 

384 

385 See `union`. 

386 """ 

387 return self.union(other) 

388 

389 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

390 """Construct a new graph with only dimensions in all of the operands. 

391 

392 See `intersection`. 

393 """ 

394 return self.intersection(other) 

395 

396 @property # type: ignore 

397 @cached_getter 

398 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]: 

399 """Return a tuple of all elements in specific order. 

400 

401 The order allows records to be 

402 found given their primary keys, starting from only the primary keys of 

403 required dimensions (`tuple` [ `DimensionRecord` ]). 

404 

405 Unlike the table definition/topological order (which is what 

406 DimensionUniverse.sorted gives you), when dimension A implies 

407 dimension B, dimension A appears first. 

408 """ 

409 done: Set[str] = set() 

410 order = [] 

411 

412 def addToOrder(element: DimensionElement) -> None: 

413 if element.name in done: 

414 return 

415 predecessors = set(element.required.names) 

416 predecessors.discard(element.name) 

417 if not done.issuperset(predecessors): 

418 return 

419 order.append(element) 

420 done.add(element.name) 

421 for other in element.implied: 

422 addToOrder(other) 

423 

424 while not done.issuperset(self.required): 

425 for dimension in self.required: 

426 addToOrder(dimension) 

427 

428 order.extend(element for element in self.elements if element.name not in done) 

429 return tuple(order) 

430 

431 @property 

432 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

433 """Families represented by the spatial elements in this graph.""" 

434 return self.topology[TopologicalSpace.SPATIAL] 

435 

436 @property 

437 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

438 """Families represented by the temporal elements in this graph.""" 

439 return self.topology[TopologicalSpace.TEMPORAL] 

440 

441 # Class attributes below are shadowed by instance attributes, and are 

442 # present just to hold the docstrings for those instance attributes. 

443 

444 universe: DimensionUniverse 

445 """The set of all known dimensions, of which this graph is a subset 

446 (`DimensionUniverse`). 

447 """ 

448 

449 dimensions: NamedValueAbstractSet[Dimension] 

450 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

451 graph (`NamedValueAbstractSet` of `Dimension`). 

452 

453 This is the set used for iteration, ``len()``, and most set-like operations 

454 on `DimensionGraph` itself. 

455 """ 

456 

457 elements: NamedValueAbstractSet[DimensionElement] 

458 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

459 graph; a superset of `dimensions` (`NamedValueAbstractSet` of 

460 `DimensionElement`). 

461 

462 This is the set used for dict-like lookups, including the ``in`` operator, 

463 on `DimensionGraph` itself. 

464 """ 

465 

466 governors: NamedValueAbstractSet[GovernorDimension] 

467 """A true `~collections.abc.Set` of all true `GovernorDimension` instances 

468 in the graph (`NamedValueAbstractSet` of `GovernorDimension`). 

469 """ 

470 

471 required: NamedValueAbstractSet[Dimension] 

472 """The subset of `dimensions` whose elements must be directly identified 

473 via their primary keys in a data ID in order to identify the rest of the 

474 elements in the graph (`NamedValueAbstractSet` of `Dimension`). 

475 """ 

476 

477 implied: NamedValueAbstractSet[Dimension] 

478 """The subset of `dimensions` whose elements need not be directly 

479 identified via their primary keys in a data ID (`NamedValueAbstractSet` of 

480 `Dimension`). 

481 """ 

482 

483 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

484 """Families of elements in this graph that can participate in topological 

485 relationships (`Mapping` from `TopologicalSpace` to 

486 `NamedValueAbstractSet` of `TopologicalFamily`). 

487 """