Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 31%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

158 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph", "SerializedDimensionGraph"] 

25 

26from pydantic import BaseModel 

27import itertools 

28from types import MappingProxyType 

29from typing import ( 

30 AbstractSet, 

31 Any, 

32 Dict, 

33 Iterable, 

34 Iterator, 

35 List, 

36 Mapping, 

37 Optional, 

38 Set, 

39 Tuple, 

40 TYPE_CHECKING, 

41 Union, 

42) 

43 

44from lsst.utils.classes import cached_getter, immutable 

45from ..named import NamedValueAbstractSet, NamedValueSet 

46from .._topology import TopologicalSpace, TopologicalFamily 

47from ..json import from_json_pydantic, to_json_pydantic 

48 

49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from ._universe import DimensionUniverse 

51 from ._elements import DimensionElement, Dimension 

52 from ._governor import GovernorDimension 

53 from ...registry import Registry 

54 

55 

56class SerializedDimensionGraph(BaseModel): 

57 """Simplified model of a `DimensionGraph` suitable for serialization.""" 

58 

59 names: List[str] 

60 

61 

62@immutable 

63class DimensionGraph: 

64 """An immutable, dependency-complete collection of dimensions. 

65 

66 `DimensionGraph` behaves in many respects like a set of `Dimension` 

67 instances that maintains several special subsets and supersets of 

68 related `DimensionElement` instances. It does not fully implement the 

69 `collections.abc.Set` interface, as its automatic expansion of dependencies 

70 would make set difference and XOR operations behave surprisingly. 

71 

72 It also provides dict-like lookup of `DimensionElement` instances from 

73 their names. 

74 

75 Parameters 

76 ---------- 

77 universe : `DimensionUniverse` 

78 The special graph of all known dimensions of which this graph will be 

79 a subset. 

80 dimensions : iterable of `Dimension`, optional 

81 An iterable of `Dimension` instances that must be included in the 

82 graph. All (recursive) dependencies of these dimensions will also 

83 be included. At most one of ``dimensions`` and ``names`` must be 

84 provided. 

85 names : iterable of `str`, optional 

86 An iterable of the names of dimensiosn that must be included in the 

87 graph. All (recursive) dependencies of these dimensions will also 

88 be included. At most one of ``dimensions`` and ``names`` must be 

89 provided. 

90 conform : `bool`, optional 

91 If `True` (default), expand to include dependencies. `False` should 

92 only be used for callers that can guarantee that other arguments are 

93 already correctly expanded, and is primarily for internal use. 

94 

95 Notes 

96 ----- 

97 `DimensionGraph` should be used instead of other collections in most 

98 contexts where a collection of dimensions is required and a 

99 `DimensionUniverse` is available. Exceptions include cases where order 

100 matters (and is different from the consistent ordering defined by the 

101 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

102 required. 

103 """ 

104 

105 _serializedType = SerializedDimensionGraph 

106 

107 def __new__( 

108 cls, 

109 universe: DimensionUniverse, 

110 dimensions: Optional[Iterable[Dimension]] = None, 

111 names: Optional[Iterable[str]] = None, 

112 conform: bool = True 

113 ) -> DimensionGraph: 

114 conformedNames: Set[str] 

115 if names is None: 

116 if dimensions is None: 

117 conformedNames = set() 

118 else: 

119 try: 

120 # Optimize for NamedValueSet/NamedKeyDict, though that's 

121 # not required. 

122 conformedNames = set(dimensions.names) # type: ignore 

123 except AttributeError: 

124 conformedNames = set(d.name for d in dimensions) 

125 else: 

126 if dimensions is not None: 

127 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

128 conformedNames = set(names) 

129 if conform: 

130 universe.expandDimensionNameSet(conformedNames) 

131 # Look in the cache of existing graphs, with the expanded set of names. 

132 cacheKey = frozenset(conformedNames) 

133 self = universe._cache.get(cacheKey, None) 

134 if self is not None: 

135 return self 

136 # This is apparently a new graph. Create it, and add it to the cache. 

137 self = super().__new__(cls) 

138 universe._cache[cacheKey] = self 

139 self.universe = universe 

140 # Reorder dimensions by iterating over the universe (which is 

141 # ordered already) and extracting the ones in the set. 

142 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() 

143 # Make a set that includes both the dimensions and any 

144 # DimensionElements whose dependencies are in self.dimensions. 

145 self.elements = NamedValueSet(e for e in universe.getStaticElements() 

146 if e.required.names <= self.dimensions.names).freeze() 

147 self._finish() 

148 return self 

149 

150 def _finish(self) -> None: 

151 # Make a set containing just the governor dimensions in this graph. 

152 # Need local import to avoid cycle. 

153 from ._governor import GovernorDimension 

154 self.governors = NamedValueSet( 

155 d for d in self.dimensions if isinstance(d, GovernorDimension) 

156 ).freeze() 

157 # Split dependencies up into "required" and "implied" subsets. 

158 # Note that a dimension may be required in one graph and implied in 

159 # another. 

160 required: NamedValueSet[Dimension] = NamedValueSet() 

161 implied: NamedValueSet[Dimension] = NamedValueSet() 

162 for i1, dim1 in enumerate(self.dimensions): 

163 for i2, dim2 in enumerate(self.dimensions): 

164 if dim1.name in dim2.implied.names: 

165 implied.add(dim1) 

166 break 

167 else: 

168 # If no other dimension implies dim1, it's required. 

169 required.add(dim1) 

170 self.required = required.freeze() 

171 self.implied = implied.freeze() 

172 

173 self.topology = MappingProxyType({ 

174 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() 

175 for space in TopologicalSpace.__members__.values() 

176 }) 

177 

178 # Build mappings from dimension to index; this is really for 

179 # DataCoordinate, but we put it in DimensionGraph because many 

180 # (many!) DataCoordinates will share the same DimensionGraph, and 

181 # we want them to be lightweight. The order here is what's convenient 

182 # for DataCoordinate: all required dimensions before all implied 

183 # dimensions. 

184 self._dataCoordinateIndices: Dict[str, int] = { 

185 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

186 } 

187 

188 def __getnewargs__(self) -> tuple: 

189 return (self.universe, None, tuple(self.dimensions.names), False) 

190 

191 def __deepcopy__(self, memo: dict) -> DimensionGraph: 

192 # DimensionGraph is recursively immutable; see note in @immutable 

193 # decorator. 

194 return self 

195 

196 @property 

197 def names(self) -> AbstractSet[str]: 

198 """Set of the names of all dimensions in the graph (`KeysView`).""" 

199 return self.dimensions.names 

200 

201 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: 

202 """Convert this class to a simple python type. 

203 

204 This type is suitable for serialization. 

205 

206 Parameters 

207 ---------- 

208 minimal : `bool`, optional 

209 Use minimal serialization. Has no effect on for this class. 

210 

211 Returns 

212 ------- 

213 names : `list` 

214 The names of the dimensions. 

215 """ 

216 # Names are all we can serialize. 

217 return SerializedDimensionGraph(names=list(self.names)) 

218 

219 @classmethod 

220 def from_simple(cls, names: SerializedDimensionGraph, 

221 universe: Optional[DimensionUniverse] = None, 

222 registry: Optional[Registry] = None) -> DimensionGraph: 

223 """Construct a new object from the simplified form. 

224 

225 This is assumed to support data data returned from the `to_simple` 

226 method. 

227 

228 Parameters 

229 ---------- 

230 names : `list` of `str` 

231 The names of the dimensions. 

232 universe : `DimensionUniverse` 

233 The special graph of all known dimensions of which this graph will 

234 be a subset. Can be `None` if `Registry` is provided. 

235 registry : `lsst.daf.butler.Registry`, optional 

236 Registry from which a universe can be extracted. Can be `None` 

237 if universe is provided explicitly. 

238 

239 Returns 

240 ------- 

241 graph : `DimensionGraph` 

242 Newly-constructed object. 

243 """ 

244 if universe is None and registry is None: 

245 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

246 if universe is None and registry is not None: 

247 universe = registry.dimensions 

248 if universe is None: 

249 # this is for mypy 

250 raise ValueError("Unable to determine a usable universe") 

251 

252 return cls(names=names.names, universe=universe) 

253 

254 to_json = to_json_pydantic 

255 from_json = classmethod(from_json_pydantic) 

256 

257 def __iter__(self) -> Iterator[Dimension]: 

258 """Iterate over all dimensions in the graph. 

259 

260 (and true `Dimension` instances only). 

261 """ 

262 return iter(self.dimensions) 

263 

264 def __len__(self) -> int: 

265 """Return the number of dimensions in the graph. 

266 

267 (and true `Dimension` instances only). 

268 """ 

269 return len(self.dimensions) 

270 

271 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

272 """Return `True` if the given element or element name is in the graph. 

273 

274 This test covers all `DimensionElement` instances in ``self.elements``, 

275 not just true `Dimension` instances). 

276 """ 

277 return element in self.elements 

278 

279 def __getitem__(self, name: str) -> DimensionElement: 

280 """Return the element with the given name. 

281 

282 This lookup covers all `DimensionElement` instances in 

283 ``self.elements``, not just true `Dimension` instances). 

284 """ 

285 return self.elements[name] 

286 

287 def get(self, name: str, default: Any = None) -> DimensionElement: 

288 """Return the element with the given name. 

289 

290 This lookup covers all `DimensionElement` instances in 

291 ``self.elements``, not just true `Dimension` instances). 

292 """ 

293 return self.elements.get(name, default) 

294 

295 def __str__(self) -> str: 

296 return str(self.dimensions) 

297 

298 def __repr__(self) -> str: 

299 return f"DimensionGraph({str(self)})" 

300 

301 def isdisjoint(self, other: DimensionGraph) -> bool: 

302 """Test whether the intersection of two graphs is empty. 

303 

304 Returns `True` if either operand is the empty. 

305 """ 

306 return self.dimensions.isdisjoint(other.dimensions) 

307 

308 def issubset(self, other: DimensionGraph) -> bool: 

309 """Test whether all dimensions in ``self`` are also in ``other``. 

310 

311 Returns `True` if ``self`` is empty. 

312 """ 

313 return self.dimensions <= other.dimensions 

314 

315 def issuperset(self, other: DimensionGraph) -> bool: 

316 """Test whether all dimensions in ``other`` are also in ``self``. 

317 

318 Returns `True` if ``other`` is empty. 

319 """ 

320 return self.dimensions >= other.dimensions 

321 

322 def __eq__(self, other: Any) -> bool: 

323 """Test the arguments have exactly the same dimensions & elements.""" 

324 if isinstance(other, DimensionGraph): 

325 return self.dimensions == other.dimensions 

326 else: 

327 return False 

328 

329 def __hash__(self) -> int: 

330 return hash(tuple(self.dimensions.names)) 

331 

332 def __le__(self, other: DimensionGraph) -> bool: 

333 """Test whether ``self`` is a subset of ``other``.""" 

334 return self.dimensions <= other.dimensions 

335 

336 def __ge__(self, other: DimensionGraph) -> bool: 

337 """Test whether ``self`` is a superset of ``other``.""" 

338 return self.dimensions >= other.dimensions 

339 

340 def __lt__(self, other: DimensionGraph) -> bool: 

341 """Test whether ``self`` is a strict subset of ``other``.""" 

342 return self.dimensions < other.dimensions 

343 

344 def __gt__(self, other: DimensionGraph) -> bool: 

345 """Test whether ``self`` is a strict superset of ``other``.""" 

346 return self.dimensions > other.dimensions 

347 

348 def union(self, *others: DimensionGraph) -> DimensionGraph: 

349 """Construct a new graph with all dimensions in any of the operands. 

350 

351 The elements of the returned graph may exceed the naive union of 

352 their elements, as some `DimensionElement` instances are included 

353 in graphs whenever multiple dimensions are present, and those 

354 dependency dimensions could have been provided by different operands. 

355 """ 

356 names = set(self.names).union(*[other.names for other in others]) 

357 return DimensionGraph(self.universe, names=names) 

358 

359 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

360 """Construct a new graph with only dimensions in all of the operands. 

361 

362 See also `union`. 

363 """ 

364 names = set(self.names).intersection(*[other.names for other in others]) 

365 return DimensionGraph(self.universe, names=names) 

366 

367 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

368 """Construct a new graph with all dimensions in any of the operands. 

369 

370 See `union`. 

371 """ 

372 return self.union(other) 

373 

374 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

375 """Construct a new graph with only dimensions in all of the operands. 

376 

377 See `intersection`. 

378 """ 

379 return self.intersection(other) 

380 

381 @property # type: ignore 

382 @cached_getter 

383 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]: 

384 """Return a tuple of all elements in specific order. 

385 

386 The order allows records to be 

387 found given their primary keys, starting from only the primary keys of 

388 required dimensions (`tuple` [ `DimensionRecord` ]). 

389 

390 Unlike the table definition/topological order (which is what 

391 DimensionUniverse.sorted gives you), when dimension A implies 

392 dimension B, dimension A appears first. 

393 """ 

394 done: Set[str] = set() 

395 order = [] 

396 

397 def addToOrder(element: DimensionElement) -> None: 

398 if element.name in done: 

399 return 

400 predecessors = set(element.required.names) 

401 predecessors.discard(element.name) 

402 if not done.issuperset(predecessors): 

403 return 

404 order.append(element) 

405 done.add(element.name) 

406 for other in element.implied: 

407 addToOrder(other) 

408 

409 while not done.issuperset(self.required): 

410 for dimension in self.required: 

411 addToOrder(dimension) 

412 

413 order.extend(element for element in self.elements if element.name not in done) 

414 return tuple(order) 

415 

416 @property 

417 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

418 """Families represented by the spatial elements in this graph.""" 

419 return self.topology[TopologicalSpace.SPATIAL] 

420 

421 @property 

422 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

423 """Families represented by the temporal elements in this graph.""" 

424 return self.topology[TopologicalSpace.TEMPORAL] 

425 

426 # Class attributes below are shadowed by instance attributes, and are 

427 # present just to hold the docstrings for those instance attributes. 

428 

429 universe: DimensionUniverse 

430 """The set of all known dimensions, of which this graph is a subset 

431 (`DimensionUniverse`). 

432 """ 

433 

434 dimensions: NamedValueAbstractSet[Dimension] 

435 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

436 graph (`NamedValueAbstractSet` of `Dimension`). 

437 

438 This is the set used for iteration, ``len()``, and most set-like operations 

439 on `DimensionGraph` itself. 

440 """ 

441 

442 elements: NamedValueAbstractSet[DimensionElement] 

443 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

444 graph; a superset of `dimensions` (`NamedValueAbstractSet` of 

445 `DimensionElement`). 

446 

447 This is the set used for dict-like lookups, including the ``in`` operator, 

448 on `DimensionGraph` itself. 

449 """ 

450 

451 governors: NamedValueAbstractSet[GovernorDimension] 

452 """A true `~collections.abc.Set` of all true `GovernorDimension` instances 

453 in the graph (`NamedValueAbstractSet` of `GovernorDimension`). 

454 """ 

455 

456 required: NamedValueAbstractSet[Dimension] 

457 """The subset of `dimensions` whose elments must be directly identified via 

458 their primary keys in a data ID in order to identify the rest of the 

459 elements in the graph (`NamedValueAbstractSet` of `Dimension`). 

460 """ 

461 

462 implied: NamedValueAbstractSet[Dimension] 

463 """The subset of `dimensions` whose elements need not be directly 

464 identified via their primary keys in a data ID (`NamedValueAbstractSet` of 

465 `Dimension`). 

466 """ 

467 

468 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

469 """Families of elements in this graph that can participate in topological 

470 relationships (`Mapping` from `TopologicalSpace` to 

471 `NamedValueAbstractSet` of `TopologicalFamily`). 

472 """