Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 38%

163 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph", "SerializedDimensionGraph"] 

25 

26import itertools 

27from collections.abc import Iterable, Iterator, Mapping, Set 

28from types import MappingProxyType 

29from typing import TYPE_CHECKING, Any, ClassVar 

30 

31from lsst.daf.butler._compat import _BaseModelCompat 

32from lsst.utils.classes import cached_getter, immutable 

33 

34from .._topology import TopologicalFamily, TopologicalSpace 

35from ..json import from_json_pydantic, to_json_pydantic 

36from ..named import NamedValueAbstractSet, NamedValueSet 

37 

38if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

39 from ...registry import Registry 

40 from ._elements import Dimension, DimensionElement 

41 from ._governor import GovernorDimension 

42 from ._universe import DimensionUniverse 

43 

44 

45class SerializedDimensionGraph(_BaseModelCompat): 

46 """Simplified model of a `DimensionGraph` suitable for serialization.""" 

47 

48 names: list[str] 

49 

50 @classmethod 

51 def direct(cls, *, names: list[str]) -> SerializedDimensionGraph: 

52 """Construct a `SerializedDimensionGraph` directly without validators. 

53 

54 This differs from the pydantic "construct" method in that the arguments 

55 are explicitly what the model requires, and it will recurse through 

56 members, constructing them from their corresponding `direct` methods. 

57 

58 This method should only be called when the inputs are trusted. 

59 """ 

60 return cls.model_construct(names=names) 

61 

62 

63@immutable 

64class DimensionGraph: 

65 """An immutable, dependency-complete collection of dimensions. 

66 

67 `DimensionGraph` behaves in many respects like a set of `Dimension` 

68 instances that maintains several special subsets and supersets of 

69 related `DimensionElement` instances. It does not fully implement the 

70 `collections.abc.Set` interface, as its automatic expansion of dependencies 

71 would make set difference and XOR operations behave surprisingly. 

72 

73 It also provides dict-like lookup of `DimensionElement` instances from 

74 their names. 

75 

76 Parameters 

77 ---------- 

78 universe : `DimensionUniverse` 

79 The special graph of all known dimensions of which this graph will be 

80 a subset. 

81 dimensions : iterable of `Dimension`, optional 

82 An iterable of `Dimension` instances that must be included in the 

83 graph. All (recursive) dependencies of these dimensions will also 

84 be included. At most one of ``dimensions`` and ``names`` must be 

85 provided. 

86 names : iterable of `str`, optional 

87 An iterable of the names of dimensions that must be included in the 

88 graph. All (recursive) dependencies of these dimensions will also 

89 be included. At most one of ``dimensions`` and ``names`` must be 

90 provided. 

91 conform : `bool`, optional 

92 If `True` (default), expand to include dependencies. `False` should 

93 only be used for callers that can guarantee that other arguments are 

94 already correctly expanded, and is primarily for internal use. 

95 

96 Notes 

97 ----- 

98 `DimensionGraph` should be used instead of other collections in most 

99 contexts where a collection of dimensions is required and a 

100 `DimensionUniverse` is available. Exceptions include cases where order 

101 matters (and is different from the consistent ordering defined by the 

102 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

103 required. 

104 """ 

105 

106 _serializedType = SerializedDimensionGraph 

107 

108 def __new__( 

109 cls, 

110 universe: DimensionUniverse, 

111 dimensions: Iterable[Dimension] | None = None, 

112 names: Iterable[str] | None = None, 

113 conform: bool = True, 

114 ) -> DimensionGraph: 

115 conformedNames: set[str] 

116 if names is None: 

117 if dimensions is None: 

118 conformedNames = set() 

119 else: 

120 try: 

121 # Optimize for NamedValueSet/NamedKeyDict, though that's 

122 # not required. 

123 conformedNames = set(dimensions.names) # type: ignore 

124 except AttributeError: 

125 conformedNames = {d.name for d in dimensions} 

126 else: 

127 if dimensions is not None: 

128 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

129 conformedNames = set(names) 

130 if conform: 

131 universe.expandDimensionNameSet(conformedNames) 

132 # Look in the cache of existing graphs, with the expanded set of names. 

133 cacheKey = frozenset(conformedNames) 

134 self = universe._cache.get(cacheKey, None) 

135 if self is not None: 

136 return self 

137 # This is apparently a new graph. Create it, and add it to the cache. 

138 self = super().__new__(cls) 

139 universe._cache[cacheKey] = self 

140 self.universe = universe 

141 # Reorder dimensions by iterating over the universe (which is 

142 # ordered already) and extracting the ones in the set. 

143 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() 

144 # Make a set that includes both the dimensions and any 

145 # DimensionElements whose dependencies are in self.dimensions. 

146 self.elements = NamedValueSet( 

147 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names 

148 ).freeze() 

149 self._finish() 

150 return self 

151 

152 def _finish(self) -> None: 

153 # Make a set containing just the governor dimensions in this graph. 

154 # Need local import to avoid cycle. 

155 from ._governor import GovernorDimension 

156 

157 self.governors = NamedValueSet( 

158 d for d in self.dimensions if isinstance(d, GovernorDimension) 

159 ).freeze() 

160 # Split dependencies up into "required" and "implied" subsets. 

161 # Note that a dimension may be required in one graph and implied in 

162 # another. 

163 required: NamedValueSet[Dimension] = NamedValueSet() 

164 implied: NamedValueSet[Dimension] = NamedValueSet() 

165 for dim1 in self.dimensions: 

166 for dim2 in self.dimensions: 

167 if dim1.name in dim2.implied.names: 

168 implied.add(dim1) 

169 break 

170 else: 

171 # If no other dimension implies dim1, it's required. 

172 required.add(dim1) 

173 self.required = required.freeze() 

174 self.implied = implied.freeze() 

175 

176 self.topology = MappingProxyType( 

177 { 

178 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() 

179 for space in TopologicalSpace.__members__.values() 

180 } 

181 ) 

182 

183 # Build mappings from dimension to index; this is really for 

184 # DataCoordinate, but we put it in DimensionGraph because many 

185 # (many!) DataCoordinates will share the same DimensionGraph, and 

186 # we want them to be lightweight. The order here is what's convenient 

187 # for DataCoordinate: all required dimensions before all implied 

188 # dimensions. 

189 self._dataCoordinateIndices: dict[str, int] = { 

190 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

191 } 

192 

193 def __getnewargs__(self) -> tuple: 

194 return (self.universe, None, tuple(self.dimensions.names), False) 

195 

196 def __deepcopy__(self, memo: dict) -> DimensionGraph: 

197 # DimensionGraph is recursively immutable; see note in @immutable 

198 # decorator. 

199 return self 

200 

201 @property 

202 def names(self) -> Set[str]: 

203 """Set of the names of all dimensions in the graph (`KeysView`).""" 

204 return self.dimensions.names 

205 

206 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: 

207 """Convert this class to a simple python type. 

208 

209 This type is suitable for serialization. 

210 

211 Parameters 

212 ---------- 

213 minimal : `bool`, optional 

214 Use minimal serialization. Has no effect on for this class. 

215 

216 Returns 

217 ------- 

218 names : `list` 

219 The names of the dimensions. 

220 """ 

221 # Names are all we can serialize. 

222 return SerializedDimensionGraph(names=list(self.names)) 

223 

224 @classmethod 

225 def from_simple( 

226 cls, 

227 names: SerializedDimensionGraph, 

228 universe: DimensionUniverse | None = None, 

229 registry: Registry | None = None, 

230 ) -> DimensionGraph: 

231 """Construct a new object from the simplified form. 

232 

233 This is assumed to support data data returned from the `to_simple` 

234 method. 

235 

236 Parameters 

237 ---------- 

238 names : `list` of `str` 

239 The names of the dimensions. 

240 universe : `DimensionUniverse` 

241 The special graph of all known dimensions of which this graph will 

242 be a subset. Can be `None` if `Registry` is provided. 

243 registry : `lsst.daf.butler.Registry`, optional 

244 Registry from which a universe can be extracted. Can be `None` 

245 if universe is provided explicitly. 

246 

247 Returns 

248 ------- 

249 graph : `DimensionGraph` 

250 Newly-constructed object. 

251 """ 

252 if universe is None and registry is None: 

253 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

254 if universe is None and registry is not None: 

255 universe = registry.dimensions 

256 if universe is None: 

257 # this is for mypy 

258 raise ValueError("Unable to determine a usable universe") 

259 

260 return cls(names=names.names, universe=universe) 

261 

262 to_json = to_json_pydantic 

263 from_json: ClassVar = classmethod(from_json_pydantic) 

264 

265 def __iter__(self) -> Iterator[Dimension]: 

266 """Iterate over all dimensions in the graph. 

267 

268 (and true `Dimension` instances only). 

269 """ 

270 return iter(self.dimensions) 

271 

272 def __len__(self) -> int: 

273 """Return the number of dimensions in the graph. 

274 

275 (and true `Dimension` instances only). 

276 """ 

277 return len(self.dimensions) 

278 

279 def __contains__(self, element: str | DimensionElement) -> bool: 

280 """Return `True` if the given element or element name is in the graph. 

281 

282 This test covers all `DimensionElement` instances in ``self.elements``, 

283 not just true `Dimension` instances). 

284 """ 

285 return element in self.elements 

286 

287 def __getitem__(self, name: str) -> DimensionElement: 

288 """Return the element with the given name. 

289 

290 This lookup covers all `DimensionElement` instances in 

291 ``self.elements``, not just true `Dimension` instances). 

292 """ 

293 return self.elements[name] 

294 

295 def get(self, name: str, default: Any = None) -> DimensionElement: 

296 """Return the element with the given name. 

297 

298 This lookup covers all `DimensionElement` instances in 

299 ``self.elements``, not just true `Dimension` instances). 

300 """ 

301 return self.elements.get(name, default) 

302 

303 def __str__(self) -> str: 

304 return str(self.dimensions) 

305 

306 def __repr__(self) -> str: 

307 return f"DimensionGraph({str(self)})" 

308 

309 def isdisjoint(self, other: DimensionGraph) -> bool: 

310 """Test whether the intersection of two graphs is empty. 

311 

312 Returns `True` if either operand is the empty. 

313 """ 

314 return self.dimensions.isdisjoint(other.dimensions) 

315 

316 def issubset(self, other: DimensionGraph) -> bool: 

317 """Test whether all dimensions in ``self`` are also in ``other``. 

318 

319 Returns `True` if ``self`` is empty. 

320 """ 

321 return self.dimensions <= other.dimensions 

322 

323 def issuperset(self, other: DimensionGraph) -> bool: 

324 """Test whether all dimensions in ``other`` are also in ``self``. 

325 

326 Returns `True` if ``other`` is empty. 

327 """ 

328 return self.dimensions >= other.dimensions 

329 

330 def __eq__(self, other: Any) -> bool: 

331 """Test the arguments have exactly the same dimensions & elements.""" 

332 if isinstance(other, DimensionGraph): 

333 return self.dimensions == other.dimensions 

334 else: 

335 return False 

336 

337 def __hash__(self) -> int: 

338 return hash(tuple(self.dimensions.names)) 

339 

340 def __le__(self, other: DimensionGraph) -> bool: 

341 """Test whether ``self`` is a subset of ``other``.""" 

342 return self.dimensions <= other.dimensions 

343 

344 def __ge__(self, other: DimensionGraph) -> bool: 

345 """Test whether ``self`` is a superset of ``other``.""" 

346 return self.dimensions >= other.dimensions 

347 

348 def __lt__(self, other: DimensionGraph) -> bool: 

349 """Test whether ``self`` is a strict subset of ``other``.""" 

350 return self.dimensions < other.dimensions 

351 

352 def __gt__(self, other: DimensionGraph) -> bool: 

353 """Test whether ``self`` is a strict superset of ``other``.""" 

354 return self.dimensions > other.dimensions 

355 

356 def union(self, *others: DimensionGraph) -> DimensionGraph: 

357 """Construct a new graph with all dimensions in any of the operands. 

358 

359 The elements of the returned graph may exceed the naive union of 

360 their elements, as some `DimensionElement` instances are included 

361 in graphs whenever multiple dimensions are present, and those 

362 dependency dimensions could have been provided by different operands. 

363 """ 

364 names = set(self.names).union(*[other.names for other in others]) 

365 return DimensionGraph(self.universe, names=names) 

366 

367 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

368 """Construct a new graph with only dimensions in all of the operands. 

369 

370 See also `union`. 

371 """ 

372 names = set(self.names).intersection(*[other.names for other in others]) 

373 return DimensionGraph(self.universe, names=names) 

374 

375 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

376 """Construct a new graph with all dimensions in any of the operands. 

377 

378 See `union`. 

379 """ 

380 return self.union(other) 

381 

382 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

383 """Construct a new graph with only dimensions in all of the operands. 

384 

385 See `intersection`. 

386 """ 

387 return self.intersection(other) 

388 

389 @property 

390 @cached_getter 

391 def primaryKeyTraversalOrder(self) -> tuple[DimensionElement, ...]: 

392 """Return a tuple of all elements in specific order. 

393 

394 The order allows records to be 

395 found given their primary keys, starting from only the primary keys of 

396 required dimensions (`tuple` [ `DimensionRecord` ]). 

397 

398 Unlike the table definition/topological order (which is what 

399 DimensionUniverse.sorted gives you), when dimension A implies 

400 dimension B, dimension A appears first. 

401 """ 

402 done: set[str] = set() 

403 order = [] 

404 

405 def addToOrder(element: DimensionElement) -> None: 

406 if element.name in done: 

407 return 

408 predecessors = set(element.required.names) 

409 predecessors.discard(element.name) 

410 if not done.issuperset(predecessors): 

411 return 

412 order.append(element) 

413 done.add(element.name) 

414 for other in element.implied: 

415 addToOrder(other) 

416 

417 while not done.issuperset(self.required): 

418 for dimension in self.required: 

419 addToOrder(dimension) 

420 

421 order.extend(element for element in self.elements if element.name not in done) 

422 return tuple(order) 

423 

424 @property 

425 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

426 """Families represented by the spatial elements in this graph.""" 

427 return self.topology[TopologicalSpace.SPATIAL] 

428 

429 @property 

430 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

431 """Families represented by the temporal elements in this graph.""" 

432 return self.topology[TopologicalSpace.TEMPORAL] 

433 

434 # Class attributes below are shadowed by instance attributes, and are 

435 # present just to hold the docstrings for those instance attributes. 

436 

437 universe: DimensionUniverse 

438 """The set of all known dimensions, of which this graph is a subset 

439 (`DimensionUniverse`). 

440 """ 

441 

442 dimensions: NamedValueAbstractSet[Dimension] 

443 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

444 graph (`NamedValueAbstractSet` of `Dimension`). 

445 

446 This is the set used for iteration, ``len()``, and most set-like operations 

447 on `DimensionGraph` itself. 

448 """ 

449 

450 elements: NamedValueAbstractSet[DimensionElement] 

451 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

452 graph; a superset of `dimensions` (`NamedValueAbstractSet` of 

453 `DimensionElement`). 

454 

455 This is the set used for dict-like lookups, including the ``in`` operator, 

456 on `DimensionGraph` itself. 

457 """ 

458 

459 governors: NamedValueAbstractSet[GovernorDimension] 

460 """A true `~collections.abc.Set` of all true `GovernorDimension` instances 

461 in the graph (`NamedValueAbstractSet` of `GovernorDimension`). 

462 """ 

463 

464 required: NamedValueAbstractSet[Dimension] 

465 """The subset of `dimensions` whose elements must be directly identified 

466 via their primary keys in a data ID in order to identify the rest of the 

467 elements in the graph (`NamedValueAbstractSet` of `Dimension`). 

468 """ 

469 

470 implied: NamedValueAbstractSet[Dimension] 

471 """The subset of `dimensions` whose elements need not be directly 

472 identified via their primary keys in a data ID (`NamedValueAbstractSet` of 

473 `Dimension`). 

474 """ 

475 

476 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

477 """Families of elements in this graph that can participate in topological 

478 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

479 `NamedValueAbstractSet` of `TopologicalFamily`). 

480 """