Coverage for python/lsst/daf/butler/core/dimensions/_graph.py: 33%

166 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-15 09:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph", "SerializedDimensionGraph"] 

25 

26import itertools 

27from collections.abc import Iterable, Iterator, Mapping, Set 

28from types import MappingProxyType 

29from typing import TYPE_CHECKING, Any, ClassVar 

30 

31from lsst.utils.classes import cached_getter, immutable 

32from pydantic import BaseModel 

33 

34from .._topology import TopologicalFamily, TopologicalSpace 

35from ..json import from_json_pydantic, to_json_pydantic 

36from ..named import NamedValueAbstractSet, NamedValueSet 

37 

38if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

39 from ...registry import Registry 

40 from ._elements import Dimension, DimensionElement 

41 from ._governor import GovernorDimension 

42 from ._universe import DimensionUniverse 

43 

44 

45class SerializedDimensionGraph(BaseModel): 

46 """Simplified model of a `DimensionGraph` suitable for serialization.""" 

47 

48 names: list[str] 

49 

50 @classmethod 

51 def direct(cls, *, names: list[str]) -> SerializedDimensionGraph: 

52 """Construct a `SerializedDimensionGraph` directly without validators. 

53 

54 This differs from the pydantic "construct" method in that the arguments 

55 are explicitly what the model requires, and it will recurse through 

56 members, constructing them from their corresponding `direct` methods. 

57 

58 This method should only be called when the inputs are trusted. 

59 """ 

60 node = SerializedDimensionGraph.__new__(cls) 

61 object.__setattr__(node, "names", names) 

62 object.__setattr__(node, "__fields_set__", {"names"}) 

63 return node 

64 

65 

66@immutable 

67class DimensionGraph: 

68 """An immutable, dependency-complete collection of dimensions. 

69 

70 `DimensionGraph` behaves in many respects like a set of `Dimension` 

71 instances that maintains several special subsets and supersets of 

72 related `DimensionElement` instances. It does not fully implement the 

73 `collections.abc.Set` interface, as its automatic expansion of dependencies 

74 would make set difference and XOR operations behave surprisingly. 

75 

76 It also provides dict-like lookup of `DimensionElement` instances from 

77 their names. 

78 

79 Parameters 

80 ---------- 

81 universe : `DimensionUniverse` 

82 The special graph of all known dimensions of which this graph will be 

83 a subset. 

84 dimensions : iterable of `Dimension`, optional 

85 An iterable of `Dimension` instances that must be included in the 

86 graph. All (recursive) dependencies of these dimensions will also 

87 be included. At most one of ``dimensions`` and ``names`` must be 

88 provided. 

89 names : iterable of `str`, optional 

90 An iterable of the names of dimensions that must be included in the 

91 graph. All (recursive) dependencies of these dimensions will also 

92 be included. At most one of ``dimensions`` and ``names`` must be 

93 provided. 

94 conform : `bool`, optional 

95 If `True` (default), expand to include dependencies. `False` should 

96 only be used for callers that can guarantee that other arguments are 

97 already correctly expanded, and is primarily for internal use. 

98 

99 Notes 

100 ----- 

101 `DimensionGraph` should be used instead of other collections in most 

102 contexts where a collection of dimensions is required and a 

103 `DimensionUniverse` is available. Exceptions include cases where order 

104 matters (and is different from the consistent ordering defined by the 

105 `DimensionUniverse`), or complete `~collection.abc.Set` semantics are 

106 required. 

107 """ 

108 

109 _serializedType = SerializedDimensionGraph 

110 

111 def __new__( 

112 cls, 

113 universe: DimensionUniverse, 

114 dimensions: Iterable[Dimension] | None = None, 

115 names: Iterable[str] | None = None, 

116 conform: bool = True, 

117 ) -> DimensionGraph: 

118 conformedNames: set[str] 

119 if names is None: 

120 if dimensions is None: 

121 conformedNames = set() 

122 else: 

123 try: 

124 # Optimize for NamedValueSet/NamedKeyDict, though that's 

125 # not required. 

126 conformedNames = set(dimensions.names) # type: ignore 

127 except AttributeError: 

128 conformedNames = {d.name for d in dimensions} 

129 else: 

130 if dimensions is not None: 

131 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

132 conformedNames = set(names) 

133 if conform: 

134 universe.expandDimensionNameSet(conformedNames) 

135 # Look in the cache of existing graphs, with the expanded set of names. 

136 cacheKey = frozenset(conformedNames) 

137 self = universe._cache.get(cacheKey, None) 

138 if self is not None: 

139 return self 

140 # This is apparently a new graph. Create it, and add it to the cache. 

141 self = super().__new__(cls) 

142 universe._cache[cacheKey] = self 

143 self.universe = universe 

144 # Reorder dimensions by iterating over the universe (which is 

145 # ordered already) and extracting the ones in the set. 

146 self.dimensions = NamedValueSet(universe.sorted(conformedNames)).freeze() 

147 # Make a set that includes both the dimensions and any 

148 # DimensionElements whose dependencies are in self.dimensions. 

149 self.elements = NamedValueSet( 

150 e for e in universe.getStaticElements() if e.required.names <= self.dimensions.names 

151 ).freeze() 

152 self._finish() 

153 return self 

154 

155 def _finish(self) -> None: 

156 # Make a set containing just the governor dimensions in this graph. 

157 # Need local import to avoid cycle. 

158 from ._governor import GovernorDimension 

159 

160 self.governors = NamedValueSet( 

161 d for d in self.dimensions if isinstance(d, GovernorDimension) 

162 ).freeze() 

163 # Split dependencies up into "required" and "implied" subsets. 

164 # Note that a dimension may be required in one graph and implied in 

165 # another. 

166 required: NamedValueSet[Dimension] = NamedValueSet() 

167 implied: NamedValueSet[Dimension] = NamedValueSet() 

168 for i1, dim1 in enumerate(self.dimensions): 

169 for i2, dim2 in enumerate(self.dimensions): 

170 if dim1.name in dim2.implied.names: 

171 implied.add(dim1) 

172 break 

173 else: 

174 # If no other dimension implies dim1, it's required. 

175 required.add(dim1) 

176 self.required = required.freeze() 

177 self.implied = implied.freeze() 

178 

179 self.topology = MappingProxyType( 

180 { 

181 space: NamedValueSet(e.topology[space] for e in self.elements if space in e.topology).freeze() 

182 for space in TopologicalSpace.__members__.values() 

183 } 

184 ) 

185 

186 # Build mappings from dimension to index; this is really for 

187 # DataCoordinate, but we put it in DimensionGraph because many 

188 # (many!) DataCoordinates will share the same DimensionGraph, and 

189 # we want them to be lightweight. The order here is what's convenient 

190 # for DataCoordinate: all required dimensions before all implied 

191 # dimensions. 

192 self._dataCoordinateIndices: dict[str, int] = { 

193 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

194 } 

195 

196 def __getnewargs__(self) -> tuple: 

197 return (self.universe, None, tuple(self.dimensions.names), False) 

198 

199 def __deepcopy__(self, memo: dict) -> DimensionGraph: 

200 # DimensionGraph is recursively immutable; see note in @immutable 

201 # decorator. 

202 return self 

203 

204 @property 

205 def names(self) -> Set[str]: 

206 """Set of the names of all dimensions in the graph (`KeysView`).""" 

207 return self.dimensions.names 

208 

209 def to_simple(self, minimal: bool = False) -> SerializedDimensionGraph: 

210 """Convert this class to a simple python type. 

211 

212 This type is suitable for serialization. 

213 

214 Parameters 

215 ---------- 

216 minimal : `bool`, optional 

217 Use minimal serialization. Has no effect on for this class. 

218 

219 Returns 

220 ------- 

221 names : `list` 

222 The names of the dimensions. 

223 """ 

224 # Names are all we can serialize. 

225 return SerializedDimensionGraph(names=list(self.names)) 

226 

227 @classmethod 

228 def from_simple( 

229 cls, 

230 names: SerializedDimensionGraph, 

231 universe: DimensionUniverse | None = None, 

232 registry: Registry | None = None, 

233 ) -> DimensionGraph: 

234 """Construct a new object from the simplified form. 

235 

236 This is assumed to support data data returned from the `to_simple` 

237 method. 

238 

239 Parameters 

240 ---------- 

241 names : `list` of `str` 

242 The names of the dimensions. 

243 universe : `DimensionUniverse` 

244 The special graph of all known dimensions of which this graph will 

245 be a subset. Can be `None` if `Registry` is provided. 

246 registry : `lsst.daf.butler.Registry`, optional 

247 Registry from which a universe can be extracted. Can be `None` 

248 if universe is provided explicitly. 

249 

250 Returns 

251 ------- 

252 graph : `DimensionGraph` 

253 Newly-constructed object. 

254 """ 

255 if universe is None and registry is None: 

256 raise ValueError("One of universe or registry is required to convert names to a DimensionGraph") 

257 if universe is None and registry is not None: 

258 universe = registry.dimensions 

259 if universe is None: 

260 # this is for mypy 

261 raise ValueError("Unable to determine a usable universe") 

262 

263 return cls(names=names.names, universe=universe) 

264 

265 to_json = to_json_pydantic 

266 from_json: ClassVar = classmethod(from_json_pydantic) 

267 

268 def __iter__(self) -> Iterator[Dimension]: 

269 """Iterate over all dimensions in the graph. 

270 

271 (and true `Dimension` instances only). 

272 """ 

273 return iter(self.dimensions) 

274 

275 def __len__(self) -> int: 

276 """Return the number of dimensions in the graph. 

277 

278 (and true `Dimension` instances only). 

279 """ 

280 return len(self.dimensions) 

281 

282 def __contains__(self, element: str | DimensionElement) -> bool: 

283 """Return `True` if the given element or element name is in the graph. 

284 

285 This test covers all `DimensionElement` instances in ``self.elements``, 

286 not just true `Dimension` instances). 

287 """ 

288 return element in self.elements 

289 

290 def __getitem__(self, name: str) -> DimensionElement: 

291 """Return the element with the given name. 

292 

293 This lookup covers all `DimensionElement` instances in 

294 ``self.elements``, not just true `Dimension` instances). 

295 """ 

296 return self.elements[name] 

297 

298 def get(self, name: str, default: Any = None) -> DimensionElement: 

299 """Return the element with the given name. 

300 

301 This lookup covers all `DimensionElement` instances in 

302 ``self.elements``, not just true `Dimension` instances). 

303 """ 

304 return self.elements.get(name, default) 

305 

306 def __str__(self) -> str: 

307 return str(self.dimensions) 

308 

309 def __repr__(self) -> str: 

310 return f"DimensionGraph({str(self)})" 

311 

312 def isdisjoint(self, other: DimensionGraph) -> bool: 

313 """Test whether the intersection of two graphs is empty. 

314 

315 Returns `True` if either operand is the empty. 

316 """ 

317 return self.dimensions.isdisjoint(other.dimensions) 

318 

319 def issubset(self, other: DimensionGraph) -> bool: 

320 """Test whether all dimensions in ``self`` are also in ``other``. 

321 

322 Returns `True` if ``self`` is empty. 

323 """ 

324 return self.dimensions <= other.dimensions 

325 

326 def issuperset(self, other: DimensionGraph) -> bool: 

327 """Test whether all dimensions in ``other`` are also in ``self``. 

328 

329 Returns `True` if ``other`` is empty. 

330 """ 

331 return self.dimensions >= other.dimensions 

332 

333 def __eq__(self, other: Any) -> bool: 

334 """Test the arguments have exactly the same dimensions & elements.""" 

335 if isinstance(other, DimensionGraph): 

336 return self.dimensions == other.dimensions 

337 else: 

338 return False 

339 

340 def __hash__(self) -> int: 

341 return hash(tuple(self.dimensions.names)) 

342 

343 def __le__(self, other: DimensionGraph) -> bool: 

344 """Test whether ``self`` is a subset of ``other``.""" 

345 return self.dimensions <= other.dimensions 

346 

347 def __ge__(self, other: DimensionGraph) -> bool: 

348 """Test whether ``self`` is a superset of ``other``.""" 

349 return self.dimensions >= other.dimensions 

350 

351 def __lt__(self, other: DimensionGraph) -> bool: 

352 """Test whether ``self`` is a strict subset of ``other``.""" 

353 return self.dimensions < other.dimensions 

354 

355 def __gt__(self, other: DimensionGraph) -> bool: 

356 """Test whether ``self`` is a strict superset of ``other``.""" 

357 return self.dimensions > other.dimensions 

358 

359 def union(self, *others: DimensionGraph) -> DimensionGraph: 

360 """Construct a new graph with all dimensions in any of the operands. 

361 

362 The elements of the returned graph may exceed the naive union of 

363 their elements, as some `DimensionElement` instances are included 

364 in graphs whenever multiple dimensions are present, and those 

365 dependency dimensions could have been provided by different operands. 

366 """ 

367 names = set(self.names).union(*[other.names for other in others]) 

368 return DimensionGraph(self.universe, names=names) 

369 

370 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

371 """Construct a new graph with only dimensions in all of the operands. 

372 

373 See also `union`. 

374 """ 

375 names = set(self.names).intersection(*[other.names for other in others]) 

376 return DimensionGraph(self.universe, names=names) 

377 

378 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

379 """Construct a new graph with all dimensions in any of the operands. 

380 

381 See `union`. 

382 """ 

383 return self.union(other) 

384 

385 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

386 """Construct a new graph with only dimensions in all of the operands. 

387 

388 See `intersection`. 

389 """ 

390 return self.intersection(other) 

391 

392 @property 

393 @cached_getter 

394 def primaryKeyTraversalOrder(self) -> tuple[DimensionElement, ...]: 

395 """Return a tuple of all elements in specific order. 

396 

397 The order allows records to be 

398 found given their primary keys, starting from only the primary keys of 

399 required dimensions (`tuple` [ `DimensionRecord` ]). 

400 

401 Unlike the table definition/topological order (which is what 

402 DimensionUniverse.sorted gives you), when dimension A implies 

403 dimension B, dimension A appears first. 

404 """ 

405 done: set[str] = set() 

406 order = [] 

407 

408 def addToOrder(element: DimensionElement) -> None: 

409 if element.name in done: 

410 return 

411 predecessors = set(element.required.names) 

412 predecessors.discard(element.name) 

413 if not done.issuperset(predecessors): 

414 return 

415 order.append(element) 

416 done.add(element.name) 

417 for other in element.implied: 

418 addToOrder(other) 

419 

420 while not done.issuperset(self.required): 

421 for dimension in self.required: 

422 addToOrder(dimension) 

423 

424 order.extend(element for element in self.elements if element.name not in done) 

425 return tuple(order) 

426 

427 @property 

428 def spatial(self) -> NamedValueAbstractSet[TopologicalFamily]: 

429 """Families represented by the spatial elements in this graph.""" 

430 return self.topology[TopologicalSpace.SPATIAL] 

431 

432 @property 

433 def temporal(self) -> NamedValueAbstractSet[TopologicalFamily]: 

434 """Families represented by the temporal elements in this graph.""" 

435 return self.topology[TopologicalSpace.TEMPORAL] 

436 

437 # Class attributes below are shadowed by instance attributes, and are 

438 # present just to hold the docstrings for those instance attributes. 

439 

440 universe: DimensionUniverse 

441 """The set of all known dimensions, of which this graph is a subset 

442 (`DimensionUniverse`). 

443 """ 

444 

445 dimensions: NamedValueAbstractSet[Dimension] 

446 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

447 graph (`NamedValueAbstractSet` of `Dimension`). 

448 

449 This is the set used for iteration, ``len()``, and most set-like operations 

450 on `DimensionGraph` itself. 

451 """ 

452 

453 elements: NamedValueAbstractSet[DimensionElement] 

454 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

455 graph; a superset of `dimensions` (`NamedValueAbstractSet` of 

456 `DimensionElement`). 

457 

458 This is the set used for dict-like lookups, including the ``in`` operator, 

459 on `DimensionGraph` itself. 

460 """ 

461 

462 governors: NamedValueAbstractSet[GovernorDimension] 

463 """A true `~collections.abc.Set` of all true `GovernorDimension` instances 

464 in the graph (`NamedValueAbstractSet` of `GovernorDimension`). 

465 """ 

466 

467 required: NamedValueAbstractSet[Dimension] 

468 """The subset of `dimensions` whose elements must be directly identified 

469 via their primary keys in a data ID in order to identify the rest of the 

470 elements in the graph (`NamedValueAbstractSet` of `Dimension`). 

471 """ 

472 

473 implied: NamedValueAbstractSet[Dimension] 

474 """The subset of `dimensions` whose elements need not be directly 

475 identified via their primary keys in a data ID (`NamedValueAbstractSet` of 

476 `Dimension`). 

477 """ 

478 

479 topology: Mapping[TopologicalSpace, NamedValueAbstractSet[TopologicalFamily]] 

480 """Families of elements in this graph that can participate in topological 

481 relationships (`~collections.abc.Mapping` from `TopologicalSpace` to 

482 `NamedValueAbstractSet` of `TopologicalFamily`). 

483 """