Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph"] 

25 

26from typing import ( 

27 Any, 

28 Iterable, 

29 Iterator, 

30 KeysView, 

31 Optional, 

32 Set, 

33 Tuple, 

34 TYPE_CHECKING, 

35 Union, 

36) 

37 

38from ..named import NamedValueSet, NamedKeyDict 

39from ..utils import immutable 

40 

41if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from .universe import DimensionUniverse 

43 from .elements import DimensionElement, Dimension 

44 

45 

46@immutable 

47class DimensionGraph: 

48 """An immutable, dependency-complete collection of dimensions. 

49 

50 `DimensionGraph` behaves in many respects like a set of `Dimension` 

51 instances that maintains several special subsets and supersets of 

52 related `DimensionElement` instances. It does not fully implement the 

53 `collections.abc.Set` interface, as its automatic expansion of dependencies 

54 would make set difference and XOR operations behave surprisingly. 

55 

56 It also provides dict-like lookup of `DimensionElement` instances from 

57 their names. 

58 

59 Parameters 

60 ---------- 

61 universe : `DimensionUniverse` 

62 The special graph of all known dimensions of which this graph will be 

63 a subset. 

64 dimensions : iterable of `Dimension`, optional 

65 An iterable of `Dimension` instances that must be included in the 

66 graph. All (recursive) dependencies of these dimensions will also 

67 be included. At most one of ``dimensions`` and ``names`` must be 

68 provided. 

69 names : iterable of `str`, optional 

70 An iterable of the names of dimensiosn that must be included in the 

71 graph. All (recursive) dependencies of these dimensions will also 

72 be included. At most one of ``dimensions`` and ``names`` must be 

73 provided. 

74 conform : `bool`, optional 

75 If `True` (default), expand to include dependencies. `False` should 

76 only be used for callers that can guarantee that other arguments are 

77 already correctly expanded, and is primarily for internal use. 

78 

79 Notes 

80 ----- 

81 `DimensionGraph` should be used instead of other collections in any context 

82 where a collection of dimensions is required and a `DimensionUniverse` is 

83 available. 

84 

85 While `DimensionUniverse` inherits from `DimensionGraph`, it should 

86 otherwise not be used as a base class. 

87 """ 

88 

89 def __new__(cls, universe: DimensionUniverse, 

90 dimensions: Optional[Iterable[Dimension]] = None, 

91 names: Optional[Iterable[str]] = None, 

92 conform: bool = True) -> DimensionGraph: 

93 conformedNames: Set[str] 

94 if names is None: 

95 if dimensions is None: 

96 conformedNames = set() 

97 else: 

98 try: 

99 # Optimize for NamedValueSet/NamedKeyDict, though that's 

100 # not required. 

101 conformedNames = set(dimensions.names) # type: ignore 

102 except AttributeError: 

103 conformedNames = set(d.name for d in dimensions) 

104 else: 

105 if dimensions is not None: 

106 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

107 conformedNames = set(names) 

108 if conform: 

109 # Expand given dimensions to include all dependencies. 

110 for name in tuple(conformedNames): # iterate over a temporary copy so we can modify the original 

111 conformedNames.update(universe[name]._related.dependencies) 

112 # Look in the cache of existing graphs, with the expanded set of names. 

113 cacheKey = frozenset(conformedNames) 

114 self = universe._cache.get(cacheKey, None) 

115 if self is not None: 

116 return self 

117 # This is apparently a new graph. Create it, and add it to the cache. 

118 self = super().__new__(cls) 

119 universe._cache[cacheKey] = self 

120 self.universe = universe 

121 # Reorder dimensions by iterating over the universe (which is 

122 # ordered already) and extracting the ones in the set. 

123 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in conformedNames) 

124 # Make a set that includes both the dimensions and any 

125 # DimensionElements whose dependencies are in self.dimensions. 

126 self.elements = NamedValueSet(e for e in universe.elements 

127 if e._shouldBeInGraph(self.dimensions.names)) 

128 self._finish() 

129 return self 

130 

131 def _finish(self) -> None: 

132 """Complete construction of the graph. 

133 

134 This is intended for internal use by `DimensionGraph` and 

135 `DimensionUniverse` only. 

136 """ 

137 # Freeze the sets the constructor is responsible for populating. 

138 self.dimensions.freeze() 

139 self.elements.freeze() 

140 

141 # Split dependencies up into "required" and "implied" subsets. 

142 # Note that a dimension may be required in one graph and implied in 

143 # another. 

144 self.required = NamedValueSet() 

145 self.implied = NamedValueSet() 

146 for i1, dim1 in enumerate(self.dimensions): 

147 for i2, dim2 in enumerate(self.dimensions): 

148 if dim1.name in dim2._related.implied: 

149 self.implied.add(dim1) 

150 break 

151 else: 

152 # If no other dimension implies dim1, it's required. 

153 self.required.add(dim1) 

154 self.required.freeze() 

155 self.implied.freeze() 

156 

157 # Compute sets of spatial and temporal elements. 

158 # This contain the values of the `.spatial` and `.temporal` attributes 

159 # of all elements, unless those attributes are not in the graph. 

160 # In that case, the element whose attribute is not in the graph is 

161 # added instead. This ensures that these sets contain the 

162 # most-specific spatial and temporal elements, not the summary elements 

163 # that aggregate them, unless the summaries are all that we have. 

164 self.spatial = NamedValueSet() 

165 self.temporal = NamedValueSet() 

166 for element in self.elements: 

167 if element.spatial is not None: 

168 if element.spatial in self.elements: 

169 self.spatial.add(element.spatial) 

170 else: 

171 self.spatial.add(element) 

172 if element.temporal is not None: 

173 if element.temporal in self.elements: 

174 self.temporal.add(element.temporal) 

175 else: 

176 self.temporal.add(element) 

177 self.spatial.freeze() 

178 self.temporal.freeze() 

179 

180 # Build mappings from dimension to index; this is really for 

181 # DataCoordinate, but we put it in DimensionGraph because many 

182 # (many!) DataCoordinates will share the same DimensionGraph, and 

183 # we want them to be lightweight. 

184 self._requiredIndices: NamedKeyDict[Dimension, int] = NamedKeyDict( 

185 {dimension: i for i, dimension in enumerate(self.required)} 

186 ) 

187 self._dimensionIndices: NamedKeyDict[Dimension, int] = NamedKeyDict( 

188 {dimension: i for i, dimension in enumerate(self.dimensions)} 

189 ) 

190 self._elementIndices: NamedKeyDict[DimensionElement, int] = NamedKeyDict( 

191 {element: i for i, element in enumerate(self.elements)} 

192 ) 

193 

194 def __getnewargs__(self) -> tuple: 

195 return (self.universe, None, tuple(self.dimensions.names), False) 

196 

197 @property 

198 def names(self) -> KeysView[str]: 

199 """A set of the names of all dimensions in the graph (`KeysView`). 

200 """ 

201 return self.dimensions.names 

202 

203 def __iter__(self) -> Iterator[Dimension]: 

204 """Iterate over all dimensions in the graph (and true `Dimension` 

205 instances only). 

206 """ 

207 return iter(self.dimensions) 

208 

209 def __len__(self) -> int: 

210 """Return the number of dimensions in the graph (and true `Dimension` 

211 instances only). 

212 """ 

213 return len(self.dimensions) 

214 

215 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

216 """Return `True` if the given element or element name is in the graph. 

217 

218 This test covers all `DimensionElement` instances in ``self.elements``, 

219 not just true `Dimension` instances). 

220 """ 

221 return element in self.elements 

222 

223 def __getitem__(self, name: str) -> DimensionElement: 

224 """Return the element with the given name. 

225 

226 This lookup covers all `DimensionElement` instances in 

227 ``self.elements``, not just true `Dimension` instances). 

228 """ 

229 return self.elements[name] 

230 

231 def get(self, name: str, default: Any = None) -> DimensionElement: 

232 """Return the element with the given name. 

233 

234 This lookup covers all `DimensionElement` instances in 

235 ``self.elements``, not just true `Dimension` instances). 

236 """ 

237 return self.elements.get(name, default) 

238 

239 def __str__(self) -> str: 

240 return str(self.dimensions) 

241 

242 def __repr__(self) -> str: 

243 return f"DimensionGraph({str(self)})" 

244 

245 @classmethod 

246 def decode(cls, encoded: bytes, *, universe: DimensionUniverse) -> DimensionGraph: 

247 """Construct a `DimensionGraph` from its encoded representation. 

248 

249 Parameters 

250 ---------- 

251 encoded : `bytes` 

252 Byte string produced by `DimensionGraph.encode`. 

253 universe : `DimensionUniverse` 

254 Universe the new graph is a part of. Must have the same dimensions 

255 as the original universe. 

256 

257 Returns 

258 ------- 

259 graph : `DimensionGraph` 

260 A new (or possibly cached) `DimensionGraph` instance matching the 

261 given encoding. 

262 """ 

263 dimensions = [] 

264 mask = int.from_bytes(encoded, "big") 

265 for dimension in universe.dimensions: 

266 index = universe._dimensionIndices[dimension] 

267 if mask & (1 << index): 

268 dimensions.append(dimension) 

269 return cls(universe, dimensions=dimensions, conform=False) 

270 

271 def encode(self) -> bytes: 

272 """Encode a `DimensionGraph` into a byte string. 

273 

274 Returns 

275 ------- 

276 encoded : `bytes` 

277 Encoded representation of the graph. Length is guaranteed to be 

278 equal to `DimensionUniverse.getEncodeLength`. 

279 """ 

280 mask = 0 

281 for dimension in self.dimensions: 

282 index = self.universe._dimensionIndices[dimension] 

283 mask |= (1 << index) 

284 return mask.to_bytes(self.universe.getEncodeLength(), byteorder="big") 

285 

286 def isdisjoint(self, other: DimensionGraph) -> bool: 

287 """Test whether the intersection of two graphs is empty. 

288 

289 Returns `True` if either operand is the empty. 

290 """ 

291 return self.dimensions.isdisjoint(other.dimensions) 

292 

293 def issubset(self, other: DimensionGraph) -> bool: 

294 """Test whether all dimensions in ``self`` are also in ``other``. 

295 

296 Returns `True` if ``self`` is empty. 

297 """ 

298 return self.dimensions.issubset(other.dimensions) 

299 

300 def issuperset(self, other: DimensionGraph) -> bool: 

301 """Test whether all dimensions in ``other`` are also in ``self``. 

302 

303 Returns `True` if ``other`` is empty. 

304 """ 

305 return self.dimensions.issuperset(other.dimensions) 

306 

307 def __eq__(self, other: Any) -> bool: 

308 """Test whether ``self`` and ``other`` have exactly the same dimensions 

309 and elements. 

310 """ 

311 if isinstance(other, DimensionGraph): 

312 return self.dimensions == other.dimensions 

313 else: 

314 return False 

315 

316 def __hash__(self) -> int: 

317 return hash(tuple(self.dimensions.names)) 

318 

319 def __le__(self, other: DimensionGraph) -> bool: 

320 """Test whether ``self`` is a subset of ``other``. 

321 """ 

322 return self.dimensions <= other.dimensions 

323 

324 def __ge__(self, other: DimensionGraph) -> bool: 

325 """Test whether ``self`` is a superset of ``other``. 

326 """ 

327 return self.dimensions >= other.dimensions 

328 

329 def __lt__(self, other: DimensionGraph) -> bool: 

330 """Test whether ``self`` is a strict subset of ``other``. 

331 """ 

332 return self.dimensions < other.dimensions 

333 

334 def __gt__(self, other: DimensionGraph) -> bool: 

335 """Test whether ``self`` is a strict superset of ``other``. 

336 """ 

337 return self.dimensions > other.dimensions 

338 

339 def union(self, *others: DimensionGraph) -> DimensionGraph: 

340 """Construct a new graph containing all dimensions in any of the 

341 operands. 

342 

343 The elements of the returned graph may exceed the naive union of 

344 their elements, as some `DimensionElement` instances are included 

345 in graphs whenever multiple dimensions are present, and those 

346 dependency dimensions could have been provided by different operands. 

347 """ 

348 names = set(self.names).union(*[other.names for other in others]) 

349 return DimensionGraph(self.universe, names=names) 

350 

351 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

352 """Construct a new graph containing only dimensions in all of the 

353 operands. 

354 """ 

355 names = set(self.names).intersection(*[other.names for other in others]) 

356 return DimensionGraph(self.universe, names=names) 

357 

358 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

359 """Construct a new graph containing all dimensions in any of the 

360 operands. 

361 

362 See `union`. 

363 """ 

364 return self.union(other) 

365 

366 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

367 """Construct a new graph containing only dimensions in all of the 

368 operands. 

369 """ 

370 return self.intersection(other) 

371 

372 @property 

373 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]: 

374 """Return a tuple of all elements in an order allows records to be 

375 found given their primary keys, starting from only the primary keys of 

376 required dimensions (`tuple` [ `DimensionRecord` ]). 

377 

378 Unlike the table definition/topological order (which is what 

379 DimensionUniverse.sorted gives you), when dimension A implies 

380 dimension B, dimension A appears first. 

381 """ 

382 order = getattr(self, "_primaryKeyTraversalOrder", None) 

383 if order is None: 

384 done: Set[str] = set() 

385 order = [] 

386 

387 def addToOrder(element: DimensionElement) -> None: 

388 if element.name in done: 

389 return 

390 predecessors = set(element.required.names) 

391 predecessors.discard(element.name) 

392 if not done.issuperset(predecessors): 

393 return 

394 order.append(element) 

395 done.add(element.name) 

396 for other in element.implied: 

397 addToOrder(other) 

398 

399 while not done.issuperset(self.required): 

400 for dimension in self.required: 

401 addToOrder(dimension) 

402 

403 order.extend(element for element in self.elements if element.name not in done) 

404 order = tuple(order) 

405 self._primaryKeyTraversalOrder = order 

406 return order 

407 

408 # Class attributes below are shadowed by instance attributes, and are 

409 # present just to hold the docstrings for those instance attributes. 

410 

411 universe: DimensionUniverse 

412 """The set of all known dimensions, of which this graph is a subset 

413 (`DimensionUniverse`). 

414 """ 

415 

416 dimensions: NamedValueSet[Dimension] 

417 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

418 graph (`NamedValueSet` of `Dimension`). 

419 

420 This is the set used for iteration, ``len()``, and most set-like operations 

421 on `DimensionGraph` itself. 

422 """ 

423 

424 elements: NamedValueSet[DimensionElement] 

425 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

426 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`). 

427 

428 This is the set used for dict-like lookups, including the ``in`` operator, 

429 on `DimensionGraph` itself. 

430 """ 

431 

432 required: NamedValueSet[Dimension] 

433 """The subset of `dimensions` whose elments must be directly identified via 

434 their primary keys in a data ID in order to identify the rest of the 

435 elements in the graph (`NamedValueSet` of `Dimension`). 

436 """ 

437 

438 implied: NamedValueSet[Dimension] 

439 """The subset of `dimensions` whose elements need not be directly 

440 identified via their primary keys in a data ID (`NamedValueSet` of 

441 `Dimension`). 

442 """ 

443 

444 spatial: NamedValueSet[DimensionElement] 

445 """Elements that are associated with independent spatial regions 

446 (`NamedValueSet` of `DimensionElement`). 

447 """ 

448 

449 temporal: NamedValueSet[DimensionElement] 

450 """Elements that are associated with independent spatial regions 

451 (`NamedValueSet` of `DimensionElement`). 

452 """