Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph"] 

25 

26import itertools 

27from typing import ( 

28 Any, 

29 Dict, 

30 Iterable, 

31 Iterator, 

32 KeysView, 

33 Optional, 

34 Set, 

35 Tuple, 

36 TYPE_CHECKING, 

37 Union, 

38) 

39 

40from ..named import NamedValueSet 

41from ..utils import immutable 

42 

43if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 from .universe import DimensionUniverse 

45 from .elements import DimensionElement, Dimension 

46 

47 

48@immutable 

49class DimensionGraph: 

50 """An immutable, dependency-complete collection of dimensions. 

51 

52 `DimensionGraph` behaves in many respects like a set of `Dimension` 

53 instances that maintains several special subsets and supersets of 

54 related `DimensionElement` instances. It does not fully implement the 

55 `collections.abc.Set` interface, as its automatic expansion of dependencies 

56 would make set difference and XOR operations behave surprisingly. 

57 

58 It also provides dict-like lookup of `DimensionElement` instances from 

59 their names. 

60 

61 Parameters 

62 ---------- 

63 universe : `DimensionUniverse` 

64 The special graph of all known dimensions of which this graph will be 

65 a subset. 

66 dimensions : iterable of `Dimension`, optional 

67 An iterable of `Dimension` instances that must be included in the 

68 graph. All (recursive) dependencies of these dimensions will also 

69 be included. At most one of ``dimensions`` and ``names`` must be 

70 provided. 

71 names : iterable of `str`, optional 

72 An iterable of the names of dimensiosn that must be included in the 

73 graph. All (recursive) dependencies of these dimensions will also 

74 be included. At most one of ``dimensions`` and ``names`` must be 

75 provided. 

76 conform : `bool`, optional 

77 If `True` (default), expand to include dependencies. `False` should 

78 only be used for callers that can guarantee that other arguments are 

79 already correctly expanded, and is primarily for internal use. 

80 

81 Notes 

82 ----- 

83 `DimensionGraph` should be used instead of other collections in any context 

84 where a collection of dimensions is required and a `DimensionUniverse` is 

85 available. 

86 

87 While `DimensionUniverse` inherits from `DimensionGraph`, it should 

88 otherwise not be used as a base class. 

89 """ 

90 

91 def __new__(cls, universe: DimensionUniverse, 

92 dimensions: Optional[Iterable[Dimension]] = None, 

93 names: Optional[Iterable[str]] = None, 

94 conform: bool = True) -> DimensionGraph: 

95 conformedNames: Set[str] 

96 if names is None: 

97 if dimensions is None: 

98 conformedNames = set() 

99 else: 

100 try: 

101 # Optimize for NamedValueSet/NamedKeyDict, though that's 

102 # not required. 

103 conformedNames = set(dimensions.names) # type: ignore 

104 except AttributeError: 

105 conformedNames = set(d.name for d in dimensions) 

106 else: 

107 if dimensions is not None: 

108 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

109 conformedNames = set(names) 

110 if conform: 

111 # Expand given dimensions to include all dependencies. 

112 for name in tuple(conformedNames): # iterate over a temporary copy so we can modify the original 

113 conformedNames.update(universe[name]._related.dependencies) 

114 # Look in the cache of existing graphs, with the expanded set of names. 

115 cacheKey = frozenset(conformedNames) 

116 self = universe._cache.get(cacheKey, None) 

117 if self is not None: 

118 return self 

119 # This is apparently a new graph. Create it, and add it to the cache. 

120 self = super().__new__(cls) 

121 universe._cache[cacheKey] = self 

122 self.universe = universe 

123 # Reorder dimensions by iterating over the universe (which is 

124 # ordered already) and extracting the ones in the set. 

125 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in conformedNames) 

126 # Make a set that includes both the dimensions and any 

127 # DimensionElements whose dependencies are in self.dimensions. 

128 self.elements = NamedValueSet(e for e in universe.elements 

129 if e._shouldBeInGraph(self.dimensions.names)) 

130 self._finish() 

131 return self 

132 

133 def _finish(self) -> None: 

134 """Complete construction of the graph. 

135 

136 This is intended for internal use by `DimensionGraph` and 

137 `DimensionUniverse` only. 

138 """ 

139 # Freeze the sets the constructor is responsible for populating. 

140 self.dimensions.freeze() 

141 self.elements.freeze() 

142 

143 # Split dependencies up into "required" and "implied" subsets. 

144 # Note that a dimension may be required in one graph and implied in 

145 # another. 

146 self.required = NamedValueSet() 

147 self.implied = NamedValueSet() 

148 for i1, dim1 in enumerate(self.dimensions): 

149 for i2, dim2 in enumerate(self.dimensions): 

150 if dim1.name in dim2._related.implied: 

151 self.implied.add(dim1) 

152 break 

153 else: 

154 # If no other dimension implies dim1, it's required. 

155 self.required.add(dim1) 

156 self.required.freeze() 

157 self.implied.freeze() 

158 

159 # Compute sets of spatial and temporal elements. 

160 # This contain the values of the `.spatial` and `.temporal` attributes 

161 # of all elements, unless those attributes are not in the graph. 

162 # In that case, the element whose attribute is not in the graph is 

163 # added instead. This ensures that these sets contain the 

164 # most-specific spatial and temporal elements, not the summary elements 

165 # that aggregate them, unless the summaries are all that we have. 

166 self.spatial = NamedValueSet() 

167 self.temporal = NamedValueSet() 

168 for element in self.elements: 

169 if element.spatial is not None: 

170 if element.spatial in self.elements: 

171 self.spatial.add(element.spatial) 

172 else: 

173 self.spatial.add(element) 

174 if element.temporal is not None: 

175 if element.temporal in self.elements: 

176 self.temporal.add(element.temporal) 

177 else: 

178 self.temporal.add(element) 

179 self.spatial.freeze() 

180 self.temporal.freeze() 

181 

182 # Build mappings from dimension to index; this is really for 

183 # DataCoordinate, but we put it in DimensionGraph because many 

184 # (many!) DataCoordinates will share the same DimensionGraph, and 

185 # we want them to be lightweight. The order here is what's convenient 

186 # for DataCoordinate: all required dimensions before all implied 

187 # dimensions. 

188 self._dataCoordinateIndices: Dict[str, int] = { 

189 name: i for i, name in enumerate(itertools.chain(self.required.names, self.implied.names)) 

190 } 

191 # Same for element to index. These are used for topological-sort 

192 # comparison operators in DimensionElement itself. 

193 self._elementIndices: Dict[str, int] = { 

194 name: i for i, name in enumerate(self.elements.names) 

195 } 

196 # Same for dimension to index, sorted topologically across required 

197 # and implied. This is used for encode/decode. 

198 self._dimensionIndices: Dict[str, int] = { 

199 name: i for i, name in enumerate(self.dimensions.names) 

200 } 

201 

202 def __getnewargs__(self) -> tuple: 

203 return (self.universe, None, tuple(self.dimensions.names), False) 

204 

205 @property 

206 def names(self) -> KeysView[str]: 

207 """A set of the names of all dimensions in the graph (`KeysView`). 

208 """ 

209 return self.dimensions.names 

210 

211 def __iter__(self) -> Iterator[Dimension]: 

212 """Iterate over all dimensions in the graph (and true `Dimension` 

213 instances only). 

214 """ 

215 return iter(self.dimensions) 

216 

217 def __len__(self) -> int: 

218 """Return the number of dimensions in the graph (and true `Dimension` 

219 instances only). 

220 """ 

221 return len(self.dimensions) 

222 

223 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

224 """Return `True` if the given element or element name is in the graph. 

225 

226 This test covers all `DimensionElement` instances in ``self.elements``, 

227 not just true `Dimension` instances). 

228 """ 

229 return element in self.elements 

230 

231 def __getitem__(self, name: str) -> DimensionElement: 

232 """Return the element with the given name. 

233 

234 This lookup covers all `DimensionElement` instances in 

235 ``self.elements``, not just true `Dimension` instances). 

236 """ 

237 return self.elements[name] 

238 

239 def get(self, name: str, default: Any = None) -> DimensionElement: 

240 """Return the element with the given name. 

241 

242 This lookup covers all `DimensionElement` instances in 

243 ``self.elements``, not just true `Dimension` instances). 

244 """ 

245 return self.elements.get(name, default) 

246 

247 def __str__(self) -> str: 

248 return str(self.dimensions) 

249 

250 def __repr__(self) -> str: 

251 return f"DimensionGraph({str(self)})" 

252 

253 @classmethod 

254 def decode(cls, encoded: bytes, *, universe: DimensionUniverse) -> DimensionGraph: 

255 """Construct a `DimensionGraph` from its encoded representation. 

256 

257 Parameters 

258 ---------- 

259 encoded : `bytes` 

260 Byte string produced by `DimensionGraph.encode`. 

261 universe : `DimensionUniverse` 

262 Universe the new graph is a part of. Must have the same dimensions 

263 as the original universe. 

264 

265 Returns 

266 ------- 

267 graph : `DimensionGraph` 

268 A new (or possibly cached) `DimensionGraph` instance matching the 

269 given encoding. 

270 """ 

271 dimensions = [] 

272 mask = int.from_bytes(encoded, "big") 

273 for dimension in universe.dimensions: 

274 index = universe._dimensionIndices[dimension.name] 

275 if mask & (1 << index): 

276 dimensions.append(dimension) 

277 return cls(universe, dimensions=dimensions, conform=False) 

278 

279 def encode(self) -> bytes: 

280 """Encode a `DimensionGraph` into a byte string. 

281 

282 Returns 

283 ------- 

284 encoded : `bytes` 

285 Encoded representation of the graph. Length is guaranteed to be 

286 equal to `DimensionUniverse.getEncodeLength`. 

287 """ 

288 mask = 0 

289 for dimension in self.dimensions: 

290 index = self.universe._dimensionIndices[dimension.name] 

291 mask |= (1 << index) 

292 return mask.to_bytes(self.universe.getEncodeLength(), byteorder="big") 

293 

294 def isdisjoint(self, other: DimensionGraph) -> bool: 

295 """Test whether the intersection of two graphs is empty. 

296 

297 Returns `True` if either operand is the empty. 

298 """ 

299 return self.dimensions.isdisjoint(other.dimensions) 

300 

301 def issubset(self, other: DimensionGraph) -> bool: 

302 """Test whether all dimensions in ``self`` are also in ``other``. 

303 

304 Returns `True` if ``self`` is empty. 

305 """ 

306 return self.dimensions.issubset(other.dimensions) 

307 

308 def issuperset(self, other: DimensionGraph) -> bool: 

309 """Test whether all dimensions in ``other`` are also in ``self``. 

310 

311 Returns `True` if ``other`` is empty. 

312 """ 

313 return self.dimensions.issuperset(other.dimensions) 

314 

315 def __eq__(self, other: Any) -> bool: 

316 """Test whether ``self`` and ``other`` have exactly the same dimensions 

317 and elements. 

318 """ 

319 if isinstance(other, DimensionGraph): 

320 return self.dimensions == other.dimensions 

321 else: 

322 return False 

323 

324 def __hash__(self) -> int: 

325 return hash(tuple(self.dimensions.names)) 

326 

327 def __le__(self, other: DimensionGraph) -> bool: 

328 """Test whether ``self`` is a subset of ``other``. 

329 """ 

330 return self.dimensions <= other.dimensions 

331 

332 def __ge__(self, other: DimensionGraph) -> bool: 

333 """Test whether ``self`` is a superset of ``other``. 

334 """ 

335 return self.dimensions >= other.dimensions 

336 

337 def __lt__(self, other: DimensionGraph) -> bool: 

338 """Test whether ``self`` is a strict subset of ``other``. 

339 """ 

340 return self.dimensions < other.dimensions 

341 

342 def __gt__(self, other: DimensionGraph) -> bool: 

343 """Test whether ``self`` is a strict superset of ``other``. 

344 """ 

345 return self.dimensions > other.dimensions 

346 

347 def union(self, *others: DimensionGraph) -> DimensionGraph: 

348 """Construct a new graph containing all dimensions in any of the 

349 operands. 

350 

351 The elements of the returned graph may exceed the naive union of 

352 their elements, as some `DimensionElement` instances are included 

353 in graphs whenever multiple dimensions are present, and those 

354 dependency dimensions could have been provided by different operands. 

355 """ 

356 names = set(self.names).union(*[other.names for other in others]) 

357 return DimensionGraph(self.universe, names=names) 

358 

359 def intersection(self, *others: DimensionGraph) -> DimensionGraph: 

360 """Construct a new graph containing only dimensions in all of the 

361 operands. 

362 """ 

363 names = set(self.names).intersection(*[other.names for other in others]) 

364 return DimensionGraph(self.universe, names=names) 

365 

366 def __or__(self, other: DimensionGraph) -> DimensionGraph: 

367 """Construct a new graph containing all dimensions in any of the 

368 operands. 

369 

370 See `union`. 

371 """ 

372 return self.union(other) 

373 

374 def __and__(self, other: DimensionGraph) -> DimensionGraph: 

375 """Construct a new graph containing only dimensions in all of the 

376 operands. 

377 """ 

378 return self.intersection(other) 

379 

380 @property 

381 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement, ...]: 

382 """Return a tuple of all elements in an order allows records to be 

383 found given their primary keys, starting from only the primary keys of 

384 required dimensions (`tuple` [ `DimensionRecord` ]). 

385 

386 Unlike the table definition/topological order (which is what 

387 DimensionUniverse.sorted gives you), when dimension A implies 

388 dimension B, dimension A appears first. 

389 """ 

390 order = getattr(self, "_primaryKeyTraversalOrder", None) 

391 if order is None: 

392 done: Set[str] = set() 

393 order = [] 

394 

395 def addToOrder(element: DimensionElement) -> None: 

396 if element.name in done: 

397 return 

398 predecessors = set(element.required.names) 

399 predecessors.discard(element.name) 

400 if not done.issuperset(predecessors): 

401 return 

402 order.append(element) 

403 done.add(element.name) 

404 for other in element.implied: 

405 addToOrder(other) 

406 

407 while not done.issuperset(self.required): 

408 for dimension in self.required: 

409 addToOrder(dimension) 

410 

411 order.extend(element for element in self.elements if element.name not in done) 

412 order = tuple(order) 

413 self._primaryKeyTraversalOrder = order 

414 return order 

415 

416 # Class attributes below are shadowed by instance attributes, and are 

417 # present just to hold the docstrings for those instance attributes. 

418 

419 universe: DimensionUniverse 

420 """The set of all known dimensions, of which this graph is a subset 

421 (`DimensionUniverse`). 

422 """ 

423 

424 dimensions: NamedValueSet[Dimension] 

425 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

426 graph (`NamedValueSet` of `Dimension`). 

427 

428 This is the set used for iteration, ``len()``, and most set-like operations 

429 on `DimensionGraph` itself. 

430 """ 

431 

432 elements: NamedValueSet[DimensionElement] 

433 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

434 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`). 

435 

436 This is the set used for dict-like lookups, including the ``in`` operator, 

437 on `DimensionGraph` itself. 

438 """ 

439 

440 required: NamedValueSet[Dimension] 

441 """The subset of `dimensions` whose elments must be directly identified via 

442 their primary keys in a data ID in order to identify the rest of the 

443 elements in the graph (`NamedValueSet` of `Dimension`). 

444 """ 

445 

446 implied: NamedValueSet[Dimension] 

447 """The subset of `dimensions` whose elements need not be directly 

448 identified via their primary keys in a data ID (`NamedValueSet` of 

449 `Dimension`). 

450 """ 

451 

452 spatial: NamedValueSet[DimensionElement] 

453 """Elements that are associated with independent spatial regions 

454 (`NamedValueSet` of `DimensionElement`). 

455 """ 

456 

457 temporal: NamedValueSet[DimensionElement] 

458 """Elements that are associated with independent spatial regions 

459 (`NamedValueSet` of `DimensionElement`). 

460 """