Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph"] 

25 

26from typing import Optional, Iterable, Iterator, KeysView, Union, Any, Tuple, TYPE_CHECKING 

27 

28from ..utils import NamedValueSet, NamedKeyDict, immutable 

29 

30if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from .universe import DimensionUniverse 

32 from .elements import DimensionElement, Dimension 

33 

34 

35@immutable 

36class DimensionGraph: 

37 """An immutable, dependency-complete collection of dimensions. 

38 

39 `DimensionGraph` behaves in many respects like a set of `Dimension` 

40 instances that maintains several special subsets and supersets of 

41 related `DimensionElement` instances. It does not fully implement the 

42 `collections.abc.Set` interface, as its automatic expansion of dependencies 

43 would make set difference and XOR operations behave surprisingly. 

44 

45 It also provides dict-like lookup of `DimensionElement` instances from 

46 their names. 

47 

48 Parameters 

49 ---------- 

50 universe : `DimensionUniverse` 

51 The special graph of all known dimensions of which this graph will be 

52 a subset. 

53 dimensions : iterable of `Dimension`, optional 

54 An iterable of `Dimension` instances that must be included in the 

55 graph. All (recursive) dependencies of these dimensions will also 

56 be included. At most one of ``dimensions`` and ``names`` must be 

57 provided. 

58 names : iterable of `str`, optional 

59 An iterable of the names of dimensiosn that must be included in the 

60 graph. All (recursive) dependencies of these dimensions will also 

61 be included. At most one of ``dimensions`` and ``names`` must be 

62 provided. 

63 conform : `bool`, optional 

64 If `True` (default), expand to include dependencies. `False` should 

65 only be used for callers that can guarantee that other arguments are 

66 already correctly expanded, and is primarily for internal use. 

67 

68 Notes 

69 ----- 

70 `DimensionGraph` should be used instead of other collections in any context 

71 where a collection of dimensions is required and a `DimensionUniverse` is 

72 available. 

73 

74 While `DimensionUniverse` inherits from `DimensionGraph`, it should 

75 otherwise not be used as a base class. 

76 """ 

77 

78 def __new__(cls, universe: DimensionUniverse, 

79 dimensions: Optional[Iterable[Dimension]] = None, 

80 names: Optional[Iterable[str]] = None, 

81 conform: bool = True) -> DimensionGraph: 

82 if names is None: 

83 if dimensions is None: 

84 names = () 

85 else: 

86 try: 

87 names = set(dimensions.names) 

88 except AttributeError: 

89 names = set(d.name for d in dimensions) 

90 else: 

91 if dimensions is not None: 

92 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

93 names = set(names) 

94 if conform: 

95 # Expand given dimensions to include all dependencies. 

96 for name in tuple(names): # iterate over a temporary copy so we can modify the original 

97 names.update(universe[name]._related.dependencies) 

98 # Look in the cache of existing graphs, with the expanded set of names. 

99 cacheKey = frozenset(names) 

100 self = universe._cache.get(cacheKey, None) 

101 if self is not None: 

102 return self 

103 # This is apparently a new graph. Create it, and add it to the cache. 

104 self = super().__new__(cls) 

105 universe._cache[cacheKey] = self 

106 self.universe = universe 

107 # Reorder dimensions by iterating over the universe (which is 

108 # ordered already) and extracting the ones in the set. 

109 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in names) 

110 # Make a set that includes both the dimensions and any 

111 # DimensionElements whose dependencies are in self.dimensions. 

112 self.elements = NamedValueSet(e for e in universe.elements 

113 if e._shouldBeInGraph(self.dimensions.names)) 

114 self._finish() 

115 return self 

116 

117 def _finish(self): 

118 """Complete construction of the graph. 

119 

120 This is intended for internal use by `DimensionGraph` and 

121 `DimensionUniverse` only. 

122 """ 

123 # Freeze the sets the constructor is responsible for populating. 

124 self.dimensions.freeze() 

125 self.elements.freeze() 

126 

127 # Split dependencies up into "required" and "implied" subsets. 

128 # Note that a dimension may be required in one graph and implied in 

129 # another. 

130 self.required = NamedValueSet() 

131 self.implied = NamedValueSet() 

132 for i1, dim1 in enumerate(self.dimensions): 

133 for i2, dim2 in enumerate(self.dimensions): 

134 if dim1.name in dim2._related.implied: 

135 self.implied.add(dim1) 

136 break 

137 else: 

138 # If no other dimension implies dim1, it's required. 

139 self.required.add(dim1) 

140 self.required.freeze() 

141 self.implied.freeze() 

142 

143 # Compute sets of spatial and temporal elements. 

144 # This contain the values of the `.spatial` and `.temporal` attributes 

145 # of all elements, unless those attributes are not in the graph. 

146 # In that case, the element whose attribute is not in the graph is 

147 # added instead. This ensures that these sets contain the 

148 # most-specific spatial and temporal elements, not the summary elements 

149 # that aggregate them, unless the summaries are all that we have. 

150 self.spatial = NamedValueSet() 

151 self.temporal = NamedValueSet() 

152 for element in self.elements: 

153 if element.spatial is not None: 

154 if element.spatial in self.elements: 

155 self.spatial.add(element.spatial) 

156 else: 

157 self.spatial.add(element) 

158 if element.temporal is not None: 

159 if element.temporal in self.elements: 

160 self.temporal.add(element.temporal) 

161 else: 

162 self.temporal.add(element) 

163 self.spatial.freeze() 

164 self.temporal.freeze() 

165 

166 # Build mappings from dimension to index; this is really for 

167 # DataCoordinate, but we put it in DimensionGraph because many 

168 # (many!) DataCoordinates will share the same DimensionGraph, and 

169 # we want them to be lightweight. 

170 self._requiredIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.required)}) 

171 self._dimensionIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.dimensions)}) 

172 self._elementIndices = NamedKeyDict({element: i for i, element in enumerate(self.elements)}) 

173 

174 def __getnewargs__(self) -> tuple: 

175 return (self.universe, None, tuple(self.dimensions.names), False) 

176 

177 @property 

178 def names(self) -> KeysView[str]: 

179 """A set of the names of all dimensions in the graph (`KeysView`). 

180 """ 

181 return self.dimensions.names 

182 

183 def __iter__(self) -> Iterator[Dimension]: 

184 """Iterate over all dimensions in the graph (and true `Dimension` 

185 instances only). 

186 """ 

187 return iter(self.dimensions) 

188 

189 def __len__(self) -> int: 

190 """Return the number of dimensions in the graph (and true `Dimension` 

191 instances only). 

192 """ 

193 return len(self.dimensions) 

194 

195 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

196 """Return `True` if the given element or element name is in the graph. 

197 

198 This test covers all `DimensionElement` instances in ``self.elements``, 

199 not just true `Dimension` instances). 

200 """ 

201 return element in self.elements 

202 

203 def __getitem__(self, name: str) -> DimensionElement: 

204 """Return the element with the given name. 

205 

206 This lookup covers all `DimensionElement` instances in 

207 ``self.elements``, not just true `Dimension` instances). 

208 """ 

209 return self.elements[name] 

210 

211 def get(self, name: str, default: Any = None) -> DimensionElement: 

212 """Return the element with the given name. 

213 

214 This lookup covers all `DimensionElement` instances in 

215 ``self.elements``, not just true `Dimension` instances). 

216 """ 

217 return self.elements.get(name, default) 

218 

219 def __str__(self) -> str: 

220 return str(self.dimensions) 

221 

222 def __repr__(self) -> str: 

223 return f"DimensionGraph({str(self)})" 

224 

225 @classmethod 

226 def decode(cls, encoded: bytes, *, universe: DimensionUniverse) -> DimensionGraph: 

227 """Construct a `DimensionGraph` from its encoded representation. 

228 

229 Parameters 

230 ---------- 

231 encoded : `bytes` 

232 Byte string produced by `DimensionGraph.encode`. 

233 universe : `DimensionUniverse` 

234 Universe the new graph is a part of. Must have the same dimensions 

235 as the original universe. 

236 

237 Returns 

238 ------- 

239 graph : `DimensionGraph` 

240 A new (or possibly cached) `DimensionGraph` instance matching the 

241 given encoding. 

242 """ 

243 dimensions = [] 

244 mask = int.from_bytes(encoded, "big") 

245 for dimension in universe.dimensions: 

246 index = universe._dimensionIndices[dimension] 

247 if mask & (1 << index): 

248 dimensions.append(dimension) 

249 return cls(universe, dimensions=dimensions, conform=False) 

250 

251 def encode(self) -> bytes: 

252 """Encode a `DimensionGraph` into a byte string. 

253 

254 Returns 

255 ------- 

256 encoded : `bytes` 

257 Encoded representation of the graph. Length is guaranteed to be 

258 equal to `DimensionUniverse.getEncodeLength`. 

259 """ 

260 mask = 0 

261 for dimension in self.dimensions: 

262 index = self.universe._dimensionIndices[dimension] 

263 mask |= (1 << index) 

264 return mask.to_bytes(self.universe.getEncodeLength(), byteorder="big") 

265 

266 def isdisjoint(self, other: DimensionGraph) -> bool: 

267 """Test whether the intersection of two graphs is empty. 

268 

269 Returns `True` if either operand is the empty. 

270 """ 

271 return self.dimensions.isdisjoint(other.dimensions) 

272 

273 def issubset(self, other: DimensionGraph) -> bool: 

274 """Test whether all dimensions in ``self`` are also in ``other``. 

275 

276 Returns `True` if ``self`` is empty. 

277 """ 

278 return self.dimensions.issubset(other.dimensions) 

279 

280 def issuperset(self, other: DimensionGraph) -> bool: 

281 """Test whether all dimensions in ``other`` are also in ``self``. 

282 

283 Returns `True` if ``other`` is empty. 

284 """ 

285 return self.dimensions.issuperset(other.dimensions) 

286 

287 def __eq__(self, other: DimensionGraph) -> bool: 

288 """Test whether ``self`` and ``other`` have exactly the same dimensions 

289 and elements. 

290 """ 

291 return self.dimensions == other.dimensions 

292 

293 def __hash__(self) -> int: 

294 return hash(tuple(self.dimensions.names)) 

295 

296 def __le__(self, other: DimensionGraph) -> bool: 

297 """Test whether ``self`` is a subset of ``other``. 

298 """ 

299 return self.dimensions <= other.dimensions 

300 

301 def __ge__(self, other: DimensionGraph) -> bool: 

302 """Test whether ``self`` is a superset of ``other``. 

303 """ 

304 return self.dimensions >= other.dimensions 

305 

306 def __lt__(self, other: DimensionGraph) -> bool: 

307 """Test whether ``self`` is a strict subset of ``other``. 

308 """ 

309 return self.dimensions < other.dimensions 

310 

311 def __gt__(self, other: DimensionGraph) -> bool: 

312 """Test whether ``self`` is a strict superset of ``other``. 

313 """ 

314 return self.dimensions > other.dimensions 

315 

316 def union(self, *others: DimensionGraph): 

317 """Construct a new graph containing all dimensions in any of the 

318 operands. 

319 

320 The elements of the returned graph may exceed the naive union of 

321 their elements, as some `DimensionElement` instances are included 

322 in graphs whenever multiple dimensions are present, and those 

323 dependency dimensions could have been provided by different operands. 

324 """ 

325 names = set(self.names).union(*[other.names for other in others]) 

326 return DimensionGraph(self.universe, names=names) 

327 

328 def intersection(self, *others: DimensionGraph): 

329 """Construct a new graph containing only dimensions in all of the 

330 operands. 

331 """ 

332 names = set(self.names).intersection(*[other.names for other in others]) 

333 return DimensionGraph(self.universe, names=names) 

334 

335 def __or__(self, other): 

336 """Construct a new graph containing all dimensions in any of the 

337 operands. 

338 

339 See `union`. 

340 """ 

341 return self.union(other) 

342 

343 def __and__(self, other): 

344 """Construct a new graph containing only dimensions in all of the 

345 operands. 

346 """ 

347 return self.intersection(other) 

348 

349 @property 

350 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement]: 

351 """Return a tuple of all elements in an order allows records to be 

352 found given their primary keys, starting from only the primary keys of 

353 required dimensions (`tuple` [ `DimensionRecord` ]). 

354 

355 Unlike the table definition/topological order (which is what 

356 DimensionUniverse.sorted gives you), when dimension A implies 

357 dimension B, dimension A appears first. 

358 """ 

359 order = getattr(self, "_primaryKeyTraversalOrder", None) 

360 if order is None: 

361 done = set() 

362 order = [] 

363 

364 def addToOrder(element) -> bool: 

365 if element.name in done: 

366 return 

367 predecessors = set(element.required.names) 

368 predecessors.discard(element.name) 

369 if not done.issuperset(predecessors): 

370 return 

371 order.append(element) 

372 done.add(element) 

373 for other in element.implied: 

374 addToOrder(other) 

375 

376 while not done.issuperset(self.required): 

377 for dimension in self.required: 

378 addToOrder(dimension) 

379 

380 order.extend(element for element in self.elements if element.name not in done) 

381 order = tuple(order) 

382 self._primaryKeyTraversalOrder = order 

383 return order 

384 

385 # Class attributes below are shadowed by instance attributes, and are 

386 # present just to hold the docstrings for those instance attributes. 

387 

388 universe: DimensionUniverse 

389 """The set of all known dimensions, of which this graph is a subset 

390 (`DimensionUniverse`). 

391 """ 

392 

393 dimensions: NamedValueSet[Dimension] 

394 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

395 graph (`NamedValueSet` of `Dimension`). 

396 

397 This is the set used for iteration, ``len()``, and most set-like operations 

398 on `DimensionGraph` itself. 

399 """ 

400 

401 elements: NamedValueSet[DimensionElement] 

402 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

403 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`). 

404 

405 This is the set used for dict-like lookups, including the ``in`` operator, 

406 on `DimensionGraph` itself. 

407 """ 

408 

409 required: NamedValueSet[Dimension] 

410 """The subset of `dimensions` whose elments must be directly identified via 

411 their primary keys in a data ID in order to identify the rest of the 

412 elements in the graph (`NamedValueSet` of `Dimension`). 

413 """ 

414 

415 implied: NamedValueSet[Dimension] 

416 """The subset of `dimensions` whose elements need not be directly 

417 identified via their primary keys in a data ID (`NamedValueSet` of 

418 `Dimension`). 

419 """ 

420 

421 spatial: NamedValueSet[DimensionElement] 

422 """Elements that are associated with independent spatial regions 

423 (`NamedValueSet` of `DimensionElement`). 

424 """ 

425 

426 temporal: NamedValueSet[DimensionElement] 

427 """Elements that are associated with independent spatial regions 

428 (`NamedValueSet` of `DimensionElement`). 

429 """