Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph"] 

25 

26from typing import Optional, Iterable, Iterator, KeysView, Union, Any, Tuple, TYPE_CHECKING 

27 

28from ..utils import NamedValueSet, NamedKeyDict, immutable 

29 

30if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from .universe import DimensionUniverse 

32 from .elements import DimensionElement, Dimension 

33 

34 

35def _filterDependentElements(elements: NamedValueSet[DimensionElement], 

36 prefer: NamedValueSet[DimensionElement] 

37 ) -> NamedValueSet[DimensionElement]: 

38 """Return a subset of the given set with only independent elements. 

39 

40 Parameters 

41 ---------- 

42 elements : `NamedValueSet` of `DimensionElement` 

43 The elements to be filtered. 

44 prefer : `NamedValueSet` of `DimensionElement` 

45 Elements to be included in the result in preference to others with 

46 which they have a dependency relationship. When no preferred element 

47 is given for a pair of related elements, the dependent is included 

48 rather than the dependency. 

49 

50 Returns 

51 ------- 

52 filtered : `NamedValueSet` of `DimensionElement` 

53 The filtered set of elements. Order is unspecified. 

54 """ 

55 resultNames = set() 

56 for element in elements: 

57 includedDependencyNames = frozenset(element._recursiveDependencyNames & resultNames) 

58 if includedDependencyNames.isdisjoint(prefer.names): 

59 resultNames.difference_update(includedDependencyNames) 

60 resultNames.add(element.name) 

61 return NamedValueSet(elements[name] for name in resultNames) 

62 

63 

64@immutable 

65class DimensionGraph: 

66 """An immutable, dependency-complete collection of dimensions. 

67 

68 `DimensionGraph` behaves in many respects like a set of `Dimension` 

69 instances that maintains several special subsets and supersets of 

70 related `DimensionElement` instances. It does not fully implement the 

71 `collections.abc.Set` interface, as its automatic expansion of dependencies 

72 would make set difference and XOR operations behave surprisingly. 

73 

74 It also provides dict-like lookup of `DimensionElement` instances from 

75 their names. 

76 

77 Parameters 

78 ---------- 

79 universe : `DimensionUniverse` 

80 The special graph of all known dimensions of which this graph will be 

81 a subset. 

82 dimensions : iterable of `Dimension`, optional 

83 An iterable of `Dimension` instances that must be included in the 

84 graph. All (recursive) dependencies of these dimensions will also 

85 be included. At most one of ``dimensions`` and ``names`` must be 

86 provided. 

87 names : iterable of `str`, optional 

88 An iterable of the names of dimensiosn that must be included in the 

89 graph. All (recursive) dependencies of these dimensions will also 

90 be included. At most one of ``dimensions`` and ``names`` must be 

91 provided. 

92 conform : `bool`, optional 

93 If `True` (default), expand to include dependencies. `False` should 

94 only be used for callers that can guarantee that other arguments are 

95 already correctly expanded, and is primarily for internal use. 

96 

97 Notes 

98 ----- 

99 `DimensionGraph` should be used instead of other collections in any context 

100 where a collection of dimensions is required and a `DimensionUniverse` is 

101 available. 

102 

103 While `DimensionUniverse` inherits from `DimensionGraph`, it should 

104 otherwise not be used as a base class. 

105 """ 

106 

107 def __new__(cls, universe: DimensionUniverse, 

108 dimensions: Optional[Iterable[Dimension]] = None, 

109 names: Optional[Iterable[str]] = None, 

110 conform: bool = True) -> DimensionGraph: 

111 if names is None: 

112 if dimensions is None: 

113 names = () 

114 else: 

115 try: 

116 names = set(dimensions.names) 

117 except AttributeError: 

118 names = set(d.name for d in dimensions) 

119 else: 

120 if dimensions is not None: 

121 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

122 names = set(names) 

123 if conform: 

124 # Expand given dimensions to include all dependencies. 

125 for name in tuple(names): # iterate over a temporary copy so we can modify the original 

126 names.update(universe[name]._recursiveDependencyNames) 

127 # Look in the cache of existing graphs, with the expanded set of names. 

128 cacheKey = frozenset(names) 

129 self = universe._cache.get(cacheKey, None) 

130 if self is not None: 

131 return self 

132 # This is apparently a new graph. Create it, and add it to the cache. 

133 self = super().__new__(cls) 

134 universe._cache[cacheKey] = self 

135 self.universe = universe 

136 # Reorder dimensions by iterating over the universe (which is 

137 # ordered already) and extracting the ones in the set. 

138 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in names) 

139 # Make a set that includes both the dimensions and any 

140 # DimensionElements whose dependencies are in self.dimensions. 

141 self.elements = NamedValueSet(e for e in universe.elements 

142 if e._shouldBeInGraph(self.dimensions.names)) 

143 self._finish() 

144 return self 

145 

146 def _finish(self): 

147 """Complete construction of the graph. 

148 

149 This is intended for internal use by `DimensionGraph` and 

150 `DimensionUniverse` only. 

151 """ 

152 # Freeze the sets the constructor is responsible for populating. 

153 self.dimensions.freeze() 

154 self.elements.freeze() 

155 

156 # Split dependencies up into "required" and "implied" subsets. 

157 # Note that a dimension may be required in one graph and implied in 

158 # another. 

159 self.required = NamedValueSet() 

160 self.implied = NamedValueSet() 

161 for i1, dim1 in enumerate(self.dimensions): 

162 for i2, dim2 in enumerate(self.dimensions): 

163 if dim1.name in dim2._impliedDependencyNames: 

164 self.implied.add(dim1) 

165 break 

166 else: 

167 # If no other dimension implies dim1, it's required. 

168 self.required.add(dim1) 

169 self.required.freeze() 

170 self.implied.freeze() 

171 

172 # Compute sets of spatial and temporal elements. 

173 # We keep the both sets with no redundancy resolution and those with 

174 # KEEP_CHILD redundancy resolution for all elements. The latter is 

175 # what is usually wanted (by e.g. ExpandedDataCoordinate), but the 

176 # former is what we need to compute any other redundancy resolution 

177 # on the fly. 

178 self._allSpatial = NamedValueSet(element for element in self.elements if element.spatial) 

179 self._allSpatial.freeze() 

180 self._allTemporal = NamedValueSet(element for element in self.elements if element.temporal) 

181 self._allTemporal.freeze() 

182 self.spatial = _filterDependentElements(self._allSpatial, prefer=NamedValueSet()) 

183 self.spatial.freeze() 

184 self.temporal = _filterDependentElements(self._allTemporal, prefer=NamedValueSet()) 

185 self.temporal.freeze() 

186 

187 # Build mappings from dimension to index; this is really for 

188 # DataCoordinate, but we put it in DimensionGraph because many 

189 # (many!) DataCoordinates will share the same DimensionGraph, and 

190 # we want them to be lightweight. 

191 self._requiredIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.required)}) 

192 self._dimensionIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.dimensions)}) 

193 self._elementIndices = NamedKeyDict({element: i for i, element in enumerate(self.elements)}) 

194 

195 def __getnewargs__(self) -> tuple: 

196 return (self.universe, None, tuple(self.dimensions.names), False) 

197 

198 @property 

199 def names(self) -> KeysView[str]: 

200 """A set of the names of all dimensions in the graph (`KeysView`). 

201 """ 

202 return self.dimensions.names 

203 

204 def __iter__(self) -> Iterator[Dimension]: 

205 """Iterate over all dimensions in the graph (and true `Dimension` 

206 instances only). 

207 """ 

208 return iter(self.dimensions) 

209 

210 def __len__(self) -> int: 

211 """Return the number of dimensions in the graph (and true `Dimension` 

212 instances only). 

213 """ 

214 return len(self.dimensions) 

215 

216 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

217 """Return `True` if the given element or element name is in the graph. 

218 

219 This test covers all `DimensionElement` instances in ``self.elements``, 

220 not just true `Dimension` instances). 

221 """ 

222 return element in self.elements 

223 

224 def __getitem__(self, name: str) -> DimensionElement: 

225 """Return the element with the given name. 

226 

227 This lookup covers all `DimensionElement` instances in 

228 ``self.elements``, not just true `Dimension` instances). 

229 """ 

230 return self.elements[name] 

231 

232 def get(self, name: str, default: Any = None) -> DimensionElement: 

233 """Return the element with the given name. 

234 

235 This lookup covers all `DimensionElement` instances in 

236 ``self.elements``, not just true `Dimension` instances). 

237 """ 

238 return self.elements.get(name, default) 

239 

240 def __str__(self) -> str: 

241 return str(self.dimensions) 

242 

243 def __repr__(self) -> str: 

244 return f"DimensionGraph({str(self)})" 

245 

246 def isdisjoint(self, other: DimensionGraph) -> bool: 

247 """Test whether the intersection of two graphs is empty. 

248 

249 Returns `True` if either operand is the empty. 

250 """ 

251 return self.dimensions.isdisjoint(other.dimensions) 

252 

253 def issubset(self, other: DimensionGraph) -> bool: 

254 """Test whether all dimensions in ``self`` are also in ``other``. 

255 

256 Returns `True` if ``self`` is empty. 

257 """ 

258 return self.dimensions.issubset(other.dimensions) 

259 

260 def issuperset(self, other: DimensionGraph) -> bool: 

261 """Test whether all dimensions in ``other`` are also in ``self``. 

262 

263 Returns `True` if ``other`` is empty. 

264 """ 

265 return self.dimensions.issuperset(other.dimensions) 

266 

267 def __eq__(self, other: DimensionGraph) -> bool: 

268 """Test whether ``self`` and ``other`` have exactly the same dimensions 

269 and elements. 

270 """ 

271 return self.dimensions == other.dimensions 

272 

273 def __hash__(self) -> int: 

274 return hash(tuple(self.dimensions.names)) 

275 

276 def __le__(self, other: DimensionGraph) -> bool: 

277 """Test whether ``self`` is a subset of ``other``. 

278 """ 

279 return self.dimensions <= other.dimensions 

280 

281 def __ge__(self, other: DimensionGraph) -> bool: 

282 """Test whether ``self`` is a superset of ``other``. 

283 """ 

284 return self.dimensions >= other.dimensions 

285 

286 def __lt__(self, other: DimensionGraph) -> bool: 

287 """Test whether ``self`` is a strict subset of ``other``. 

288 """ 

289 return self.dimensions < other.dimensions 

290 

291 def __gt__(self, other: DimensionGraph) -> bool: 

292 """Test whether ``self`` is a strict superset of ``other``. 

293 """ 

294 return self.dimensions > other.dimensions 

295 

296 def union(self, *others: DimensionGraph): 

297 """Construct a new graph containing all dimensions in any of the 

298 operands. 

299 

300 The elements of the returned graph may exceed the naive union of 

301 their elements, as some `DimensionElement` instances are included 

302 in graphs whenever multiple dimensions are present, and those 

303 dependency dimensions could have been provided by different operands. 

304 """ 

305 names = set(self.names).union(*[other.names for other in others]) 

306 return DimensionGraph(self.universe, names=names) 

307 

308 def intersection(self, *others: DimensionGraph): 

309 """Construct a new graph containing only dimensions in all of the 

310 operands. 

311 """ 

312 names = set(self.names).intersection(*[other.names for other in others]) 

313 return DimensionGraph(self.universe, names=names) 

314 

315 def __or__(self, other): 

316 """Construct a new graph containing all dimensions in any of the 

317 operands. 

318 

319 See `union`. 

320 """ 

321 return self.union(other) 

322 

323 def __and__(self, other): 

324 """Construct a new graph containing only dimensions in all of the 

325 operands. 

326 """ 

327 return self.intersection(other) 

328 

329 def getSpatial(self, *, independent: bool = True, 

330 prefer: Optional[Iterable[DimensionElement]] = None 

331 ) -> NamedValueSet[DimensionElement]: 

332 """Return the elements that are associated with spatial regions, 

333 possibly with some filtering. 

334 

335 Parameters 

336 ---------- 

337 independent : `bool` 

338 If `True` (default) ensure that all returned elements are 

339 independent of each other, by resolving any dependencies between 

340 spatial elements in favor of the dependent one (which is the one 

341 with the smaller, more precise region). A graph that includes both 

342 "tract" and "patch", for example, would have only "patch" returned 

343 here if ``independent`` is `True`. If `False`, all spatial 

344 elements are returned. 

345 prefer : iterable of `DimensionElement` 

346 Elements that should be returned instead of their dependents when 

347 ``independent`` is `True` (ignored if ``independent`` is `False`). 

348 For example, passing ``prefer=[tract]`` to a graph with both 

349 "tract" and "patch" would result in only "tract" being returned. 

350 

351 Returns 

352 ------- 

353 spatial : `NamedValueSet` of `DimensionElement` 

354 Elements that have `DimensionElement.spatial` `True`, filtered 

355 as specified by the arguments. 

356 """ 

357 if not independent: 

358 return self._allSpatial 

359 elif prefer is None: 

360 return self.spatial 

361 else: 

362 return _filterDependentElements(self._allSpatial, 

363 prefer=NamedValueSet(self.elements[p] for p in prefer)) 

364 

365 def getTemporal(self, *, independent: bool = True, 

366 prefer: Optional[Iterable[DimensionElement]] = None 

367 ) -> NamedValueSet[DimensionElement]: 

368 """Return the elements that are associated with a timespan, 

369 possibly with some filtering. 

370 

371 Parameters 

372 ---------- 

373 independent : `bool` 

374 If `True` (default) ensure that all returned elements are 

375 independent of each other, by resolving any dependencies between 

376 spatial elements in favor of the dependent one (which is the one 

377 with the smaller, more precise timespans). 

378 prefer : iterable of `DimensionElement` 

379 Elements that should be returned instead of their dependents when 

380 ``independent`` is `True` (ignored if ``independent`` is `False`). 

381 

382 Returns 

383 ------- 

384 temporal : `NamedValueSet` of `DimensionElement` 

385 Elements that have `DimensionElement.temporal` `True`, filtered 

386 as specified by the arguments. 

387 """ 

388 if not independent: 

389 return self._allTemporal 

390 elif prefer is None: 

391 return self.temporal 

392 else: 

393 return _filterDependentElements(self._allTemporal, 

394 prefer=NamedValueSet(self.elements[p] for p in prefer)) 

395 

396 @property 

397 def primaryKeyTraversalOrder(self) -> Tuple[DimensionElement]: 

398 """Return a tuple of all elements in an order allows records to be 

399 found given their primary keys, starting from only the primary keys of 

400 required dimensions (`tuple` [ `DimensionRecord` ]). 

401 

402 Unlike the table definition/topological order (which is what 

403 DimensionUniverse.sorted gives you), when dimension A implies 

404 dimension B, dimension A appears first. 

405 """ 

406 order = getattr(self, "_primaryKeyTraversalOrder", None) 

407 if order is None: 

408 done = set() 

409 order = [] 

410 

411 def addToOrder(element) -> bool: 

412 if element.name in done: 

413 return 

414 predecessors = set(element.graph.required.names) 

415 predecessors.discard(element.name) 

416 if not done.issuperset(predecessors): 

417 return 

418 order.append(element) 

419 done.add(element) 

420 for other in element.implied: 

421 addToOrder(other) 

422 

423 while not done.issuperset(self.required): 

424 for dimension in self.required: 

425 addToOrder(dimension) 

426 

427 order.extend(element for element in self.elements if element.name not in done) 

428 order = tuple(order) 

429 self._primaryKeyTraversalOrder = order 

430 return order 

431 

432 # Class attributes below are shadowed by instance attributes, and are 

433 # present just to hold the docstrings for those instance attributes. 

434 

435 universe: DimensionUniverse 

436 """The set of all known dimensions, of which this graph is a subset 

437 (`DimensionUniverse`). 

438 """ 

439 

440 dimensions: NamedValueSet[Dimension] 

441 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

442 graph (`NamedValueSet` of `Dimension`). 

443 

444 This is the set used for iteration, ``len()``, and most set-like operations 

445 on `DimensionGraph` itself. 

446 """ 

447 

448 elements: NamedValueSet[DimensionElement] 

449 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

450 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`). 

451 

452 This is the set used for dict-like lookups, including the ``in`` operator, 

453 on `DimensionGraph` itself. 

454 """ 

455 

456 required: NamedValueSet[Dimension] 

457 """The subset of `dimensions` whose elments must be directly identified via 

458 their primary keys in a data ID in order to identify the rest of the 

459 elements in the graph (`NamedValueSet` of `Dimension`). 

460 """ 

461 

462 implied: NamedValueSet[Dimension] 

463 """The subset of `dimensions` whose elements need not be directly 

464 identified via their primary keys in a data ID (`NamedValueSet` of 

465 `Dimension`). 

466 """ 

467 

468 spatial: NamedValueSet[DimensionElement] 

469 """Elements that are associated with independent spatial regions 

470 (`NamedValueSet` of `DimensionElement`). 

471 

472 The default filtering described in `getSpatial` is applied. 

473 """ 

474 

475 temporal: NamedValueSet[DimensionElement] 

476 """Elements that are associated with independent spatial regions 

477 (`NamedValueSet` of `DimensionElement`). 

478 

479 The default filtering described in `getTemporal` is applied. 

480 """