Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DimensionGraph"] 

25 

26from typing import Optional, Iterable, Iterator, KeysView, Union, Any, TYPE_CHECKING 

27 

28from ..utils import NamedValueSet, NamedKeyDict, immutable 

29 

30if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from .universe import DimensionUniverse 

32 from .elements import DimensionElement, Dimension 

33 

34 

35def _filterDependentElements(elements: NamedValueSet[DimensionElement], 

36 prefer: NamedValueSet[DimensionElement] 

37 ) -> NamedValueSet[DimensionElement]: 

38 """Return a subset of the given set with only independent elements. 

39 

40 Parameters 

41 ---------- 

42 elements : `NamedValueSet` of `DimensionElement` 

43 The elements to be filtered. 

44 prefer : `NamedValueSet` of `DimensionElement` 

45 Elements to be included in the result in preference to others with 

46 which they have a dependency relationship. When no preferred element 

47 is given for a pair of related elements, the dependent is included 

48 rather than the dependency. 

49 

50 Returns 

51 ------- 

52 filtered : `NamedValueSet` of `DimensionElement` 

53 The filtered set of elements. Order is unspecified. 

54 """ 

55 resultNames = set() 

56 for element in elements: 

57 includedDependencyNames = frozenset(element._recursiveDependencyNames & resultNames) 

58 if includedDependencyNames.isdisjoint(prefer.names): 

59 resultNames.difference_update(includedDependencyNames) 

60 resultNames.add(element.name) 

61 return NamedValueSet(elements[name] for name in resultNames) 

62 

63 

64@immutable 

65class DimensionGraph: 

66 """An immutable, dependency-complete collection of dimensions. 

67 

68 `DimensionGraph` behaves in many respects like a set of `Dimension` 

69 instances that maintains several special subsets and supersets of 

70 related `DimensionElement` instances. It does not fully implement the 

71 `collections.abc.Set` interface, as its automatic expansion of dependencies 

72 would make set difference and XOR operations behave surprisingly. 

73 

74 It also provides dict-like lookup of `DimensionElement` instances from 

75 their names. 

76 

77 Parameters 

78 ---------- 

79 universe : `DimensionUniverse` 

80 The special graph of all known dimensions of which this graph will be 

81 a subset. 

82 dimensions : iterable of `Dimension`, optional 

83 An iterable of `Dimension` instances that must be included in the 

84 graph. All (recursive) dependencies of these dimensions will also 

85 be included. At most one of ``dimensions`` and ``names`` must be 

86 provided. 

87 names : iterable of `str`, optional 

88 An iterable of the names of dimensiosn that must be included in the 

89 graph. All (recursive) dependencies of these dimensions will also 

90 be included. At most one of ``dimensions`` and ``names`` must be 

91 provided. 

92 conform : `bool`, optional 

93 If `True` (default), expand to include dependencies. `False` should 

94 only be used for callers that can guarantee that other arguments are 

95 already correctly expanded, and is primarily for internal use. 

96 

97 Notes 

98 ----- 

99 `DimensionGraph` should be used instead of other collections in any context 

100 where a collection of dimensions is required and a `DimensionUniverse` is 

101 available. 

102 

103 While `DimensionUniverse` inherits from `DimensionGraph`, it should 

104 otherwise not be used as a base class. 

105 """ 

106 

107 def __new__(cls, universe: DimensionUniverse, 

108 dimensions: Optional[Iterable[Dimension]] = None, 

109 names: Optional[Iterable[str]] = None, 

110 conform: bool = True) -> DimensionGraph: 

111 if names is None: 

112 if dimensions is None: 

113 names = () 

114 else: 

115 try: 

116 names = set(dimensions.names) 

117 except AttributeError: 

118 names = set(d.name for d in dimensions) 

119 else: 

120 if dimensions is not None: 

121 raise TypeError("Only one of 'dimensions' and 'names' may be provided.") 

122 names = set(names) 

123 if conform: 

124 # Expand given dimensions to include all dependencies. 

125 for name in tuple(names): # iterate over a temporary copy so we can modify the original 

126 names.update(universe[name]._recursiveDependencyNames) 

127 # Look in the cache of existing graphs, with the expanded set of names. 

128 cacheKey = frozenset(names) 

129 self = universe._cache.get(cacheKey, None) 

130 if self is not None: 

131 return self 

132 # This is apparently a new graph. Create it, and add it to the cache. 

133 self = super().__new__(cls) 

134 universe._cache[cacheKey] = self 

135 self.universe = universe 

136 # Reorder dimensions by iterating over the universe (which is 

137 # ordered already) and extracting the ones in the set. 

138 self.dimensions = NamedValueSet(d for d in universe.dimensions if d.name in names) 

139 # Make a set that includes both the dimensions and any 

140 # DimensionElements whose dependencies are in self.dimensions. 

141 self.elements = NamedValueSet(e for e in universe.elements 

142 if e._shouldBeInGraph(self.dimensions.names)) 

143 self._finish() 

144 return self 

145 

146 def _finish(self): 

147 """Complete construction of the graph. 

148 

149 This is intended for internal use by `DimensionGraph` and 

150 `DimensionUniverse` only. 

151 """ 

152 # Freeze the sets the constructor is responsible for populating. 

153 self.dimensions.freeze() 

154 self.elements.freeze() 

155 

156 # Split dependencies up into "required" and "implied" subsets. 

157 # Note that a dimension may be required in one graph and implied in 

158 # another. 

159 self.required = NamedValueSet() 

160 self.implied = NamedValueSet() 

161 for i1, dim1 in enumerate(self.dimensions): 

162 for i2, dim2 in enumerate(self.dimensions): 

163 if dim1.name in dim2._impliedDependencyNames: 

164 self.implied.add(dim1) 

165 break 

166 else: 

167 # If no other dimension implies dim1, it's required. 

168 self.required.add(dim1) 

169 self.required.freeze() 

170 self.implied.freeze() 

171 

172 # Compute sets of spatial and temporal elements. 

173 # We keep the both sets with no redundancy resolution and those with 

174 # KEEP_CHILD redundancy resolution for all elements. The latter is 

175 # what is usually wanted (by e.g. ExpandedDataCoordinate), but the 

176 # former is what we need to compute any other redundancy resolution 

177 # on the fly. 

178 self._allSpatial = NamedValueSet(element for element in self.elements if element.spatial) 

179 self._allSpatial.freeze() 

180 self._allTemporal = NamedValueSet(element for element in self.elements if element.temporal) 

181 self._allTemporal.freeze() 

182 self.spatial = _filterDependentElements(self._allSpatial, prefer=NamedValueSet()) 

183 self.spatial.freeze() 

184 self.temporal = _filterDependentElements(self._allTemporal, prefer=NamedValueSet()) 

185 self.temporal.freeze() 

186 

187 # Build mappings from dimension to index; this is really for 

188 # DataCoordinate, but we put it in DimensionGraph because many 

189 # (many!) DataCoordinates will share the same DimensionGraph, and 

190 # we want them to be lightweight. 

191 self._requiredIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.required)}) 

192 self._dimensionIndices = NamedKeyDict({dimension: i for i, dimension in enumerate(self.dimensions)}) 

193 self._elementIndices = NamedKeyDict({element: i for i, element in enumerate(self.elements)}) 

194 

195 # Compute an element traversal order that allows element records to be 

196 # found given their primary keys, starting from only the primary keys 

197 # of required dimensions. Unlike the table definition/topological 

198 # order (which is what DimensionUniverse.sorted gives you), when 

199 # dimension A implies dimension B, dimension A appears first. 

200 # This is really for DimensionDatabase/ExpandedDataCoordinate, but 

201 # is stored here so we don't have to recompute it for every coordinate. 

202 todo = set(self.elements) 

203 self._primaryKeyTraversalOrder = [] 

204 

205 def addToPrimaryKeyTraversalOrder(element): 

206 if element in todo: 

207 self._primaryKeyTraversalOrder.append(element) 

208 todo.remove(element) 

209 for other in element.implied: 

210 addToPrimaryKeyTraversalOrder(other) 

211 

212 for dimension in self.required: 

213 addToPrimaryKeyTraversalOrder(dimension) 

214 

215 self._primaryKeyTraversalOrder.extend(todo) 

216 

217 def __getnewargs__(self) -> tuple: 

218 return (self.universe, None, tuple(self.dimensions.names), False) 

219 

220 @property 

221 def names(self) -> KeysView[str]: 

222 """A set of the names of all dimensions in the graph (`KeysView`). 

223 """ 

224 return self.dimensions.names 

225 

226 def __iter__(self) -> Iterator[Dimension]: 

227 """Iterate over all dimensions in the graph (and true `Dimension` 

228 instances only). 

229 """ 

230 return iter(self.dimensions) 

231 

232 def __len__(self) -> int: 

233 """Return the number of dimensions in the graph (and true `Dimension` 

234 instances only). 

235 """ 

236 return len(self.dimensions) 

237 

238 def __contains__(self, element: Union[str, DimensionElement]) -> bool: 

239 """Return `True` if the given element or element name is in the graph. 

240 

241 This test covers all `DimensionElement` instances in ``self.elements``, 

242 not just true `Dimension` instances). 

243 """ 

244 return element in self.elements 

245 

246 def __getitem__(self, name: str) -> DimensionElement: 

247 """Return the element with the given name. 

248 

249 This lookup covers all `DimensionElement` instances in 

250 ``self.elements``, not just true `Dimension` instances). 

251 """ 

252 return self.elements[name] 

253 

254 def get(self, name: str, default: Any = None) -> DimensionElement: 

255 """Return the element with the given name. 

256 

257 This lookup covers all `DimensionElement` instances in 

258 ``self.elements``, not just true `Dimension` instances). 

259 """ 

260 return self.elements.get(name, default) 

261 

262 def __str__(self) -> str: 

263 return str(self.dimensions) 

264 

265 def __repr__(self) -> str: 

266 return f"DimensionGraph({str(self)})" 

267 

268 def isdisjoint(self, other: DimensionGraph) -> bool: 

269 """Test whether the intersection of two graphs is empty. 

270 

271 Returns `True` if either operand is the empty. 

272 """ 

273 return self.dimensions.isdisjoint(other.dimensions) 

274 

275 def issubset(self, other: DimensionGraph) -> bool: 

276 """Test whether all dimensions in ``self`` are also in ``other``. 

277 

278 Returns `True` if ``self`` is empty. 

279 """ 

280 return self.dimensions.issubset(other.dimensions) 

281 

282 def issuperset(self, other: DimensionGraph) -> bool: 

283 """Test whether all dimensions in ``other`` are also in ``self``. 

284 

285 Returns `True` if ``other`` is empty. 

286 """ 

287 return self.dimensions.issuperset(other.dimensions) 

288 

289 def __eq__(self, other: DimensionGraph) -> bool: 

290 """Test whether ``self`` and ``other`` have exactly the same dimensions 

291 and elements. 

292 """ 

293 return self.dimensions == other.dimensions 

294 

295 def __hash__(self) -> int: 

296 return hash(tuple(self.dimensions.names)) 

297 

298 def __le__(self, other: DimensionGraph) -> bool: 

299 """Test whether ``self`` is a subset of ``other``. 

300 """ 

301 return self.dimensions <= other.dimensions 

302 

303 def __ge__(self, other: DimensionGraph) -> bool: 

304 """Test whether ``self`` is a superset of ``other``. 

305 """ 

306 return self.dimensions >= other.dimensions 

307 

308 def __lt__(self, other: DimensionGraph) -> bool: 

309 """Test whether ``self`` is a strict subset of ``other``. 

310 """ 

311 return self.dimensions < other.dimensions 

312 

313 def __gt__(self, other: DimensionGraph) -> bool: 

314 """Test whether ``self`` is a strict superset of ``other``. 

315 """ 

316 return self.dimensions > other.dimensions 

317 

318 def union(self, *others: DimensionGraph): 

319 """Construct a new graph containing all dimensions in any of the 

320 operands. 

321 

322 The elements of the returned graph may exceed the naive union of 

323 their elements, as some `DimensionElement` instances are included 

324 in graphs whenever multiple dimensions are present, and those 

325 dependency dimensions could have been provided by different operands. 

326 """ 

327 names = set(self.names).union(*[other.names for other in others]) 

328 return DimensionGraph(self.universe, names=names) 

329 

330 def intersection(self, *others: DimensionGraph): 

331 """Construct a new graph containing only dimensions in all of the 

332 operands. 

333 """ 

334 names = set(self.names).intersection(*[other.names for other in others]) 

335 return DimensionGraph(self.universe, names=names) 

336 

337 def __or__(self, other): 

338 """Construct a new graph containing all dimensions in any of the 

339 operands. 

340 

341 See `union`. 

342 """ 

343 return self.union(other) 

344 

345 def __and__(self, other): 

346 """Construct a new graph containing only dimensions in all of the 

347 operands. 

348 """ 

349 return self.intersection(other) 

350 

351 def getSpatial(self, *, independent: bool = True, 

352 prefer: Optional[Iterable[DimensionElement]] = None 

353 ) -> NamedValueSet[DimensionElement]: 

354 """Return the elements that are associated with spatial regions, 

355 possibly with some filtering. 

356 

357 Parameters 

358 ---------- 

359 independent : `bool` 

360 If `True` (default) ensure that all returned elements are 

361 independent of each other, by resolving any dependencies between 

362 spatial elements in favor of the dependent one (which is the one 

363 with the smaller, more precise region). A graph that includes both 

364 "tract" and "patch", for example, would have only "patch" returned 

365 here if ``independent`` is `True`. If `False`, all spatial 

366 elements are returned. 

367 prefer : iterable of `DimensionElement` 

368 Elements that should be returned instead of their dependents when 

369 ``independent`` is `True` (ignored if ``independent`` is `False`). 

370 For example, passing ``prefer=[tract]`` to a graph with both 

371 "tract" and "patch" would result in only "tract" being returned. 

372 

373 Returns 

374 ------- 

375 spatial : `NamedValueSet` of `DimensionElement` 

376 Elements that have `DimensionElement.spatial` `True`, filtered 

377 as specified by the arguments. 

378 """ 

379 if not independent: 

380 return self._allSpatial 

381 elif prefer is None: 

382 return self.spatial 

383 else: 

384 return _filterDependentElements(self._allSpatial, 

385 prefer=NamedValueSet(self.elements[p] for p in prefer)) 

386 

387 def getTemporal(self, *, independent: bool = True, 

388 prefer: Optional[Iterable[DimensionElement]] = None 

389 ) -> NamedValueSet[DimensionElement]: 

390 """Return the elements that are associated with a timespan, 

391 possibly with some filtering. 

392 

393 Parameters 

394 ---------- 

395 independent : `bool` 

396 If `True` (default) ensure that all returned elements are 

397 independent of each other, by resolving any dependencies between 

398 spatial elements in favor of the dependent one (which is the one 

399 with the smaller, more precise timespans). 

400 prefer : iterable of `DimensionElement` 

401 Elements that should be returned instead of their dependents when 

402 ``independent`` is `True` (ignored if ``independent`` is `False`). 

403 

404 Returns 

405 ------- 

406 temporal : `NamedValueSet` of `DimensionElement` 

407 Elements that have `DimensionElement.temporal` `True`, filtered 

408 as specified by the arguments. 

409 """ 

410 if not independent: 

411 return self._allTemporal 

412 elif prefer is None: 

413 return self.temporal 

414 else: 

415 return _filterDependentElements(self._allTemporal, 

416 prefer=NamedValueSet(self.elements[p] for p in prefer)) 

417 

418 # Class attributes below are shadowed by instance attributes, and are 

419 # present just to hold the docstrings for those instance attributes. 

420 

421 universe: DimensionUniverse 

422 """The set of all known dimensions, of which this graph is a subset 

423 (`DimensionUniverse`). 

424 """ 

425 

426 dimensions: NamedValueSet[Dimension] 

427 """A true `~collections.abc.Set` of all true `Dimension` instances in the 

428 graph (`NamedValueSet` of `Dimension`). 

429 

430 This is the set used for iteration, ``len()``, and most set-like operations 

431 on `DimensionGraph` itself. 

432 """ 

433 

434 elements: NamedValueSet[DimensionElement] 

435 """A true `~collections.abc.Set` of all `DimensionElement` instances in the 

436 graph; a superset of `dimensions` (`NamedValueSet` of `DimensionElement`). 

437 

438 This is the set used for dict-like lookups, including the ``in`` operator, 

439 on `DimensionGraph` itself. 

440 """ 

441 

442 required: NamedValueSet[Dimension] 

443 """The subset of `dimensions` whose elments must be directly identified via 

444 their primary keys in a data ID in order to identify the rest of the 

445 elements in the graph (`NamedValueSet` of `Dimension`). 

446 """ 

447 

448 implied: NamedValueSet[Dimension] 

449 """The subset of `dimensions` whose elements need not be directly 

450 identified via their primary keys in a data ID (`NamedValueSet` of 

451 `Dimension`). 

452 """ 

453 

454 spatial: NamedValueSet[DimensionElement] 

455 """Elements that are associated with independent spatial regions 

456 (`NamedValueSet` of `DimensionElement`). 

457 

458 The default filtering described in `getSpatial` is applied. 

459 """ 

460 

461 temporal: NamedValueSet[DimensionElement] 

462 """Elements that are associated with independent spatial regions 

463 (`NamedValueSet` of `DimensionElement`). 

464 

465 The default filtering described in `getTemporal` is applied. 

466 """