Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId") 

25 

26from typing import Any, Tuple, Mapping, Optional, Dict, Union, TYPE_CHECKING 

27 

28from lsst.sphgeom import Region 

29from ..utils import IndexedTupleDict, immutable 

30from ..timespan import Timespan 

31from .graph import DimensionGraph 

32 

33if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 from .elements import DimensionElement, Dimension 

35 from .universe import DimensionUniverse 

36 from .records import DimensionRecord 

37 

38 

39@immutable 

40class DataCoordinate(IndexedTupleDict): 

41 """An immutable data ID dictionary that guarantees that its key-value pairs 

42 identify all required dimensions in a `DimensionGraph`. 

43 

44 `DataCoordinate` instances should usually be constructed via the 

45 `standardize` class method; the constructor is reserved for callers that 

46 can guarantee that the ``values`` tuple has exactly the right elements. 

47 

48 Parameters 

49 ---------- 

50 graph : `DimensionGraph` 

51 The dimensions identified by this instance. 

52 values : `tuple` 

53 Tuple of primary key values for the given dimensions. 

54 

55 Notes 

56 ----- 

57 Like any data ID class, `DataCoordinate` behaves like a dictionary, 

58 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`, 

59 both `Dimension` instances and `str` names thereof may be used as keys in 

60 lookup operations. 

61 

62 Subclasses are permitted to support lookup for any dimension in 

63 ``self.graph.dimensions``, but the base class only supports lookup for 

64 those in ``self.graph.required``, which is the minimal set needed to 

65 identify all others in a `Registry`. Both the base class and subclasses 

66 define comparisons, iterators, and the `keys`, `values`, and `items` views 

67 to just the ``self.graph.required`` subset in order to guarantee true 

68 (i.e. Liskov) substitutability. 

69 """ 

70 

71 __slots__ = ("graph",) 

72 

73 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...]): 

74 self = super().__new__(cls, graph._requiredIndices, values) 

75 self.graph = graph 

76 return self 

77 

78 @staticmethod 

79 def standardize(mapping: Optional[Mapping[str, Any]] = None, *, 

80 graph: Optional[DimensionGraph] = None, 

81 universe: Optional[DimensionUniverse] = None, 

82 **kwds) -> DataCoordinate: 

83 """Adapt an arbitrary mapping and/or additional arguments into a true 

84 `DataCoordinate`, or augment an existing one. 

85 

86 Parameters 

87 ---------- 

88 mapping : `~collections.abc.Mapping`, optional 

89 An informal data ID that maps dimension names to their primary key 

90 values (may also be a true `DataCoordinate`). 

91 graph : `DimensionGraph` 

92 The dimensions to be identified by the new `DataCoordinate`. 

93 If not provided, will be inferred from the keys of ``mapping``, 

94 and ``universe`` must be provided unless ``mapping`` is already a 

95 `DataCoordinate`. 

96 universe : `DimensionUniverse` 

97 All known dimensions and their relationships; used to expand 

98 and validate dependencies when ``graph`` is not provided. 

99 kwds 

100 Additional keyword arguments are treated like additional key-value 

101 pairs in ``mapping``. 

102 

103 Returns 

104 ------- 

105 coordinate : `DataCoordinate` 

106 A validated `DataCoordinate` instance. May be a subclass instance 

107 if and only if ``mapping`` is a subclass instance and ``graph`` 

108 is a subset of ``mapping.graph``. 

109 

110 Raises 

111 ------ 

112 TypeError 

113 Raised if the set of optional arguments provided is not supported. 

114 KeyError 

115 Raised if a key-value pair for a required dimension is missing. 

116 

117 Notes 

118 ----- 

119 Because `DataCoordinate` stores only values for required dimensions, 

120 key-value pairs for other related dimensions will be ignored and 

121 excluded from the result. This means that a `DataCoordinate` may 

122 contain *fewer* key-value pairs than the informal data ID dictionary 

123 it was constructed from. 

124 """ 

125 if isinstance(mapping, DataCoordinate): 

126 if graph is None: 

127 if not kwds: 

128 # Already standardized to exactly what we want. 

129 return mapping 

130 elif mapping.graph.issuperset(graph): 

131 # Already standardized; just return the relevant subset. 

132 return mapping.subset(graph) 

133 assert universe is None or universe == mapping.universe 

134 universe = mapping.universe 

135 if kwds: 

136 if mapping: 

137 try: 

138 d = dict(mapping.byName(), **kwds) 

139 except AttributeError: 

140 d = dict(mapping, **kwds) 

141 else: 

142 d = kwds 

143 elif mapping: 

144 try: 

145 d = mapping.byName() 

146 except AttributeError: 

147 d = mapping 

148 else: 

149 d = {} 

150 if graph is None: 

151 if universe is None: 

152 raise TypeError("universe must be provided if graph is not.") 

153 graph = DimensionGraph(universe, names=d.keys()) 

154 try: 

155 values = tuple(d[name] for name in graph.required.names) 

156 except KeyError as err: 

157 raise KeyError(f"No value in data ID for required dimension {err}.") from err 

158 return DataCoordinate(graph, values) 

159 

160 def byName(self) -> Dict[str, Any]: 

161 """Return a true `dict` keyed by `str` dimension name and the same 

162 values as ``self``. 

163 """ 

164 return {k.name: v for k, v in self.items()} 

165 

166 def __getnewargs__(self) -> tuple: 

167 # Implements pickle support (in addition to methods provided by 

168 # @immutable decorator). 

169 return (self.graph, self.values()) 

170 

171 def __hash__(self) -> int: 

172 return hash((self.graph, self.values())) 

173 

174 def __eq__(self, other: DataCoordinate) -> bool: 

175 try: 

176 # Optimized code path for DataCoordinate comparisons. 

177 return self.graph == other.graph and self.values() == other.values() 

178 except AttributeError: 

179 # Also support comparison with informal data ID dictionaries that 

180 # map dimension name to value. 

181 return self.byName() == other 

182 

183 def __str__(self): 

184 return f"{self.byName()}" 

185 

186 def __repr__(self): 

187 return f"DataCoordinate({self.graph}, {self.values()})" 

188 

189 def fingerprint(self, update): 

190 """Update a secure hash function with the values in this data ID. 

191 

192 Parameters 

193 ---------- 

194 update : `~collections.abc.Callable` 

195 Callable that accepts a single `bytes` argument to update 

196 the hash; usually the ``update`` method of an instance from 

197 the ``hashlib`` module. 

198 """ 

199 for k, v in self.items(): 

200 update(k.name.encode("utf8")) 

201 if isinstance(v, int): 

202 update(v.to_bytes(64, "big", signed=False)) 

203 elif isinstance(v, str): 

204 update(v.encode("utf8")) 

205 else: 

206 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).") 

207 

208 def matches(self, other: DataCoordinate) -> bool: 

209 """Test whether the values of all keys in both coordinates are equal. 

210 

211 Parameters 

212 ---------- 

213 other : `DataCoordinate` 

214 The other coordinate to compare to. 

215 

216 Returns 

217 ------- 

218 consistent : `bool` 

219 `True` if all keys that are in in both ``other`` and ``self`` 

220 are associated with the same values, and `False` otherwise. 

221 `True` if there are no keys in common. 

222 """ 

223 d = getattr(other, "full", other) 

224 return all(self[k] == d[k] for k in (self.keys() & d.keys())) 

225 

226 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

227 """Return a new `DataCoordinate` whose graph is a subset of 

228 ``self.graph``. 

229 

230 Subclasses may override this method to return a subclass instance. 

231 

232 Parameters 

233 ---------- 

234 graph : `DimensionGraph` 

235 The dimensions identified by the returned `DataCoordinate`. 

236 

237 Returns 

238 ------- 

239 coordinate : `DataCoordinate` 

240 A `DataCoordinate` instance that identifies only the given 

241 dimensions. 

242 

243 Raises 

244 ------ 

245 KeyError 

246 Raised if ``graph`` is not a subset of ``self.graph``, and hence 

247 one or more dimensions has no associated primary key value. 

248 """ 

249 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required)) 

250 

251 @property 

252 def universe(self) -> DimensionUniverse: 

253 """The universe that defines all known dimensions compatible with 

254 this coordinate (`DimensionUniverse`). 

255 """ 

256 return self.graph.universe 

257 

258 # Class attributes below are shadowed by instance attributes, and are 

259 # present just to hold the docstrings for those instance attributes. 

260 

261 graph: DimensionGraph 

262 """The dimensions identified by this data ID (`DimensionGraph`). 

263 

264 Note that values are only required to be present for dimensions in 

265 ``self.graph.required``; all others may be retrieved (from a `Registry`) 

266 given these. 

267 """ 

268 

269 

270DataId = Union[DataCoordinate, Mapping[str, Any]] 

271"""A type-annotation alias for signatures that accept both informal data ID 

272dictionaries and validated `DataCoordinate` instances. 

273""" 

274 

275 

276def _intersectRegions(*args: Region) -> Optional[Region]: 

277 """Return the intersection of several regions. 

278 

279 For internal use by `ExpandedDataCoordinate` only. 

280 

281 If no regions are provided, returns `None`. 

282 

283 This is currently a placeholder; it actually returns `NotImplemented` 

284 (it does *not* raise an exception) when multiple regions are given, which 

285 propagates to `ExpandedDataCoordinate`. This reflects the fact that we 

286 don't want to fail to construct an `ExpandedDataCoordinate` entirely when 

287 we can't compute its region, and at present we don't have a high-level use 

288 case for the regions of these particular data IDs. 

289 """ 

290 if len(args) == 0: 

291 return None 

292 elif len(args) == 1: 

293 return args[0] 

294 else: 

295 return NotImplemented 

296 

297 

298@immutable 

299class ExpandedDataCoordinate(DataCoordinate): 

300 """A data ID that has been expanded to include all relevant metadata. 

301 

302 Instances should usually be obtained by calling `Registry.expandDataId`. 

303 

304 Parameters 

305 ---------- 

306 graph : `DimensionGraph` 

307 The dimensions identified by this instance. 

308 values : `tuple` 

309 Tuple of primary key values for the given dimensions. 

310 records : `~collections.abc.Mapping` 

311 Dictionary mapping `DimensionElement` to `DimensionRecord`. 

312 full : `~collections.abc.Mapping` 

313 Dictionary mapping dimensions to their primary key values for all 

314 dimensions in the graph, not just required ones. Ignored unless 

315 ``conform`` is `False.` 

316 region : `sphgeom.Region`, optional 

317 Region on the sky associated with this data ID, or `None` if there 

318 are no spatial dimensions. At present, this may be the special value 

319 `NotImplemented` if there multiple spatial dimensions identified; in 

320 the future this will be replaced with the intersection. Ignored unless 

321 ``conform`` is `False`.Timespan 

322 timespan : `Timespan`, optionalTimespan 

323 Timespan associated with this data ID, or `None` if there are no 

324 temporal dimensions. 

325 Ignored unless ``conform`` is `False`. 

326 conform : `bool`, optional 

327 If `True` (default), adapt arguments from arbitrary mappings to the 

328 custom dictionary types and check that all expected key-value pairs are 

329 present. `False` is only for internal use. 

330 

331 Notes 

332 ----- 

333 To maintain Liskov substitutability with `DataCoordinate`, 

334 `ExpandedDataCoordinate` mostly acts like a mapping that contains only 

335 values for its graph's required dimensions, even though it also contains 

336 values for all implied dimensions - its length, iteration, and 

337 keys/values/items views reflect only required dimensions. Values for 

338 the primary keys of implied dimensions can be obtained from the `full` 

339 attribute, and are also accessible in dict lookups and the ``in`` operator. 

340 """ 

341 

342 __slots__ = ("records", "full", "region", "timespan") 

343 

344 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...], *, 

345 records: Mapping[DimensionElement, DimensionRecord], 

346 full: Optional[Mapping[Dimension, Any]] = None, 

347 region: Optional[Region] = None, 

348 timespan: Optional[Timespan] = None, 

349 conform: bool = True): 

350 self = super().__new__(cls, graph, values) 

351 if conform: 

352 self.records = IndexedTupleDict( 

353 indices=graph._elementIndices, 

354 values=tuple(records[element] for element in graph.elements) 

355 ) 

356 self.full = IndexedTupleDict( 

357 indices=graph._dimensionIndices, 

358 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None) 

359 for dimension in graph.dimensions) 

360 ) 

361 regions = [] 

362 for element in self.graph.spatial: 

363 record = self.records[element.name] 

364 if record is None or record.region is None: 

365 self.region = None 

366 break 

367 else: 

368 regions.append(record.region) 

369 else: 

370 self.region = _intersectRegions(*regions) 

371 timespans = [] 

372 for element in self.graph.temporal: 

373 record = self.records[element.name] 

374 if record is None or record.timespan is None: 

375 self.timespan = None 

376 break 

377 else: 

378 timespans.append(record.timespan) 

379 else: 

380 self.timespan = Timespan.intersection(*timespans) 

381 else: 

382 self.records = records 

383 self.full = full 

384 self.region = region 

385 self.timespan = timespan 

386 return self 

387 

388 def __contains__(self, key: Union[DimensionElement, str]) -> bool: 

389 return key in self.full 

390 

391 def __getitem__(self, key: Union[DimensionElement, str]) -> Any: 

392 return self.full[key] 

393 

394 def __repr__(self): 

395 return f"ExpandedDataCoordinate({self.graph}, {self.values()})" 

396 

397 def pack(self, name: str, *, returnMaxBits: bool = False) -> int: 

398 """Pack this data ID into an integer. 

399 

400 Parameters 

401 ---------- 

402 name : `str` 

403 Name of the `DimensionPacker` algorithm (as defined in the 

404 dimension configuration). 

405 returnMaxBits : `bool`, optional 

406 If `True` (`False` is default), return the maximum number of 

407 nonzero bits in the returned integer across all data IDs. 

408 

409 Returns 

410 ------- 

411 packed : `int` 

412 Integer ID. This ID is unique only across data IDs that have 

413 the same values for the packer's "fixed" dimensions. 

414 maxBits : `int`, optional 

415 Maximum number of nonzero bits in ``packed``. Not returned unless 

416 ``returnMaxBits`` is `True`. 

417 """ 

418 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits) 

419 

420 def matches(self, other) -> bool: 

421 # Docstring inherited from DataCoordinate.matches. 

422 d = getattr(other, "full", other) 

423 return all(self[k] == d[k] for k in (self.full.keys() & d.keys())) 

424 

425 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate: 

426 # Docstring inherited from DataCoordinate.subset. 

427 return ExpandedDataCoordinate( 

428 graph, 

429 tuple(self[dimension] for dimension in graph.required), 

430 records=self.records, 

431 conform=True 

432 ) 

433 

434 def __getnewargs_ex__(self) -> Tuple(tuple, dict): 

435 return ( 

436 (self.graph, self.values()), 

437 dict( 

438 records=self.records, 

439 full=self.full, 

440 region=self.region, 

441 timespan=self.timespan, 

442 conform=False, 

443 ) 

444 ) 

445 

446 # Class attributes below are shadowed by instance attributes, and are 

447 # present just to hold the docstrings for those instance attributes. 

448 

449 full: IndexedTupleDict[Dimension, Any] 

450 """Dictionary mapping dimensions to their primary key values for all 

451 dimensions in the graph, not just required ones (`IndexedTupleDict`). 

452 

453 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

454 as well as `Dimension` instance. 

455 """ 

456 

457 records: IndexedTupleDict[DimensionElement, DimensionRecord] 

458 """Dictionary mapping `DimensionElement` to the associated 

459 `DimensionRecord` (`IndexedTupleDict`). 

460 

461 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

462 as well as `DimensionElement` instance. 

463 """ 

464 

465 region: Optional[Region] 

466 """Region on the sky associated with this data ID, or `None` if there 

467 are no spatial dimensions (`sphgeom.Region`). 

468 

469 At present, this may be the special value `NotImplemented` if there 

470 multiple spatial dimensions identified; in the future this will be replaced 

471 with the intersection. 

472 """ 

473 

474 timespan: Optional[Timespan] 

475 """Timespan associated with this data ID, or `None` if there are no 

476 temporal dimensions (`TimeSpan`). 

477 """