Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId") 

25 

26import numbers 

27from typing import Any, Tuple, Mapping, Optional, Dict, Union, TYPE_CHECKING 

28 

29from lsst.sphgeom import Region 

30from ..utils import IndexedTupleDict, immutable 

31from ..timespan import Timespan 

32from .graph import DimensionGraph 

33 

34if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 from .elements import DimensionElement, Dimension 

36 from .universe import DimensionUniverse 

37 from .records import DimensionRecord 

38 

39 

40@immutable 

41class DataCoordinate(IndexedTupleDict): 

42 """An immutable data ID dictionary that guarantees that its key-value pairs 

43 identify all required dimensions in a `DimensionGraph`. 

44 

45 `DataCoordinate` instances should usually be constructed via the 

46 `standardize` class method; the constructor is reserved for callers that 

47 can guarantee that the ``values`` tuple has exactly the right elements. 

48 

49 Parameters 

50 ---------- 

51 graph : `DimensionGraph` 

52 The dimensions identified by this instance. 

53 values : `tuple` 

54 Tuple of primary key values for the given dimensions. 

55 

56 Notes 

57 ----- 

58 Like any data ID class, `DataCoordinate` behaves like a dictionary, 

59 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`, 

60 both `Dimension` instances and `str` names thereof may be used as keys in 

61 lookup operations. 

62 

63 Subclasses are permitted to support lookup for any dimension in 

64 ``self.graph.dimensions``, but the base class only supports lookup for 

65 those in ``self.graph.required``, which is the minimal set needed to 

66 identify all others in a `Registry`. Both the base class and subclasses 

67 define comparisons, iterators, and the `keys`, `values`, and `items` views 

68 to just the ``self.graph.required`` subset in order to guarantee true 

69 (i.e. Liskov) substitutability. 

70 """ 

71 

72 __slots__ = ("graph",) 

73 

74 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...]): 

75 self = super().__new__(cls, graph._requiredIndices, values) 

76 self.graph = graph 

77 return self 

78 

79 @staticmethod 

80 def standardize(mapping: Optional[Mapping[str, Any]] = None, *, 

81 graph: Optional[DimensionGraph] = None, 

82 universe: Optional[DimensionUniverse] = None, 

83 **kwds) -> DataCoordinate: 

84 """Adapt an arbitrary mapping and/or additional arguments into a true 

85 `DataCoordinate`, or augment an existing one. 

86 

87 Parameters 

88 ---------- 

89 mapping : `~collections.abc.Mapping`, optional 

90 An informal data ID that maps dimension names to their primary key 

91 values (may also be a true `DataCoordinate`). 

92 graph : `DimensionGraph` 

93 The dimensions to be identified by the new `DataCoordinate`. 

94 If not provided, will be inferred from the keys of ``mapping``, 

95 and ``universe`` must be provided unless ``mapping`` is already a 

96 `DataCoordinate`. 

97 universe : `DimensionUniverse` 

98 All known dimensions and their relationships; used to expand 

99 and validate dependencies when ``graph`` is not provided. 

100 kwds 

101 Additional keyword arguments are treated like additional key-value 

102 pairs in ``mapping``. 

103 

104 Returns 

105 ------- 

106 coordinate : `DataCoordinate` 

107 A validated `DataCoordinate` instance. May be a subclass instance 

108 if and only if ``mapping`` is a subclass instance and ``graph`` 

109 is a subset of ``mapping.graph``. 

110 

111 Raises 

112 ------ 

113 TypeError 

114 Raised if the set of optional arguments provided is not supported. 

115 KeyError 

116 Raised if a key-value pair for a required dimension is missing. 

117 

118 Notes 

119 ----- 

120 Because `DataCoordinate` stores only values for required dimensions, 

121 key-value pairs for other related dimensions will be ignored and 

122 excluded from the result. This means that a `DataCoordinate` may 

123 contain *fewer* key-value pairs than the informal data ID dictionary 

124 it was constructed from. 

125 """ 

126 if isinstance(mapping, DataCoordinate): 

127 if graph is None: 

128 if not kwds: 

129 # Already standardized to exactly what we want. 

130 return mapping 

131 elif mapping.graph.issuperset(graph): 

132 # Already standardized; just return the relevant subset. 

133 return mapping.subset(graph) 

134 assert universe is None or universe == mapping.universe 

135 universe = mapping.universe 

136 if kwds: 

137 if mapping: 

138 try: 

139 d = dict(mapping.byName(), **kwds) 

140 except AttributeError: 

141 d = dict(mapping, **kwds) 

142 else: 

143 d = kwds 

144 elif mapping: 

145 try: 

146 d = mapping.byName() 

147 except AttributeError: 

148 d = mapping 

149 else: 

150 d = {} 

151 if graph is None: 

152 if universe is None: 

153 raise TypeError("universe must be provided if graph is not.") 

154 graph = DimensionGraph(universe, names=d.keys()) 

155 try: 

156 values = tuple(d[name] for name in graph.required.names) 

157 # some backends cannot handle numpy.int64 type which is 

158 # a subclass of numbers.Integral, convert that to int. 

159 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values) 

160 except KeyError as err: 

161 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err 

162 return DataCoordinate(graph, values) 

163 

164 def byName(self) -> Dict[str, Any]: 

165 """Return a true `dict` keyed by `str` dimension name and the same 

166 values as ``self``. 

167 """ 

168 return {k.name: v for k, v in self.items()} 

169 

170 def __getnewargs__(self) -> tuple: 

171 # Implements pickle support (in addition to methods provided by 

172 # @immutable decorator). 

173 return (self.graph, self.values()) 

174 

175 def __hash__(self) -> int: 

176 return hash((self.graph, self.values())) 

177 

178 def __eq__(self, other: DataCoordinate) -> bool: 

179 try: 

180 # Optimized code path for DataCoordinate comparisons. 

181 return self.graph == other.graph and self.values() == other.values() 

182 except AttributeError: 

183 # Also support comparison with informal data ID dictionaries that 

184 # map dimension name to value. 

185 return self.byName() == other 

186 

187 def __str__(self): 

188 return f"{self.byName()}" 

189 

190 def __repr__(self): 

191 return f"DataCoordinate({self.graph}, {self.values()})" 

192 

193 def fingerprint(self, update): 

194 """Update a secure hash function with the values in this data ID. 

195 

196 Parameters 

197 ---------- 

198 update : `~collections.abc.Callable` 

199 Callable that accepts a single `bytes` argument to update 

200 the hash; usually the ``update`` method of an instance from 

201 the ``hashlib`` module. 

202 """ 

203 for k, v in self.items(): 

204 update(k.name.encode("utf8")) 

205 if isinstance(v, numbers.Integral): 

206 update(int(v).to_bytes(64, "big", signed=False)) 

207 elif isinstance(v, str): 

208 update(v.encode("utf8")) 

209 else: 

210 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).") 

211 

212 def matches(self, other: DataCoordinate) -> bool: 

213 """Test whether the values of all keys in both coordinates are equal. 

214 

215 Parameters 

216 ---------- 

217 other : `DataCoordinate` 

218 The other coordinate to compare to. 

219 

220 Returns 

221 ------- 

222 consistent : `bool` 

223 `True` if all keys that are in in both ``other`` and ``self`` 

224 are associated with the same values, and `False` otherwise. 

225 `True` if there are no keys in common. 

226 """ 

227 d = getattr(other, "full", other) 

228 return all(self[k] == d[k] for k in (self.keys() & d.keys())) 

229 

230 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

231 """Return a new `DataCoordinate` whose graph is a subset of 

232 ``self.graph``. 

233 

234 Subclasses may override this method to return a subclass instance. 

235 

236 Parameters 

237 ---------- 

238 graph : `DimensionGraph` 

239 The dimensions identified by the returned `DataCoordinate`. 

240 

241 Returns 

242 ------- 

243 coordinate : `DataCoordinate` 

244 A `DataCoordinate` instance that identifies only the given 

245 dimensions. 

246 

247 Raises 

248 ------ 

249 KeyError 

250 Raised if ``graph`` is not a subset of ``self.graph``, and hence 

251 one or more dimensions has no associated primary key value. 

252 """ 

253 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required)) 

254 

255 @property 

256 def universe(self) -> DimensionUniverse: 

257 """The universe that defines all known dimensions compatible with 

258 this coordinate (`DimensionUniverse`). 

259 """ 

260 return self.graph.universe 

261 

262 # Class attributes below are shadowed by instance attributes, and are 

263 # present just to hold the docstrings for those instance attributes. 

264 

265 graph: DimensionGraph 

266 """The dimensions identified by this data ID (`DimensionGraph`). 

267 

268 Note that values are only required to be present for dimensions in 

269 ``self.graph.required``; all others may be retrieved (from a `Registry`) 

270 given these. 

271 """ 

272 

273 

274DataId = Union[DataCoordinate, Mapping[str, Any]] 

275"""A type-annotation alias for signatures that accept both informal data ID 

276dictionaries and validated `DataCoordinate` instances. 

277""" 

278 

279 

280def _intersectRegions(*args: Region) -> Optional[Region]: 

281 """Return the intersection of several regions. 

282 

283 For internal use by `ExpandedDataCoordinate` only. 

284 

285 If no regions are provided, returns `None`. 

286 

287 This is currently a placeholder; it actually returns `NotImplemented` 

288 (it does *not* raise an exception) when multiple regions are given, which 

289 propagates to `ExpandedDataCoordinate`. This reflects the fact that we 

290 don't want to fail to construct an `ExpandedDataCoordinate` entirely when 

291 we can't compute its region, and at present we don't have a high-level use 

292 case for the regions of these particular data IDs. 

293 """ 

294 if len(args) == 0: 

295 return None 

296 elif len(args) == 1: 

297 return args[0] 

298 else: 

299 return NotImplemented 

300 

301 

302@immutable 

303class ExpandedDataCoordinate(DataCoordinate): 

304 """A data ID that has been expanded to include all relevant metadata. 

305 

306 Instances should usually be obtained by calling `Registry.expandDataId`. 

307 

308 Parameters 

309 ---------- 

310 graph : `DimensionGraph` 

311 The dimensions identified by this instance. 

312 values : `tuple` 

313 Tuple of primary key values for the given dimensions. 

314 records : `~collections.abc.Mapping` 

315 Dictionary mapping `DimensionElement` to `DimensionRecord`. 

316 full : `~collections.abc.Mapping` 

317 Dictionary mapping dimensions to their primary key values for all 

318 dimensions in the graph, not just required ones. Ignored unless 

319 ``conform`` is `False.` 

320 region : `sphgeom.Region`, optional 

321 Region on the sky associated with this data ID, or `None` if there 

322 are no spatial dimensions. At present, this may be the special value 

323 `NotImplemented` if there multiple spatial dimensions identified; in 

324 the future this will be replaced with the intersection. Ignored unless 

325 ``conform`` is `False`.Timespan 

326 timespan : `Timespan`, optionalTimespan 

327 Timespan associated with this data ID, or `None` if there are no 

328 temporal dimensions. 

329 Ignored unless ``conform`` is `False`. 

330 conform : `bool`, optional 

331 If `True` (default), adapt arguments from arbitrary mappings to the 

332 custom dictionary types and check that all expected key-value pairs are 

333 present. `False` is only for internal use. 

334 

335 Notes 

336 ----- 

337 To maintain Liskov substitutability with `DataCoordinate`, 

338 `ExpandedDataCoordinate` mostly acts like a mapping that contains only 

339 values for its graph's required dimensions, even though it also contains 

340 values for all implied dimensions - its length, iteration, and 

341 keys/values/items views reflect only required dimensions. Values for 

342 the primary keys of implied dimensions can be obtained from the `full` 

343 attribute, and are also accessible in dict lookups and the ``in`` operator. 

344 """ 

345 

346 __slots__ = ("records", "full", "region", "timespan") 

347 

348 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...], *, 

349 records: Mapping[DimensionElement, DimensionRecord], 

350 full: Optional[Mapping[Dimension, Any]] = None, 

351 region: Optional[Region] = None, 

352 timespan: Optional[Timespan] = None, 

353 conform: bool = True): 

354 self = super().__new__(cls, graph, values) 

355 if conform: 

356 self.records = IndexedTupleDict( 

357 indices=graph._elementIndices, 

358 values=tuple(records[element] for element in graph.elements) 

359 ) 

360 self.full = IndexedTupleDict( 

361 indices=graph._dimensionIndices, 

362 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None) 

363 for dimension in graph.dimensions) 

364 ) 

365 regions = [] 

366 for element in self.graph.spatial: 

367 record = self.records[element.name] 

368 if record is None or record.region is None: 

369 self.region = None 

370 break 

371 else: 

372 regions.append(record.region) 

373 else: 

374 self.region = _intersectRegions(*regions) 

375 timespans = [] 

376 for element in self.graph.temporal: 

377 record = self.records[element.name] 

378 if record is None or record.timespan is None: 

379 self.timespan = None 

380 break 

381 else: 

382 timespans.append(record.timespan) 

383 else: 

384 self.timespan = Timespan.intersection(*timespans) 

385 else: 

386 self.records = records 

387 self.full = full 

388 self.region = region 

389 self.timespan = timespan 

390 return self 

391 

392 def __contains__(self, key: Union[DimensionElement, str]) -> bool: 

393 return key in self.full 

394 

395 def __getitem__(self, key: Union[DimensionElement, str]) -> Any: 

396 return self.full[key] 

397 

398 def __repr__(self): 

399 return f"ExpandedDataCoordinate({self.graph}, {self.values()})" 

400 

401 def pack(self, name: str, *, returnMaxBits: bool = False) -> int: 

402 """Pack this data ID into an integer. 

403 

404 Parameters 

405 ---------- 

406 name : `str` 

407 Name of the `DimensionPacker` algorithm (as defined in the 

408 dimension configuration). 

409 returnMaxBits : `bool`, optional 

410 If `True` (`False` is default), return the maximum number of 

411 nonzero bits in the returned integer across all data IDs. 

412 

413 Returns 

414 ------- 

415 packed : `int` 

416 Integer ID. This ID is unique only across data IDs that have 

417 the same values for the packer's "fixed" dimensions. 

418 maxBits : `int`, optional 

419 Maximum number of nonzero bits in ``packed``. Not returned unless 

420 ``returnMaxBits`` is `True`. 

421 """ 

422 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits) 

423 

424 def matches(self, other) -> bool: 

425 # Docstring inherited from DataCoordinate.matches. 

426 d = getattr(other, "full", other) 

427 return all(self[k] == d[k] for k in (self.full.keys() & d.keys())) 

428 

429 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate: 

430 # Docstring inherited from DataCoordinate.subset. 

431 return ExpandedDataCoordinate( 

432 graph, 

433 tuple(self[dimension] for dimension in graph.required), 

434 records=self.records, 

435 conform=True 

436 ) 

437 

438 def __getnewargs_ex__(self) -> Tuple(tuple, dict): 

439 return ( 

440 (self.graph, self.values()), 

441 dict( 

442 records=self.records, 

443 full=self.full, 

444 region=self.region, 

445 timespan=self.timespan, 

446 conform=False, 

447 ) 

448 ) 

449 

450 # Class attributes below are shadowed by instance attributes, and are 

451 # present just to hold the docstrings for those instance attributes. 

452 

453 full: IndexedTupleDict[Dimension, Any] 

454 """Dictionary mapping dimensions to their primary key values for all 

455 dimensions in the graph, not just required ones (`IndexedTupleDict`). 

456 

457 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

458 as well as `Dimension` instance. 

459 """ 

460 

461 records: IndexedTupleDict[DimensionElement, DimensionRecord] 

462 """Dictionary mapping `DimensionElement` to the associated 

463 `DimensionRecord` (`IndexedTupleDict`). 

464 

465 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

466 as well as `DimensionElement` instance. 

467 """ 

468 

469 region: Optional[Region] 

470 """Region on the sky associated with this data ID, or `None` if there 

471 are no spatial dimensions (`sphgeom.Region`). 

472 

473 At present, this may be the special value `NotImplemented` if there 

474 multiple spatial dimensions identified; in the future this will be replaced 

475 with the intersection. 

476 """ 

477 

478 timespan: Optional[Timespan] 

479 """Timespan associated with this data ID, or `None` if there are no 

480 temporal dimensions (`TimeSpan`). 

481 """