Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId") 

25 

26import numbers 

27from typing import Any, Tuple, Mapping, Optional, Dict, Union, TYPE_CHECKING 

28 

29from lsst.sphgeom import Region 

30from ..utils import IndexedTupleDict, immutable 

31from ..timespan import Timespan 

32from .graph import DimensionGraph 

33 

34if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 from .elements import DimensionElement, Dimension 

36 from .universe import DimensionUniverse 

37 from .records import DimensionRecord 

38 

39 

40@immutable 

41class DataCoordinate(IndexedTupleDict): 

42 """An immutable data ID dictionary that guarantees that its key-value pairs 

43 identify all required dimensions in a `DimensionGraph`. 

44 

45 `DataCoordinate` instances should usually be constructed via the 

46 `standardize` class method; the constructor is reserved for callers that 

47 can guarantee that the ``values`` tuple has exactly the right elements. 

48 

49 Parameters 

50 ---------- 

51 graph : `DimensionGraph` 

52 The dimensions identified by this instance. 

53 values : `tuple` 

54 Tuple of primary key values for the given dimensions. 

55 

56 Notes 

57 ----- 

58 Like any data ID class, `DataCoordinate` behaves like a dictionary, 

59 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`, 

60 both `Dimension` instances and `str` names thereof may be used as keys in 

61 lookup operations. 

62 

63 Subclasses are permitted to support lookup for any dimension in 

64 ``self.graph.dimensions``, but the base class only supports lookup for 

65 those in ``self.graph.required``, which is the minimal set needed to 

66 identify all others in a `Registry`. Both the base class and subclasses 

67 define comparisons, iterators, and the `keys`, `values`, and `items` views 

68 to just the ``self.graph.required`` subset in order to guarantee true 

69 (i.e. Liskov) substitutability. 

70 """ 

71 

72 __slots__ = ("graph",) 

73 

74 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...]): 

75 self = super().__new__(cls, graph._requiredIndices, values) 

76 self.graph = graph 

77 return self 

78 

79 @staticmethod 

80 def standardize(mapping: Optional[Mapping[str, Any]] = None, *, 

81 graph: Optional[DimensionGraph] = None, 

82 universe: Optional[DimensionUniverse] = None, 

83 **kwds) -> DataCoordinate: 

84 """Adapt an arbitrary mapping and/or additional arguments into a true 

85 `DataCoordinate`, or augment an existing one. 

86 

87 Parameters 

88 ---------- 

89 mapping : `~collections.abc.Mapping`, optional 

90 An informal data ID that maps dimension names to their primary key 

91 values (may also be a true `DataCoordinate`). 

92 graph : `DimensionGraph` 

93 The dimensions to be identified by the new `DataCoordinate`. 

94 If not provided, will be inferred from the keys of ``mapping``, 

95 and ``universe`` must be provided unless ``mapping`` is already a 

96 `DataCoordinate`. 

97 universe : `DimensionUniverse` 

98 All known dimensions and their relationships; used to expand 

99 and validate dependencies when ``graph`` is not provided. 

100 kwds 

101 Additional keyword arguments are treated like additional key-value 

102 pairs in ``mapping``. 

103 

104 Returns 

105 ------- 

106 coordinate : `DataCoordinate` 

107 A validated `DataCoordinate` instance. May be a subclass instance 

108 if and only if ``mapping`` is a subclass instance and ``graph`` 

109 is a subset of ``mapping.graph``. 

110 

111 Raises 

112 ------ 

113 TypeError 

114 Raised if the set of optional arguments provided is not supported. 

115 KeyError 

116 Raised if a key-value pair for a required dimension is missing. 

117 

118 Notes 

119 ----- 

120 Because `DataCoordinate` stores only values for required dimensions, 

121 key-value pairs for other related dimensions will be ignored and 

122 excluded from the result. This means that a `DataCoordinate` may 

123 contain *fewer* key-value pairs than the informal data ID dictionary 

124 it was constructed from. 

125 """ 

126 if isinstance(mapping, DataCoordinate): 

127 if graph is None: 

128 if not kwds: 

129 # Already standardized to exactly what we want. 

130 return mapping 

131 elif mapping.graph.issuperset(graph): 

132 # Already standardized; just return the relevant subset. 

133 return mapping.subset(graph) 

134 assert universe is None or universe == mapping.universe 

135 universe = mapping.universe 

136 if kwds: 

137 if mapping: 

138 try: 

139 d = dict(mapping.byName(), **kwds) 

140 except AttributeError: 

141 d = dict(mapping, **kwds) 

142 else: 

143 d = kwds 

144 elif mapping: 

145 try: 

146 d = mapping.byName() 

147 except AttributeError: 

148 d = mapping 

149 else: 

150 d = {} 

151 if graph is None: 

152 if universe is None: 

153 raise TypeError("universe must be provided if graph is not.") 

154 graph = DimensionGraph(universe, names=d.keys()) 

155 try: 

156 values = tuple(d[name] for name in graph.required.names) 

157 # some backends cannot handle numpy.int64 type which is 

158 # a subclass of numbers.Integral, convert that to int. 

159 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values) 

160 except KeyError as err: 

161 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err 

162 return DataCoordinate(graph, values) 

163 

164 def byName(self) -> Dict[str, Any]: 

165 """Return a true `dict` keyed by `str` dimension name and the same 

166 values as ``self``. 

167 """ 

168 return {k.name: v for k, v in self.items()} 

169 

170 def __getnewargs__(self) -> tuple: 

171 # Implements pickle support (in addition to methods provided by 

172 # @immutable decorator). 

173 return (self.graph, self.values()) 

174 

175 def __hash__(self) -> int: 

176 return hash((self.graph, self.values())) 

177 

178 def __eq__(self, other: DataCoordinate) -> bool: 

179 try: 

180 # Optimized code path for DataCoordinate comparisons. 

181 return self.graph == other.graph and self.values() == other.values() 

182 except AttributeError: 

183 # We can't reliably compare to informal data ID dictionaries 

184 # we don't know if any extra keys they might have are consistent 

185 # with an `ExpandedDataCoordinate` version of ``self`` (which 

186 # should compare as equal) or something else (which should 

187 # compare as not equal). 

188 # We don't even want to return `NotImplemented` and tell Python 

189 # to delegate to ``other.__eq__``, because that could also be 

190 # misleading. We raise TypeError instead. 

191 raise TypeError("Cannot compare DataCoordinate instances to other objects without potentially " 

192 "misleading results.") from None 

193 

194 def __str__(self): 

195 return f"{self.byName()}" 

196 

197 def __repr__(self): 

198 return f"DataCoordinate({self.graph}, {self.values()})" 

199 

200 def fingerprint(self, update): 

201 """Update a secure hash function with the values in this data ID. 

202 

203 Parameters 

204 ---------- 

205 update : `~collections.abc.Callable` 

206 Callable that accepts a single `bytes` argument to update 

207 the hash; usually the ``update`` method of an instance from 

208 the ``hashlib`` module. 

209 """ 

210 for k, v in self.items(): 

211 update(k.name.encode("utf8")) 

212 if isinstance(v, numbers.Integral): 

213 update(int(v).to_bytes(64, "big", signed=False)) 

214 elif isinstance(v, str): 

215 update(v.encode("utf8")) 

216 else: 

217 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).") 

218 

219 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

220 """Return a new `DataCoordinate` whose graph is a subset of 

221 ``self.graph``. 

222 

223 Subclasses may override this method to return a subclass instance. 

224 

225 Parameters 

226 ---------- 

227 graph : `DimensionGraph` 

228 The dimensions identified by the returned `DataCoordinate`. 

229 

230 Returns 

231 ------- 

232 coordinate : `DataCoordinate` 

233 A `DataCoordinate` instance that identifies only the given 

234 dimensions. 

235 

236 Raises 

237 ------ 

238 KeyError 

239 Raised if ``graph`` is not a subset of ``self.graph``, and hence 

240 one or more dimensions has no associated primary key value. 

241 """ 

242 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required)) 

243 

244 @property 

245 def universe(self) -> DimensionUniverse: 

246 """The universe that defines all known dimensions compatible with 

247 this coordinate (`DimensionUniverse`). 

248 """ 

249 return self.graph.universe 

250 

251 # Class attributes below are shadowed by instance attributes, and are 

252 # present just to hold the docstrings for those instance attributes. 

253 

254 graph: DimensionGraph 

255 """The dimensions identified by this data ID (`DimensionGraph`). 

256 

257 Note that values are only required to be present for dimensions in 

258 ``self.graph.required``; all others may be retrieved (from a `Registry`) 

259 given these. 

260 """ 

261 

262 

263DataId = Union[DataCoordinate, Mapping[str, Any]] 

264"""A type-annotation alias for signatures that accept both informal data ID 

265dictionaries and validated `DataCoordinate` instances. 

266""" 

267 

268 

269def _intersectRegions(*args: Region) -> Optional[Region]: 

270 """Return the intersection of several regions. 

271 

272 For internal use by `ExpandedDataCoordinate` only. 

273 

274 If no regions are provided, returns `None`. 

275 

276 This is currently a placeholder; it actually returns `NotImplemented` 

277 (it does *not* raise an exception) when multiple regions are given, which 

278 propagates to `ExpandedDataCoordinate`. This reflects the fact that we 

279 don't want to fail to construct an `ExpandedDataCoordinate` entirely when 

280 we can't compute its region, and at present we don't have a high-level use 

281 case for the regions of these particular data IDs. 

282 """ 

283 if len(args) == 0: 

284 return None 

285 elif len(args) == 1: 

286 return args[0] 

287 else: 

288 return NotImplemented 

289 

290 

291@immutable 

292class ExpandedDataCoordinate(DataCoordinate): 

293 """A data ID that has been expanded to include all relevant metadata. 

294 

295 Instances should usually be obtained by calling `Registry.expandDataId`. 

296 

297 Parameters 

298 ---------- 

299 graph : `DimensionGraph` 

300 The dimensions identified by this instance. 

301 values : `tuple` 

302 Tuple of primary key values for the given dimensions. 

303 records : `~collections.abc.Mapping` 

304 Dictionary mapping `DimensionElement` to `DimensionRecord`. 

305 full : `~collections.abc.Mapping` 

306 Dictionary mapping dimensions to their primary key values for all 

307 dimensions in the graph, not just required ones. Ignored unless 

308 ``conform`` is `False.` 

309 region : `sphgeom.Region`, optional 

310 Region on the sky associated with this data ID, or `None` if there 

311 are no spatial dimensions. At present, this may be the special value 

312 `NotImplemented` if there multiple spatial dimensions identified; in 

313 the future this will be replaced with the intersection. Ignored unless 

314 ``conform`` is `False`.Timespan 

315 timespan : `Timespan`, optionalTimespan 

316 Timespan associated with this data ID, or `None` if there are no 

317 temporal dimensions. 

318 Ignored unless ``conform`` is `False`. 

319 conform : `bool`, optional 

320 If `True` (default), adapt arguments from arbitrary mappings to the 

321 custom dictionary types and check that all expected key-value pairs are 

322 present. `False` is only for internal use. 

323 

324 Notes 

325 ----- 

326 To maintain Liskov substitutability with `DataCoordinate`, 

327 `ExpandedDataCoordinate` mostly acts like a mapping that contains only 

328 values for its graph's required dimensions, even though it also contains 

329 values for all implied dimensions - its length, iteration, and 

330 keys/values/items views reflect only required dimensions. Values for 

331 the primary keys of implied dimensions can be obtained from the `full` 

332 attribute, and are also accessible in dict lookups and the ``in`` operator. 

333 """ 

334 

335 __slots__ = ("records", "full", "region", "timespan") 

336 

337 def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...], *, 

338 records: Mapping[DimensionElement, DimensionRecord], 

339 full: Optional[Mapping[Dimension, Any]] = None, 

340 region: Optional[Region] = None, 

341 timespan: Optional[Timespan] = None, 

342 conform: bool = True): 

343 self = super().__new__(cls, graph, values) 

344 if conform: 

345 self.records = IndexedTupleDict( 

346 indices=graph._elementIndices, 

347 values=tuple(records[element] for element in graph.elements) 

348 ) 

349 self.full = IndexedTupleDict( 

350 indices=graph._dimensionIndices, 

351 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None) 

352 for dimension in graph.dimensions) 

353 ) 

354 regions = [] 

355 for element in self.graph.spatial: 

356 record = self.records[element.name] 

357 if record is None or record.region is None: 

358 self.region = None 

359 break 

360 else: 

361 regions.append(record.region) 

362 else: 

363 self.region = _intersectRegions(*regions) 

364 timespans = [] 

365 for element in self.graph.temporal: 

366 record = self.records[element.name] 

367 if record is None or record.timespan is None: 

368 self.timespan = None 

369 break 

370 else: 

371 timespans.append(record.timespan) 

372 else: 

373 self.timespan = Timespan.intersection(*timespans) 

374 else: 

375 self.records = records 

376 self.full = full 

377 self.region = region 

378 self.timespan = timespan 

379 return self 

380 

381 def __contains__(self, key: Union[DimensionElement, str]) -> bool: 

382 return key in self.full 

383 

384 def __getitem__(self, key: Union[DimensionElement, str]) -> Any: 

385 return self.full[key] 

386 

387 def __repr__(self): 

388 return f"ExpandedDataCoordinate({self.graph}, {self.values()})" 

389 

390 def pack(self, name: str, *, returnMaxBits: bool = False) -> int: 

391 """Pack this data ID into an integer. 

392 

393 Parameters 

394 ---------- 

395 name : `str` 

396 Name of the `DimensionPacker` algorithm (as defined in the 

397 dimension configuration). 

398 returnMaxBits : `bool`, optional 

399 If `True` (`False` is default), return the maximum number of 

400 nonzero bits in the returned integer across all data IDs. 

401 

402 Returns 

403 ------- 

404 packed : `int` 

405 Integer ID. This ID is unique only across data IDs that have 

406 the same values for the packer's "fixed" dimensions. 

407 maxBits : `int`, optional 

408 Maximum number of nonzero bits in ``packed``. Not returned unless 

409 ``returnMaxBits`` is `True`. 

410 """ 

411 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits) 

412 

413 def matches(self, other) -> bool: 

414 # Docstring inherited from DataCoordinate.matches. 

415 d = getattr(other, "full", other) 

416 return all(self[k] == d[k] for k in (self.full.keys() & d.keys())) 

417 

418 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate: 

419 # Docstring inherited from DataCoordinate.subset. 

420 return ExpandedDataCoordinate( 

421 graph, 

422 tuple(self[dimension] for dimension in graph.required), 

423 records=self.records, 

424 conform=True 

425 ) 

426 

427 def __getnewargs_ex__(self) -> Tuple(tuple, dict): 

428 return ( 

429 (self.graph, self.values()), 

430 dict( 

431 records=self.records, 

432 full=self.full, 

433 region=self.region, 

434 timespan=self.timespan, 

435 conform=False, 

436 ) 

437 ) 

438 

439 # Class attributes below are shadowed by instance attributes, and are 

440 # present just to hold the docstrings for those instance attributes. 

441 

442 full: IndexedTupleDict[Dimension, Any] 

443 """Dictionary mapping dimensions to their primary key values for all 

444 dimensions in the graph, not just required ones (`IndexedTupleDict`). 

445 

446 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

447 as well as `Dimension` instance. 

448 """ 

449 

450 records: IndexedTupleDict[DimensionElement, DimensionRecord] 

451 """Dictionary mapping `DimensionElement` to the associated 

452 `DimensionRecord` (`IndexedTupleDict`). 

453 

454 Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

455 as well as `DimensionElement` instance. 

456 """ 

457 

458 region: Optional[Region] 

459 """Region on the sky associated with this data ID, or `None` if there 

460 are no spatial dimensions (`sphgeom.Region`). 

461 

462 At present, this may be the special value `NotImplemented` if there 

463 multiple spatial dimensions identified; in the future this will be replaced 

464 with the intersection. 

465 """ 

466 

467 timespan: Optional[Timespan] 

468 """Timespan associated with this data ID, or `None` if there are no 

469 temporal dimensions (`TimeSpan`). 

470 """