Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId") 

25 

26import numbers 

27from typing import ( 

28 Any, 

29 Callable, 

30 Mapping, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34 Union, 

35) 

36 

37from lsst.sphgeom import Region 

38from ..named import IndexedTupleDict, NamedKeyMapping 

39from ..timespan import Timespan 

40from .elements import Dimension 

41from .graph import DimensionGraph 

42 

43if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 from .elements import DimensionElement 

45 from .universe import DimensionUniverse 

46 from .records import DimensionRecord 

47 

48 

49class DataCoordinate(IndexedTupleDict[Dimension, Any]): 

50 """An immutable data ID dictionary that guarantees that its key-value pairs 

51 identify all required dimensions in a `DimensionGraph`. 

52 

53 `DataCoordinate` instances should usually be constructed via the 

54 `standardize` class method; the constructor is reserved for callers that 

55 can guarantee that the ``values`` tuple has exactly the right elements. 

56 

57 Parameters 

58 ---------- 

59 graph : `DimensionGraph` 

60 The dimensions identified by this instance. 

61 values : `tuple` 

62 Tuple of primary key values for the given dimensions. 

63 

64 Notes 

65 ----- 

66 Like any data ID class, `DataCoordinate` behaves like a dictionary, 

67 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`, 

68 both `Dimension` instances and `str` names thereof may be used as keys in 

69 lookup operations. 

70 

71 Subclasses are permitted to support lookup for any dimension in 

72 ``self.graph.dimensions``, but the base class only supports lookup for 

73 those in ``self.graph.required``, which is the minimal set needed to 

74 identify all others in a `Registry`. Both the base class and subclasses 

75 define comparisons, iterators, and the `keys`, `values`, and `items` views 

76 to just the ``self.graph.required`` subset in order to guarantee true 

77 (i.e. Liskov) substitutability. 

78 """ 

79 

80 __slots__ = ("_graph",) 

81 

82 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...]): 

83 super().__init__(graph._requiredIndices, values) 

84 self._graph = graph 

85 

86 @staticmethod 

87 def standardize(mapping: Optional[Union[Mapping[str, Any], NamedKeyMapping[Dimension, Any]]] = None, *, 

88 graph: Optional[DimensionGraph] = None, 

89 universe: Optional[DimensionUniverse] = None, 

90 **kwargs: Any) -> DataCoordinate: 

91 """Adapt an arbitrary mapping and/or additional arguments into a true 

92 `DataCoordinate`, or augment an existing one. 

93 

94 Parameters 

95 ---------- 

96 mapping : `~collections.abc.Mapping`, optional 

97 An informal data ID that maps dimension names to their primary key 

98 values (may also be a true `DataCoordinate`). 

99 graph : `DimensionGraph` 

100 The dimensions to be identified by the new `DataCoordinate`. 

101 If not provided, will be inferred from the keys of ``mapping``, 

102 and ``universe`` must be provided unless ``mapping`` is already a 

103 `DataCoordinate`. 

104 universe : `DimensionUniverse` 

105 All known dimensions and their relationships; used to expand 

106 and validate dependencies when ``graph`` is not provided. 

107 **kwargs 

108 Additional keyword arguments are treated like additional key-value 

109 pairs in ``mapping``. 

110 

111 Returns 

112 ------- 

113 coordinate : `DataCoordinate` 

114 A validated `DataCoordinate` instance. May be a subclass instance 

115 if and only if ``mapping`` is a subclass instance and ``graph`` 

116 is a subset of ``mapping.graph``. 

117 

118 Raises 

119 ------ 

120 TypeError 

121 Raised if the set of optional arguments provided is not supported. 

122 KeyError 

123 Raised if a key-value pair for a required dimension is missing. 

124 

125 Notes 

126 ----- 

127 Because `DataCoordinate` stores only values for required dimensions, 

128 key-value pairs for other related dimensions will be ignored and 

129 excluded from the result. This means that a `DataCoordinate` may 

130 contain *fewer* key-value pairs than the informal data ID dictionary 

131 it was constructed from. 

132 """ 

133 if isinstance(mapping, DataCoordinate): 

134 if graph is None: 

135 if not kwargs: 

136 # Already standardized to exactly what we want. 

137 return mapping 

138 elif mapping.graph.issuperset(graph): 

139 # Already standardized; just return the relevant subset. 

140 return mapping.subset(graph) 

141 assert universe is None or universe == mapping.universe 

142 universe = mapping.universe 

143 d: Mapping[str, Any] 

144 if kwargs: 

145 if mapping: 

146 if isinstance(mapping, NamedKeyMapping): 

147 d = dict(mapping.byName(), **kwargs) 

148 else: 

149 d = dict(mapping, **kwargs) 

150 else: 

151 d = kwargs 

152 elif mapping: 

153 if isinstance(mapping, NamedKeyMapping): 

154 d = mapping.byName() 

155 else: 

156 d = mapping 

157 else: 

158 d = {} 

159 if graph is None: 

160 if universe is None: 

161 raise TypeError("universe must be provided if graph is not.") 

162 graph = DimensionGraph(universe, names=d.keys()) 

163 try: 

164 values = tuple(d[name] for name in graph.required.names) 

165 # some backends cannot handle numpy.int64 type which is 

166 # a subclass of numbers.Integral, convert that to int. 

167 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values) 

168 except KeyError as err: 

169 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err 

170 return DataCoordinate(graph, values) 

171 

172 def __hash__(self) -> int: 

173 return hash((self.graph, self.values())) 

174 

175 def __eq__(self, other: Any) -> bool: 

176 try: 

177 # Optimized code path for DataCoordinate comparisons. 

178 return self.graph == other.graph and self.values() == other.values() 

179 except AttributeError: 

180 # We can't reliably compare to informal data ID dictionaries 

181 # we don't know if any extra keys they might have are consistent 

182 # with an `ExpandedDataCoordinate` version of ``self`` (which 

183 # should compare as equal) or something else (which should 

184 # compare as not equal). 

185 # We don't even want to return `NotImplemented` and tell Python 

186 # to delegate to ``other.__eq__``, because that could also be 

187 # misleading. We raise TypeError instead. 

188 raise TypeError("Cannot compare DataCoordinate instances to other objects without potentially " 

189 "misleading results.") from None 

190 

191 def __str__(self) -> str: 

192 return f"{self.byName()}" 

193 

194 def __repr__(self) -> str: 

195 return f"DataCoordinate({self.graph}, {self.values()})" 

196 

197 def fingerprint(self, update: Callable[[bytes], None]) -> None: 

198 """Update a secure hash function with the values in this data ID. 

199 

200 Parameters 

201 ---------- 

202 update : `~collections.abc.Callable` 

203 Callable that accepts a single `bytes` argument to update 

204 the hash; usually the ``update`` method of an instance from 

205 the ``hashlib`` module. 

206 """ 

207 for k, v in self.items(): 

208 update(k.name.encode("utf8")) 

209 if isinstance(v, numbers.Integral): 

210 update(int(v).to_bytes(64, "big", signed=False)) 

211 elif isinstance(v, str): 

212 update(v.encode("utf8")) 

213 else: 

214 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).") 

215 

216 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

217 """Return a new `DataCoordinate` whose graph is a subset of 

218 ``self.graph``. 

219 

220 Subclasses may override this method to return a subclass instance. 

221 

222 Parameters 

223 ---------- 

224 graph : `DimensionGraph` 

225 The dimensions identified by the returned `DataCoordinate`. 

226 

227 Returns 

228 ------- 

229 coordinate : `DataCoordinate` 

230 A `DataCoordinate` instance that identifies only the given 

231 dimensions. 

232 

233 Raises 

234 ------ 

235 KeyError 

236 Raised if ``graph`` is not a subset of ``self.graph``, and hence 

237 one or more dimensions has no associated primary key value. 

238 """ 

239 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required)) 

240 

241 @property 

242 def universe(self) -> DimensionUniverse: 

243 """The universe that defines all known dimensions compatible with 

244 this coordinate (`DimensionUniverse`). 

245 """ 

246 return self.graph.universe 

247 

248 @property 

249 def graph(self) -> DimensionGraph: 

250 """The dimensions identified by this data ID (`DimensionGraph`). 

251 

252 Note that values are only required to be present for dimensions in 

253 ``self.graph.required``; all others may be retrieved (from a 

254 `Registry`) given these. 

255 """ 

256 return self._graph 

257 

258 

259DataId = Union[DataCoordinate, Mapping[str, Any]] 

260"""A type-annotation alias for signatures that accept both informal data ID 

261dictionaries and validated `DataCoordinate` instances. 

262""" 

263 

264 

265def _intersectRegions(*args: Region) -> Optional[Region]: 

266 """Return the intersection of several regions. 

267 

268 For internal use by `ExpandedDataCoordinate` only. 

269 

270 If no regions are provided, returns `None`. 

271 

272 This is currently a placeholder; it actually returns `NotImplemented` 

273 (it does *not* raise an exception) when multiple regions are given, which 

274 propagates to `ExpandedDataCoordinate`. This reflects the fact that we 

275 don't want to fail to construct an `ExpandedDataCoordinate` entirely when 

276 we can't compute its region, and at present we don't have a high-level use 

277 case for the regions of these particular data IDs. 

278 """ 

279 if len(args) == 0: 

280 return None 

281 elif len(args) == 1: 

282 return args[0] 

283 else: 

284 return NotImplemented 

285 

286 

287class ExpandedDataCoordinate(DataCoordinate): 

288 """A data ID that has been expanded to include all relevant metadata. 

289 

290 Instances should usually be obtained by calling `Registry.expandDataId`. 

291 

292 Parameters 

293 ---------- 

294 graph : `DimensionGraph` 

295 The dimensions identified by this instance. 

296 values : `tuple` 

297 Tuple of primary key values for the given dimensions. 

298 records : `~collections.abc.Mapping` 

299 Dictionary mapping `DimensionElement` to `DimensionRecord`. 

300 full : `~collections.abc.Mapping` 

301 Dictionary mapping dimensions to their primary key values for all 

302 dimensions in the graph, not just required ones. Ignored unless 

303 ``conform`` is `False.` 

304 region : `sphgeom.Region`, optional 

305 Region on the sky associated with this data ID, or `None` if there 

306 are no spatial dimensions. At present, this may be the special value 

307 `NotImplemented` if there multiple spatial dimensions identified; in 

308 the future this will be replaced with the intersection. Ignored unless 

309 ``conform`` is `False`.Timespan 

310 timespan : `Timespan`, optionalTimespan 

311 Timespan associated with this data ID, or `None` if there are no 

312 temporal dimensions. 

313 Ignored unless ``conform`` is `False`. 

314 conform : `bool`, optional 

315 If `True` (default), adapt arguments from arbitrary mappings to the 

316 custom dictionary types and check that all expected key-value pairs are 

317 present. `False` is only for internal use. 

318 

319 Notes 

320 ----- 

321 To maintain Liskov substitutability with `DataCoordinate`, 

322 `ExpandedDataCoordinate` mostly acts like a mapping that contains only 

323 values for its graph's required dimensions, even though it also contains 

324 values for all implied dimensions - its length, iteration, and 

325 keys/values/items views reflect only required dimensions. Values for 

326 the primary keys of implied dimensions can be obtained from the `full` 

327 attribute, and are also accessible in dict lookups and the ``in`` operator. 

328 """ 

329 

330 __slots__ = ("_records", "_full", "_region", "_timespan") 

331 

332 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...], *, 

333 records: NamedKeyMapping[DimensionElement, Optional[DimensionRecord]], 

334 full: Optional[NamedKeyMapping[Dimension, Any]] = None, 

335 region: Optional[Region] = None, 

336 timespan: Optional[Timespan] = None, 

337 conform: bool = True): 

338 super().__init__(graph, values) 

339 if conform: 

340 self._records = IndexedTupleDict( 

341 indices=graph._elementIndices, 

342 values=tuple(records[element.name] for element in graph.elements) 

343 ) 

344 self._full = IndexedTupleDict( 

345 indices=graph._dimensionIndices, 

346 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None) 

347 for dimension in graph.dimensions) 

348 ) 

349 regions = [] 

350 for element in self.graph.spatial: 

351 record = self.records[element.name] 

352 # DimensionRecord subclasses for spatial elements always have a 

353 # .region, but they're dynamic so this can't be type-checked. 

354 if record is None or record.region is None: # type: ignore 

355 self._region = None 

356 break 

357 else: 

358 regions.append(record.region) # type:ignore 

359 else: 

360 self._region = _intersectRegions(*regions) 

361 timespans = [] 

362 for element in self.graph.temporal: 

363 record = self.records[element.name] 

364 # DimensionRecord subclasses for temporal elements always have 

365 # .timespan, but they're dynamic so this can't be type-checked. 

366 if record is None or record.timespan is None: # type:ignore 

367 self._timespan = None 

368 break 

369 else: 

370 timespans.append(record.timespan) # type:ignore 

371 else: 

372 self._timespan = Timespan.intersection(*timespans) 

373 else: 

374 # User has declared that the types are correct; ignore them. 

375 self._records = records # type: ignore 

376 self._full = full # type: ignore 

377 self._region = region 

378 self._timespan = timespan 

379 

380 def __contains__(self, key: Any) -> bool: 

381 return key in self.full 

382 

383 def __getitem__(self, key: Union[Dimension, str]) -> Any: 

384 return self.full[key] 

385 

386 def __repr__(self) -> str: 

387 return f"ExpandedDataCoordinate({self.graph}, {self.values()})" 

388 

389 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]: 

390 """Pack this data ID into an integer. 

391 

392 Parameters 

393 ---------- 

394 name : `str` 

395 Name of the `DimensionPacker` algorithm (as defined in the 

396 dimension configuration). 

397 returnMaxBits : `bool`, optional 

398 If `True` (`False` is default), return the maximum number of 

399 nonzero bits in the returned integer across all data IDs. 

400 

401 Returns 

402 ------- 

403 packed : `int` 

404 Integer ID. This ID is unique only across data IDs that have 

405 the same values for the packer's "fixed" dimensions. 

406 maxBits : `int`, optional 

407 Maximum number of nonzero bits in ``packed``. Not returned unless 

408 ``returnMaxBits`` is `True`. 

409 """ 

410 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits) 

411 

412 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate: 

413 # Docstring inherited from DataCoordinate.subset. 

414 return ExpandedDataCoordinate( 

415 graph, 

416 tuple(self[dimension] for dimension in graph.required), 

417 records=self.records, 

418 conform=True 

419 ) 

420 

421 @property 

422 def full(self) -> NamedKeyMapping[Dimension, Any]: 

423 """Dictionary mapping dimensions to their primary key values for all 

424 dimensions in the graph, not just required ones (`NamedKeyMapping`). 

425 

426 Like `DataCoordinate` itself, this dictionary can be indexed by `str` 

427 name as well as `Dimension` instance. 

428 """ 

429 return self._full 

430 

431 @property 

432 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]: 

433 """Dictionary mapping `DimensionElement` to the associated 

434 `DimensionRecord` (`NamedKeyMapping`). 

435 

436 Like `DataCoordinate` itself, this dictionary can be indexed by `str` 

437 name as well as `DimensionElement` instance. 

438 """ 

439 return self._records 

440 

441 @property 

442 def region(self) -> Optional[Region]: 

443 """Region on the sky associated with this data ID, or `None` if there 

444 are no spatial dimensions (`sphgeom.Region`). 

445 

446 At present, this may be the special value `NotImplemented` if there 

447 multiple spatial dimensions identified; in the future this will be 

448 replaced with the intersection. 

449 """ 

450 return self._region 

451 

452 @property 

453 def timespan(self) -> Optional[Timespan]: 

454 """Timespan associated with this data ID, or `None` if there are no 

455 temporal dimensions (`TimeSpan`). 

456 """ 

457 return self._timespan