Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId") 

25 

26import numbers 

27from typing import ( 

28 Any, 

29 Callable, 

30 Mapping, 

31 Optional, 

32 Tuple, 

33 TYPE_CHECKING, 

34 Union, 

35) 

36 

37from lsst.sphgeom import Region 

38from ..named import IndexedTupleDict, NamedKeyMapping 

39from ..timespan import Timespan 

40from .elements import Dimension 

41from .graph import DimensionGraph 

42 

43if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 from .elements import DimensionElement 

45 from .universe import DimensionUniverse 

46 from .records import DimensionRecord 

47 

48 

49class DataCoordinate(IndexedTupleDict[Dimension, Any]): 

50 """An immutable data ID dictionary that guarantees that its key-value pairs 

51 identify all required dimensions in a `DimensionGraph`. 

52 

53 `DataCoordinate` instances should usually be constructed via the 

54 `standardize` class method; the constructor is reserved for callers that 

55 can guarantee that the ``values`` tuple has exactly the right elements. 

56 

57 Parameters 

58 ---------- 

59 graph : `DimensionGraph` 

60 The dimensions identified by this instance. 

61 values : `tuple` 

62 Tuple of primary key values for the given dimensions. 

63 

64 Notes 

65 ----- 

66 Like any data ID class, `DataCoordinate` behaves like a dictionary, 

67 mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`, 

68 both `Dimension` instances and `str` names thereof may be used as keys in 

69 lookup operations. 

70 

71 Subclasses are permitted to support lookup for any dimension in 

72 ``self.graph.dimensions``, but the base class only supports lookup for 

73 those in ``self.graph.required``, which is the minimal set needed to 

74 identify all others in a `Registry`. Both the base class and subclasses 

75 define comparisons, iterators, and the `keys`, `values`, and `items` views 

76 to just the ``self.graph.required`` subset in order to guarantee true 

77 (i.e. Liskov) substitutability. 

78 """ 

79 

80 __slots__ = ("_graph",) 

81 

82 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...]): 

83 super().__init__(graph._requiredIndices, values) 

84 self._graph = graph 

85 

86 @staticmethod 

87 def standardize(mapping: Optional[Union[Mapping[str, Any], NamedKeyMapping[Dimension, Any]]] = None, *, 

88 graph: Optional[DimensionGraph] = None, 

89 universe: Optional[DimensionUniverse] = None, 

90 **kwargs: Any) -> DataCoordinate: 

91 """Adapt an arbitrary mapping and/or additional arguments into a true 

92 `DataCoordinate`, or augment an existing one. 

93 

94 Parameters 

95 ---------- 

96 mapping : `~collections.abc.Mapping`, optional 

97 An informal data ID that maps dimension names to their primary key 

98 values (may also be a true `DataCoordinate`). 

99 graph : `DimensionGraph` 

100 The dimensions to be identified by the new `DataCoordinate`. 

101 If not provided, will be inferred from the keys of ``mapping``, 

102 and ``universe`` must be provided unless ``mapping`` is already a 

103 `DataCoordinate`. 

104 universe : `DimensionUniverse` 

105 All known dimensions and their relationships; used to expand 

106 and validate dependencies when ``graph`` is not provided. 

107 **kwargs 

108 Additional keyword arguments are treated like additional key-value 

109 pairs in ``mapping``. 

110 

111 Returns 

112 ------- 

113 coordinate : `DataCoordinate` 

114 A validated `DataCoordinate` instance. May be a subclass instance 

115 if and only if ``mapping`` is a subclass instance and ``graph`` 

116 is a subset of ``mapping.graph``. 

117 

118 Raises 

119 ------ 

120 TypeError 

121 Raised if the set of optional arguments provided is not supported. 

122 KeyError 

123 Raised if a key-value pair for a required dimension is missing. 

124 

125 Notes 

126 ----- 

127 Because `DataCoordinate` stores only values for required dimensions, 

128 key-value pairs for other related dimensions will be ignored and 

129 excluded from the result. This means that a `DataCoordinate` may 

130 contain *fewer* key-value pairs than the informal data ID dictionary 

131 it was constructed from. 

132 """ 

133 if isinstance(mapping, DataCoordinate): 

134 if graph is None: 

135 if not kwargs: 

136 # Already standardized to exactly what we want. 

137 return mapping 

138 elif mapping.graph.issuperset(graph): 

139 # Already standardized; just return the relevant subset. 

140 return mapping.subset(graph) 

141 assert universe is None or universe == mapping.universe 

142 universe = mapping.universe 

143 d: Mapping[str, Any] 

144 if kwargs: 

145 if mapping: 

146 if isinstance(mapping, NamedKeyMapping): 

147 d = dict(mapping.byName(), **kwargs) 

148 else: 

149 d = dict(mapping, **kwargs) 

150 else: 

151 d = kwargs 

152 elif mapping: 

153 if isinstance(mapping, NamedKeyMapping): 

154 d = mapping.byName() 

155 else: 

156 d = mapping 

157 else: 

158 d = {} 

159 if graph is None: 

160 if universe is None: 

161 raise TypeError("universe must be provided if graph is not.") 

162 graph = DimensionGraph(universe, names=d.keys()) 

163 try: 

164 values = tuple(d[name] for name in graph.required.names) 

165 # some backends cannot handle numpy.int64 type which is 

166 # a subclass of numbers.Integral, convert that to int. 

167 values = tuple(int(val) if isinstance(val, numbers.Integral) else val for val in values) 

168 except KeyError as err: 

169 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err 

170 return DataCoordinate(graph, values) 

171 

172 def __hash__(self) -> int: 

173 return hash((self.graph, self.values())) 

174 

175 def __eq__(self, other: Any) -> bool: 

176 try: 

177 # Optimized code path for DataCoordinate comparisons. 

178 return self.graph == other.graph and self.values() == other.values() 

179 except AttributeError: 

180 # We can't reliably compare to informal data ID dictionaries 

181 # we don't know if any extra keys they might have are consistent 

182 # with an `ExpandedDataCoordinate` version of ``self`` (which 

183 # should compare as equal) or something else (which should 

184 # compare as not equal). 

185 # We don't even want to return `NotImplemented` and tell Python 

186 # to delegate to ``other.__eq__``, because that could also be 

187 # misleading. We raise TypeError instead. 

188 raise TypeError("Cannot compare DataCoordinate instances to other objects without potentially " 

189 "misleading results.") from None 

190 

191 def __repr__(self) -> str: 

192 # We can't make repr yield something that could be exec'd here without 

193 # printing out the whole DimensionUniverse the graph is derived from. 

194 # So we print something that mostly looks like a dict, but doesn't 

195 # quote it's keys: that's both more compact and something that can't 

196 # be mistaken for an actual dict or something that could be exec'd. 

197 return "{{{}}}".format(', '.join(f"{k.name}: {v!r}" for k, v in self.items())) 

198 

199 def fingerprint(self, update: Callable[[bytes], None]) -> None: 

200 """Update a secure hash function with the values in this data ID. 

201 

202 Parameters 

203 ---------- 

204 update : `~collections.abc.Callable` 

205 Callable that accepts a single `bytes` argument to update 

206 the hash; usually the ``update`` method of an instance from 

207 the ``hashlib`` module. 

208 """ 

209 for k, v in self.items(): 

210 update(k.name.encode("utf8")) 

211 if isinstance(v, numbers.Integral): 

212 update(int(v).to_bytes(64, "big", signed=False)) 

213 elif isinstance(v, str): 

214 update(v.encode("utf8")) 

215 else: 

216 raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).") 

217 

218 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

219 """Return a new `DataCoordinate` whose graph is a subset of 

220 ``self.graph``. 

221 

222 Subclasses may override this method to return a subclass instance. 

223 

224 Parameters 

225 ---------- 

226 graph : `DimensionGraph` 

227 The dimensions identified by the returned `DataCoordinate`. 

228 

229 Returns 

230 ------- 

231 coordinate : `DataCoordinate` 

232 A `DataCoordinate` instance that identifies only the given 

233 dimensions. 

234 

235 Raises 

236 ------ 

237 KeyError 

238 Raised if ``graph`` is not a subset of ``self.graph``, and hence 

239 one or more dimensions has no associated primary key value. 

240 """ 

241 return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required)) 

242 

243 @property 

244 def universe(self) -> DimensionUniverse: 

245 """The universe that defines all known dimensions compatible with 

246 this coordinate (`DimensionUniverse`). 

247 """ 

248 return self.graph.universe 

249 

250 @property 

251 def graph(self) -> DimensionGraph: 

252 """The dimensions identified by this data ID (`DimensionGraph`). 

253 

254 Note that values are only required to be present for dimensions in 

255 ``self.graph.required``; all others may be retrieved (from a 

256 `Registry`) given these. 

257 """ 

258 return self._graph 

259 

260 

261DataId = Union[DataCoordinate, Mapping[str, Any]] 

262"""A type-annotation alias for signatures that accept both informal data ID 

263dictionaries and validated `DataCoordinate` instances. 

264""" 

265 

266 

267def _intersectRegions(*args: Region) -> Optional[Region]: 

268 """Return the intersection of several regions. 

269 

270 For internal use by `ExpandedDataCoordinate` only. 

271 

272 If no regions are provided, returns `None`. 

273 

274 This is currently a placeholder; it actually returns `NotImplemented` 

275 (it does *not* raise an exception) when multiple regions are given, which 

276 propagates to `ExpandedDataCoordinate`. This reflects the fact that we 

277 don't want to fail to construct an `ExpandedDataCoordinate` entirely when 

278 we can't compute its region, and at present we don't have a high-level use 

279 case for the regions of these particular data IDs. 

280 """ 

281 if len(args) == 0: 

282 return None 

283 elif len(args) == 1: 

284 return args[0] 

285 else: 

286 return NotImplemented 

287 

288 

289class ExpandedDataCoordinate(DataCoordinate): 

290 """A data ID that has been expanded to include all relevant metadata. 

291 

292 Instances should usually be obtained by calling `Registry.expandDataId`. 

293 

294 Parameters 

295 ---------- 

296 graph : `DimensionGraph` 

297 The dimensions identified by this instance. 

298 values : `tuple` 

299 Tuple of primary key values for the given dimensions. 

300 records : `~collections.abc.Mapping` 

301 Dictionary mapping `DimensionElement` to `DimensionRecord`. 

302 full : `~collections.abc.Mapping` 

303 Dictionary mapping dimensions to their primary key values for all 

304 dimensions in the graph, not just required ones. Ignored unless 

305 ``conform`` is `False.` 

306 region : `sphgeom.Region`, optional 

307 Region on the sky associated with this data ID, or `None` if there 

308 are no spatial dimensions. At present, this may be the special value 

309 `NotImplemented` if there multiple spatial dimensions identified; in 

310 the future this will be replaced with the intersection. Ignored unless 

311 ``conform`` is `False`.Timespan 

312 timespan : `Timespan`, optionalTimespan 

313 Timespan associated with this data ID, or `None` if there are no 

314 temporal dimensions. 

315 Ignored unless ``conform`` is `False`. 

316 conform : `bool`, optional 

317 If `True` (default), adapt arguments from arbitrary mappings to the 

318 custom dictionary types and check that all expected key-value pairs are 

319 present. `False` is only for internal use. 

320 

321 Notes 

322 ----- 

323 To maintain Liskov substitutability with `DataCoordinate`, 

324 `ExpandedDataCoordinate` mostly acts like a mapping that contains only 

325 values for its graph's required dimensions, even though it also contains 

326 values for all implied dimensions - its length, iteration, and 

327 keys/values/items views reflect only required dimensions. Values for 

328 the primary keys of implied dimensions can be obtained from the `full` 

329 attribute, and are also accessible in dict lookups and the ``in`` operator. 

330 """ 

331 

332 __slots__ = ("_records", "_full", "_region", "_timespan") 

333 

334 def __init__(self, graph: DimensionGraph, values: Tuple[Any, ...], *, 

335 records: NamedKeyMapping[DimensionElement, Optional[DimensionRecord]], 

336 full: Optional[NamedKeyMapping[Dimension, Any]] = None, 

337 region: Optional[Region] = None, 

338 timespan: Optional[Timespan] = None, 

339 conform: bool = True): 

340 super().__init__(graph, values) 

341 if conform: 

342 self._records = IndexedTupleDict( 

343 indices=graph._elementIndices, 

344 values=tuple(records[element.name] for element in graph.elements) 

345 ) 

346 self._full = IndexedTupleDict( 

347 indices=graph._dimensionIndices, 

348 values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None) 

349 for dimension in graph.dimensions) 

350 ) 

351 regions = [] 

352 for element in self.graph.spatial: 

353 record = self.records[element.name] 

354 # DimensionRecord subclasses for spatial elements always have a 

355 # .region, but they're dynamic so this can't be type-checked. 

356 if record is None or record.region is None: # type: ignore 

357 self._region = None 

358 break 

359 else: 

360 regions.append(record.region) # type:ignore 

361 else: 

362 self._region = _intersectRegions(*regions) 

363 timespans = [] 

364 for element in self.graph.temporal: 

365 record = self.records[element.name] 

366 # DimensionRecord subclasses for temporal elements always have 

367 # .timespan, but they're dynamic so this can't be type-checked. 

368 if record is None or record.timespan is None: # type:ignore 

369 self._timespan = None 

370 break 

371 else: 

372 timespans.append(record.timespan) # type:ignore 

373 else: 

374 self._timespan = Timespan.intersection(*timespans) 

375 else: 

376 # User has declared that the types are correct; ignore them. 

377 self._records = records # type: ignore 

378 self._full = full # type: ignore 

379 self._region = region 

380 self._timespan = timespan 

381 

382 def __contains__(self, key: Any) -> bool: 

383 return key in self.full 

384 

385 def __getitem__(self, key: Union[Dimension, str]) -> Any: 

386 return self.full[key] 

387 

388 def __repr__(self) -> str: 

389 # See DataCoordinate.__repr__ comment for reasoning behind this form. 

390 # The expanded version just includes key-value pairs for implied 

391 # dimensions. 

392 return "{{{}}}".format(', '.join(f"{k.name}: {v!r}" for k, v in self.full.items())) 

393 

394 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]: 

395 """Pack this data ID into an integer. 

396 

397 Parameters 

398 ---------- 

399 name : `str` 

400 Name of the `DimensionPacker` algorithm (as defined in the 

401 dimension configuration). 

402 returnMaxBits : `bool`, optional 

403 If `True` (`False` is default), return the maximum number of 

404 nonzero bits in the returned integer across all data IDs. 

405 

406 Returns 

407 ------- 

408 packed : `int` 

409 Integer ID. This ID is unique only across data IDs that have 

410 the same values for the packer's "fixed" dimensions. 

411 maxBits : `int`, optional 

412 Maximum number of nonzero bits in ``packed``. Not returned unless 

413 ``returnMaxBits`` is `True`. 

414 """ 

415 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits) 

416 

417 def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate: 

418 # Docstring inherited from DataCoordinate.subset. 

419 return ExpandedDataCoordinate( 

420 graph, 

421 tuple(self[dimension] for dimension in graph.required), 

422 records=self.records, 

423 conform=True 

424 ) 

425 

426 @property 

427 def full(self) -> NamedKeyMapping[Dimension, Any]: 

428 """Dictionary mapping dimensions to their primary key values for all 

429 dimensions in the graph, not just required ones (`NamedKeyMapping`). 

430 

431 Like `DataCoordinate` itself, this dictionary can be indexed by `str` 

432 name as well as `Dimension` instance. 

433 """ 

434 return self._full 

435 

436 @property 

437 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]: 

438 """Dictionary mapping `DimensionElement` to the associated 

439 `DimensionRecord` (`NamedKeyMapping`). 

440 

441 Like `DataCoordinate` itself, this dictionary can be indexed by `str` 

442 name as well as `DimensionElement` instance. 

443 """ 

444 return self._records 

445 

446 @property 

447 def region(self) -> Optional[Region]: 

448 """Region on the sky associated with this data ID, or `None` if there 

449 are no spatial dimensions (`sphgeom.Region`). 

450 

451 At present, this may be the special value `NotImplemented` if there 

452 multiple spatial dimensions identified; in the future this will be 

453 replaced with the intersection. 

454 """ 

455 return self._region 

456 

457 @property 

458 def timespan(self) -> Optional[Timespan]: 

459 """Timespan associated with this data ID, or `None` if there are no 

460 temporal dimensions (`TimeSpan`). 

461 """ 

462 return self._timespan