Coverage for python/lsst/daf/butler/core/dimensions/_coordinate.py: 26%

336 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-02 14:18 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22# 

23# Design notes for this module are in 

24# doc/lsst.daf.butler/dev/dataCoordinate.py. 

25# 

26 

27from __future__ import annotations 

28 

29__all__ = ("DataCoordinate", "DataId", "DataIdKey", "DataIdValue", "SerializedDataCoordinate") 

30 

31import numbers 

32from abc import abstractmethod 

33from typing import ( 

34 TYPE_CHECKING, 

35 AbstractSet, 

36 Any, 

37 Dict, 

38 Iterator, 

39 Literal, 

40 Mapping, 

41 Optional, 

42 Tuple, 

43 Union, 

44 overload, 

45) 

46 

47from lsst.sphgeom import IntersectionRegion, Region 

48from pydantic import BaseModel 

49 

50from ..json import from_json_pydantic, to_json_pydantic 

51from ..named import NamedKeyDict, NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping 

52from ..timespan import Timespan 

53from ._elements import Dimension, DimensionElement 

54from ._graph import DimensionGraph 

55from ._records import DimensionRecord, SerializedDimensionRecord 

56 

57if TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 from ...registry import Registry 

59 from ._universe import DimensionUniverse 

60 

61DataIdKey = Union[str, Dimension] 

62"""Type annotation alias for the keys that can be used to index a 

63DataCoordinate. 

64""" 

65 

66# Pydantic will cast int to str if str is first in the Union. 

67DataIdValue = Union[int, str, None] 

68"""Type annotation alias for the values that can be present in a 

69DataCoordinate or other data ID. 

70""" 

71 

72 

73class SerializedDataCoordinate(BaseModel): 

74 """Simplified model for serializing a `DataCoordinate`.""" 

75 

76 dataId: Dict[str, DataIdValue] 

77 records: Optional[Dict[str, SerializedDimensionRecord]] = None 

78 

79 @classmethod 

80 def direct(cls, *, dataId: Dict[str, DataIdValue], records: Dict[str, Dict]) -> SerializedDataCoordinate: 

81 """Construct a `SerializedDataCoordinate` directly without validators. 

82 

83 This differs from the pydantic "construct" method in that the arguments 

84 are explicitly what the model requires, and it will recurse through 

85 members, constructing them from their corresponding `direct` methods. 

86 

87 This method should only be called when the inputs are trusted. 

88 """ 

89 node = SerializedDataCoordinate.__new__(cls) 

90 setter = object.__setattr__ 

91 setter(node, "dataId", dataId) 

92 setter( 

93 node, 

94 "records", 

95 records 

96 if records is None 

97 else {k: SerializedDimensionRecord.direct(**v) for k, v in records.items()}, 

98 ) 

99 setter(node, "__fields_set__", {"dataId", "records"}) 

100 return node 

101 

102 

103def _intersectRegions(*args: Region) -> Optional[Region]: 

104 """Return the intersection of several regions. 

105 

106 For internal use by `ExpandedDataCoordinate` only. 

107 

108 If no regions are provided, returns `None`. 

109 """ 

110 if len(args) == 0: 

111 return None 

112 else: 

113 result = args[0] 

114 for n in range(1, len(args)): 

115 result = IntersectionRegion(result, args[n]) 

116 return result 

117 

118 

119class DataCoordinate(NamedKeyMapping[Dimension, DataIdValue]): 

120 """Data ID dictionary. 

121 

122 An immutable data ID dictionary that guarantees that its key-value pairs 

123 identify at least all required dimensions in a `DimensionGraph`. 

124 

125 `DataCoordinate` itself is an ABC, but provides `staticmethod` factory 

126 functions for private concrete implementations that should be sufficient 

127 for most purposes. `standardize` is the most flexible and safe of these; 

128 the others (`makeEmpty`, `fromRequiredValues`, and `fromFullValues`) are 

129 more specialized and perform little or no checking of inputs. 

130 

131 Notes 

132 ----- 

133 Like any data ID class, `DataCoordinate` behaves like a dictionary, but 

134 with some subtleties: 

135 

136 - Both `Dimension` instances and `str` names thereof may be used as keys 

137 in lookup operations, but iteration (and `keys`) will yield `Dimension` 

138 instances. The `names` property can be used to obtain the corresponding 

139 `str` names. 

140 

141 - Lookups for implied dimensions (those in ``self.graph.implied``) are 

142 supported if and only if `hasFull` returns `True`, and are never 

143 included in iteration or `keys`. The `full` property may be used to 

144 obtain a mapping whose keys do include implied dimensions. 

145 

146 - Equality comparison with other mappings is supported, but it always 

147 considers only required dimensions (as well as requiring both operands 

148 to identify the same dimensions). This is not quite consistent with the 

149 way mappings usually work - normally differing keys imply unequal 

150 mappings - but it makes sense in this context because data IDs with the 

151 same values for required dimensions but different values for implied 

152 dimensions represent a serious problem with the data that 

153 `DataCoordinate` cannot generally recognize on its own, and a data ID 

154 that knows implied dimension values should still be able to compare as 

155 equal to one that does not. This is of course not the way comparisons 

156 between simple `dict` data IDs work, and hence using a `DataCoordinate` 

157 instance for at least one operand in any data ID comparison is strongly 

158 recommended. 

159 """ 

160 

161 __slots__ = () 

162 

163 _serializedType = SerializedDataCoordinate 

164 

165 @staticmethod 

166 def standardize( 

167 mapping: Optional[NameLookupMapping[Dimension, DataIdValue]] = None, 

168 *, 

169 graph: Optional[DimensionGraph] = None, 

170 universe: Optional[DimensionUniverse] = None, 

171 defaults: Optional[DataCoordinate] = None, 

172 **kwargs: Any, 

173 ) -> DataCoordinate: 

174 """Standardize the supplied dataId. 

175 

176 Adapts an arbitrary mapping and/or additional arguments into a true 

177 `DataCoordinate`, or augment an existing one. 

178 

179 Parameters 

180 ---------- 

181 mapping : `~collections.abc.Mapping`, optional 

182 An informal data ID that maps dimensions or dimension names to 

183 their primary key values (may also be a true `DataCoordinate`). 

184 graph : `DimensionGraph` 

185 The dimensions to be identified by the new `DataCoordinate`. 

186 If not provided, will be inferred from the keys of ``mapping`` and 

187 ``**kwargs``, and ``universe`` must be provided unless ``mapping`` 

188 is already a `DataCoordinate`. 

189 universe : `DimensionUniverse` 

190 All known dimensions and their relationships; used to expand 

191 and validate dependencies when ``graph`` is not provided. 

192 defaults : `DataCoordinate`, optional 

193 Default dimension key-value pairs to use when needed. These are 

194 never used to infer ``graph``, and are ignored if a different value 

195 is provided for the same key in ``mapping`` or `**kwargs``. 

196 **kwargs 

197 Additional keyword arguments are treated like additional key-value 

198 pairs in ``mapping``. 

199 

200 Returns 

201 ------- 

202 coordinate : `DataCoordinate` 

203 A validated `DataCoordinate` instance. 

204 

205 Raises 

206 ------ 

207 TypeError 

208 Raised if the set of optional arguments provided is not supported. 

209 KeyError 

210 Raised if a key-value pair for a required dimension is missing. 

211 """ 

212 d: Dict[str, DataIdValue] = {} 

213 if isinstance(mapping, DataCoordinate): 

214 if graph is None: 

215 if not kwargs: 

216 # Already standardized to exactly what we want. 

217 return mapping 

218 elif kwargs.keys().isdisjoint(graph.dimensions.names): 

219 # User provided kwargs, but told us not to use them by 

220 # passing in dimensions that are disjoint from those kwargs. 

221 # This is not necessarily user error - it's a useful pattern 

222 # to pass in all of the key-value pairs you have and let the 

223 # code here pull out only what it needs. 

224 return mapping.subset(graph) 

225 assert universe is None or universe == mapping.universe 

226 universe = mapping.universe 

227 d.update((name, mapping[name]) for name in mapping.graph.required.names) 

228 if mapping.hasFull(): 

229 d.update((name, mapping[name]) for name in mapping.graph.implied.names) 

230 elif isinstance(mapping, NamedKeyMapping): 

231 d.update(mapping.byName()) 

232 elif mapping is not None: 

233 d.update(mapping) 

234 d.update(kwargs) 

235 if graph is None: 

236 if defaults is not None: 

237 universe = defaults.universe 

238 elif universe is None: 

239 raise TypeError("universe must be provided if graph is not.") 

240 graph = DimensionGraph(universe, names=d.keys()) 

241 if not graph.dimensions: 

242 return DataCoordinate.makeEmpty(graph.universe) 

243 if defaults is not None: 

244 if defaults.hasFull(): 

245 for k, v in defaults.full.items(): 

246 d.setdefault(k.name, v) 

247 else: 

248 for k, v in defaults.items(): 

249 d.setdefault(k.name, v) 

250 if d.keys() >= graph.dimensions.names: 

251 values = tuple(d[name] for name in graph._dataCoordinateIndices.keys()) 

252 else: 

253 try: 

254 values = tuple(d[name] for name in graph.required.names) 

255 except KeyError as err: 

256 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err 

257 # Some backends cannot handle numpy.int64 type which is a subclass of 

258 # numbers.Integral; convert that to int. 

259 values = tuple( 

260 int(val) if isinstance(val, numbers.Integral) else val for val in values # type: ignore 

261 ) 

262 return _BasicTupleDataCoordinate(graph, values) 

263 

264 @staticmethod 

265 def makeEmpty(universe: DimensionUniverse) -> DataCoordinate: 

266 """Return an empty `DataCoordinate`. 

267 

268 It identifies the null set of dimensions. 

269 

270 Parameters 

271 ---------- 

272 universe : `DimensionUniverse` 

273 Universe to which this null dimension set belongs. 

274 

275 Returns 

276 ------- 

277 dataId : `DataCoordinate` 

278 A data ID object that identifies no dimensions. `hasFull` and 

279 `hasRecords` are guaranteed to return `True`, because both `full` 

280 and `records` are just empty mappings. 

281 """ 

282 return _ExpandedTupleDataCoordinate(universe.empty, (), {}) 

283 

284 @staticmethod 

285 def fromRequiredValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate: 

286 """Construct a `DataCoordinate` from required dimension values. 

287 

288 This is a low-level interface with at most assertion-level checking of 

289 inputs. Most callers should use `standardize` instead. 

290 

291 Parameters 

292 ---------- 

293 graph : `DimensionGraph` 

294 Dimensions this data ID will identify. 

295 values : `tuple` [ `int` or `str` ] 

296 Tuple of primary key values corresponding to ``graph.required``, 

297 in that order. 

298 

299 Returns 

300 ------- 

301 dataId : `DataCoordinate` 

302 A data ID object that identifies the given dimensions. 

303 ``dataId.hasFull()`` will return `True` if and only if 

304 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never 

305 return `True`. 

306 """ 

307 assert len(graph.required) == len( 

308 values 

309 ), f"Inconsistency between dimensions {graph.required} and required values {values}." 

310 return _BasicTupleDataCoordinate(graph, values) 

311 

312 @staticmethod 

313 def fromFullValues(graph: DimensionGraph, values: Tuple[DataIdValue, ...]) -> DataCoordinate: 

314 """Construct a `DataCoordinate` from all dimension values. 

315 

316 This is a low-level interface with at most assertion-level checking of 

317 inputs. Most callers should use `standardize` instead. 

318 

319 Parameters 

320 ---------- 

321 graph : `DimensionGraph` 

322 Dimensions this data ID will identify. 

323 values : `tuple` [ `int` or `str` ] 

324 Tuple of primary key values corresponding to 

325 ``itertools.chain(graph.required, graph.implied)``, in that order. 

326 Note that this is _not_ the same order as ``graph.dimensions``, 

327 though these contain the same elements. 

328 

329 Returns 

330 ------- 

331 dataId : `DataCoordinate` 

332 A data ID object that identifies the given dimensions. 

333 ``dataId.hasFull()`` will return `True` if and only if 

334 ``graph.implied`` is empty, and ``dataId.hasRecords()`` will never 

335 return `True`. 

336 """ 

337 assert len(graph.dimensions) == len( 

338 values 

339 ), f"Inconsistency between dimensions {graph.dimensions} and full values {values}." 

340 return _BasicTupleDataCoordinate(graph, values) 

341 

342 def __hash__(self) -> int: 

343 return hash((self.graph,) + tuple(self[d.name] for d in self.graph.required)) 

344 

345 def __eq__(self, other: Any) -> bool: 

346 if not isinstance(other, DataCoordinate): 

347 other = DataCoordinate.standardize(other, universe=self.universe) 

348 return self.graph == other.graph and all(self[d.name] == other[d.name] for d in self.graph.required) 

349 

350 def __repr__(self) -> str: 

351 # We can't make repr yield something that could be exec'd here without 

352 # printing out the whole DimensionUniverse the graph is derived from. 

353 # So we print something that mostly looks like a dict, but doesn't 

354 # quote its keys: that's both more compact and something that can't 

355 # be mistaken for an actual dict or something that could be exec'd. 

356 terms = [f"{d}: {self[d]!r}" for d in self.graph.required.names] 

357 if self.hasFull() and self.graph.required != self.graph.dimensions: 

358 terms.append("...") 

359 return "{{{}}}".format(", ".join(terms)) 

360 

361 def __lt__(self, other: Any) -> bool: 

362 # Allow DataCoordinate to be sorted 

363 if not isinstance(other, type(self)): 

364 return NotImplemented 

365 # Form tuple of tuples for each DataCoordinate: 

366 # Unlike repr() we only use required keys here to ensure that 

367 # __eq__ can not be true simultaneously with __lt__ being true. 

368 self_kv = tuple(self.items()) 

369 other_kv = tuple(other.items()) 

370 

371 return self_kv < other_kv 

372 

373 def __iter__(self) -> Iterator[Dimension]: 

374 return iter(self.keys()) 

375 

376 def __len__(self) -> int: 

377 return len(self.keys()) 

378 

379 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore 

380 return self.graph.required 

381 

382 @property 

383 def names(self) -> AbstractSet[str]: 

384 """Names of the required dimensions identified by this data ID. 

385 

386 They are returned in the same order as `keys` 

387 (`collections.abc.Set` [ `str` ]). 

388 """ 

389 return self.keys().names 

390 

391 @abstractmethod 

392 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

393 """Return a `DataCoordinate` whose graph is a subset of ``self.graph``. 

394 

395 Parameters 

396 ---------- 

397 graph : `DimensionGraph` 

398 The dimensions identified by the returned `DataCoordinate`. 

399 

400 Returns 

401 ------- 

402 coordinate : `DataCoordinate` 

403 A `DataCoordinate` instance that identifies only the given 

404 dimensions. May be ``self`` if ``graph == self.graph``. 

405 

406 Raises 

407 ------ 

408 KeyError 

409 Raised if the primary key value for one or more required dimensions 

410 is unknown. This may happen if ``graph.issubset(self.graph)`` is 

411 `False`, or even if ``graph.issubset(self.graph)`` is `True`, if 

412 ``self.hasFull()`` is `False` and 

413 ``graph.required.issubset(self.graph.required)`` is `False`. As 

414 an example of the latter case, consider trying to go from a data ID 

415 with dimensions {instrument, physical_filter, band} to 

416 just {instrument, band}; band is implied by 

417 physical_filter and hence would have no value in the original data 

418 ID if ``self.hasFull()`` is `False`. 

419 

420 Notes 

421 ----- 

422 If `hasFull` and `hasRecords` return `True` on ``self``, they will 

423 return `True` (respectively) on the returned `DataCoordinate` as well. 

424 The converse does not hold. 

425 """ 

426 raise NotImplementedError() 

427 

428 @abstractmethod 

429 def union(self, other: DataCoordinate) -> DataCoordinate: 

430 """Combine two data IDs. 

431 

432 Yields a new one that identifies all dimensions that either of them 

433 identify. 

434 

435 Parameters 

436 ---------- 

437 other : `DataCoordinate` 

438 Data ID to combine with ``self``. 

439 

440 Returns 

441 ------- 

442 unioned : `DataCoordinate` 

443 A `DataCoordinate` instance that satisfies 

444 ``unioned.graph == self.graph.union(other.graph)``. Will preserve 

445 ``hasFull`` and ``hasRecords`` whenever possible. 

446 

447 Notes 

448 ----- 

449 No checking for consistency is performed on values for keys that 

450 ``self`` and ``other`` have in common, and which value is included in 

451 the returned data ID is not specified. 

452 """ 

453 raise NotImplementedError() 

454 

455 @abstractmethod 

456 def expanded( 

457 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]] 

458 ) -> DataCoordinate: 

459 """Return a `DataCoordinate` that holds the given records. 

460 

461 Guarantees that `hasRecords` returns `True`. 

462 

463 This is a low-level interface with at most assertion-level checking of 

464 inputs. Most callers should use `Registry.expandDataId` instead. 

465 

466 Parameters 

467 ---------- 

468 records : `Mapping` [ `str`, `DimensionRecord` or `None` ] 

469 A `NamedKeyMapping` with `DimensionElement` keys or a regular 

470 `Mapping` with `str` (`DimensionElement` name) keys and 

471 `DimensionRecord` values. Keys must cover all elements in 

472 ``self.graph.elements``. Values may be `None`, but only to reflect 

473 actual NULL values in the database, not just records that have not 

474 been fetched. 

475 """ 

476 raise NotImplementedError() 

477 

478 @property 

479 def universe(self) -> DimensionUniverse: 

480 """Universe that defines all known compatible dimensions. 

481 

482 The univers will be compatible with this coordinate 

483 (`DimensionUniverse`). 

484 """ 

485 return self.graph.universe 

486 

487 @property 

488 @abstractmethod 

489 def graph(self) -> DimensionGraph: 

490 """Dimensions identified by this data ID (`DimensionGraph`). 

491 

492 Note that values are only required to be present for dimensions in 

493 ``self.graph.required``; all others may be retrieved (from a 

494 `Registry`) given these. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def hasFull(self) -> bool: 

500 """Whether this data ID contains implied and required values. 

501 

502 Returns 

503 ------- 

504 state : `bool` 

505 If `True`, `__getitem__`, `get`, and `__contains__` (but not 

506 `keys`!) will act as though the mapping includes key-value pairs 

507 for implied dimensions, and the `full` property may be used. If 

508 `False`, these operations only include key-value pairs for required 

509 dimensions, and accessing `full` is an error. Always `True` if 

510 there are no implied dimensions. 

511 """ 

512 raise NotImplementedError() 

513 

514 @property 

515 def full(self) -> NamedKeyMapping[Dimension, DataIdValue]: 

516 """Return mapping for all dimensions in ``self.graph``. 

517 

518 The mapping includes key-value pairs for all dimensions in 

519 ``self.graph``, including implied (`NamedKeyMapping`). 

520 

521 Accessing this attribute if `hasFull` returns `False` is a logic error 

522 that may raise an exception of unspecified type either immediately or 

523 when implied keys are accessed via the returned mapping, depending on 

524 the implementation and whether assertions are enabled. 

525 """ 

526 assert self.hasFull(), "full may only be accessed if hasFull() returns True." 

527 return _DataCoordinateFullView(self) 

528 

529 @abstractmethod 

530 def hasRecords(self) -> bool: 

531 """Whether this data ID contains records. 

532 

533 These are the records for all of the dimension elements it identifies. 

534 

535 Returns 

536 ------- 

537 state : `bool` 

538 If `True`, the following attributes may be accessed: 

539 

540 - `records` 

541 - `region` 

542 - `timespan` 

543 - `pack` 

544 

545 If `False`, accessing any of these is considered a logic error. 

546 """ 

547 raise NotImplementedError() 

548 

549 @property 

550 def records(self) -> NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]: 

551 """Return the records. 

552 

553 Returns a mapping that contains `DimensionRecord` objects for all 

554 elements identified by this data ID (`NamedKeyMapping`). 

555 

556 The values of this mapping may be `None` if and only if there is no 

557 record for that element with these dimensions in the database (which 

558 means some foreign key field must have a NULL value). 

559 

560 Accessing this attribute if `hasRecords` returns `False` is a logic 

561 error that may raise an exception of unspecified type either 

562 immediately or when the returned mapping is used, depending on the 

563 implementation and whether assertions are enabled. 

564 """ 

565 assert self.hasRecords(), "records may only be accessed if hasRecords() returns True." 

566 return _DataCoordinateRecordsView(self) 

567 

568 @abstractmethod 

569 def _record(self, name: str) -> Optional[DimensionRecord]: 

570 """Protected implementation hook that backs the ``records`` attribute. 

571 

572 Parameters 

573 ---------- 

574 name : `str` 

575 The name of a `DimensionElement`, guaranteed to be in 

576 ``self.graph.elements.names``. 

577 

578 Returns 

579 ------- 

580 record : `DimensionRecord` or `None` 

581 The dimension record for the given element identified by this 

582 data ID, or `None` if there is no such record. 

583 """ 

584 raise NotImplementedError() 

585 

586 @property 

587 def region(self) -> Optional[Region]: 

588 """Spatial region associated with this data ID. 

589 

590 (`lsst.sphgeom.Region` or `None`). 

591 

592 This is `None` if and only if ``self.graph.spatial`` is empty. 

593 

594 Accessing this attribute if `hasRecords` returns `False` is a logic 

595 error that may or may not raise an exception, depending on the 

596 implementation and whether assertions are enabled. 

597 """ 

598 assert self.hasRecords(), "region may only be accessed if hasRecords() returns True." 

599 regions = [] 

600 for family in self.graph.spatial: 

601 element = family.choose(self.graph.elements) 

602 record = self._record(element.name) 

603 if record is None or record.region is None: 

604 return None 

605 else: 

606 regions.append(record.region) 

607 return _intersectRegions(*regions) 

608 

609 @property 

610 def timespan(self) -> Optional[Timespan]: 

611 """Temporal interval associated with this data ID. 

612 

613 (`Timespan` or `None`). 

614 

615 This is `None` if and only if ``self.graph.timespan`` is empty. 

616 

617 Accessing this attribute if `hasRecords` returns `False` is a logic 

618 error that may or may not raise an exception, depending on the 

619 implementation and whether assertions are enabled. 

620 """ 

621 assert self.hasRecords(), "timespan may only be accessed if hasRecords() returns True." 

622 timespans = [] 

623 for family in self.graph.temporal: 

624 element = family.choose(self.graph.elements) 

625 record = self._record(element.name) 

626 # DimensionRecord subclasses for temporal elements always have 

627 # .timespan, but they're dynamic so this can't be type-checked. 

628 if record is None or record.timespan is None: 

629 return None 

630 else: 

631 timespans.append(record.timespan) 

632 if not timespans: 

633 return None 

634 elif len(timespans) == 1: 

635 return timespans[0] 

636 else: 

637 return Timespan.intersection(*timespans) 

638 

639 @overload 

640 def pack(self, name: str, *, returnMaxBits: Literal[True]) -> Tuple[int, int]: 

641 ... 

642 

643 @overload 

644 def pack(self, name: str, *, returnMaxBits: Literal[False]) -> int: 

645 ... 

646 

647 def pack(self, name: str, *, returnMaxBits: bool = False) -> Union[Tuple[int, int], int]: 

648 """Pack this data ID into an integer. 

649 

650 Parameters 

651 ---------- 

652 name : `str` 

653 Name of the `DimensionPacker` algorithm (as defined in the 

654 dimension configuration). 

655 returnMaxBits : `bool`, optional 

656 If `True` (`False` is default), return the maximum number of 

657 nonzero bits in the returned integer across all data IDs. 

658 

659 Returns 

660 ------- 

661 packed : `int` 

662 Integer ID. This ID is unique only across data IDs that have 

663 the same values for the packer's "fixed" dimensions. 

664 maxBits : `int`, optional 

665 Maximum number of nonzero bits in ``packed``. Not returned unless 

666 ``returnMaxBits`` is `True`. 

667 

668 Notes 

669 ----- 

670 Accessing this attribute if `hasRecords` returns `False` is a logic 

671 error that may or may not raise an exception, depending on the 

672 implementation and whether assertions are enabled. 

673 """ 

674 assert self.hasRecords(), "pack() may only be called if hasRecords() returns True." 

675 return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits) 

676 

677 def to_simple(self, minimal: bool = False) -> SerializedDataCoordinate: 

678 """Convert this class to a simple python type. 

679 

680 This is suitable for serialization. 

681 

682 Parameters 

683 ---------- 

684 minimal : `bool`, optional 

685 Use minimal serialization. If set the records will not be attached. 

686 

687 Returns 

688 ------- 

689 simple : `SerializedDataCoordinate` 

690 The object converted to simple form. 

691 """ 

692 # Convert to a dict form 

693 if self.hasFull(): 

694 dataId = self.full.byName() 

695 else: 

696 dataId = self.byName() 

697 records: Optional[Dict[str, SerializedDimensionRecord]] 

698 if not minimal and self.hasRecords(): 

699 records = {k: v.to_simple() for k, v in self.records.byName().items() if v is not None} 

700 else: 

701 records = None 

702 

703 return SerializedDataCoordinate(dataId=dataId, records=records) 

704 

705 @classmethod 

706 def from_simple( 

707 cls, 

708 simple: SerializedDataCoordinate, 

709 universe: Optional[DimensionUniverse] = None, 

710 registry: Optional[Registry] = None, 

711 ) -> DataCoordinate: 

712 """Construct a new object from the simplified form. 

713 

714 The data is assumed to be of the form returned from the `to_simple` 

715 method. 

716 

717 Parameters 

718 ---------- 

719 simple : `dict` of [`str`, `Any`] 

720 The `dict` returned by `to_simple()`. 

721 universe : `DimensionUniverse` 

722 The special graph of all known dimensions. 

723 registry : `lsst.daf.butler.Registry`, optional 

724 Registry from which a universe can be extracted. Can be `None` 

725 if universe is provided explicitly. 

726 

727 Returns 

728 ------- 

729 dataId : `DataCoordinate` 

730 Newly-constructed object. 

731 """ 

732 if universe is None and registry is None: 

733 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate") 

734 if universe is None and registry is not None: 

735 universe = registry.dimensions 

736 if universe is None: 

737 # this is for mypy 

738 raise ValueError("Unable to determine a usable universe") 

739 

740 dataId = cls.standardize(simple.dataId, universe=universe) 

741 if simple.records: 

742 dataId = dataId.expanded( 

743 {k: DimensionRecord.from_simple(v, universe=universe) for k, v in simple.records.items()} 

744 ) 

745 return dataId 

746 

747 to_json = to_json_pydantic 

748 from_json = classmethod(from_json_pydantic) 

749 

750 

751DataId = Union[DataCoordinate, Mapping[str, Any]] 

752"""A type-annotation alias for signatures that accept both informal data ID 

753dictionaries and validated `DataCoordinate` instances. 

754""" 

755 

756 

757class _DataCoordinateFullView(NamedKeyMapping[Dimension, DataIdValue]): 

758 """View class for `DataCoordinate.full`. 

759 

760 Provides the default implementation for 

761 `DataCoordinate.full`. 

762 

763 Parameters 

764 ---------- 

765 target : `DataCoordinate` 

766 The `DataCoordinate` instance this object provides a view of. 

767 """ 

768 

769 def __init__(self, target: DataCoordinate): 

770 self._target = target 

771 

772 __slots__ = ("_target",) 

773 

774 def __repr__(self) -> str: 

775 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.dimensions.names] 

776 return "{{{}}}".format(", ".join(terms)) 

777 

778 def __getitem__(self, key: DataIdKey) -> DataIdValue: 

779 return self._target[key] 

780 

781 def __iter__(self) -> Iterator[Dimension]: 

782 return iter(self.keys()) 

783 

784 def __len__(self) -> int: 

785 return len(self.keys()) 

786 

787 def keys(self) -> NamedValueAbstractSet[Dimension]: # type: ignore 

788 return self._target.graph.dimensions 

789 

790 @property 

791 def names(self) -> AbstractSet[str]: 

792 # Docstring inherited from `NamedKeyMapping`. 

793 return self.keys().names 

794 

795 

796class _DataCoordinateRecordsView(NamedKeyMapping[DimensionElement, Optional[DimensionRecord]]): 

797 """View class for `DataCoordinate.records`. 

798 

799 Provides the default implementation for 

800 `DataCoordinate.records`. 

801 

802 Parameters 

803 ---------- 

804 target : `DataCoordinate` 

805 The `DataCoordinate` instance this object provides a view of. 

806 """ 

807 

808 def __init__(self, target: DataCoordinate): 

809 self._target = target 

810 

811 __slots__ = ("_target",) 

812 

813 def __repr__(self) -> str: 

814 terms = [f"{d}: {self[d]!r}" for d in self._target.graph.elements.names] 

815 return "{{{}}}".format(", ".join(terms)) 

816 

817 def __str__(self) -> str: 

818 return "\n".join(str(v) for v in self.values()) 

819 

820 def __getitem__(self, key: Union[DimensionElement, str]) -> Optional[DimensionRecord]: 

821 if isinstance(key, DimensionElement): 

822 key = key.name 

823 return self._target._record(key) 

824 

825 def __iter__(self) -> Iterator[DimensionElement]: 

826 return iter(self.keys()) 

827 

828 def __len__(self) -> int: 

829 return len(self.keys()) 

830 

831 def keys(self) -> NamedValueAbstractSet[DimensionElement]: # type: ignore 

832 return self._target.graph.elements 

833 

834 @property 

835 def names(self) -> AbstractSet[str]: 

836 # Docstring inherited from `NamedKeyMapping`. 

837 return self.keys().names 

838 

839 

840class _BasicTupleDataCoordinate(DataCoordinate): 

841 """Standard implementation of `DataCoordinate`. 

842 

843 Backed by a tuple of values. 

844 

845 This class should only be accessed outside this module via the 

846 `DataCoordinate` interface, and should only be constructed via the static 

847 methods there. 

848 

849 Parameters 

850 ---------- 

851 graph : `DimensionGraph` 

852 The dimensions to be identified. 

853 values : `tuple` [ `int` or `str` ] 

854 Data ID values, ordered to match ``graph._dataCoordinateIndices``. May 

855 include values for just required dimensions (which always come first) 

856 or all dimensions. 

857 """ 

858 

859 def __init__(self, graph: DimensionGraph, values: Tuple[DataIdValue, ...]): 

860 self._graph = graph 

861 self._values = values 

862 

863 __slots__ = ("_graph", "_values") 

864 

865 @property 

866 def graph(self) -> DimensionGraph: 

867 # Docstring inherited from DataCoordinate. 

868 return self._graph 

869 

870 def __getitem__(self, key: DataIdKey) -> DataIdValue: 

871 # Docstring inherited from DataCoordinate. 

872 if isinstance(key, Dimension): 

873 key = key.name 

874 index = self._graph._dataCoordinateIndices[key] 

875 try: 

876 return self._values[index] 

877 except IndexError: 

878 # Caller asked for an implied dimension, but this object only has 

879 # values for the required ones. 

880 raise KeyError(key) from None 

881 

882 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

883 # Docstring inherited from DataCoordinate. 

884 if self._graph == graph: 

885 return self 

886 elif self.hasFull() or self._graph.required >= graph.dimensions: 

887 return _BasicTupleDataCoordinate( 

888 graph, 

889 tuple(self[k] for k in graph._dataCoordinateIndices.keys()), 

890 ) 

891 else: 

892 return _BasicTupleDataCoordinate(graph, tuple(self[k] for k in graph.required.names)) 

893 

894 def union(self, other: DataCoordinate) -> DataCoordinate: 

895 # Docstring inherited from DataCoordinate. 

896 graph = self.graph.union(other.graph) 

897 # See if one or both input data IDs is already what we want to return; 

898 # if so, return the most complete one we have. 

899 if other.graph == graph: 

900 if self.graph == graph: 

901 # Input data IDs have the same graph (which is also the result 

902 # graph), but may not have the same content. 

903 # other might have records; self does not, so try other first. 

904 # If it at least has full values, it's no worse than self. 

905 if other.hasFull(): 

906 return other 

907 else: 

908 return self 

909 elif other.hasFull(): 

910 return other 

911 # There's some chance that neither self nor other has full values, 

912 # but together provide enough to the union to. Let the general 

913 # case below handle that. 

914 elif self.graph == graph: 

915 # No chance at returning records. If self has full values, it's 

916 # the best we can do. 

917 if self.hasFull(): 

918 return self 

919 # General case with actual merging of dictionaries. 

920 values = self.full.byName() if self.hasFull() else self.byName() 

921 values.update(other.full.byName() if other.hasFull() else other.byName()) 

922 return DataCoordinate.standardize(values, graph=graph) 

923 

924 def expanded( 

925 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]] 

926 ) -> DataCoordinate: 

927 # Docstring inherited from DataCoordinate 

928 values = self._values 

929 if not self.hasFull(): 

930 # Extract a complete values tuple from the attributes of the given 

931 # records. It's possible for these to be inconsistent with 

932 # self._values (which is a serious problem, of course), but we've 

933 # documented this as a no-checking API. 

934 values += tuple(getattr(records[d.name], d.primaryKey.name) for d in self._graph.implied) 

935 return _ExpandedTupleDataCoordinate(self._graph, values, records) 

936 

937 def hasFull(self) -> bool: 

938 # Docstring inherited from DataCoordinate. 

939 return len(self._values) == len(self._graph._dataCoordinateIndices) 

940 

941 def hasRecords(self) -> bool: 

942 # Docstring inherited from DataCoordinate. 

943 return False 

944 

945 def _record(self, name: str) -> Optional[DimensionRecord]: 

946 # Docstring inherited from DataCoordinate. 

947 assert False 

948 

949 

950class _ExpandedTupleDataCoordinate(_BasicTupleDataCoordinate): 

951 """A `DataCoordinate` implementation that can hold `DimensionRecord`. 

952 

953 This class should only be accessed outside this module via the 

954 `DataCoordinate` interface, and should only be constructed via calls to 

955 `DataCoordinate.expanded`. 

956 

957 Parameters 

958 ---------- 

959 graph : `DimensionGraph` 

960 The dimensions to be identified. 

961 values : `tuple` [ `int` or `str` ] 

962 Data ID values, ordered to match ``graph._dataCoordinateIndices``. 

963 May include values for just required dimensions (which always come 

964 first) or all dimensions. 

965 records : `Mapping` [ `str`, `DimensionRecord` or `None` ] 

966 A `NamedKeyMapping` with `DimensionElement` keys or a regular 

967 `Mapping` with `str` (`DimensionElement` name) keys and 

968 `DimensionRecord` values. Keys must cover all elements in 

969 ``self.graph.elements``. Values may be `None`, but only to reflect 

970 actual NULL values in the database, not just records that have not 

971 been fetched. 

972 """ 

973 

974 def __init__( 

975 self, 

976 graph: DimensionGraph, 

977 values: Tuple[DataIdValue, ...], 

978 records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]], 

979 ): 

980 super().__init__(graph, values) 

981 assert super().hasFull(), "This implementation requires full dimension records." 

982 self._records = records 

983 

984 __slots__ = ("_records",) 

985 

986 def subset(self, graph: DimensionGraph) -> DataCoordinate: 

987 # Docstring inherited from DataCoordinate. 

988 if self._graph == graph: 

989 return self 

990 return _ExpandedTupleDataCoordinate( 

991 graph, tuple(self[k] for k in graph._dataCoordinateIndices.keys()), records=self._records 

992 ) 

993 

994 def expanded( 

995 self, records: NameLookupMapping[DimensionElement, Optional[DimensionRecord]] 

996 ) -> DataCoordinate: 

997 # Docstring inherited from DataCoordinate. 

998 return self 

999 

1000 def union(self, other: DataCoordinate) -> DataCoordinate: 

1001 # Docstring inherited from DataCoordinate. 

1002 graph = self.graph.union(other.graph) 

1003 # See if one or both input data IDs is already what we want to return; 

1004 # if so, return the most complete one we have. 

1005 if self.graph == graph: 

1006 # self has records, so even if other is also a valid result, it's 

1007 # no better. 

1008 return self 

1009 if other.graph == graph: 

1010 # If other has full values, and self does not identify some of 

1011 # those, it's the base we can do. It may have records, too. 

1012 if other.hasFull(): 

1013 return other 

1014 # If other does not have full values, there's a chance self may 

1015 # provide the values needed to complete it. For example, self 

1016 # could be {band} while other could be 

1017 # {instrument, physical_filter, band}, with band unknown. 

1018 # General case with actual merging of dictionaries. 

1019 values = self.full.byName() 

1020 values.update(other.full.byName() if other.hasFull() else other.byName()) 

1021 basic = DataCoordinate.standardize(values, graph=graph) 

1022 # See if we can add records. 

1023 if self.hasRecords() and other.hasRecords(): 

1024 # Sometimes the elements of a union of graphs can contain elements 

1025 # that weren't in either input graph (because graph unions are only 

1026 # on dimensions). e.g. {visit} | {detector} brings along 

1027 # visit_detector_region. 

1028 elements = set(graph.elements.names) 

1029 elements -= self.graph.elements.names 

1030 elements -= other.graph.elements.names 

1031 if not elements: 

1032 records = NamedKeyDict[DimensionElement, Optional[DimensionRecord]](self.records) 

1033 records.update(other.records) 

1034 return basic.expanded(records.freeze()) 

1035 return basic 

1036 

1037 def hasFull(self) -> bool: 

1038 # Docstring inherited from DataCoordinate. 

1039 return True 

1040 

1041 def hasRecords(self) -> bool: 

1042 # Docstring inherited from DataCoordinate. 

1043 return True 

1044 

1045 def _record(self, name: str) -> Optional[DimensionRecord]: 

1046 # Docstring inherited from DataCoordinate. 

1047 return self._records[name]