Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 51%

136 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-01 02:05 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from collections import defaultdict 

32from collections.abc import Iterator, Set 

33from typing import TYPE_CHECKING, Any 

34 

35from ...core import DimensionUniverse, Timespan, ddl 

36from .._collectionType import CollectionType 

37from ..wildcards import CollectionWildcard 

38from ._versioning import VersionedExtension 

39 

40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true

41 from ._database import Database, StaticTablesContext 

42 from ._dimensions import DimensionRecordStorageManager 

43 

44 

45class CollectionRecord: 

46 """A struct used to represent a collection in internal `Registry` APIs. 

47 

48 User-facing code should always just use a `str` to represent collections. 

49 

50 Parameters 

51 ---------- 

52 key 

53 Unique collection ID, can be the same as ``name`` if ``name`` is used 

54 for identification. Usually this is an integer or string, but can be 

55 other database-specific type. 

56 name : `str` 

57 Name of the collection. 

58 type : `CollectionType` 

59 Enumeration value describing the type of the collection. 

60 

61 Notes 

62 ----- 

63 The `name`, `key`, and `type` attributes set by the base class should be 

64 considered immutable by all users and derived classes (as these are used 

65 in the definition of equality and this is a hashable type). Other 

66 attributes defined by subclasses may be mutable, as long as they do not 

67 participate in some subclass equality definition. 

68 """ 

69 

70 def __init__(self, key: Any, name: str, type: CollectionType): 

71 self.key = key 

72 self.name = name 

73 self.type = type 

74 assert isinstance(self.type, CollectionType) 

75 

76 name: str 

77 """Name of the collection (`str`). 

78 """ 

79 

80 key: Any 

81 """The primary/foreign key value for this collection. 

82 """ 

83 

84 type: CollectionType 

85 """Enumeration value describing the type of the collection 

86 (`CollectionType`). 

87 """ 

88 

89 def __eq__(self, other: Any) -> bool: 

90 try: 

91 return self.name == other.name and self.type == other.type and self.key == other.key 

92 except AttributeError: 

93 return NotImplemented 

94 

95 def __hash__(self) -> int: 

96 return hash(self.name) 

97 

98 def __repr__(self) -> str: 

99 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})" 

100 

101 def __str__(self) -> str: 

102 return self.name 

103 

104 

105class RunRecord(CollectionRecord): 

106 """A subclass of `CollectionRecord` that adds execution information and 

107 an interface for updating it. 

108 """ 

109 

110 @abstractmethod 

111 def update(self, host: str | None = None, timespan: Timespan | None = None) -> None: 

112 """Update the database record for this run with new execution 

113 information. 

114 

115 Values not provided will set to ``NULL`` in the database, not ignored. 

116 

117 Parameters 

118 ---------- 

119 host : `str`, optional 

120 Name of the host or system on which this run was produced. 

121 Detailed form to be set by higher-level convention; from the 

122 `Registry` perspective, this is an entirely opaque value. 

123 timespan : `Timespan`, optional 

124 Begin and end timestamps for the period over which the run was 

125 produced. `None`/``NULL`` values are interpreted as infinite 

126 bounds. 

127 """ 

128 raise NotImplementedError() 

129 

130 @property 

131 @abstractmethod 

132 def host(self) -> str | None: 

133 """Return the name of the host or system on which this run was 

134 produced (`str` or `None`). 

135 """ 

136 raise NotImplementedError() 

137 

138 @property 

139 @abstractmethod 

140 def timespan(self) -> Timespan: 

141 """Begin and end timestamps for the period over which the run was 

142 produced. `None`/``NULL`` values are interpreted as infinite 

143 bounds. 

144 """ 

145 raise NotImplementedError() 

146 

147 def __repr__(self) -> str: 

148 return f"RunRecord(key={self.key!r}, name={self.name!r})" 

149 

150 

151class ChainedCollectionRecord(CollectionRecord): 

152 """A subclass of `CollectionRecord` that adds the list of child collections 

153 in a ``CHAINED`` collection. 

154 

155 Parameters 

156 ---------- 

157 key 

158 Unique collection ID, can be the same as ``name`` if ``name`` is used 

159 for identification. Usually this is an integer or string, but can be 

160 other database-specific type. 

161 name : `str` 

162 Name of the collection. 

163 """ 

164 

165 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

166 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

167 self._children: tuple[str, ...] = () 

168 

169 @property 

170 def children(self) -> tuple[str, ...]: 

171 """The ordered search path of child collections that define this chain 

172 (`tuple` [ `str` ]). 

173 """ 

174 return self._children 

175 

176 def update(self, manager: CollectionManager, children: tuple[str, ...], flatten: bool) -> None: 

177 """Redefine this chain to search the given child collections. 

178 

179 This method should be used by all external code to set children. It 

180 delegates to `_update`, which is what should be overridden by 

181 subclasses. 

182 

183 Parameters 

184 ---------- 

185 manager : `CollectionManager` 

186 The object that manages this records instance and all records 

187 instances that may appear as its children. 

188 children : `tuple` [ `str` ] 

189 A collection search path that should be resolved to set the child 

190 collections of this chain. 

191 flatten : `bool` 

192 If `True`, recursively flatten out any nested 

193 `~CollectionType.CHAINED` collections in ``children`` first. 

194 

195 Raises 

196 ------ 

197 ValueError 

198 Raised when the child collections contain a cycle. 

199 """ 

200 children_as_wildcard = CollectionWildcard.from_names(children) 

201 for record in manager.resolve_wildcard( 

202 children_as_wildcard, 

203 flatten_chains=True, 

204 include_chains=True, 

205 collection_types={CollectionType.CHAINED}, 

206 ): 

207 if record == self: 

208 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

209 if flatten: 

210 children = tuple( 

211 record.name for record in manager.resolve_wildcard(children_as_wildcard, flatten_chains=True) 

212 ) 

213 # Delegate to derived classes to do the database updates. 

214 self._update(manager, children) 

215 # Update the reverse mapping (from child to parents) in the manager, 

216 # by removing the old relationships and adding back in the new ones. 

217 for old_child in self._children: 

218 manager._parents_by_child[manager.find(old_child).key].discard(self.key) 

219 for new_child in children: 

220 manager._parents_by_child[manager.find(new_child).key].add(self.key) 

221 # Actually set this instances sequence of children. 

222 self._children = children 

223 

224 def refresh(self, manager: CollectionManager) -> None: 

225 """Load children from the database, using the given manager to resolve 

226 collection primary key values into records. 

227 

228 This method exists to ensure that all collections that may appear in a 

229 chain are known to the manager before any particular chain tries to 

230 retrieve their records from it. `ChainedCollectionRecord` subclasses 

231 can rely on it being called sometime after their own ``__init__`` to 

232 finish construction. 

233 

234 Parameters 

235 ---------- 

236 manager : `CollectionManager` 

237 The object that manages this records instance and all records 

238 instances that may appear as its children. 

239 """ 

240 # Clear out the old reverse mapping (from child to parents). 

241 for child in self._children: 

242 manager._parents_by_child[manager.find(child).key].discard(self.key) 

243 self._children = self._load(manager) 

244 # Update the reverse mapping (from child to parents) in the manager. 

245 for child in self._children: 

246 manager._parents_by_child[manager.find(child).key].add(self.key) 

247 

248 @abstractmethod 

249 def _update(self, manager: CollectionManager, children: tuple[str, ...]) -> None: 

250 """Protected implementation hook for `update`. 

251 

252 This method should be implemented by subclasses to update the database 

253 to reflect the children given. It should never be called by anything 

254 other than `update`, which should be used by all external code. 

255 

256 Parameters 

257 ---------- 

258 manager : `CollectionManager` 

259 The object that manages this records instance and all records 

260 instances that may appear as its children. 

261 children : `tuple` [ `str` ] 

262 A collection search path that should be resolved to set the child 

263 collections of this chain. Guaranteed not to contain cycles. 

264 """ 

265 raise NotImplementedError() 

266 

267 @abstractmethod 

268 def _load(self, manager: CollectionManager) -> tuple[str, ...]: 

269 """Protected implementation hook for `refresh`. 

270 

271 This method should be implemented by subclasses to retrieve the chain's 

272 child collections from the database and return them. It should never 

273 be called by anything other than `refresh`, which should be used by all 

274 external code. 

275 

276 Parameters 

277 ---------- 

278 manager : `CollectionManager` 

279 The object that manages this records instance and all records 

280 instances that may appear as its children. 

281 

282 Returns 

283 ------- 

284 children : `tuple` [ `str` ] 

285 The ordered sequence of collection names that defines the chained 

286 collection. Guaranteed not to contain cycles. 

287 """ 

288 raise NotImplementedError() 

289 

290 def __repr__(self) -> str: 

291 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})" 

292 

293 

294class CollectionManager(VersionedExtension): 

295 """An interface for managing the collections (including runs) in a 

296 `Registry`. 

297 

298 Notes 

299 ----- 

300 Each layer in a multi-layer `Registry` has its own record for any 

301 collection for which it has datasets (or quanta). Different layers may 

302 use different IDs for the same collection, so any usage of the IDs 

303 obtained through the `CollectionManager` APIs are strictly for internal 

304 (to `Registry`) use. 

305 """ 

306 

307 def __init__(self) -> None: 

308 self._parents_by_child: defaultdict[Any, set[Any]] = defaultdict(set) 

309 

310 @classmethod 

311 @abstractmethod 

312 def initialize( 

313 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager 

314 ) -> CollectionManager: 

315 """Construct an instance of the manager. 

316 

317 Parameters 

318 ---------- 

319 db : `Database` 

320 Interface to the underlying database engine and namespace. 

321 context : `StaticTablesContext` 

322 Context object obtained from `Database.declareStaticTables`; used 

323 to declare any tables that should always be present in a layer 

324 implemented with this manager. 

325 dimensions : `DimensionRecordStorageManager` 

326 Manager object for the dimensions in this `Registry`. 

327 

328 Returns 

329 ------- 

330 manager : `CollectionManager` 

331 An instance of a concrete `CollectionManager` subclass. 

332 """ 

333 raise NotImplementedError() 

334 

335 @classmethod 

336 @abstractmethod 

337 def addCollectionForeignKey( 

338 cls, 

339 tableSpec: ddl.TableSpec, 

340 *, 

341 prefix: str = "collection", 

342 onDelete: str | None = None, 

343 constraint: bool = True, 

344 **kwargs: Any, 

345 ) -> ddl.FieldSpec: 

346 """Add a foreign key (field and constraint) referencing the collection 

347 table. 

348 

349 Parameters 

350 ---------- 

351 tableSpec : `ddl.TableSpec` 

352 Specification for the table that should reference the collection 

353 table. Will be modified in place. 

354 prefix: `str`, optional 

355 A name to use for the prefix of the new field; the full name may 

356 have a suffix (and is given in the returned `ddl.FieldSpec`). 

357 onDelete: `str`, optional 

358 One of "CASCADE" or "SET NULL", indicating what should happen to 

359 the referencing row if the collection row is deleted. `None` 

360 indicates that this should be an integrity error. 

361 constraint: `bool`, optional 

362 If `False` (`True` is default), add a field that can be joined to 

363 the collection primary key, but do not add a foreign key 

364 constraint. 

365 **kwargs 

366 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

367 constructor (only the ``name`` and ``dtype`` arguments are 

368 otherwise provided). 

369 

370 Returns 

371 ------- 

372 fieldSpec : `ddl.FieldSpec` 

373 Specification for the field being added. 

374 """ 

375 raise NotImplementedError() 

376 

377 @classmethod 

378 @abstractmethod 

379 def addRunForeignKey( 

380 cls, 

381 tableSpec: ddl.TableSpec, 

382 *, 

383 prefix: str = "run", 

384 onDelete: str | None = None, 

385 constraint: bool = True, 

386 **kwargs: Any, 

387 ) -> ddl.FieldSpec: 

388 """Add a foreign key (field and constraint) referencing the run 

389 table. 

390 

391 Parameters 

392 ---------- 

393 tableSpec : `ddl.TableSpec` 

394 Specification for the table that should reference the run table. 

395 Will be modified in place. 

396 prefix: `str`, optional 

397 A name to use for the prefix of the new field; the full name may 

398 have a suffix (and is given in the returned `ddl.FieldSpec`). 

399 onDelete: `str`, optional 

400 One of "CASCADE" or "SET NULL", indicating what should happen to 

401 the referencing row if the collection row is deleted. `None` 

402 indicates that this should be an integrity error. 

403 constraint: `bool`, optional 

404 If `False` (`True` is default), add a field that can be joined to 

405 the run primary key, but do not add a foreign key constraint. 

406 **kwargs 

407 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

408 constructor (only the ``name`` and ``dtype`` arguments are 

409 otherwise provided). 

410 

411 Returns 

412 ------- 

413 fieldSpec : `ddl.FieldSpec` 

414 Specification for the field being added. 

415 """ 

416 raise NotImplementedError() 

417 

418 @classmethod 

419 @abstractmethod 

420 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

421 """Return the name of the field added by `addCollectionForeignKey` 

422 if called with the same prefix. 

423 

424 Parameters 

425 ---------- 

426 prefix : `str` 

427 A name to use for the prefix of the new field; the full name may 

428 have a suffix. 

429 

430 Returns 

431 ------- 

432 name : `str` 

433 The field name. 

434 """ 

435 raise NotImplementedError() 

436 

437 @classmethod 

438 @abstractmethod 

439 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

440 """Return the name of the field added by `addRunForeignKey` 

441 if called with the same prefix. 

442 

443 Parameters 

444 ---------- 

445 prefix : `str` 

446 A name to use for the prefix of the new field; the full name may 

447 have a suffix. 

448 

449 Returns 

450 ------- 

451 name : `str` 

452 The field name. 

453 """ 

454 raise NotImplementedError() 

455 

456 @abstractmethod 

457 def refresh(self) -> None: 

458 """Ensure all other operations on this manager are aware of any 

459 collections that may have been registered by other clients since it 

460 was initialized or last refreshed. 

461 """ 

462 raise NotImplementedError() 

463 

464 @abstractmethod 

465 def register( 

466 self, name: str, type: CollectionType, doc: str | None = None 

467 ) -> tuple[CollectionRecord, bool]: 

468 """Ensure that a collection of the given name and type are present 

469 in the layer this manager is associated with. 

470 

471 Parameters 

472 ---------- 

473 name : `str` 

474 Name of the collection. 

475 type : `CollectionType` 

476 Enumeration value indicating the type of collection. 

477 doc : `str`, optional 

478 Documentation string for the collection. Ignored if the collection 

479 already exists. 

480 

481 Returns 

482 ------- 

483 record : `CollectionRecord` 

484 Object representing the collection, including its type and ID. 

485 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

486 instance. If ``type is CollectionType.CHAIN``, this will be a 

487 `ChainedCollectionRecord` instance. 

488 registered : `bool` 

489 True if the collection was registered, `False` if it already 

490 existed. 

491 

492 Raises 

493 ------ 

494 TransactionInterruption 

495 Raised if this operation is invoked within a `Database.transaction` 

496 context. 

497 DatabaseConflictError 

498 Raised if a collection with this name but a different type already 

499 exists. 

500 

501 Notes 

502 ----- 

503 Concurrent registrations of the same collection should be safe; nothing 

504 should happen if the types are consistent, and integrity errors due to 

505 inconsistent types should happen before any database changes are made. 

506 """ 

507 raise NotImplementedError() 

508 

509 @abstractmethod 

510 def remove(self, name: str) -> None: 

511 """Completely remove a collection. 

512 

513 Any existing `CollectionRecord` objects that correspond to the removed 

514 collection are considered invalidated. 

515 

516 Parameters 

517 ---------- 

518 name : `str` 

519 Name of the collection to remove. 

520 

521 Notes 

522 ----- 

523 If this collection is referenced by foreign keys in tables managed by 

524 other objects, the ON DELETE clauses of those tables will be invoked. 

525 That will frequently delete many dependent rows automatically (via 

526 "CASCADE", but it may also cause this operation to fail (with rollback) 

527 unless dependent rows that do not have an ON DELETE clause are removed 

528 first. 

529 """ 

530 raise NotImplementedError() 

531 

532 @abstractmethod 

533 def find(self, name: str) -> CollectionRecord: 

534 """Return the collection record associated with the given name. 

535 

536 Parameters 

537 ---------- 

538 name : `str` 

539 Name of the collection. 

540 

541 Returns 

542 ------- 

543 record : `CollectionRecord` 

544 Object representing the collection, including its type and ID. 

545 If ``record.type is CollectionType.RUN``, this will be a 

546 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

547 this will be a `ChainedCollectionRecord` instance. 

548 

549 Raises 

550 ------ 

551 MissingCollectionError 

552 Raised if the given collection does not exist. 

553 

554 Notes 

555 ----- 

556 Collections registered by another client of the same layer since the 

557 last call to `initialize` or `refresh` may not be found. 

558 """ 

559 raise NotImplementedError() 

560 

561 @abstractmethod 

562 def __getitem__(self, key: Any) -> CollectionRecord: 

563 """Return the collection record associated with the given 

564 primary/foreign key value. 

565 

566 Parameters 

567 ---------- 

568 key 

569 Internal primary key value for the collection. 

570 

571 Returns 

572 ------- 

573 record : `CollectionRecord` 

574 Object representing the collection, including its type and name. 

575 If ``record.type is CollectionType.RUN``, this will be a 

576 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

577 this will be a `ChainedCollectionRecord` instance. 

578 

579 Raises 

580 ------ 

581 MissingCollectionError 

582 Raised if no collection with this key exists. 

583 

584 Notes 

585 ----- 

586 Collections registered by another client of the same layer since the 

587 last call to `initialize` or `refresh` may not be found. 

588 """ 

589 raise NotImplementedError() 

590 

591 @abstractmethod 

592 def resolve_wildcard( 

593 self, 

594 wildcard: CollectionWildcard, 

595 *, 

596 collection_types: Set[CollectionType] = CollectionType.all(), 

597 done: set[str] | None = None, 

598 flatten_chains: bool = True, 

599 include_chains: bool | None = None, 

600 ) -> list[CollectionRecord]: 

601 """Iterate over collection records that match a wildcard. 

602 

603 Parameters 

604 ---------- 

605 wildcard : `CollectionWildcard` 

606 Names and/or patterns for collections. 

607 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

608 If provided, only yield collections of these types. 

609 done : `set` [ `str` ], optional 

610 A `set` of collection names that will not be returned (presumably 

611 because they have already been returned in some higher-level logic) 

612 that will also be updated with the names of the collections 

613 returned. 

614 flatten_chains : `bool`, optional 

615 If `True` (default) recursively yield the child collections of 

616 `~CollectionType.CHAINED` collections. 

617 include_chains : `bool`, optional 

618 If `False`, return records for `~CollectionType.CHAINED` 

619 collections themselves. The default is the opposite of 

620 ``flattenChains``: either return records for CHAINED collections or 

621 their children, but not both. 

622 

623 Returns 

624 ------- 

625 records : `list` [ `CollectionRecord` ] 

626 Matching collection records. 

627 """ 

628 raise NotImplementedError() 

629 

630 @abstractmethod 

631 def getDocumentation(self, key: Any) -> str | None: 

632 """Retrieve the documentation string for a collection. 

633 

634 Parameters 

635 ---------- 

636 key 

637 Internal primary key value for the collection. 

638 

639 Returns 

640 ------- 

641 docs : `str` or `None` 

642 Docstring for the collection with the given key. 

643 """ 

644 raise NotImplementedError() 

645 

646 @abstractmethod 

647 def setDocumentation(self, key: Any, doc: str | None) -> None: 

648 """Set the documentation string for a collection. 

649 

650 Parameters 

651 ---------- 

652 key 

653 Internal primary key value for the collection. 

654 docs : `str`, optional 

655 Docstring for the collection with the given key. 

656 """ 

657 raise NotImplementedError() 

658 

659 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]: 

660 """Find all CHAINED collections that directly contain the given 

661 collection. 

662 

663 Parameters 

664 ---------- 

665 key 

666 Internal primary key value for the collection. 

667 """ 

668 for parent_key in self._parents_by_child[key]: 

669 result = self[parent_key] 

670 assert isinstance(result, ChainedCollectionRecord) 

671 yield result