Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 53%

134 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-22 02:05 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from collections import defaultdict 

32from typing import TYPE_CHECKING, Any, DefaultDict, Iterator, Optional, Set, Tuple 

33 

34from ...core import DimensionUniverse, Timespan, ddl 

35from .._collectionType import CollectionType 

36from ..wildcards import CollectionSearch 

37from ._versioning import VersionedExtension 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from ._database import Database, StaticTablesContext 

41 from ._dimensions import DimensionRecordStorageManager 

42 

43 

44class CollectionRecord: 

45 """A struct used to represent a collection in internal `Registry` APIs. 

46 

47 User-facing code should always just use a `str` to represent collections. 

48 

49 Parameters 

50 ---------- 

51 key 

52 Unique collection ID, can be the same as ``name`` if ``name`` is used 

53 for identification. Usually this is an integer or string, but can be 

54 other database-specific type. 

55 name : `str` 

56 Name of the collection. 

57 type : `CollectionType` 

58 Enumeration value describing the type of the collection. 

59 

60 Notes 

61 ----- 

62 The `name`, `key`, and `type` attributes set by the base class should be 

63 considered immutable by all users and derived classes (as these are used 

64 in the definition of equality and this is a hashable type). Other 

65 attributes defined by subclasses may be mutable, as long as they do not 

66 participate in some subclass equality definition. 

67 """ 

68 

69 def __init__(self, key: Any, name: str, type: CollectionType): 

70 self.key = key 

71 self.name = name 

72 self.type = type 

73 assert isinstance(self.type, CollectionType) 

74 

75 name: str 

76 """Name of the collection (`str`). 

77 """ 

78 

79 key: Any 

80 """The primary/foreign key value for this collection. 

81 """ 

82 

83 type: CollectionType 

84 """Enumeration value describing the type of the collection 

85 (`CollectionType`). 

86 """ 

87 

88 def __eq__(self, other: Any) -> bool: 

89 try: 

90 return self.name == other.name and self.type == other.type and self.key == other.key 

91 except AttributeError: 

92 return NotImplemented 

93 

94 def __hash__(self) -> int: 

95 return hash(self.name) 

96 

97 def __repr__(self) -> str: 

98 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})" 

99 

100 def __str__(self) -> str: 

101 return self.name 

102 

103 

104class RunRecord(CollectionRecord): 

105 """A subclass of `CollectionRecord` that adds execution information and 

106 an interface for updating it. 

107 """ 

108 

109 @abstractmethod 

110 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None: 

111 """Update the database record for this run with new execution 

112 information. 

113 

114 Values not provided will set to ``NULL`` in the database, not ignored. 

115 

116 Parameters 

117 ---------- 

118 host : `str`, optional 

119 Name of the host or system on which this run was produced. 

120 Detailed form to be set by higher-level convention; from the 

121 `Registry` perspective, this is an entirely opaque value. 

122 timespan : `Timespan`, optional 

123 Begin and end timestamps for the period over which the run was 

124 produced. `None`/``NULL`` values are interpreted as infinite 

125 bounds. 

126 """ 

127 raise NotImplementedError() 

128 

129 @property 

130 @abstractmethod 

131 def host(self) -> Optional[str]: 

132 """Return the name of the host or system on which this run was 

133 produced (`str` or `None`). 

134 """ 

135 raise NotImplementedError() 

136 

137 @property 

138 @abstractmethod 

139 def timespan(self) -> Timespan: 

140 """Begin and end timestamps for the period over which the run was 

141 produced. `None`/``NULL`` values are interpreted as infinite 

142 bounds. 

143 """ 

144 raise NotImplementedError() 

145 

146 def __repr__(self) -> str: 

147 return f"RunRecord(key={self.key!r}, name={self.name!r})" 

148 

149 

150class ChainedCollectionRecord(CollectionRecord): 

151 """A subclass of `CollectionRecord` that adds the list of child collections 

152 in a ``CHAINED`` collection. 

153 

154 Parameters 

155 ---------- 

156 key 

157 Unique collection ID, can be the same as ``name`` if ``name`` is used 

158 for identification. Usually this is an integer or string, but can be 

159 other database-specific type. 

160 name : `str` 

161 Name of the collection. 

162 """ 

163 

164 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

165 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

166 self._children = CollectionSearch.fromExpression([]) 

167 

168 @property 

169 def children(self) -> CollectionSearch: 

170 """The ordered search path of child collections that define this chain 

171 (`CollectionSearch`). 

172 """ 

173 return self._children 

174 

175 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None: 

176 """Redefine this chain to search the given child collections. 

177 

178 This method should be used by all external code to set children. It 

179 delegates to `_update`, which is what should be overridden by 

180 subclasses. 

181 

182 Parameters 

183 ---------- 

184 manager : `CollectionManager` 

185 The object that manages this records instance and all records 

186 instances that may appear as its children. 

187 children : `CollectionSearch` 

188 A collection search path that should be resolved to set the child 

189 collections of this chain. 

190 flatten : `bool` 

191 If `True`, recursively flatten out any nested 

192 `~CollectionType.CHAINED` collections in ``children`` first. 

193 

194 Raises 

195 ------ 

196 ValueError 

197 Raised when the child collections contain a cycle. 

198 """ 

199 for record in children.iter( 

200 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED} 

201 ): 

202 if record == self: 

203 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

204 if flatten: 

205 children = CollectionSearch.fromExpression( 

206 tuple(record.name for record in children.iter(manager, flattenChains=True)) 

207 ) 

208 # Delegate to derived classes to do the database updates. 

209 self._update(manager, children) 

210 # Update the reverse mapping (from child to parents) in the manager, 

211 # by removing the old relationships and adding back in the new ones. 

212 for old_child in self._children: 

213 manager._parents_by_child[manager.find(old_child).key].discard(self.key) 

214 for new_child in children: 

215 manager._parents_by_child[manager.find(new_child).key].add(self.key) 

216 # Actually set this instances sequence of children. 

217 self._children = children 

218 

219 def refresh(self, manager: CollectionManager) -> None: 

220 """Load children from the database, using the given manager to resolve 

221 collection primary key values into records. 

222 

223 This method exists to ensure that all collections that may appear in a 

224 chain are known to the manager before any particular chain tries to 

225 retrieve their records from it. `ChainedCollectionRecord` subclasses 

226 can rely on it being called sometime after their own ``__init__`` to 

227 finish construction. 

228 

229 Parameters 

230 ---------- 

231 manager : `CollectionManager` 

232 The object that manages this records instance and all records 

233 instances that may appear as its children. 

234 """ 

235 # Clear out the old reverse mapping (from child to parents). 

236 for child in self._children: 

237 manager._parents_by_child[manager.find(child).key].discard(self.key) 

238 self._children = self._load(manager) 

239 # Update the reverse mapping (from child to parents) in the manager. 

240 for child in self._children: 

241 manager._parents_by_child[manager.find(child).key].add(self.key) 

242 

243 @abstractmethod 

244 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

245 """Protected implementation hook for `update`. 

246 

247 This method should be implemented by subclasses to update the database 

248 to reflect the children given. It should never be called by anything 

249 other than `update`, which should be used by all external code. 

250 

251 Parameters 

252 ---------- 

253 manager : `CollectionManager` 

254 The object that manages this records instance and all records 

255 instances that may appear as its children. 

256 children : `CollectionSearch` 

257 A collection search path that should be resolved to set the child 

258 collections of this chain. Guaranteed not to contain cycles. 

259 """ 

260 raise NotImplementedError() 

261 

262 @abstractmethod 

263 def _load(self, manager: CollectionManager) -> CollectionSearch: 

264 """Protected implementation hook for `refresh`. 

265 

266 This method should be implemented by subclasses to retrieve the chain's 

267 child collections from the database and return them. It should never 

268 be called by anything other than `refresh`, which should be used by all 

269 external code. 

270 

271 Parameters 

272 ---------- 

273 manager : `CollectionManager` 

274 The object that manages this records instance and all records 

275 instances that may appear as its children. 

276 

277 Returns 

278 ------- 

279 children : `CollectionSearch` 

280 The ordered sequence of collection names that defines the chained 

281 collection. Guaranteed not to contain cycles. 

282 """ 

283 raise NotImplementedError() 

284 

285 def __repr__(self) -> str: 

286 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})" 

287 

288 

289class CollectionManager(VersionedExtension): 

290 """An interface for managing the collections (including runs) in a 

291 `Registry`. 

292 

293 Notes 

294 ----- 

295 Each layer in a multi-layer `Registry` has its own record for any 

296 collection for which it has datasets (or quanta). Different layers may 

297 use different IDs for the same collection, so any usage of the IDs 

298 obtained through the `CollectionManager` APIs are strictly for internal 

299 (to `Registry`) use. 

300 """ 

301 

302 def __init__(self) -> None: 

303 self._parents_by_child: DefaultDict[Any, Set[Any]] = defaultdict(set) 

304 

305 @classmethod 

306 @abstractmethod 

307 def initialize( 

308 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager 

309 ) -> CollectionManager: 

310 """Construct an instance of the manager. 

311 

312 Parameters 

313 ---------- 

314 db : `Database` 

315 Interface to the underlying database engine and namespace. 

316 context : `StaticTablesContext` 

317 Context object obtained from `Database.declareStaticTables`; used 

318 to declare any tables that should always be present in a layer 

319 implemented with this manager. 

320 dimensions : `DimensionRecordStorageManager` 

321 Manager object for the dimensions in this `Registry`. 

322 

323 Returns 

324 ------- 

325 manager : `CollectionManager` 

326 An instance of a concrete `CollectionManager` subclass. 

327 """ 

328 raise NotImplementedError() 

329 

330 @classmethod 

331 @abstractmethod 

332 def addCollectionForeignKey( 

333 cls, 

334 tableSpec: ddl.TableSpec, 

335 *, 

336 prefix: str = "collection", 

337 onDelete: Optional[str] = None, 

338 constraint: bool = True, 

339 **kwargs: Any, 

340 ) -> ddl.FieldSpec: 

341 """Add a foreign key (field and constraint) referencing the collection 

342 table. 

343 

344 Parameters 

345 ---------- 

346 tableSpec : `ddl.TableSpec` 

347 Specification for the table that should reference the collection 

348 table. Will be modified in place. 

349 prefix: `str`, optional 

350 A name to use for the prefix of the new field; the full name may 

351 have a suffix (and is given in the returned `ddl.FieldSpec`). 

352 onDelete: `str`, optional 

353 One of "CASCADE" or "SET NULL", indicating what should happen to 

354 the referencing row if the collection row is deleted. `None` 

355 indicates that this should be an integrity error. 

356 constraint: `bool`, optional 

357 If `False` (`True` is default), add a field that can be joined to 

358 the collection primary key, but do not add a foreign key 

359 constraint. 

360 **kwargs 

361 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

362 constructor (only the ``name`` and ``dtype`` arguments are 

363 otherwise provided). 

364 

365 Returns 

366 ------- 

367 fieldSpec : `ddl.FieldSpec` 

368 Specification for the field being added. 

369 """ 

370 raise NotImplementedError() 

371 

372 @classmethod 

373 @abstractmethod 

374 def addRunForeignKey( 

375 cls, 

376 tableSpec: ddl.TableSpec, 

377 *, 

378 prefix: str = "run", 

379 onDelete: Optional[str] = None, 

380 constraint: bool = True, 

381 **kwargs: Any, 

382 ) -> ddl.FieldSpec: 

383 """Add a foreign key (field and constraint) referencing the run 

384 table. 

385 

386 Parameters 

387 ---------- 

388 tableSpec : `ddl.TableSpec` 

389 Specification for the table that should reference the run table. 

390 Will be modified in place. 

391 prefix: `str`, optional 

392 A name to use for the prefix of the new field; the full name may 

393 have a suffix (and is given in the returned `ddl.FieldSpec`). 

394 onDelete: `str`, optional 

395 One of "CASCADE" or "SET NULL", indicating what should happen to 

396 the referencing row if the collection row is deleted. `None` 

397 indicates that this should be an integrity error. 

398 constraint: `bool`, optional 

399 If `False` (`True` is default), add a field that can be joined to 

400 the run primary key, but do not add a foreign key constraint. 

401 **kwargs 

402 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

403 constructor (only the ``name`` and ``dtype`` arguments are 

404 otherwise provided). 

405 

406 Returns 

407 ------- 

408 fieldSpec : `ddl.FieldSpec` 

409 Specification for the field being added. 

410 """ 

411 raise NotImplementedError() 

412 

413 @classmethod 

414 @abstractmethod 

415 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

416 """Return the name of the field added by `addCollectionForeignKey` 

417 if called with the same prefix. 

418 

419 Parameters 

420 ---------- 

421 prefix : `str` 

422 A name to use for the prefix of the new field; the full name may 

423 have a suffix. 

424 

425 Returns 

426 ------- 

427 name : `str` 

428 The field name. 

429 """ 

430 raise NotImplementedError() 

431 

432 @classmethod 

433 @abstractmethod 

434 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

435 """Return the name of the field added by `addRunForeignKey` 

436 if called with the same prefix. 

437 

438 Parameters 

439 ---------- 

440 prefix : `str` 

441 A name to use for the prefix of the new field; the full name may 

442 have a suffix. 

443 

444 Returns 

445 ------- 

446 name : `str` 

447 The field name. 

448 """ 

449 raise NotImplementedError() 

450 

451 @abstractmethod 

452 def refresh(self) -> None: 

453 """Ensure all other operations on this manager are aware of any 

454 collections that may have been registered by other clients since it 

455 was initialized or last refreshed. 

456 """ 

457 raise NotImplementedError() 

458 

459 @abstractmethod 

460 def register( 

461 self, name: str, type: CollectionType, doc: Optional[str] = None 

462 ) -> Tuple[CollectionRecord, bool]: 

463 """Ensure that a collection of the given name and type are present 

464 in the layer this manager is associated with. 

465 

466 Parameters 

467 ---------- 

468 name : `str` 

469 Name of the collection. 

470 type : `CollectionType` 

471 Enumeration value indicating the type of collection. 

472 doc : `str`, optional 

473 Documentation string for the collection. Ignored if the collection 

474 already exists. 

475 

476 Returns 

477 ------- 

478 record : `CollectionRecord` 

479 Object representing the collection, including its type and ID. 

480 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

481 instance. If ``type is CollectionType.CHAIN``, this will be a 

482 `ChainedCollectionRecord` instance. 

483 registered : `bool` 

484 True if the collection was registered, `False` if it already 

485 existed. 

486 

487 Raises 

488 ------ 

489 TransactionInterruption 

490 Raised if this operation is invoked within a `Database.transaction` 

491 context. 

492 DatabaseConflictError 

493 Raised if a collection with this name but a different type already 

494 exists. 

495 

496 Notes 

497 ----- 

498 Concurrent registrations of the same collection should be safe; nothing 

499 should happen if the types are consistent, and integrity errors due to 

500 inconsistent types should happen before any database changes are made. 

501 """ 

502 raise NotImplementedError() 

503 

504 @abstractmethod 

505 def remove(self, name: str) -> None: 

506 """Completely remove a collection. 

507 

508 Any existing `CollectionRecord` objects that correspond to the removed 

509 collection are considered invalidated. 

510 

511 Parameters 

512 ---------- 

513 name : `str` 

514 Name of the collection to remove. 

515 

516 Notes 

517 ----- 

518 If this collection is referenced by foreign keys in tables managed by 

519 other objects, the ON DELETE clauses of those tables will be invoked. 

520 That will frequently delete many dependent rows automatically (via 

521 "CASCADE", but it may also cause this operation to fail (with rollback) 

522 unless dependent rows that do not have an ON DELETE clause are removed 

523 first. 

524 """ 

525 raise NotImplementedError() 

526 

527 @abstractmethod 

528 def find(self, name: str) -> CollectionRecord: 

529 """Return the collection record associated with the given name. 

530 

531 Parameters 

532 ---------- 

533 name : `str` 

534 Name of the collection. 

535 

536 Returns 

537 ------- 

538 record : `CollectionRecord` 

539 Object representing the collection, including its type and ID. 

540 If ``record.type is CollectionType.RUN``, this will be a 

541 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

542 this will be a `ChainedCollectionRecord` instance. 

543 

544 Raises 

545 ------ 

546 MissingCollectionError 

547 Raised if the given collection does not exist. 

548 

549 Notes 

550 ----- 

551 Collections registered by another client of the same layer since the 

552 last call to `initialize` or `refresh` may not be found. 

553 """ 

554 raise NotImplementedError() 

555 

556 @abstractmethod 

557 def __getitem__(self, key: Any) -> CollectionRecord: 

558 """Return the collection record associated with the given 

559 primary/foreign key value. 

560 

561 Parameters 

562 ---------- 

563 key 

564 Internal primary key value for the collection. 

565 

566 Returns 

567 ------- 

568 record : `CollectionRecord` 

569 Object representing the collection, including its type and name. 

570 If ``record.type is CollectionType.RUN``, this will be a 

571 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

572 this will be a `ChainedCollectionRecord` instance. 

573 

574 Raises 

575 ------ 

576 MissingCollectionError 

577 Raised if no collection with this key exists. 

578 

579 Notes 

580 ----- 

581 Collections registered by another client of the same layer since the 

582 last call to `initialize` or `refresh` may not be found. 

583 """ 

584 raise NotImplementedError() 

585 

586 @abstractmethod 

587 def __iter__(self) -> Iterator[CollectionRecord]: 

588 """Iterate over all collections. 

589 

590 Yields 

591 ------ 

592 record : `CollectionRecord` 

593 The record for a managed collection. 

594 """ 

595 raise NotImplementedError() 

596 

597 @abstractmethod 

598 def getDocumentation(self, key: Any) -> Optional[str]: 

599 """Retrieve the documentation string for a collection. 

600 

601 Parameters 

602 ---------- 

603 key 

604 Internal primary key value for the collection. 

605 

606 Returns 

607 ------- 

608 docs : `str` or `None` 

609 Docstring for the collection with the given key. 

610 """ 

611 raise NotImplementedError() 

612 

613 @abstractmethod 

614 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

615 """Set the documentation string for a collection. 

616 

617 Parameters 

618 ---------- 

619 key 

620 Internal primary key value for the collection. 

621 docs : `str`, optional 

622 Docstring for the collection with the given key. 

623 """ 

624 raise NotImplementedError() 

625 

626 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]: 

627 """Find all CHAINED collections that directly contain the given 

628 collection. 

629 

630 Parameters 

631 ---------- 

632 key 

633 Internal primary key value for the collection. 

634 """ 

635 for parent_key in self._parents_by_child[key]: 

636 result = self[parent_key] 

637 assert isinstance(result, ChainedCollectionRecord) 

638 yield result