Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 53%

135 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-30 02:19 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from collections import defaultdict 

32from collections.abc import Iterator 

33from typing import TYPE_CHECKING, Any 

34 

35from ...core import DimensionUniverse, Timespan, ddl 

36from .._collectionType import CollectionType 

37from ..wildcards import CollectionSearch 

38from ._versioning import VersionedExtension 

39 

40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true

41 from ._database import Database, StaticTablesContext 

42 from ._dimensions import DimensionRecordStorageManager 

43 

44 

45class CollectionRecord: 

46 """A struct used to represent a collection in internal `Registry` APIs. 

47 

48 User-facing code should always just use a `str` to represent collections. 

49 

50 Parameters 

51 ---------- 

52 key 

53 Unique collection ID, can be the same as ``name`` if ``name`` is used 

54 for identification. Usually this is an integer or string, but can be 

55 other database-specific type. 

56 name : `str` 

57 Name of the collection. 

58 type : `CollectionType` 

59 Enumeration value describing the type of the collection. 

60 

61 Notes 

62 ----- 

63 The `name`, `key`, and `type` attributes set by the base class should be 

64 considered immutable by all users and derived classes (as these are used 

65 in the definition of equality and this is a hashable type). Other 

66 attributes defined by subclasses may be mutable, as long as they do not 

67 participate in some subclass equality definition. 

68 """ 

69 

70 def __init__(self, key: Any, name: str, type: CollectionType): 

71 self.key = key 

72 self.name = name 

73 self.type = type 

74 assert isinstance(self.type, CollectionType) 

75 

76 name: str 

77 """Name of the collection (`str`). 

78 """ 

79 

80 key: Any 

81 """The primary/foreign key value for this collection. 

82 """ 

83 

84 type: CollectionType 

85 """Enumeration value describing the type of the collection 

86 (`CollectionType`). 

87 """ 

88 

89 def __eq__(self, other: Any) -> bool: 

90 try: 

91 return self.name == other.name and self.type == other.type and self.key == other.key 

92 except AttributeError: 

93 return NotImplemented 

94 

95 def __hash__(self) -> int: 

96 return hash(self.name) 

97 

98 def __repr__(self) -> str: 

99 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})" 

100 

101 def __str__(self) -> str: 

102 return self.name 

103 

104 

105class RunRecord(CollectionRecord): 

106 """A subclass of `CollectionRecord` that adds execution information and 

107 an interface for updating it. 

108 """ 

109 

110 @abstractmethod 

111 def update(self, host: str | None = None, timespan: Timespan | None = None) -> None: 

112 """Update the database record for this run with new execution 

113 information. 

114 

115 Values not provided will set to ``NULL`` in the database, not ignored. 

116 

117 Parameters 

118 ---------- 

119 host : `str`, optional 

120 Name of the host or system on which this run was produced. 

121 Detailed form to be set by higher-level convention; from the 

122 `Registry` perspective, this is an entirely opaque value. 

123 timespan : `Timespan`, optional 

124 Begin and end timestamps for the period over which the run was 

125 produced. `None`/``NULL`` values are interpreted as infinite 

126 bounds. 

127 """ 

128 raise NotImplementedError() 

129 

130 @property 

131 @abstractmethod 

132 def host(self) -> str | None: 

133 """Return the name of the host or system on which this run was 

134 produced (`str` or `None`). 

135 """ 

136 raise NotImplementedError() 

137 

138 @property 

139 @abstractmethod 

140 def timespan(self) -> Timespan: 

141 """Begin and end timestamps for the period over which the run was 

142 produced. `None`/``NULL`` values are interpreted as infinite 

143 bounds. 

144 """ 

145 raise NotImplementedError() 

146 

147 def __repr__(self) -> str: 

148 return f"RunRecord(key={self.key!r}, name={self.name!r})" 

149 

150 

151class ChainedCollectionRecord(CollectionRecord): 

152 """A subclass of `CollectionRecord` that adds the list of child collections 

153 in a ``CHAINED`` collection. 

154 

155 Parameters 

156 ---------- 

157 key 

158 Unique collection ID, can be the same as ``name`` if ``name`` is used 

159 for identification. Usually this is an integer or string, but can be 

160 other database-specific type. 

161 name : `str` 

162 Name of the collection. 

163 """ 

164 

165 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

166 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

167 self._children = CollectionSearch.fromExpression([]) 

168 

169 @property 

170 def children(self) -> CollectionSearch: 

171 """The ordered search path of child collections that define this chain 

172 (`CollectionSearch`). 

173 """ 

174 return self._children 

175 

176 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None: 

177 """Redefine this chain to search the given child collections. 

178 

179 This method should be used by all external code to set children. It 

180 delegates to `_update`, which is what should be overridden by 

181 subclasses. 

182 

183 Parameters 

184 ---------- 

185 manager : `CollectionManager` 

186 The object that manages this records instance and all records 

187 instances that may appear as its children. 

188 children : `CollectionSearch` 

189 A collection search path that should be resolved to set the child 

190 collections of this chain. 

191 flatten : `bool` 

192 If `True`, recursively flatten out any nested 

193 `~CollectionType.CHAINED` collections in ``children`` first. 

194 

195 Raises 

196 ------ 

197 ValueError 

198 Raised when the child collections contain a cycle. 

199 """ 

200 for record in children.iter( 

201 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED} 

202 ): 

203 if record == self: 

204 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

205 if flatten: 

206 children = CollectionSearch.fromExpression( 

207 tuple(record.name for record in children.iter(manager, flattenChains=True)) 

208 ) 

209 # Delegate to derived classes to do the database updates. 

210 self._update(manager, children) 

211 # Update the reverse mapping (from child to parents) in the manager, 

212 # by removing the old relationships and adding back in the new ones. 

213 for old_child in self._children: 

214 manager._parents_by_child[manager.find(old_child).key].discard(self.key) 

215 for new_child in children: 

216 manager._parents_by_child[manager.find(new_child).key].add(self.key) 

217 # Actually set this instances sequence of children. 

218 self._children = children 

219 

220 def refresh(self, manager: CollectionManager) -> None: 

221 """Load children from the database, using the given manager to resolve 

222 collection primary key values into records. 

223 

224 This method exists to ensure that all collections that may appear in a 

225 chain are known to the manager before any particular chain tries to 

226 retrieve their records from it. `ChainedCollectionRecord` subclasses 

227 can rely on it being called sometime after their own ``__init__`` to 

228 finish construction. 

229 

230 Parameters 

231 ---------- 

232 manager : `CollectionManager` 

233 The object that manages this records instance and all records 

234 instances that may appear as its children. 

235 """ 

236 # Clear out the old reverse mapping (from child to parents). 

237 for child in self._children: 

238 manager._parents_by_child[manager.find(child).key].discard(self.key) 

239 self._children = self._load(manager) 

240 # Update the reverse mapping (from child to parents) in the manager. 

241 for child in self._children: 

242 manager._parents_by_child[manager.find(child).key].add(self.key) 

243 

244 @abstractmethod 

245 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

246 """Protected implementation hook for `update`. 

247 

248 This method should be implemented by subclasses to update the database 

249 to reflect the children given. It should never be called by anything 

250 other than `update`, which should be used by all external code. 

251 

252 Parameters 

253 ---------- 

254 manager : `CollectionManager` 

255 The object that manages this records instance and all records 

256 instances that may appear as its children. 

257 children : `CollectionSearch` 

258 A collection search path that should be resolved to set the child 

259 collections of this chain. Guaranteed not to contain cycles. 

260 """ 

261 raise NotImplementedError() 

262 

263 @abstractmethod 

264 def _load(self, manager: CollectionManager) -> CollectionSearch: 

265 """Protected implementation hook for `refresh`. 

266 

267 This method should be implemented by subclasses to retrieve the chain's 

268 child collections from the database and return them. It should never 

269 be called by anything other than `refresh`, which should be used by all 

270 external code. 

271 

272 Parameters 

273 ---------- 

274 manager : `CollectionManager` 

275 The object that manages this records instance and all records 

276 instances that may appear as its children. 

277 

278 Returns 

279 ------- 

280 children : `CollectionSearch` 

281 The ordered sequence of collection names that defines the chained 

282 collection. Guaranteed not to contain cycles. 

283 """ 

284 raise NotImplementedError() 

285 

286 def __repr__(self) -> str: 

287 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})" 

288 

289 

290class CollectionManager(VersionedExtension): 

291 """An interface for managing the collections (including runs) in a 

292 `Registry`. 

293 

294 Notes 

295 ----- 

296 Each layer in a multi-layer `Registry` has its own record for any 

297 collection for which it has datasets (or quanta). Different layers may 

298 use different IDs for the same collection, so any usage of the IDs 

299 obtained through the `CollectionManager` APIs are strictly for internal 

300 (to `Registry`) use. 

301 """ 

302 

303 def __init__(self) -> None: 

304 self._parents_by_child: defaultdict[Any, set[Any]] = defaultdict(set) 

305 

306 @classmethod 

307 @abstractmethod 

308 def initialize( 

309 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager 

310 ) -> CollectionManager: 

311 """Construct an instance of the manager. 

312 

313 Parameters 

314 ---------- 

315 db : `Database` 

316 Interface to the underlying database engine and namespace. 

317 context : `StaticTablesContext` 

318 Context object obtained from `Database.declareStaticTables`; used 

319 to declare any tables that should always be present in a layer 

320 implemented with this manager. 

321 dimensions : `DimensionRecordStorageManager` 

322 Manager object for the dimensions in this `Registry`. 

323 

324 Returns 

325 ------- 

326 manager : `CollectionManager` 

327 An instance of a concrete `CollectionManager` subclass. 

328 """ 

329 raise NotImplementedError() 

330 

331 @classmethod 

332 @abstractmethod 

333 def addCollectionForeignKey( 

334 cls, 

335 tableSpec: ddl.TableSpec, 

336 *, 

337 prefix: str = "collection", 

338 onDelete: str | None = None, 

339 constraint: bool = True, 

340 **kwargs: Any, 

341 ) -> ddl.FieldSpec: 

342 """Add a foreign key (field and constraint) referencing the collection 

343 table. 

344 

345 Parameters 

346 ---------- 

347 tableSpec : `ddl.TableSpec` 

348 Specification for the table that should reference the collection 

349 table. Will be modified in place. 

350 prefix: `str`, optional 

351 A name to use for the prefix of the new field; the full name may 

352 have a suffix (and is given in the returned `ddl.FieldSpec`). 

353 onDelete: `str`, optional 

354 One of "CASCADE" or "SET NULL", indicating what should happen to 

355 the referencing row if the collection row is deleted. `None` 

356 indicates that this should be an integrity error. 

357 constraint: `bool`, optional 

358 If `False` (`True` is default), add a field that can be joined to 

359 the collection primary key, but do not add a foreign key 

360 constraint. 

361 **kwargs 

362 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

363 constructor (only the ``name`` and ``dtype`` arguments are 

364 otherwise provided). 

365 

366 Returns 

367 ------- 

368 fieldSpec : `ddl.FieldSpec` 

369 Specification for the field being added. 

370 """ 

371 raise NotImplementedError() 

372 

373 @classmethod 

374 @abstractmethod 

375 def addRunForeignKey( 

376 cls, 

377 tableSpec: ddl.TableSpec, 

378 *, 

379 prefix: str = "run", 

380 onDelete: str | None = None, 

381 constraint: bool = True, 

382 **kwargs: Any, 

383 ) -> ddl.FieldSpec: 

384 """Add a foreign key (field and constraint) referencing the run 

385 table. 

386 

387 Parameters 

388 ---------- 

389 tableSpec : `ddl.TableSpec` 

390 Specification for the table that should reference the run table. 

391 Will be modified in place. 

392 prefix: `str`, optional 

393 A name to use for the prefix of the new field; the full name may 

394 have a suffix (and is given in the returned `ddl.FieldSpec`). 

395 onDelete: `str`, optional 

396 One of "CASCADE" or "SET NULL", indicating what should happen to 

397 the referencing row if the collection row is deleted. `None` 

398 indicates that this should be an integrity error. 

399 constraint: `bool`, optional 

400 If `False` (`True` is default), add a field that can be joined to 

401 the run primary key, but do not add a foreign key constraint. 

402 **kwargs 

403 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

404 constructor (only the ``name`` and ``dtype`` arguments are 

405 otherwise provided). 

406 

407 Returns 

408 ------- 

409 fieldSpec : `ddl.FieldSpec` 

410 Specification for the field being added. 

411 """ 

412 raise NotImplementedError() 

413 

414 @classmethod 

415 @abstractmethod 

416 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

417 """Return the name of the field added by `addCollectionForeignKey` 

418 if called with the same prefix. 

419 

420 Parameters 

421 ---------- 

422 prefix : `str` 

423 A name to use for the prefix of the new field; the full name may 

424 have a suffix. 

425 

426 Returns 

427 ------- 

428 name : `str` 

429 The field name. 

430 """ 

431 raise NotImplementedError() 

432 

433 @classmethod 

434 @abstractmethod 

435 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

436 """Return the name of the field added by `addRunForeignKey` 

437 if called with the same prefix. 

438 

439 Parameters 

440 ---------- 

441 prefix : `str` 

442 A name to use for the prefix of the new field; the full name may 

443 have a suffix. 

444 

445 Returns 

446 ------- 

447 name : `str` 

448 The field name. 

449 """ 

450 raise NotImplementedError() 

451 

452 @abstractmethod 

453 def refresh(self) -> None: 

454 """Ensure all other operations on this manager are aware of any 

455 collections that may have been registered by other clients since it 

456 was initialized or last refreshed. 

457 """ 

458 raise NotImplementedError() 

459 

460 @abstractmethod 

461 def register( 

462 self, name: str, type: CollectionType, doc: str | None = None 

463 ) -> tuple[CollectionRecord, bool]: 

464 """Ensure that a collection of the given name and type are present 

465 in the layer this manager is associated with. 

466 

467 Parameters 

468 ---------- 

469 name : `str` 

470 Name of the collection. 

471 type : `CollectionType` 

472 Enumeration value indicating the type of collection. 

473 doc : `str`, optional 

474 Documentation string for the collection. Ignored if the collection 

475 already exists. 

476 

477 Returns 

478 ------- 

479 record : `CollectionRecord` 

480 Object representing the collection, including its type and ID. 

481 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

482 instance. If ``type is CollectionType.CHAIN``, this will be a 

483 `ChainedCollectionRecord` instance. 

484 registered : `bool` 

485 True if the collection was registered, `False` if it already 

486 existed. 

487 

488 Raises 

489 ------ 

490 TransactionInterruption 

491 Raised if this operation is invoked within a `Database.transaction` 

492 context. 

493 DatabaseConflictError 

494 Raised if a collection with this name but a different type already 

495 exists. 

496 

497 Notes 

498 ----- 

499 Concurrent registrations of the same collection should be safe; nothing 

500 should happen if the types are consistent, and integrity errors due to 

501 inconsistent types should happen before any database changes are made. 

502 """ 

503 raise NotImplementedError() 

504 

505 @abstractmethod 

506 def remove(self, name: str) -> None: 

507 """Completely remove a collection. 

508 

509 Any existing `CollectionRecord` objects that correspond to the removed 

510 collection are considered invalidated. 

511 

512 Parameters 

513 ---------- 

514 name : `str` 

515 Name of the collection to remove. 

516 

517 Notes 

518 ----- 

519 If this collection is referenced by foreign keys in tables managed by 

520 other objects, the ON DELETE clauses of those tables will be invoked. 

521 That will frequently delete many dependent rows automatically (via 

522 "CASCADE", but it may also cause this operation to fail (with rollback) 

523 unless dependent rows that do not have an ON DELETE clause are removed 

524 first. 

525 """ 

526 raise NotImplementedError() 

527 

528 @abstractmethod 

529 def find(self, name: str) -> CollectionRecord: 

530 """Return the collection record associated with the given name. 

531 

532 Parameters 

533 ---------- 

534 name : `str` 

535 Name of the collection. 

536 

537 Returns 

538 ------- 

539 record : `CollectionRecord` 

540 Object representing the collection, including its type and ID. 

541 If ``record.type is CollectionType.RUN``, this will be a 

542 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

543 this will be a `ChainedCollectionRecord` instance. 

544 

545 Raises 

546 ------ 

547 MissingCollectionError 

548 Raised if the given collection does not exist. 

549 

550 Notes 

551 ----- 

552 Collections registered by another client of the same layer since the 

553 last call to `initialize` or `refresh` may not be found. 

554 """ 

555 raise NotImplementedError() 

556 

557 @abstractmethod 

558 def __getitem__(self, key: Any) -> CollectionRecord: 

559 """Return the collection record associated with the given 

560 primary/foreign key value. 

561 

562 Parameters 

563 ---------- 

564 key 

565 Internal primary key value for the collection. 

566 

567 Returns 

568 ------- 

569 record : `CollectionRecord` 

570 Object representing the collection, including its type and name. 

571 If ``record.type is CollectionType.RUN``, this will be a 

572 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

573 this will be a `ChainedCollectionRecord` instance. 

574 

575 Raises 

576 ------ 

577 MissingCollectionError 

578 Raised if no collection with this key exists. 

579 

580 Notes 

581 ----- 

582 Collections registered by another client of the same layer since the 

583 last call to `initialize` or `refresh` may not be found. 

584 """ 

585 raise NotImplementedError() 

586 

587 @abstractmethod 

588 def __iter__(self) -> Iterator[CollectionRecord]: 

589 """Iterate over all collections. 

590 

591 Yields 

592 ------ 

593 record : `CollectionRecord` 

594 The record for a managed collection. 

595 """ 

596 raise NotImplementedError() 

597 

598 @abstractmethod 

599 def getDocumentation(self, key: Any) -> str | None: 

600 """Retrieve the documentation string for a collection. 

601 

602 Parameters 

603 ---------- 

604 key 

605 Internal primary key value for the collection. 

606 

607 Returns 

608 ------- 

609 docs : `str` or `None` 

610 Docstring for the collection with the given key. 

611 """ 

612 raise NotImplementedError() 

613 

614 @abstractmethod 

615 def setDocumentation(self, key: Any, doc: str | None) -> None: 

616 """Set the documentation string for a collection. 

617 

618 Parameters 

619 ---------- 

620 key 

621 Internal primary key value for the collection. 

622 docs : `str`, optional 

623 Docstring for the collection with the given key. 

624 """ 

625 raise NotImplementedError() 

626 

627 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]: 

628 """Find all CHAINED collections that directly contain the given 

629 collection. 

630 

631 Parameters 

632 ---------- 

633 key 

634 Internal primary key value for the collection. 

635 """ 

636 for parent_key in self._parents_by_child[key]: 

637 result = self[parent_key] 

638 assert isinstance(result, ChainedCollectionRecord) 

639 yield result