Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 52%

119 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 18:18 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from collections import defaultdict 

32from typing import TYPE_CHECKING, Any, DefaultDict, Iterator, Optional, Set, Tuple 

33 

34from ...core import DimensionUniverse, Timespan, ddl 

35from .._collectionType import CollectionType 

36from ..wildcards import CollectionSearch 

37from ._versioning import VersionedExtension 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from ._database import Database, StaticTablesContext 

41 from ._dimensions import DimensionRecordStorageManager 

42 

43 

44class CollectionRecord: 

45 """A struct used to represent a collection in internal `Registry` APIs. 

46 

47 User-facing code should always just use a `str` to represent collections. 

48 

49 Parameters 

50 ---------- 

51 key 

52 Unique collection ID, can be the same as ``name`` if ``name`` is used 

53 for identification. Usually this is an integer or string, but can be 

54 other database-specific type. 

55 name : `str` 

56 Name of the collection. 

57 type : `CollectionType` 

58 Enumeration value describing the type of the collection. 

59 """ 

60 

61 def __init__(self, key: Any, name: str, type: CollectionType): 

62 self.key = key 

63 self.name = name 

64 self.type = type 

65 assert isinstance(self.type, CollectionType) 

66 

67 name: str 

68 """Name of the collection (`str`). 

69 """ 

70 

71 key: Any 

72 """The primary/foreign key value for this collection. 

73 """ 

74 

75 type: CollectionType 

76 """Enumeration value describing the type of the collection 

77 (`CollectionType`). 

78 """ 

79 

80 

81class RunRecord(CollectionRecord): 

82 """A subclass of `CollectionRecord` that adds execution information and 

83 an interface for updating it. 

84 """ 

85 

86 @abstractmethod 

87 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None: 

88 """Update the database record for this run with new execution 

89 information. 

90 

91 Values not provided will set to ``NULL`` in the database, not ignored. 

92 

93 Parameters 

94 ---------- 

95 host : `str`, optional 

96 Name of the host or system on which this run was produced. 

97 Detailed form to be set by higher-level convention; from the 

98 `Registry` perspective, this is an entirely opaque value. 

99 timespan : `Timespan`, optional 

100 Begin and end timestamps for the period over which the run was 

101 produced. `None`/``NULL`` values are interpreted as infinite 

102 bounds. 

103 """ 

104 raise NotImplementedError() 

105 

106 @property 

107 @abstractmethod 

108 def host(self) -> Optional[str]: 

109 """Return the name of the host or system on which this run was 

110 produced (`str` or `None`). 

111 """ 

112 raise NotImplementedError() 

113 

114 @property 

115 @abstractmethod 

116 def timespan(self) -> Timespan: 

117 """Begin and end timestamps for the period over which the run was 

118 produced. `None`/``NULL`` values are interpreted as infinite 

119 bounds. 

120 """ 

121 raise NotImplementedError() 

122 

123 

124class ChainedCollectionRecord(CollectionRecord): 

125 """A subclass of `CollectionRecord` that adds the list of child collections 

126 in a ``CHAINED`` collection. 

127 

128 Parameters 

129 ---------- 

130 key 

131 Unique collection ID, can be the same as ``name`` if ``name`` is used 

132 for identification. Usually this is an integer or string, but can be 

133 other database-specific type. 

134 name : `str` 

135 Name of the collection. 

136 """ 

137 

138 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

139 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

140 self._children = CollectionSearch.fromExpression([]) 

141 

142 @property 

143 def children(self) -> CollectionSearch: 

144 """The ordered search path of child collections that define this chain 

145 (`CollectionSearch`). 

146 """ 

147 return self._children 

148 

149 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None: 

150 """Redefine this chain to search the given child collections. 

151 

152 This method should be used by all external code to set children. It 

153 delegates to `_update`, which is what should be overridden by 

154 subclasses. 

155 

156 Parameters 

157 ---------- 

158 manager : `CollectionManager` 

159 The object that manages this records instance and all records 

160 instances that may appear as its children. 

161 children : `CollectionSearch` 

162 A collection search path that should be resolved to set the child 

163 collections of this chain. 

164 flatten : `bool` 

165 If `True`, recursively flatten out any nested 

166 `~CollectionType.CHAINED` collections in ``children`` first. 

167 

168 Raises 

169 ------ 

170 ValueError 

171 Raised when the child collections contain a cycle. 

172 """ 

173 for record in children.iter( 

174 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED} 

175 ): 

176 if record == self: 

177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

178 if flatten: 

179 children = CollectionSearch.fromExpression( 

180 tuple(record.name for record in children.iter(manager, flattenChains=True)) 

181 ) 

182 # Delegate to derived classes to do the database updates. 

183 self._update(manager, children) 

184 # Update the reverse mapping (from child to parents) in the manager, 

185 # by removing the old relationships and adding back in the new ones. 

186 for old_child in self._children: 

187 manager._parents_by_child[manager.find(old_child).key].discard(self.key) 

188 for new_child in children: 

189 manager._parents_by_child[manager.find(new_child).key].add(self.key) 

190 # Actually set this instances sequence of children. 

191 self._children = children 

192 

193 def refresh(self, manager: CollectionManager) -> None: 

194 """Load children from the database, using the given manager to resolve 

195 collection primary key values into records. 

196 

197 This method exists to ensure that all collections that may appear in a 

198 chain are known to the manager before any particular chain tries to 

199 retrieve their records from it. `ChainedCollectionRecord` subclasses 

200 can rely on it being called sometime after their own ``__init__`` to 

201 finish construction. 

202 

203 Parameters 

204 ---------- 

205 manager : `CollectionManager` 

206 The object that manages this records instance and all records 

207 instances that may appear as its children. 

208 """ 

209 # Clear out the old reverse mapping (from child to parents). 

210 for child in self._children: 

211 manager._parents_by_child[manager.find(child).key].discard(self.key) 

212 self._children = self._load(manager) 

213 # Update the reverse mapping (from child to parents) in the manager. 

214 for child in self._children: 

215 manager._parents_by_child[manager.find(child).key].add(self.key) 

216 

217 @abstractmethod 

218 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

219 """Protected implementation hook for `update`. 

220 

221 This method should be implemented by subclasses to update the database 

222 to reflect the children given. It should never be called by anything 

223 other than `update`, which should be used by all external code. 

224 

225 Parameters 

226 ---------- 

227 manager : `CollectionManager` 

228 The object that manages this records instance and all records 

229 instances that may appear as its children. 

230 children : `CollectionSearch` 

231 A collection search path that should be resolved to set the child 

232 collections of this chain. Guaranteed not to contain cycles. 

233 """ 

234 raise NotImplementedError() 

235 

236 @abstractmethod 

237 def _load(self, manager: CollectionManager) -> CollectionSearch: 

238 """Protected implementation hook for `refresh`. 

239 

240 This method should be implemented by subclasses to retrieve the chain's 

241 child collections from the database and return them. It should never 

242 be called by anything other than `refresh`, which should be used by all 

243 external code. 

244 

245 Parameters 

246 ---------- 

247 manager : `CollectionManager` 

248 The object that manages this records instance and all records 

249 instances that may appear as its children. 

250 

251 Returns 

252 ------- 

253 children : `CollectionSearch` 

254 The ordered sequence of collection names that defines the chained 

255 collection. Guaranteed not to contain cycles. 

256 """ 

257 raise NotImplementedError() 

258 

259 

260class CollectionManager(VersionedExtension): 

261 """An interface for managing the collections (including runs) in a 

262 `Registry`. 

263 

264 Notes 

265 ----- 

266 Each layer in a multi-layer `Registry` has its own record for any 

267 collection for which it has datasets (or quanta). Different layers may 

268 use different IDs for the same collection, so any usage of the IDs 

269 obtained through the `CollectionManager` APIs are strictly for internal 

270 (to `Registry`) use. 

271 """ 

272 

273 def __init__(self) -> None: 

274 self._parents_by_child: DefaultDict[Any, Set[Any]] = defaultdict(set) 

275 

276 @classmethod 

277 @abstractmethod 

278 def initialize( 

279 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager 

280 ) -> CollectionManager: 

281 """Construct an instance of the manager. 

282 

283 Parameters 

284 ---------- 

285 db : `Database` 

286 Interface to the underlying database engine and namespace. 

287 context : `StaticTablesContext` 

288 Context object obtained from `Database.declareStaticTables`; used 

289 to declare any tables that should always be present in a layer 

290 implemented with this manager. 

291 dimensions : `DimensionRecordStorageManager` 

292 Manager object for the dimensions in this `Registry`. 

293 

294 Returns 

295 ------- 

296 manager : `CollectionManager` 

297 An instance of a concrete `CollectionManager` subclass. 

298 """ 

299 raise NotImplementedError() 

300 

301 @classmethod 

302 @abstractmethod 

303 def addCollectionForeignKey( 

304 cls, 

305 tableSpec: ddl.TableSpec, 

306 *, 

307 prefix: str = "collection", 

308 onDelete: Optional[str] = None, 

309 constraint: bool = True, 

310 **kwargs: Any, 

311 ) -> ddl.FieldSpec: 

312 """Add a foreign key (field and constraint) referencing the collection 

313 table. 

314 

315 Parameters 

316 ---------- 

317 tableSpec : `ddl.TableSpec` 

318 Specification for the table that should reference the collection 

319 table. Will be modified in place. 

320 prefix: `str`, optional 

321 A name to use for the prefix of the new field; the full name may 

322 have a suffix (and is given in the returned `ddl.FieldSpec`). 

323 onDelete: `str`, optional 

324 One of "CASCADE" or "SET NULL", indicating what should happen to 

325 the referencing row if the collection row is deleted. `None` 

326 indicates that this should be an integrity error. 

327 constraint: `bool`, optional 

328 If `False` (`True` is default), add a field that can be joined to 

329 the collection primary key, but do not add a foreign key 

330 constraint. 

331 **kwargs 

332 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

333 constructor (only the ``name`` and ``dtype`` arguments are 

334 otherwise provided). 

335 

336 Returns 

337 ------- 

338 fieldSpec : `ddl.FieldSpec` 

339 Specification for the field being added. 

340 """ 

341 raise NotImplementedError() 

342 

343 @classmethod 

344 @abstractmethod 

345 def addRunForeignKey( 

346 cls, 

347 tableSpec: ddl.TableSpec, 

348 *, 

349 prefix: str = "run", 

350 onDelete: Optional[str] = None, 

351 constraint: bool = True, 

352 **kwargs: Any, 

353 ) -> ddl.FieldSpec: 

354 """Add a foreign key (field and constraint) referencing the run 

355 table. 

356 

357 Parameters 

358 ---------- 

359 tableSpec : `ddl.TableSpec` 

360 Specification for the table that should reference the run table. 

361 Will be modified in place. 

362 prefix: `str`, optional 

363 A name to use for the prefix of the new field; the full name may 

364 have a suffix (and is given in the returned `ddl.FieldSpec`). 

365 onDelete: `str`, optional 

366 One of "CASCADE" or "SET NULL", indicating what should happen to 

367 the referencing row if the collection row is deleted. `None` 

368 indicates that this should be an integrity error. 

369 constraint: `bool`, optional 

370 If `False` (`True` is default), add a field that can be joined to 

371 the run primary key, but do not add a foreign key constraint. 

372 **kwargs 

373 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

374 constructor (only the ``name`` and ``dtype`` arguments are 

375 otherwise provided). 

376 

377 Returns 

378 ------- 

379 fieldSpec : `ddl.FieldSpec` 

380 Specification for the field being added. 

381 """ 

382 raise NotImplementedError() 

383 

384 @classmethod 

385 @abstractmethod 

386 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

387 """Return the name of the field added by `addCollectionForeignKey` 

388 if called with the same prefix. 

389 

390 Parameters 

391 ---------- 

392 prefix : `str` 

393 A name to use for the prefix of the new field; the full name may 

394 have a suffix. 

395 

396 Returns 

397 ------- 

398 name : `str` 

399 The field name. 

400 """ 

401 raise NotImplementedError() 

402 

403 @classmethod 

404 @abstractmethod 

405 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

406 """Return the name of the field added by `addRunForeignKey` 

407 if called with the same prefix. 

408 

409 Parameters 

410 ---------- 

411 prefix : `str` 

412 A name to use for the prefix of the new field; the full name may 

413 have a suffix. 

414 

415 Returns 

416 ------- 

417 name : `str` 

418 The field name. 

419 """ 

420 raise NotImplementedError() 

421 

422 @abstractmethod 

423 def refresh(self) -> None: 

424 """Ensure all other operations on this manager are aware of any 

425 collections that may have been registered by other clients since it 

426 was initialized or last refreshed. 

427 """ 

428 raise NotImplementedError() 

429 

430 @abstractmethod 

431 def register( 

432 self, name: str, type: CollectionType, doc: Optional[str] = None 

433 ) -> Tuple[CollectionRecord, bool]: 

434 """Ensure that a collection of the given name and type are present 

435 in the layer this manager is associated with. 

436 

437 Parameters 

438 ---------- 

439 name : `str` 

440 Name of the collection. 

441 type : `CollectionType` 

442 Enumeration value indicating the type of collection. 

443 doc : `str`, optional 

444 Documentation string for the collection. Ignored if the collection 

445 already exists. 

446 

447 Returns 

448 ------- 

449 record : `CollectionRecord` 

450 Object representing the collection, including its type and ID. 

451 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

452 instance. If ``type is CollectionType.CHAIN``, this will be a 

453 `ChainedCollectionRecord` instance. 

454 registered : `bool` 

455 True if the collection was registered, `False` if it already 

456 existed. 

457 

458 Raises 

459 ------ 

460 TransactionInterruption 

461 Raised if this operation is invoked within a `Database.transaction` 

462 context. 

463 DatabaseConflictError 

464 Raised if a collection with this name but a different type already 

465 exists. 

466 

467 Notes 

468 ----- 

469 Concurrent registrations of the same collection should be safe; nothing 

470 should happen if the types are consistent, and integrity errors due to 

471 inconsistent types should happen before any database changes are made. 

472 """ 

473 raise NotImplementedError() 

474 

475 @abstractmethod 

476 def remove(self, name: str) -> None: 

477 """Completely remove a collection. 

478 

479 Any existing `CollectionRecord` objects that correspond to the removed 

480 collection are considered invalidated. 

481 

482 Parameters 

483 ---------- 

484 name : `str` 

485 Name of the collection to remove. 

486 

487 Notes 

488 ----- 

489 If this collection is referenced by foreign keys in tables managed by 

490 other objects, the ON DELETE clauses of those tables will be invoked. 

491 That will frequently delete many dependent rows automatically (via 

492 "CASCADE", but it may also cause this operation to fail (with rollback) 

493 unless dependent rows that do not have an ON DELETE clause are removed 

494 first. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def find(self, name: str) -> CollectionRecord: 

500 """Return the collection record associated with the given name. 

501 

502 Parameters 

503 ---------- 

504 name : `str` 

505 Name of the collection. 

506 

507 Returns 

508 ------- 

509 record : `CollectionRecord` 

510 Object representing the collection, including its type and ID. 

511 If ``record.type is CollectionType.RUN``, this will be a 

512 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

513 this will be a `ChainedCollectionRecord` instance. 

514 

515 Raises 

516 ------ 

517 MissingCollectionError 

518 Raised if the given collection does not exist. 

519 

520 Notes 

521 ----- 

522 Collections registered by another client of the same layer since the 

523 last call to `initialize` or `refresh` may not be found. 

524 """ 

525 raise NotImplementedError() 

526 

527 @abstractmethod 

528 def __getitem__(self, key: Any) -> CollectionRecord: 

529 """Return the collection record associated with the given 

530 primary/foreign key value. 

531 

532 Parameters 

533 ---------- 

534 key 

535 Internal primary key value for the collection. 

536 

537 Returns 

538 ------- 

539 record : `CollectionRecord` 

540 Object representing the collection, including its type and name. 

541 If ``record.type is CollectionType.RUN``, this will be a 

542 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

543 this will be a `ChainedCollectionRecord` instance. 

544 

545 Raises 

546 ------ 

547 MissingCollectionError 

548 Raised if no collection with this key exists. 

549 

550 Notes 

551 ----- 

552 Collections registered by another client of the same layer since the 

553 last call to `initialize` or `refresh` may not be found. 

554 """ 

555 raise NotImplementedError() 

556 

557 @abstractmethod 

558 def __iter__(self) -> Iterator[CollectionRecord]: 

559 """Iterate over all collections. 

560 

561 Yields 

562 ------ 

563 record : `CollectionRecord` 

564 The record for a managed collection. 

565 """ 

566 raise NotImplementedError() 

567 

568 @abstractmethod 

569 def getDocumentation(self, key: Any) -> Optional[str]: 

570 """Retrieve the documentation string for a collection. 

571 

572 Parameters 

573 ---------- 

574 key 

575 Internal primary key value for the collection. 

576 

577 Returns 

578 ------- 

579 docs : `str` or `None` 

580 Docstring for the collection with the given key. 

581 """ 

582 raise NotImplementedError() 

583 

584 @abstractmethod 

585 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

586 """Set the documentation string for a collection. 

587 

588 Parameters 

589 ---------- 

590 key 

591 Internal primary key value for the collection. 

592 docs : `str`, optional 

593 Docstring for the collection with the given key. 

594 """ 

595 raise NotImplementedError() 

596 

597 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]: 

598 """Find all CHAINED collections that directly contain the given 

599 collection. 

600 

601 Parameters 

602 ---------- 

603 key 

604 Internal primary key value for the collection. 

605 """ 

606 for parent_key in self._parents_by_child[key]: 

607 result = self[parent_key] 

608 assert isinstance(result, ChainedCollectionRecord) 

609 yield result