Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 57%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

113 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from collections import defaultdict 

32from typing import TYPE_CHECKING, Any, DefaultDict, Iterator, Optional, Set, Tuple 

33 

34from ...core import DimensionUniverse, Timespan, ddl 

35from .._collectionType import CollectionType 

36from ..wildcards import CollectionSearch 

37from ._versioning import VersionedExtension 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from ._database import Database, StaticTablesContext 

41 from ._dimensions import DimensionRecordStorageManager 

42 

43 

44class CollectionRecord: 

45 """A struct used to represent a collection in internal `Registry` APIs. 

46 

47 User-facing code should always just use a `str` to represent collections. 

48 

49 Parameters 

50 ---------- 

51 key 

52 Unique collection ID, can be the same as ``name`` if ``name`` is used 

53 for identification. Usually this is an integer or string, but can be 

54 other database-specific type. 

55 name : `str` 

56 Name of the collection. 

57 type : `CollectionType` 

58 Enumeration value describing the type of the collection. 

59 """ 

60 

61 def __init__(self, key: Any, name: str, type: CollectionType): 

62 self.key = key 

63 self.name = name 

64 self.type = type 

65 assert isinstance(self.type, CollectionType) 

66 

67 name: str 

68 """Name of the collection (`str`). 

69 """ 

70 

71 key: Any 

72 """The primary/foreign key value for this collection. 

73 """ 

74 

75 type: CollectionType 

76 """Enumeration value describing the type of the collection 

77 (`CollectionType`). 

78 """ 

79 

80 

81class RunRecord(CollectionRecord): 

82 """A subclass of `CollectionRecord` that adds execution information and 

83 an interface for updating it. 

84 """ 

85 

86 @abstractmethod 

87 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None: 

88 """Update the database record for this run with new execution 

89 information. 

90 

91 Values not provided will set to ``NULL`` in the database, not ignored. 

92 

93 Parameters 

94 ---------- 

95 host : `str`, optional 

96 Name of the host or system on which this run was produced. 

97 Detailed form to be set by higher-level convention; from the 

98 `Registry` perspective, this is an entirely opaque value. 

99 timespan : `Timespan`, optional 

100 Begin and end timestamps for the period over which the run was 

101 produced. `None`/``NULL`` values are interpreted as infinite 

102 bounds. 

103 """ 

104 raise NotImplementedError() 

105 

106 @property 

107 @abstractmethod 

108 def host(self) -> Optional[str]: 

109 """Return the name of the host or system on which this run was 

110 produced (`str` or `None`). 

111 """ 

112 raise NotImplementedError() 

113 

114 @property 

115 @abstractmethod 

116 def timespan(self) -> Timespan: 

117 """Begin and end timestamps for the period over which the run was 

118 produced. `None`/``NULL`` values are interpreted as infinite 

119 bounds. 

120 """ 

121 raise NotImplementedError() 

122 

123 

124class ChainedCollectionRecord(CollectionRecord): 

125 """A subclass of `CollectionRecord` that adds the list of child collections 

126 in a ``CHAINED`` collection. 

127 

128 Parameters 

129 ---------- 

130 key 

131 Unique collection ID, can be the same as ``name`` if ``name`` is used 

132 for identification. Usually this is an integer or string, but can be 

133 other database-specific type. 

134 name : `str` 

135 Name of the collection. 

136 """ 

137 

138 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

139 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

140 self._children = CollectionSearch.fromExpression([]) 

141 

142 @property 

143 def children(self) -> CollectionSearch: 

144 """The ordered search path of child collections that define this chain 

145 (`CollectionSearch`). 

146 """ 

147 return self._children 

148 

149 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None: 

150 """Redefine this chain to search the given child collections. 

151 

152 This method should be used by all external code to set children. It 

153 delegates to `_update`, which is what should be overridden by 

154 subclasses. 

155 

156 Parameters 

157 ---------- 

158 manager : `CollectionManager` 

159 The object that manages this records instance and all records 

160 instances that may appear as its children. 

161 children : `CollectionSearch` 

162 A collection search path that should be resolved to set the child 

163 collections of this chain. 

164 flatten : `bool` 

165 If `True`, recursively flatten out any nested 

166 `~CollectionType.CHAINED` collections in ``children`` first. 

167 

168 Raises 

169 ------ 

170 ValueError 

171 Raised when the child collections contain a cycle. 

172 """ 

173 for record in children.iter( 

174 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED} 

175 ): 

176 if record == self: 

177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

178 if flatten: 

179 children = CollectionSearch.fromExpression( 

180 tuple(record.name for record in children.iter(manager, flattenChains=True)) 

181 ) 

182 # Delegate to derived classes to do the database updates. 

183 self._update(manager, children) 

184 # Update the reverse mapping (from child to parents) in the manager, 

185 # by removing the old relationships and adding back in the new ones. 

186 for old_child in self._children: 

187 manager._parents_by_child[manager.find(old_child).key].discard(self.key) 

188 for new_child in children: 

189 manager._parents_by_child[manager.find(new_child).key].add(self.key) 

190 # Actually set this instances sequence of children. 

191 self._children = children 

192 

193 def refresh(self, manager: CollectionManager) -> None: 

194 """Load children from the database, using the given manager to resolve 

195 collection primary key values into records. 

196 

197 This method exists to ensure that all collections that may appear in a 

198 chain are known to the manager before any particular chain tries to 

199 retrieve their records from it. `ChainedCollectionRecord` subclasses 

200 can rely on it being called sometime after their own ``__init__`` to 

201 finish construction. 

202 

203 Parameters 

204 ---------- 

205 manager : `CollectionManager` 

206 The object that manages this records instance and all records 

207 instances that may appear as its children. 

208 """ 

209 self._children = self._load(manager) 

210 

211 @abstractmethod 

212 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

213 """Protected implementation hook for `update`. 

214 

215 This method should be implemented by subclasses to update the database 

216 to reflect the children given. It should never be called by anything 

217 other than `update`, which should be used by all external code. 

218 

219 Parameters 

220 ---------- 

221 manager : `CollectionManager` 

222 The object that manages this records instance and all records 

223 instances that may appear as its children. 

224 children : `CollectionSearch` 

225 A collection search path that should be resolved to set the child 

226 collections of this chain. Guaranteed not to contain cycles. 

227 """ 

228 raise NotImplementedError() 

229 

230 @abstractmethod 

231 def _load(self, manager: CollectionManager) -> CollectionSearch: 

232 """Protected implementation hook for `refresh`. 

233 

234 This method should be implemented by subclasses to retrieve the chain's 

235 child collections from the database and return them. It should never 

236 be called by anything other than `refresh`, which should be used by all 

237 external code. 

238 

239 Parameters 

240 ---------- 

241 manager : `CollectionManager` 

242 The object that manages this records instance and all records 

243 instances that may appear as its children. 

244 

245 Returns 

246 ------- 

247 children : `CollectionSearch` 

248 The ordered sequence of collection names that defines the chained 

249 collection. Guaranteed not to contain cycles. 

250 """ 

251 raise NotImplementedError() 

252 

253 

254class CollectionManager(VersionedExtension): 

255 """An interface for managing the collections (including runs) in a 

256 `Registry`. 

257 

258 Notes 

259 ----- 

260 Each layer in a multi-layer `Registry` has its own record for any 

261 collection for which it has datasets (or quanta). Different layers may 

262 use different IDs for the same collection, so any usage of the IDs 

263 obtained through the `CollectionManager` APIs are strictly for internal 

264 (to `Registry`) use. 

265 """ 

266 

267 def __init__(self) -> None: 

268 self._parents_by_child: DefaultDict[Any, Set[Any]] = defaultdict(set) 

269 

270 @classmethod 

271 @abstractmethod 

272 def initialize( 

273 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager 

274 ) -> CollectionManager: 

275 """Construct an instance of the manager. 

276 

277 Parameters 

278 ---------- 

279 db : `Database` 

280 Interface to the underlying database engine and namespace. 

281 context : `StaticTablesContext` 

282 Context object obtained from `Database.declareStaticTables`; used 

283 to declare any tables that should always be present in a layer 

284 implemented with this manager. 

285 dimensions : `DimensionRecordStorageManager` 

286 Manager object for the dimensions in this `Registry`. 

287 

288 Returns 

289 ------- 

290 manager : `CollectionManager` 

291 An instance of a concrete `CollectionManager` subclass. 

292 """ 

293 raise NotImplementedError() 

294 

295 @classmethod 

296 @abstractmethod 

297 def addCollectionForeignKey( 

298 cls, 

299 tableSpec: ddl.TableSpec, 

300 *, 

301 prefix: str = "collection", 

302 onDelete: Optional[str] = None, 

303 constraint: bool = True, 

304 **kwargs: Any, 

305 ) -> ddl.FieldSpec: 

306 """Add a foreign key (field and constraint) referencing the collection 

307 table. 

308 

309 Parameters 

310 ---------- 

311 tableSpec : `ddl.TableSpec` 

312 Specification for the table that should reference the collection 

313 table. Will be modified in place. 

314 prefix: `str`, optional 

315 A name to use for the prefix of the new field; the full name may 

316 have a suffix (and is given in the returned `ddl.FieldSpec`). 

317 onDelete: `str`, optional 

318 One of "CASCADE" or "SET NULL", indicating what should happen to 

319 the referencing row if the collection row is deleted. `None` 

320 indicates that this should be an integrity error. 

321 constraint: `bool`, optional 

322 If `False` (`True` is default), add a field that can be joined to 

323 the collection primary key, but do not add a foreign key 

324 constraint. 

325 **kwargs 

326 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

327 constructor (only the ``name`` and ``dtype`` arguments are 

328 otherwise provided). 

329 

330 Returns 

331 ------- 

332 fieldSpec : `ddl.FieldSpec` 

333 Specification for the field being added. 

334 """ 

335 raise NotImplementedError() 

336 

337 @classmethod 

338 @abstractmethod 

339 def addRunForeignKey( 

340 cls, 

341 tableSpec: ddl.TableSpec, 

342 *, 

343 prefix: str = "run", 

344 onDelete: Optional[str] = None, 

345 constraint: bool = True, 

346 **kwargs: Any, 

347 ) -> ddl.FieldSpec: 

348 """Add a foreign key (field and constraint) referencing the run 

349 table. 

350 

351 Parameters 

352 ---------- 

353 tableSpec : `ddl.TableSpec` 

354 Specification for the table that should reference the run table. 

355 Will be modified in place. 

356 prefix: `str`, optional 

357 A name to use for the prefix of the new field; the full name may 

358 have a suffix (and is given in the returned `ddl.FieldSpec`). 

359 onDelete: `str`, optional 

360 One of "CASCADE" or "SET NULL", indicating what should happen to 

361 the referencing row if the collection row is deleted. `None` 

362 indicates that this should be an integrity error. 

363 constraint: `bool`, optional 

364 If `False` (`True` is default), add a field that can be joined to 

365 the run primary key, but do not add a foreign key constraint. 

366 **kwargs 

367 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

368 constructor (only the ``name`` and ``dtype`` arguments are 

369 otherwise provided). 

370 

371 Returns 

372 ------- 

373 fieldSpec : `ddl.FieldSpec` 

374 Specification for the field being added. 

375 """ 

376 raise NotImplementedError() 

377 

378 @classmethod 

379 @abstractmethod 

380 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

381 """Return the name of the field added by `addCollectionForeignKey` 

382 if called with the same prefix. 

383 

384 Parameters 

385 ---------- 

386 prefix : `str` 

387 A name to use for the prefix of the new field; the full name may 

388 have a suffix. 

389 

390 Returns 

391 ------- 

392 name : `str` 

393 The field name. 

394 """ 

395 raise NotImplementedError() 

396 

397 @classmethod 

398 @abstractmethod 

399 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

400 """Return the name of the field added by `addRunForeignKey` 

401 if called with the same prefix. 

402 

403 Parameters 

404 ---------- 

405 prefix : `str` 

406 A name to use for the prefix of the new field; the full name may 

407 have a suffix. 

408 

409 Returns 

410 ------- 

411 name : `str` 

412 The field name. 

413 """ 

414 raise NotImplementedError() 

415 

416 @abstractmethod 

417 def refresh(self) -> None: 

418 """Ensure all other operations on this manager are aware of any 

419 collections that may have been registered by other clients since it 

420 was initialized or last refreshed. 

421 """ 

422 raise NotImplementedError() 

423 

424 @abstractmethod 

425 def register( 

426 self, name: str, type: CollectionType, doc: Optional[str] = None 

427 ) -> Tuple[CollectionRecord, bool]: 

428 """Ensure that a collection of the given name and type are present 

429 in the layer this manager is associated with. 

430 

431 Parameters 

432 ---------- 

433 name : `str` 

434 Name of the collection. 

435 type : `CollectionType` 

436 Enumeration value indicating the type of collection. 

437 doc : `str`, optional 

438 Documentation string for the collection. Ignored if the collection 

439 already exists. 

440 

441 Returns 

442 ------- 

443 record : `CollectionRecord` 

444 Object representing the collection, including its type and ID. 

445 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

446 instance. If ``type is CollectionType.CHAIN``, this will be a 

447 `ChainedCollectionRecord` instance. 

448 registered : `bool` 

449 True if the collection was registered, `False` if it already 

450 existed. 

451 

452 Raises 

453 ------ 

454 TransactionInterruption 

455 Raised if this operation is invoked within a `Database.transaction` 

456 context. 

457 DatabaseConflictError 

458 Raised if a collection with this name but a different type already 

459 exists. 

460 

461 Notes 

462 ----- 

463 Concurrent registrations of the same collection should be safe; nothing 

464 should happen if the types are consistent, and integrity errors due to 

465 inconsistent types should happen before any database changes are made. 

466 """ 

467 raise NotImplementedError() 

468 

469 @abstractmethod 

470 def remove(self, name: str) -> None: 

471 """Completely remove a collection. 

472 

473 Any existing `CollectionRecord` objects that correspond to the removed 

474 collection are considered invalidated. 

475 

476 Parameters 

477 ---------- 

478 name : `str` 

479 Name of the collection to remove. 

480 

481 Notes 

482 ----- 

483 If this collection is referenced by foreign keys in tables managed by 

484 other objects, the ON DELETE clauses of those tables will be invoked. 

485 That will frequently delete many dependent rows automatically (via 

486 "CASCADE", but it may also cause this operation to fail (with rollback) 

487 unless dependent rows that do not have an ON DELETE clause are removed 

488 first. 

489 """ 

490 raise NotImplementedError() 

491 

492 @abstractmethod 

493 def find(self, name: str) -> CollectionRecord: 

494 """Return the collection record associated with the given name. 

495 

496 Parameters 

497 ---------- 

498 name : `str` 

499 Name of the collection. 

500 

501 Returns 

502 ------- 

503 record : `CollectionRecord` 

504 Object representing the collection, including its type and ID. 

505 If ``record.type is CollectionType.RUN``, this will be a 

506 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

507 this will be a `ChainedCollectionRecord` instance. 

508 

509 Raises 

510 ------ 

511 MissingCollectionError 

512 Raised if the given collection does not exist. 

513 

514 Notes 

515 ----- 

516 Collections registered by another client of the same layer since the 

517 last call to `initialize` or `refresh` may not be found. 

518 """ 

519 raise NotImplementedError() 

520 

521 @abstractmethod 

522 def __getitem__(self, key: Any) -> CollectionRecord: 

523 """Return the collection record associated with the given 

524 primary/foreign key value. 

525 

526 Parameters 

527 ---------- 

528 key 

529 Internal primary key value for the collection. 

530 

531 Returns 

532 ------- 

533 record : `CollectionRecord` 

534 Object representing the collection, including its type and name. 

535 If ``record.type is CollectionType.RUN``, this will be a 

536 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

537 this will be a `ChainedCollectionRecord` instance. 

538 

539 Raises 

540 ------ 

541 MissingCollectionError 

542 Raised if no collection with this key exists. 

543 

544 Notes 

545 ----- 

546 Collections registered by another client of the same layer since the 

547 last call to `initialize` or `refresh` may not be found. 

548 """ 

549 raise NotImplementedError() 

550 

551 @abstractmethod 

552 def __iter__(self) -> Iterator[CollectionRecord]: 

553 """Iterate over all collections. 

554 

555 Yields 

556 ------ 

557 record : `CollectionRecord` 

558 The record for a managed collection. 

559 """ 

560 raise NotImplementedError() 

561 

562 @abstractmethod 

563 def getDocumentation(self, key: Any) -> Optional[str]: 

564 """Retrieve the documentation string for a collection. 

565 

566 Parameters 

567 ---------- 

568 key 

569 Internal primary key value for the collection. 

570 

571 Returns 

572 ------- 

573 docs : `str` or `None` 

574 Docstring for the collection with the given key. 

575 """ 

576 raise NotImplementedError() 

577 

578 @abstractmethod 

579 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

580 """Set the documentation string for a collection. 

581 

582 Parameters 

583 ---------- 

584 key 

585 Internal primary key value for the collection. 

586 docs : `str`, optional 

587 Docstring for the collection with the given key. 

588 """ 

589 raise NotImplementedError() 

590 

591 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]: 

592 """Find all CHAINED collections that directly contain the given 

593 collection. 

594 

595 Parameters 

596 ---------- 

597 key 

598 Internal primary key value for the collection. 

599 """ 

600 for parent_key in self._parents_by_child[key]: 

601 result = self[parent_key] 

602 assert isinstance(result, ChainedCollectionRecord) 

603 yield result