Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 63%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

101 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from typing import TYPE_CHECKING, Any, Iterator, Optional, Tuple 

32 

33from ...core import DimensionUniverse, Timespan, ddl 

34from .._collectionType import CollectionType 

35from ..wildcards import CollectionSearch 

36from ._versioning import VersionedExtension 

37 

38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true

39 from ._database import Database, StaticTablesContext 

40 from ._dimensions import DimensionRecordStorageManager 

41 

42 

43class CollectionRecord: 

44 """A struct used to represent a collection in internal `Registry` APIs. 

45 

46 User-facing code should always just use a `str` to represent collections. 

47 

48 Parameters 

49 ---------- 

50 key 

51 Unique collection ID, can be the same as ``name`` if ``name`` is used 

52 for identification. Usually this is an integer or string, but can be 

53 other database-specific type. 

54 name : `str` 

55 Name of the collection. 

56 type : `CollectionType` 

57 Enumeration value describing the type of the collection. 

58 """ 

59 

60 def __init__(self, key: Any, name: str, type: CollectionType): 

61 self.key = key 

62 self.name = name 

63 self.type = type 

64 assert isinstance(self.type, CollectionType) 

65 

66 name: str 

67 """Name of the collection (`str`). 

68 """ 

69 

70 key: Any 

71 """The primary/foreign key value for this collection. 

72 """ 

73 

74 type: CollectionType 

75 """Enumeration value describing the type of the collection 

76 (`CollectionType`). 

77 """ 

78 

79 

80class RunRecord(CollectionRecord): 

81 """A subclass of `CollectionRecord` that adds execution information and 

82 an interface for updating it. 

83 """ 

84 

85 @abstractmethod 

86 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None: 

87 """Update the database record for this run with new execution 

88 information. 

89 

90 Values not provided will set to ``NULL`` in the database, not ignored. 

91 

92 Parameters 

93 ---------- 

94 host : `str`, optional 

95 Name of the host or system on which this run was produced. 

96 Detailed form to be set by higher-level convention; from the 

97 `Registry` perspective, this is an entirely opaque value. 

98 timespan : `Timespan`, optional 

99 Begin and end timestamps for the period over which the run was 

100 produced. `None`/``NULL`` values are interpreted as infinite 

101 bounds. 

102 """ 

103 raise NotImplementedError() 

104 

105 @property 

106 @abstractmethod 

107 def host(self) -> Optional[str]: 

108 """Return the name of the host or system on which this run was 

109 produced (`str` or `None`). 

110 """ 

111 raise NotImplementedError() 

112 

113 @property 

114 @abstractmethod 

115 def timespan(self) -> Timespan: 

116 """Begin and end timestamps for the period over which the run was 

117 produced. `None`/``NULL`` values are interpreted as infinite 

118 bounds. 

119 """ 

120 raise NotImplementedError() 

121 

122 

123class ChainedCollectionRecord(CollectionRecord): 

124 """A subclass of `CollectionRecord` that adds the list of child collections 

125 in a ``CHAINED`` collection. 

126 

127 Parameters 

128 ---------- 

129 key 

130 Unique collection ID, can be the same as ``name`` if ``name`` is used 

131 for identification. Usually this is an integer or string, but can be 

132 other database-specific type. 

133 name : `str` 

134 Name of the collection. 

135 """ 

136 

137 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

138 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

139 self._children = CollectionSearch.fromExpression([]) 

140 

141 @property 

142 def children(self) -> CollectionSearch: 

143 """The ordered search path of child collections that define this chain 

144 (`CollectionSearch`). 

145 """ 

146 return self._children 

147 

148 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None: 

149 """Redefine this chain to search the given child collections. 

150 

151 This method should be used by all external code to set children. It 

152 delegates to `_update`, which is what should be overridden by 

153 subclasses. 

154 

155 Parameters 

156 ---------- 

157 manager : `CollectionManager` 

158 The object that manages this records instance and all records 

159 instances that may appear as its children. 

160 children : `CollectionSearch` 

161 A collection search path that should be resolved to set the child 

162 collections of this chain. 

163 flatten : `bool` 

164 If `True`, recursively flatten out any nested 

165 `~CollectionType.CHAINED` collections in ``children`` first. 

166 

167 Raises 

168 ------ 

169 ValueError 

170 Raised when the child collections contain a cycle. 

171 """ 

172 for record in children.iter( 

173 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED} 

174 ): 

175 if record == self: 

176 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

177 if flatten: 

178 children = CollectionSearch.fromExpression( 

179 tuple(record.name for record in children.iter(manager, flattenChains=True)) 

180 ) 

181 self._update(manager, children) 

182 self._children = children 

183 

184 def refresh(self, manager: CollectionManager) -> None: 

185 """Load children from the database, using the given manager to resolve 

186 collection primary key values into records. 

187 

188 This method exists to ensure that all collections that may appear in a 

189 chain are known to the manager before any particular chain tries to 

190 retrieve their records from it. `ChainedCollectionRecord` subclasses 

191 can rely on it being called sometime after their own ``__init__`` to 

192 finish construction. 

193 

194 Parameters 

195 ---------- 

196 manager : `CollectionManager` 

197 The object that manages this records instance and all records 

198 instances that may appear as its children. 

199 """ 

200 self._children = self._load(manager) 

201 

202 @abstractmethod 

203 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

204 """Protected implementation hook for setting the `children` property. 

205 

206 This method should be implemented by subclasses to update the database 

207 to reflect the children given. It should never be called by anything 

208 other than the `children` setter, which should be used by all external 

209 code. 

210 

211 Parameters 

212 ---------- 

213 manager : `CollectionManager` 

214 The object that manages this records instance and all records 

215 instances that may appear as its children. 

216 children : `CollectionSearch` 

217 A collection search path that should be resolved to set the child 

218 collections of this chain. Guaranteed not to contain cycles. 

219 """ 

220 raise NotImplementedError() 

221 

222 @abstractmethod 

223 def _load(self, manager: CollectionManager) -> CollectionSearch: 

224 """Protected implementation hook for `refresh`. 

225 

226 This method should be implemented by subclasses to retrieve the chain's 

227 child collections from the database and return them. It should never 

228 be called by anything other than `refresh`, which should be used by all 

229 external code. 

230 

231 Parameters 

232 ---------- 

233 manager : `CollectionManager` 

234 The object that manages this records instance and all records 

235 instances that may appear as its children. 

236 

237 Returns 

238 ------- 

239 children : `CollectionSearch` 

240 The ordered sequence of collection names that defines the chained 

241 collection. Guaranteed not to contain cycles. 

242 """ 

243 raise NotImplementedError() 

244 

245 

246class CollectionManager(VersionedExtension): 

247 """An interface for managing the collections (including runs) in a 

248 `Registry`. 

249 

250 Notes 

251 ----- 

252 Each layer in a multi-layer `Registry` has its own record for any 

253 collection for which it has datasets (or quanta). Different layers may 

254 use different IDs for the same collection, so any usage of the IDs 

255 obtained through the `CollectionManager` APIs are strictly for internal 

256 (to `Registry`) use. 

257 """ 

258 

259 @classmethod 

260 @abstractmethod 

261 def initialize( 

262 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager 

263 ) -> CollectionManager: 

264 """Construct an instance of the manager. 

265 

266 Parameters 

267 ---------- 

268 db : `Database` 

269 Interface to the underlying database engine and namespace. 

270 context : `StaticTablesContext` 

271 Context object obtained from `Database.declareStaticTables`; used 

272 to declare any tables that should always be present in a layer 

273 implemented with this manager. 

274 dimensions : `DimensionRecordStorageManager` 

275 Manager object for the dimensions in this `Registry`. 

276 

277 Returns 

278 ------- 

279 manager : `CollectionManager` 

280 An instance of a concrete `CollectionManager` subclass. 

281 """ 

282 raise NotImplementedError() 

283 

284 @classmethod 

285 @abstractmethod 

286 def addCollectionForeignKey( 

287 cls, 

288 tableSpec: ddl.TableSpec, 

289 *, 

290 prefix: str = "collection", 

291 onDelete: Optional[str] = None, 

292 constraint: bool = True, 

293 **kwargs: Any, 

294 ) -> ddl.FieldSpec: 

295 """Add a foreign key (field and constraint) referencing the collection 

296 table. 

297 

298 Parameters 

299 ---------- 

300 tableSpec : `ddl.TableSpec` 

301 Specification for the table that should reference the collection 

302 table. Will be modified in place. 

303 prefix: `str`, optional 

304 A name to use for the prefix of the new field; the full name may 

305 have a suffix (and is given in the returned `ddl.FieldSpec`). 

306 onDelete: `str`, optional 

307 One of "CASCADE" or "SET NULL", indicating what should happen to 

308 the referencing row if the collection row is deleted. `None` 

309 indicates that this should be an integrity error. 

310 constraint: `bool`, optional 

311 If `False` (`True` is default), add a field that can be joined to 

312 the collection primary key, but do not add a foreign key 

313 constraint. 

314 **kwargs 

315 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

316 constructor (only the ``name`` and ``dtype`` arguments are 

317 otherwise provided). 

318 

319 Returns 

320 ------- 

321 fieldSpec : `ddl.FieldSpec` 

322 Specification for the field being added. 

323 """ 

324 raise NotImplementedError() 

325 

326 @classmethod 

327 @abstractmethod 

328 def addRunForeignKey( 

329 cls, 

330 tableSpec: ddl.TableSpec, 

331 *, 

332 prefix: str = "run", 

333 onDelete: Optional[str] = None, 

334 constraint: bool = True, 

335 **kwargs: Any, 

336 ) -> ddl.FieldSpec: 

337 """Add a foreign key (field and constraint) referencing the run 

338 table. 

339 

340 Parameters 

341 ---------- 

342 tableSpec : `ddl.TableSpec` 

343 Specification for the table that should reference the run table. 

344 Will be modified in place. 

345 prefix: `str`, optional 

346 A name to use for the prefix of the new field; the full name may 

347 have a suffix (and is given in the returned `ddl.FieldSpec`). 

348 onDelete: `str`, optional 

349 One of "CASCADE" or "SET NULL", indicating what should happen to 

350 the referencing row if the collection row is deleted. `None` 

351 indicates that this should be an integrity error. 

352 constraint: `bool`, optional 

353 If `False` (`True` is default), add a field that can be joined to 

354 the run primary key, but do not add a foreign key constraint. 

355 **kwargs 

356 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

357 constructor (only the ``name`` and ``dtype`` arguments are 

358 otherwise provided). 

359 

360 Returns 

361 ------- 

362 fieldSpec : `ddl.FieldSpec` 

363 Specification for the field being added. 

364 """ 

365 raise NotImplementedError() 

366 

367 @classmethod 

368 @abstractmethod 

369 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

370 """Return the name of the field added by `addCollectionForeignKey` 

371 if called with the same prefix. 

372 

373 Parameters 

374 ---------- 

375 prefix : `str` 

376 A name to use for the prefix of the new field; the full name may 

377 have a suffix. 

378 

379 Returns 

380 ------- 

381 name : `str` 

382 The field name. 

383 """ 

384 raise NotImplementedError() 

385 

386 @classmethod 

387 @abstractmethod 

388 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

389 """Return the name of the field added by `addRunForeignKey` 

390 if called with the same prefix. 

391 

392 Parameters 

393 ---------- 

394 prefix : `str` 

395 A name to use for the prefix of the new field; the full name may 

396 have a suffix. 

397 

398 Returns 

399 ------- 

400 name : `str` 

401 The field name. 

402 """ 

403 raise NotImplementedError() 

404 

405 @abstractmethod 

406 def refresh(self) -> None: 

407 """Ensure all other operations on this manager are aware of any 

408 collections that may have been registered by other clients since it 

409 was initialized or last refreshed. 

410 """ 

411 raise NotImplementedError() 

412 

413 @abstractmethod 

414 def register( 

415 self, name: str, type: CollectionType, doc: Optional[str] = None 

416 ) -> Tuple[CollectionRecord, bool]: 

417 """Ensure that a collection of the given name and type are present 

418 in the layer this manager is associated with. 

419 

420 Parameters 

421 ---------- 

422 name : `str` 

423 Name of the collection. 

424 type : `CollectionType` 

425 Enumeration value indicating the type of collection. 

426 doc : `str`, optional 

427 Documentation string for the collection. Ignored if the collection 

428 already exists. 

429 

430 Returns 

431 ------- 

432 record : `CollectionRecord` 

433 Object representing the collection, including its type and ID. 

434 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

435 instance. If ``type is CollectionType.CHAIN``, this will be a 

436 `ChainedCollectionRecord` instance. 

437 registered : `bool` 

438 True if the collection was registered, `False` if it already 

439 existed. 

440 

441 Raises 

442 ------ 

443 TransactionInterruption 

444 Raised if this operation is invoked within a `Database.transaction` 

445 context. 

446 DatabaseConflictError 

447 Raised if a collection with this name but a different type already 

448 exists. 

449 

450 Notes 

451 ----- 

452 Concurrent registrations of the same collection should be safe; nothing 

453 should happen if the types are consistent, and integrity errors due to 

454 inconsistent types should happen before any database changes are made. 

455 """ 

456 raise NotImplementedError() 

457 

458 @abstractmethod 

459 def remove(self, name: str) -> None: 

460 """Completely remove a collection. 

461 

462 Any existing `CollectionRecord` objects that correspond to the removed 

463 collection are considered invalidated. 

464 

465 Parameters 

466 ---------- 

467 name : `str` 

468 Name of the collection to remove. 

469 

470 Notes 

471 ----- 

472 If this collection is referenced by foreign keys in tables managed by 

473 other objects, the ON DELETE clauses of those tables will be invoked. 

474 That will frequently delete many dependent rows automatically (via 

475 "CASCADE", but it may also cause this operation to fail (with rollback) 

476 unless dependent rows that do not have an ON DELETE clause are removed 

477 first. 

478 """ 

479 raise NotImplementedError() 

480 

481 @abstractmethod 

482 def find(self, name: str) -> CollectionRecord: 

483 """Return the collection record associated with the given name. 

484 

485 Parameters 

486 ---------- 

487 name : `str` 

488 Name of the collection. 

489 

490 Returns 

491 ------- 

492 record : `CollectionRecord` 

493 Object representing the collection, including its type and ID. 

494 If ``record.type is CollectionType.RUN``, this will be a 

495 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

496 this will be a `ChainedCollectionRecord` instance. 

497 

498 Raises 

499 ------ 

500 MissingCollectionError 

501 Raised if the given collection does not exist. 

502 

503 Notes 

504 ----- 

505 Collections registered by another client of the same layer since the 

506 last call to `initialize` or `refresh` may not be found. 

507 """ 

508 raise NotImplementedError() 

509 

510 @abstractmethod 

511 def __getitem__(self, key: Any) -> CollectionRecord: 

512 """Return the collection record associated with the given 

513 primary/foreign key value. 

514 

515 Parameters 

516 ---------- 

517 key 

518 Internal primary key value for the collection. 

519 

520 Returns 

521 ------- 

522 record : `CollectionRecord` 

523 Object representing the collection, including its type and name. 

524 If ``record.type is CollectionType.RUN``, this will be a 

525 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

526 this will be a `ChainedCollectionRecord` instance. 

527 

528 Raises 

529 ------ 

530 MissingCollectionError 

531 Raised if no collection with this key exists. 

532 

533 Notes 

534 ----- 

535 Collections registered by another client of the same layer since the 

536 last call to `initialize` or `refresh` may not be found. 

537 """ 

538 raise NotImplementedError() 

539 

540 @abstractmethod 

541 def __iter__(self) -> Iterator[CollectionRecord]: 

542 """Iterate over all collections. 

543 

544 Yields 

545 ------ 

546 record : `CollectionRecord` 

547 The record for a managed collection. 

548 """ 

549 raise NotImplementedError() 

550 

551 @abstractmethod 

552 def getDocumentation(self, key: Any) -> Optional[str]: 

553 """Retrieve the documentation string for a collection. 

554 

555 Parameters 

556 ---------- 

557 key 

558 Internal primary key value for the collection. 

559 

560 Returns 

561 ------- 

562 docs : `str` or `None` 

563 Docstring for the collection with the given key. 

564 """ 

565 raise NotImplementedError() 

566 

567 @abstractmethod 

568 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

569 """Set the documentation string for a collection. 

570 

571 Parameters 

572 ---------- 

573 key 

574 Internal primary key value for the collection. 

575 docs : `str`, optional 

576 Docstring for the collection with the given key. 

577 """ 

578 raise NotImplementedError()