Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 60%

101 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from typing import ( 

32 Any, 

33 Iterator, 

34 Optional, 

35 Tuple, 

36 TYPE_CHECKING, 

37) 

38 

39from ...core import ddl, DimensionUniverse, Timespan 

40from ..wildcards import CollectionSearch 

41from .._collectionType import CollectionType 

42from ._versioning import VersionedExtension 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from ._database import Database, StaticTablesContext 

46 from ._dimensions import DimensionRecordStorageManager 

47 

48 

49class CollectionRecord: 

50 """A struct used to represent a collection in internal `Registry` APIs. 

51 

52 User-facing code should always just use a `str` to represent collections. 

53 

54 Parameters 

55 ---------- 

56 key 

57 Unique collection ID, can be the same as ``name`` if ``name`` is used 

58 for identification. Usually this is an integer or string, but can be 

59 other database-specific type. 

60 name : `str` 

61 Name of the collection. 

62 type : `CollectionType` 

63 Enumeration value describing the type of the collection. 

64 """ 

65 def __init__(self, key: Any, name: str, type: CollectionType): 

66 self.key = key 

67 self.name = name 

68 self.type = type 

69 assert isinstance(self.type, CollectionType) 

70 

71 name: str 

72 """Name of the collection (`str`). 

73 """ 

74 

75 key: Any 

76 """The primary/foreign key value for this collection. 

77 """ 

78 

79 type: CollectionType 

80 """Enumeration value describing the type of the collection 

81 (`CollectionType`). 

82 """ 

83 

84 

85class RunRecord(CollectionRecord): 

86 """A subclass of `CollectionRecord` that adds execution information and 

87 an interface for updating it. 

88 """ 

89 

90 @abstractmethod 

91 def update(self, host: Optional[str] = None, 

92 timespan: Optional[Timespan] = None) -> None: 

93 """Update the database record for this run with new execution 

94 information. 

95 

96 Values not provided will set to ``NULL`` in the database, not ignored. 

97 

98 Parameters 

99 ---------- 

100 host : `str`, optional 

101 Name of the host or system on which this run was produced. 

102 Detailed form to be set by higher-level convention; from the 

103 `Registry` perspective, this is an entirely opaque value. 

104 timespan : `Timespan`, optional 

105 Begin and end timestamps for the period over which the run was 

106 produced. `None`/``NULL`` values are interpreted as infinite 

107 bounds. 

108 """ 

109 raise NotImplementedError() 

110 

111 @property 

112 @abstractmethod 

113 def host(self) -> Optional[str]: 

114 """Return the name of the host or system on which this run was 

115 produced (`str` or `None`). 

116 """ 

117 raise NotImplementedError() 

118 

119 @property 

120 @abstractmethod 

121 def timespan(self) -> Timespan: 

122 """Begin and end timestamps for the period over which the run was 

123 produced. `None`/``NULL`` values are interpreted as infinite 

124 bounds. 

125 """ 

126 raise NotImplementedError() 

127 

128 

129class ChainedCollectionRecord(CollectionRecord): 

130 """A subclass of `CollectionRecord` that adds the list of child collections 

131 in a ``CHAINED`` collection. 

132 

133 Parameters 

134 ---------- 

135 key 

136 Unique collection ID, can be the same as ``name`` if ``name`` is used 

137 for identification. Usually this is an integer or string, but can be 

138 other database-specific type. 

139 name : `str` 

140 Name of the collection. 

141 """ 

142 

143 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

144 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

145 self._children = CollectionSearch.fromExpression([]) 

146 

147 @property 

148 def children(self) -> CollectionSearch: 

149 """The ordered search path of child collections that define this chain 

150 (`CollectionSearch`). 

151 """ 

152 return self._children 

153 

154 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None: 

155 """Redefine this chain to search the given child collections. 

156 

157 This method should be used by all external code to set children. It 

158 delegates to `_update`, which is what should be overridden by 

159 subclasses. 

160 

161 Parameters 

162 ---------- 

163 manager : `CollectionManager` 

164 The object that manages this records instance and all records 

165 instances that may appear as its children. 

166 children : `CollectionSearch` 

167 A collection search path that should be resolved to set the child 

168 collections of this chain. 

169 flatten : `bool` 

170 If `True`, recursively flatten out any nested 

171 `~CollectionType.CHAINED` collections in ``children`` first. 

172 

173 Raises 

174 ------ 

175 ValueError 

176 Raised when the child collections contain a cycle. 

177 """ 

178 for record in children.iter(manager, flattenChains=True, includeChains=True, 

179 collectionTypes={CollectionType.CHAINED}): 

180 if record == self: 

181 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

182 if flatten: 

183 children = CollectionSearch.fromExpression( 

184 tuple(record.name for record in children.iter(manager, flattenChains=True)) 

185 ) 

186 self._update(manager, children) 

187 self._children = children 

188 

189 def refresh(self, manager: CollectionManager) -> None: 

190 """Load children from the database, using the given manager to resolve 

191 collection primary key values into records. 

192 

193 This method exists to ensure that all collections that may appear in a 

194 chain are known to the manager before any particular chain tries to 

195 retrieve their records from it. `ChainedCollectionRecord` subclasses 

196 can rely on it being called sometime after their own ``__init__`` to 

197 finish construction. 

198 

199 Parameters 

200 ---------- 

201 manager : `CollectionManager` 

202 The object that manages this records instance and all records 

203 instances that may appear as its children. 

204 """ 

205 self._children = self._load(manager) 

206 

207 @abstractmethod 

208 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

209 """Protected implementation hook for setting the `children` property. 

210 

211 This method should be implemented by subclasses to update the database 

212 to reflect the children given. It should never be called by anything 

213 other than the `children` setter, which should be used by all external 

214 code. 

215 

216 Parameters 

217 ---------- 

218 manager : `CollectionManager` 

219 The object that manages this records instance and all records 

220 instances that may appear as its children. 

221 children : `CollectionSearch` 

222 A collection search path that should be resolved to set the child 

223 collections of this chain. Guaranteed not to contain cycles. 

224 """ 

225 raise NotImplementedError() 

226 

227 @abstractmethod 

228 def _load(self, manager: CollectionManager) -> CollectionSearch: 

229 """Protected implementation hook for `refresh`. 

230 

231 This method should be implemented by subclasses to retrieve the chain's 

232 child collections from the database and return them. It should never 

233 be called by anything other than `refresh`, which should be used by all 

234 external code. 

235 

236 Parameters 

237 ---------- 

238 manager : `CollectionManager` 

239 The object that manages this records instance and all records 

240 instances that may appear as its children. 

241 

242 Returns 

243 ------- 

244 children : `CollectionSearch` 

245 The ordered sequence of collection names that defines the chained 

246 collection. Guaranteed not to contain cycles. 

247 """ 

248 raise NotImplementedError() 

249 

250 

251class CollectionManager(VersionedExtension): 

252 """An interface for managing the collections (including runs) in a 

253 `Registry`. 

254 

255 Notes 

256 ----- 

257 Each layer in a multi-layer `Registry` has its own record for any 

258 collection for which it has datasets (or quanta). Different layers may 

259 use different IDs for the same collection, so any usage of the IDs 

260 obtained through the `CollectionManager` APIs are strictly for internal 

261 (to `Registry`) use. 

262 """ 

263 

264 @classmethod 

265 @abstractmethod 

266 def initialize(cls, db: Database, context: StaticTablesContext, *, 

267 dimensions: DimensionRecordStorageManager) -> CollectionManager: 

268 """Construct an instance of the manager. 

269 

270 Parameters 

271 ---------- 

272 db : `Database` 

273 Interface to the underlying database engine and namespace. 

274 context : `StaticTablesContext` 

275 Context object obtained from `Database.declareStaticTables`; used 

276 to declare any tables that should always be present in a layer 

277 implemented with this manager. 

278 dimensions : `DimensionRecordStorageManager` 

279 Manager object for the dimensions in this `Registry`. 

280 

281 Returns 

282 ------- 

283 manager : `CollectionManager` 

284 An instance of a concrete `CollectionManager` subclass. 

285 """ 

286 raise NotImplementedError() 

287 

288 @classmethod 

289 @abstractmethod 

290 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

291 onDelete: Optional[str] = None, 

292 constraint: bool = True, 

293 **kwargs: Any) -> ddl.FieldSpec: 

294 """Add a foreign key (field and constraint) referencing the collection 

295 table. 

296 

297 Parameters 

298 ---------- 

299 tableSpec : `ddl.TableSpec` 

300 Specification for the table that should reference the collection 

301 table. Will be modified in place. 

302 prefix: `str`, optional 

303 A name to use for the prefix of the new field; the full name may 

304 have a suffix (and is given in the returned `ddl.FieldSpec`). 

305 onDelete: `str`, optional 

306 One of "CASCADE" or "SET NULL", indicating what should happen to 

307 the referencing row if the collection row is deleted. `None` 

308 indicates that this should be an integrity error. 

309 constraint: `bool`, optional 

310 If `False` (`True` is default), add a field that can be joined to 

311 the collection primary key, but do not add a foreign key 

312 constraint. 

313 **kwargs 

314 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

315 constructor (only the ``name`` and ``dtype`` arguments are 

316 otherwise provided). 

317 

318 Returns 

319 ------- 

320 fieldSpec : `ddl.FieldSpec` 

321 Specification for the field being added. 

322 """ 

323 raise NotImplementedError() 

324 

325 @classmethod 

326 @abstractmethod 

327 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

328 onDelete: Optional[str] = None, 

329 constraint: bool = True, 

330 **kwargs: Any) -> ddl.FieldSpec: 

331 """Add a foreign key (field and constraint) referencing the run 

332 table. 

333 

334 Parameters 

335 ---------- 

336 tableSpec : `ddl.TableSpec` 

337 Specification for the table that should reference the run table. 

338 Will be modified in place. 

339 prefix: `str`, optional 

340 A name to use for the prefix of the new field; the full name may 

341 have a suffix (and is given in the returned `ddl.FieldSpec`). 

342 onDelete: `str`, optional 

343 One of "CASCADE" or "SET NULL", indicating what should happen to 

344 the referencing row if the collection row is deleted. `None` 

345 indicates that this should be an integrity error. 

346 constraint: `bool`, optional 

347 If `False` (`True` is default), add a field that can be joined to 

348 the run primary key, but do not add a foreign key constraint. 

349 **kwargs 

350 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

351 constructor (only the ``name`` and ``dtype`` arguments are 

352 otherwise provided). 

353 

354 Returns 

355 ------- 

356 fieldSpec : `ddl.FieldSpec` 

357 Specification for the field being added. 

358 """ 

359 raise NotImplementedError() 

360 

361 @classmethod 

362 @abstractmethod 

363 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

364 """Return the name of the field added by `addCollectionForeignKey` 

365 if called with the same prefix. 

366 

367 Parameters 

368 ---------- 

369 prefix : `str` 

370 A name to use for the prefix of the new field; the full name may 

371 have a suffix. 

372 

373 Returns 

374 ------- 

375 name : `str` 

376 The field name. 

377 """ 

378 raise NotImplementedError() 

379 

380 @classmethod 

381 @abstractmethod 

382 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

383 """Return the name of the field added by `addRunForeignKey` 

384 if called with the same prefix. 

385 

386 Parameters 

387 ---------- 

388 prefix : `str` 

389 A name to use for the prefix of the new field; the full name may 

390 have a suffix. 

391 

392 Returns 

393 ------- 

394 name : `str` 

395 The field name. 

396 """ 

397 raise NotImplementedError() 

398 

399 @abstractmethod 

400 def refresh(self) -> None: 

401 """Ensure all other operations on this manager are aware of any 

402 collections that may have been registered by other clients since it 

403 was initialized or last refreshed. 

404 """ 

405 raise NotImplementedError() 

406 

407 @abstractmethod 

408 def register(self, name: str, type: CollectionType, 

409 doc: Optional[str] = None) -> Tuple[CollectionRecord, bool]: 

410 """Ensure that a collection of the given name and type are present 

411 in the layer this manager is associated with. 

412 

413 Parameters 

414 ---------- 

415 name : `str` 

416 Name of the collection. 

417 type : `CollectionType` 

418 Enumeration value indicating the type of collection. 

419 doc : `str`, optional 

420 Documentation string for the collection. Ignored if the collection 

421 already exists. 

422 

423 Returns 

424 ------- 

425 record : `CollectionRecord` 

426 Object representing the collection, including its type and ID. 

427 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

428 instance. If ``type is CollectionType.CHAIN``, this will be a 

429 `ChainedCollectionRecord` instance. 

430 registered : `bool` 

431 True if the collection was registered, `False` if it already 

432 existed. 

433 

434 Raises 

435 ------ 

436 TransactionInterruption 

437 Raised if this operation is invoked within a `Database.transaction` 

438 context. 

439 DatabaseConflictError 

440 Raised if a collection with this name but a different type already 

441 exists. 

442 

443 Notes 

444 ----- 

445 Concurrent registrations of the same collection should be safe; nothing 

446 should happen if the types are consistent, and integrity errors due to 

447 inconsistent types should happen before any database changes are made. 

448 """ 

449 raise NotImplementedError() 

450 

451 @abstractmethod 

452 def remove(self, name: str) -> None: 

453 """Completely remove a collection. 

454 

455 Any existing `CollectionRecord` objects that correspond to the removed 

456 collection are considered invalidated. 

457 

458 Parameters 

459 ---------- 

460 name : `str` 

461 Name of the collection to remove. 

462 

463 Notes 

464 ----- 

465 If this collection is referenced by foreign keys in tables managed by 

466 other objects, the ON DELETE clauses of those tables will be invoked. 

467 That will frequently delete many dependent rows automatically (via 

468 "CASCADE", but it may also cause this operation to fail (with rollback) 

469 unless dependent rows that do not have an ON DELETE clause are removed 

470 first. 

471 """ 

472 raise NotImplementedError() 

473 

474 @abstractmethod 

475 def find(self, name: str) -> CollectionRecord: 

476 """Return the collection record associated with the given name. 

477 

478 Parameters 

479 ---------- 

480 name : `str` 

481 Name of the collection. 

482 

483 Returns 

484 ------- 

485 record : `CollectionRecord` 

486 Object representing the collection, including its type and ID. 

487 If ``record.type is CollectionType.RUN``, this will be a 

488 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

489 this will be a `ChainedCollectionRecord` instance. 

490 

491 Raises 

492 ------ 

493 MissingCollectionError 

494 Raised if the given collection does not exist. 

495 

496 Notes 

497 ----- 

498 Collections registered by another client of the same layer since the 

499 last call to `initialize` or `refresh` may not be found. 

500 """ 

501 raise NotImplementedError() 

502 

503 @abstractmethod 

504 def __getitem__(self, key: Any) -> CollectionRecord: 

505 """Return the collection record associated with the given 

506 primary/foreign key value. 

507 

508 Parameters 

509 ---------- 

510 key 

511 Internal primary key value for the collection. 

512 

513 Returns 

514 ------- 

515 record : `CollectionRecord` 

516 Object representing the collection, including its type and name. 

517 If ``record.type is CollectionType.RUN``, this will be a 

518 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

519 this will be a `ChainedCollectionRecord` instance. 

520 

521 Raises 

522 ------ 

523 MissingCollectionError 

524 Raised if no collection with this key exists. 

525 

526 Notes 

527 ----- 

528 Collections registered by another client of the same layer since the 

529 last call to `initialize` or `refresh` may not be found. 

530 """ 

531 raise NotImplementedError() 

532 

533 @abstractmethod 

534 def __iter__(self) -> Iterator[CollectionRecord]: 

535 """Iterate over all collections. 

536 

537 Yields 

538 ------ 

539 record : `CollectionRecord` 

540 The record for a managed collection. 

541 """ 

542 raise NotImplementedError() 

543 

544 @abstractmethod 

545 def getDocumentation(self, key: Any) -> Optional[str]: 

546 """Retrieve the documentation string for a collection. 

547 

548 Parameters 

549 ---------- 

550 key 

551 Internal primary key value for the collection. 

552 

553 Returns 

554 ------- 

555 docs : `str` or `None` 

556 Docstring for the collection with the given key. 

557 """ 

558 raise NotImplementedError() 

559 

560 @abstractmethod 

561 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

562 """Set the documentation string for a collection. 

563 

564 Parameters 

565 ---------- 

566 key 

567 Internal primary key value for the collection. 

568 docs : `str`, optional 

569 Docstring for the collection with the given key. 

570 """ 

571 raise NotImplementedError()