Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 85%

95 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = [ 

32 "ChainedCollectionRecord", 

33 "CollectionManager", 

34 "CollectionRecord", 

35 "RunRecord", 

36] 

37 

38from abc import abstractmethod 

39from collections.abc import Iterable, Set 

40from typing import TYPE_CHECKING, Any, Generic, Self, TypeVar 

41 

42from ..._timespan import Timespan 

43from .._collection_type import CollectionType 

44from ..wildcards import CollectionWildcard 

45from ._versioning import VersionedExtension, VersionTuple 

46 

47if TYPE_CHECKING: 

48 from .._caching_context import CachingContext 

49 from ._database import Database, StaticTablesContext 

50 

51 

52_Key = TypeVar("_Key") 

53 

54 

55class CollectionRecord(Generic[_Key]): 

56 """A struct used to represent a collection in internal `Registry` APIs. 

57 

58 User-facing code should always just use a `str` to represent collections. 

59 

60 Parameters 

61 ---------- 

62 key : _Key 

63 Unique collection ID, can be the same as ``name`` if ``name`` is used 

64 for identification. Usually this is an integer or string, but can be 

65 other database-specific type. 

66 name : `str` 

67 Name of the collection. 

68 type : `CollectionType` 

69 Enumeration value describing the type of the collection. 

70 

71 Notes 

72 ----- 

73 The `name`, `key`, and `type` attributes set by the base class should be 

74 considered immutable by all users and derived classes (as these are used 

75 in the definition of equality and this is a hashable type). Other 

76 attributes defined by subclasses may be mutable, as long as they do not 

77 participate in some subclass equality definition. 

78 """ 

79 

80 def __init__(self, key: _Key, name: str, type: CollectionType): 

81 self.key = key 

82 self.name = name 

83 self.type = type 

84 assert isinstance(self.type, CollectionType) 

85 

86 name: str 

87 """Name of the collection (`str`). 

88 """ 

89 

90 key: _Key 

91 """The primary/foreign key value for this collection. 

92 """ 

93 

94 type: CollectionType 

95 """Enumeration value describing the type of the collection 

96 (`CollectionType`). 

97 """ 

98 

99 def __eq__(self, other: Any) -> bool: 

100 try: 

101 return self.name == other.name and self.type == other.type and self.key == other.key 

102 except AttributeError: 

103 return NotImplemented 

104 

105 def __hash__(self) -> int: 

106 return hash(self.name) 

107 

108 def __repr__(self) -> str: 

109 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})" 

110 

111 def __str__(self) -> str: 

112 return self.name 

113 

114 

115class RunRecord(CollectionRecord[_Key]): 

116 """A subclass of `CollectionRecord` that adds execution information and 

117 an interface for updating it. 

118 

119 Parameters 

120 ---------- 

121 key : `object` 

122 Unique collection key. 

123 name : `str` 

124 Name of the collection. 

125 host : `str`, optional 

126 Name of the host or system on which this run was produced. 

127 timespan : `Timespan`, optional 

128 Begin and end timestamps for the period over which the run was 

129 produced. 

130 """ 

131 

132 host: str | None 

133 """Name of the host or system on which this run was produced (`str` or 

134 `None`). 

135 """ 

136 

137 timespan: Timespan 

138 """Begin and end timestamps for the period over which the run was produced. 

139 None`/``NULL`` values are interpreted as infinite bounds. 

140 """ 

141 

142 def __init__( 

143 self, 

144 key: _Key, 

145 name: str, 

146 *, 

147 host: str | None = None, 

148 timespan: Timespan | None = None, 

149 ): 

150 super().__init__(key=key, name=name, type=CollectionType.RUN) 

151 self.host = host 

152 if timespan is None: 

153 timespan = Timespan(begin=None, end=None) 

154 self.timespan = timespan 

155 

156 def __repr__(self) -> str: 

157 return f"RunRecord(key={self.key!r}, name={self.name!r})" 

158 

159 

160class ChainedCollectionRecord(CollectionRecord[_Key]): 

161 """A subclass of `CollectionRecord` that adds the list of child collections 

162 in a ``CHAINED`` collection. 

163 

164 Parameters 

165 ---------- 

166 key : `object` 

167 Unique collection key. 

168 name : `str` 

169 Name of the collection. 

170 children : Iterable[str], 

171 Ordered sequence of names of child collections. 

172 """ 

173 

174 children: tuple[str, ...] 

175 """The ordered search path of child collections that define this chain 

176 (`tuple` [ `str` ]). 

177 """ 

178 

179 def __init__( 

180 self, 

181 key: Any, 

182 name: str, 

183 *, 

184 children: Iterable[str], 

185 ): 

186 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

187 self.children = tuple(children) 

188 

189 def __repr__(self) -> str: 

190 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})" 

191 

192 

193class CollectionManager(Generic[_Key], VersionedExtension): 

194 """An interface for managing the collections (including runs) in a 

195 `Registry`. 

196 

197 Parameters 

198 ---------- 

199 registry_schema_version : `VersionTuple` or `None`, optional 

200 Version of registry schema. 

201 

202 Notes 

203 ----- 

204 Each layer in a multi-layer `Registry` has its own record for any 

205 collection for which it has datasets (or quanta). Different layers may 

206 use different IDs for the same collection, so any usage of the IDs 

207 obtained through the `CollectionManager` APIs are strictly for internal 

208 (to `Registry`) use. 

209 """ 

210 

211 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None: 

212 super().__init__(registry_schema_version=registry_schema_version) 

213 

214 @abstractmethod 

215 def clone(self, db: Database, caching_context: CachingContext) -> Self: 

216 """Make an independent copy of this manager instance bound to a new 

217 `Database` instance. 

218 

219 Parameters 

220 ---------- 

221 db : `Database` 

222 New `Database` object to use when instantiating the manager. 

223 caching_context : `CachingContext` 

224 New `CachingContext` object to use when instantiating the manager. 

225 

226 Returns 

227 ------- 

228 instance : `CollectionManager` 

229 New manager instance with the same configuration as this instance, 

230 but bound to a new Database object. 

231 """ 

232 raise NotImplementedError() 

233 

234 @classmethod 

235 @abstractmethod 

236 def initialize( 

237 cls, 

238 db: Database, 

239 context: StaticTablesContext, 

240 *, 

241 caching_context: CachingContext, 

242 registry_schema_version: VersionTuple | None = None, 

243 ) -> CollectionManager: 

244 """Construct an instance of the manager. 

245 

246 Parameters 

247 ---------- 

248 db : `Database` 

249 Interface to the underlying database engine and namespace. 

250 context : `StaticTablesContext` 

251 Context object obtained from `Database.declareStaticTables`; used 

252 to declare any tables that should always be present in a layer 

253 implemented with this manager. 

254 caching_context : `CachingContext` 

255 Object controlling caching of information returned by managers. 

256 registry_schema_version : `VersionTuple` or `None` 

257 Schema version of this extension as defined in registry. 

258 

259 Returns 

260 ------- 

261 manager : `CollectionManager` 

262 An instance of a concrete `CollectionManager` subclass. 

263 """ 

264 raise NotImplementedError() 

265 

266 @classmethod 

267 @abstractmethod 

268 def addCollectionForeignKey( 

269 cls, 

270 tableSpec: ddl.TableSpec, 

271 *, 

272 prefix: str = "collection", 

273 onDelete: str | None = None, 

274 constraint: bool = True, 

275 **kwargs: Any, 

276 ) -> ddl.FieldSpec: 

277 """Add a foreign key (field and constraint) referencing the collection 

278 table. 

279 

280 Parameters 

281 ---------- 

282 tableSpec : `ddl.TableSpec` 

283 Specification for the table that should reference the collection 

284 table. Will be modified in place. 

285 prefix : `str`, optional 

286 A name to use for the prefix of the new field; the full name may 

287 have a suffix (and is given in the returned `ddl.FieldSpec`). 

288 onDelete : `str`, optional 

289 One of "CASCADE" or "SET NULL", indicating what should happen to 

290 the referencing row if the collection row is deleted. `None` 

291 indicates that this should be an integrity error. 

292 constraint : `bool`, optional 

293 If `False` (`True` is default), add a field that can be joined to 

294 the collection primary key, but do not add a foreign key 

295 constraint. 

296 **kwargs 

297 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

298 constructor (only the ``name`` and ``dtype`` arguments are 

299 otherwise provided). 

300 

301 Returns 

302 ------- 

303 fieldSpec : `ddl.FieldSpec` 

304 Specification for the field being added. 

305 """ 

306 raise NotImplementedError() 

307 

308 @classmethod 

309 @abstractmethod 

310 def addRunForeignKey( 

311 cls, 

312 tableSpec: ddl.TableSpec, 

313 *, 

314 prefix: str = "run", 

315 onDelete: str | None = None, 

316 constraint: bool = True, 

317 **kwargs: Any, 

318 ) -> ddl.FieldSpec: 

319 """Add a foreign key (field and constraint) referencing the run 

320 table. 

321 

322 Parameters 

323 ---------- 

324 tableSpec : `ddl.TableSpec` 

325 Specification for the table that should reference the run table. 

326 Will be modified in place. 

327 prefix : `str`, optional 

328 A name to use for the prefix of the new field; the full name may 

329 have a suffix (and is given in the returned `ddl.FieldSpec`). 

330 onDelete : `str`, optional 

331 One of "CASCADE" or "SET NULL", indicating what should happen to 

332 the referencing row if the collection row is deleted. `None` 

333 indicates that this should be an integrity error. 

334 constraint : `bool`, optional 

335 If `False` (`True` is default), add a field that can be joined to 

336 the run primary key, but do not add a foreign key constraint. 

337 **kwargs 

338 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

339 constructor (only the ``name`` and ``dtype`` arguments are 

340 otherwise provided). 

341 

342 Returns 

343 ------- 

344 fieldSpec : `ddl.FieldSpec` 

345 Specification for the field being added. 

346 """ 

347 raise NotImplementedError() 

348 

349 @classmethod 

350 @abstractmethod 

351 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

352 """Return the name of the field added by `addCollectionForeignKey` 

353 if called with the same prefix. 

354 

355 Parameters 

356 ---------- 

357 prefix : `str` 

358 A name to use for the prefix of the new field; the full name may 

359 have a suffix. 

360 

361 Returns 

362 ------- 

363 name : `str` 

364 The field name. 

365 """ 

366 raise NotImplementedError() 

367 

368 @classmethod 

369 @abstractmethod 

370 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

371 """Return the name of the field added by `addRunForeignKey` 

372 if called with the same prefix. 

373 

374 Parameters 

375 ---------- 

376 prefix : `str` 

377 A name to use for the prefix of the new field; the full name may 

378 have a suffix. 

379 

380 Returns 

381 ------- 

382 name : `str` 

383 The field name. 

384 """ 

385 raise NotImplementedError() 

386 

387 @abstractmethod 

388 def refresh(self) -> None: 

389 """Ensure all other operations on this manager are aware of any 

390 collections that may have been registered by other clients since it 

391 was initialized or last refreshed. 

392 """ 

393 raise NotImplementedError() 

394 

395 @abstractmethod 

396 def register( 

397 self, name: str, type: CollectionType, doc: str | None = None 

398 ) -> tuple[CollectionRecord[_Key], bool]: 

399 """Ensure that a collection of the given name and type are present 

400 in the layer this manager is associated with. 

401 

402 Parameters 

403 ---------- 

404 name : `str` 

405 Name of the collection. 

406 type : `CollectionType` 

407 Enumeration value indicating the type of collection. 

408 doc : `str`, optional 

409 Documentation string for the collection. Ignored if the collection 

410 already exists. 

411 

412 Returns 

413 ------- 

414 record : `CollectionRecord` 

415 Object representing the collection, including its type and ID. 

416 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

417 instance. If ``type is CollectionType.CHAIN``, this will be a 

418 `ChainedCollectionRecord` instance. 

419 registered : `bool` 

420 True if the collection was registered, `False` if it already 

421 existed. 

422 

423 Raises 

424 ------ 

425 TransactionInterruption 

426 Raised if this operation is invoked within a `Database.transaction` 

427 context. 

428 DatabaseConflictError 

429 Raised if a collection with this name but a different type already 

430 exists. 

431 

432 Notes 

433 ----- 

434 Concurrent registrations of the same collection should be safe; nothing 

435 should happen if the types are consistent, and integrity errors due to 

436 inconsistent types should happen before any database changes are made. 

437 """ 

438 raise NotImplementedError() 

439 

440 @abstractmethod 

441 def remove(self, name: str) -> None: 

442 """Completely remove a collection. 

443 

444 Any existing `CollectionRecord` objects that correspond to the removed 

445 collection are considered invalidated. 

446 

447 Parameters 

448 ---------- 

449 name : `str` 

450 Name of the collection to remove. 

451 

452 Notes 

453 ----- 

454 If this collection is referenced by foreign keys in tables managed by 

455 other objects, the ON DELETE clauses of those tables will be invoked. 

456 That will frequently delete many dependent rows automatically (via 

457 "CASCADE", but it may also cause this operation to fail (with rollback) 

458 unless dependent rows that do not have an ON DELETE clause are removed 

459 first. 

460 """ 

461 raise NotImplementedError() 

462 

463 @abstractmethod 

464 def find(self, name: str) -> CollectionRecord[_Key]: 

465 """Return the collection record associated with the given name. 

466 

467 Parameters 

468 ---------- 

469 name : `str` 

470 Name of the collection. 

471 

472 Returns 

473 ------- 

474 record : `CollectionRecord` 

475 Object representing the collection, including its type and ID. 

476 If ``record.type is CollectionType.RUN``, this will be a 

477 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

478 this will be a `ChainedCollectionRecord` instance. 

479 

480 Raises 

481 ------ 

482 MissingCollectionError 

483 Raised if the given collection does not exist. 

484 

485 Notes 

486 ----- 

487 Collections registered by another client of the same layer since the 

488 last call to `initialize` or `refresh` may not be found. 

489 """ 

490 raise NotImplementedError() 

491 

492 @abstractmethod 

493 def __getitem__(self, key: Any) -> CollectionRecord[_Key]: 

494 """Return the collection record associated with the given 

495 primary/foreign key value. 

496 

497 Parameters 

498 ---------- 

499 key : `typing.Any` 

500 Internal primary key value for the collection. 

501 

502 Returns 

503 ------- 

504 record : `CollectionRecord` 

505 Object representing the collection, including its type and name. 

506 If ``record.type is CollectionType.RUN``, this will be a 

507 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

508 this will be a `ChainedCollectionRecord` instance. 

509 

510 Raises 

511 ------ 

512 MissingCollectionError 

513 Raised if no collection with this key exists. 

514 

515 Notes 

516 ----- 

517 Collections registered by another client of the same layer since the 

518 last call to `initialize` or `refresh` may not be found. 

519 """ 

520 raise NotImplementedError() 

521 

522 @abstractmethod 

523 def resolve_wildcard( 

524 self, 

525 wildcard: CollectionWildcard, 

526 *, 

527 collection_types: Set[CollectionType] = CollectionType.all(), 

528 done: set[str] | None = None, 

529 flatten_chains: bool = True, 

530 include_chains: bool | None = None, 

531 ) -> list[CollectionRecord[_Key]]: 

532 """Iterate over collection records that match a wildcard. 

533 

534 Parameters 

535 ---------- 

536 wildcard : `CollectionWildcard` 

537 Names and/or patterns for collections. 

538 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

539 If provided, only yield collections of these types. 

540 done : `set` [ `str` ], optional 

541 A `set` of collection names that will not be returned (presumably 

542 because they have already been returned in some higher-level logic) 

543 that will also be updated with the names of the collections 

544 returned. 

545 flatten_chains : `bool`, optional 

546 If `True` (default) recursively yield the child collections of 

547 `~CollectionType.CHAINED` collections. 

548 include_chains : `bool`, optional 

549 If `True`, return records for `~CollectionType.CHAINED` 

550 collections themselves. The default is the opposite of 

551 ``flatten_chains``: either return records for CHAINED collections 

552 or their children, but not both. 

553 

554 Returns 

555 ------- 

556 records : `list` [ `CollectionRecord` ] 

557 Matching collection records. 

558 """ 

559 raise NotImplementedError() 

560 

561 @abstractmethod 

562 def getDocumentation(self, key: _Key) -> str | None: 

563 """Retrieve the documentation string for a collection. 

564 

565 Parameters 

566 ---------- 

567 key : _Key 

568 Internal primary key value for the collection. 

569 

570 Returns 

571 ------- 

572 docs : `str` or `None` 

573 Docstring for the collection with the given key. 

574 """ 

575 raise NotImplementedError() 

576 

577 @abstractmethod 

578 def setDocumentation(self, key: _Key, doc: str | None) -> None: 

579 """Set the documentation string for a collection. 

580 

581 Parameters 

582 ---------- 

583 key : _Key 

584 Internal primary key value for the collection. 

585 doc : `str`, optional 

586 Docstring for the collection with the given key. 

587 """ 

588 raise NotImplementedError() 

589 

590 @abstractmethod 

591 def getParentChains(self, key: _Key) -> set[str]: 

592 """Find all CHAINED collection names that directly contain the given 

593 collection. 

594 

595 Parameters 

596 ---------- 

597 key : _Key 

598 Internal primary key value for the collection. 

599 

600 Returns 

601 ------- 

602 names : `set` [`str`] 

603 Parent collection names. 

604 """ 

605 raise NotImplementedError() 

606 

607 @abstractmethod 

608 def update_chain( 

609 self, record: ChainedCollectionRecord[_Key], children: Iterable[str], flatten: bool = False 

610 ) -> ChainedCollectionRecord[_Key]: 

611 """Update chained collection composition. 

612 

613 Parameters 

614 ---------- 

615 record : `ChainedCollectionRecord` 

616 Chained collection record. 

617 children : `~collections.abc.Iterable` [`str`] 

618 Ordered names of children collections. 

619 flatten : `bool`, optional 

620 If `True`, recursively flatten out any nested 

621 `~CollectionType.CHAINED` collections in ``children`` first. 

622 """ 

623 raise NotImplementedError()