Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 84%

93 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = [ 

32 "ChainedCollectionRecord", 

33 "CollectionManager", 

34 "CollectionRecord", 

35 "RunRecord", 

36] 

37 

38from abc import abstractmethod 

39from collections.abc import Iterable, Set 

40from typing import TYPE_CHECKING, Any, Generic, TypeVar 

41 

42from ..._timespan import Timespan 

43from .._collection_type import CollectionType 

44from ..wildcards import CollectionWildcard 

45from ._versioning import VersionedExtension, VersionTuple 

46 

47if TYPE_CHECKING: 

48 from .._caching_context import CachingContext 

49 from ._database import Database, StaticTablesContext 

50 from ._dimensions import DimensionRecordStorageManager 

51 

52 

53_Key = TypeVar("_Key") 

54 

55 

56class CollectionRecord(Generic[_Key]): 

57 """A struct used to represent a collection in internal `Registry` APIs. 

58 

59 User-facing code should always just use a `str` to represent collections. 

60 

61 Parameters 

62 ---------- 

63 key : _Key 

64 Unique collection ID, can be the same as ``name`` if ``name`` is used 

65 for identification. Usually this is an integer or string, but can be 

66 other database-specific type. 

67 name : `str` 

68 Name of the collection. 

69 type : `CollectionType` 

70 Enumeration value describing the type of the collection. 

71 

72 Notes 

73 ----- 

74 The `name`, `key`, and `type` attributes set by the base class should be 

75 considered immutable by all users and derived classes (as these are used 

76 in the definition of equality and this is a hashable type). Other 

77 attributes defined by subclasses may be mutable, as long as they do not 

78 participate in some subclass equality definition. 

79 """ 

80 

81 def __init__(self, key: _Key, name: str, type: CollectionType): 

82 self.key = key 

83 self.name = name 

84 self.type = type 

85 assert isinstance(self.type, CollectionType) 

86 

87 name: str 

88 """Name of the collection (`str`). 

89 """ 

90 

91 key: _Key 

92 """The primary/foreign key value for this collection. 

93 """ 

94 

95 type: CollectionType 

96 """Enumeration value describing the type of the collection 

97 (`CollectionType`). 

98 """ 

99 

100 def __eq__(self, other: Any) -> bool: 

101 try: 

102 return self.name == other.name and self.type == other.type and self.key == other.key 

103 except AttributeError: 

104 return NotImplemented 

105 

106 def __hash__(self) -> int: 

107 return hash(self.name) 

108 

109 def __repr__(self) -> str: 

110 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})" 

111 

112 def __str__(self) -> str: 

113 return self.name 

114 

115 

116class RunRecord(CollectionRecord[_Key]): 

117 """A subclass of `CollectionRecord` that adds execution information and 

118 an interface for updating it. 

119 

120 Parameters 

121 ---------- 

122 key : `object` 

123 Unique collection key. 

124 name : `str` 

125 Name of the collection. 

126 host : `str`, optional 

127 Name of the host or system on which this run was produced. 

128 timespan : `Timespan`, optional 

129 Begin and end timestamps for the period over which the run was 

130 produced. 

131 """ 

132 

133 host: str | None 

134 """Name of the host or system on which this run was produced (`str` or 

135 `None`). 

136 """ 

137 

138 timespan: Timespan 

139 """Begin and end timestamps for the period over which the run was produced. 

140 None`/``NULL`` values are interpreted as infinite bounds. 

141 """ 

142 

143 def __init__( 

144 self, 

145 key: _Key, 

146 name: str, 

147 *, 

148 host: str | None = None, 

149 timespan: Timespan | None = None, 

150 ): 

151 super().__init__(key=key, name=name, type=CollectionType.RUN) 

152 self.host = host 

153 if timespan is None: 

154 timespan = Timespan(begin=None, end=None) 

155 self.timespan = timespan 

156 

157 def __repr__(self) -> str: 

158 return f"RunRecord(key={self.key!r}, name={self.name!r})" 

159 

160 

161class ChainedCollectionRecord(CollectionRecord[_Key]): 

162 """A subclass of `CollectionRecord` that adds the list of child collections 

163 in a ``CHAINED`` collection. 

164 

165 Parameters 

166 ---------- 

167 key : `object` 

168 Unique collection key. 

169 name : `str` 

170 Name of the collection. 

171 children : Iterable[str], 

172 Ordered sequence of names of child collections. 

173 """ 

174 

175 children: tuple[str, ...] 

176 """The ordered search path of child collections that define this chain 

177 (`tuple` [ `str` ]). 

178 """ 

179 

180 def __init__( 

181 self, 

182 key: Any, 

183 name: str, 

184 *, 

185 children: Iterable[str], 

186 ): 

187 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

188 self.children = tuple(children) 

189 

190 def __repr__(self) -> str: 

191 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})" 

192 

193 

194class CollectionManager(Generic[_Key], VersionedExtension): 

195 """An interface for managing the collections (including runs) in a 

196 `Registry`. 

197 

198 Parameters 

199 ---------- 

200 registry_schema_version : `VersionTuple` or `None`, optional 

201 Version of registry schema. 

202 

203 Notes 

204 ----- 

205 Each layer in a multi-layer `Registry` has its own record for any 

206 collection for which it has datasets (or quanta). Different layers may 

207 use different IDs for the same collection, so any usage of the IDs 

208 obtained through the `CollectionManager` APIs are strictly for internal 

209 (to `Registry`) use. 

210 """ 

211 

212 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None: 

213 super().__init__(registry_schema_version=registry_schema_version) 

214 

215 @classmethod 

216 @abstractmethod 

217 def initialize( 

218 cls, 

219 db: Database, 

220 context: StaticTablesContext, 

221 *, 

222 dimensions: DimensionRecordStorageManager, 

223 caching_context: CachingContext, 

224 registry_schema_version: VersionTuple | None = None, 

225 ) -> CollectionManager: 

226 """Construct an instance of the manager. 

227 

228 Parameters 

229 ---------- 

230 db : `Database` 

231 Interface to the underlying database engine and namespace. 

232 context : `StaticTablesContext` 

233 Context object obtained from `Database.declareStaticTables`; used 

234 to declare any tables that should always be present in a layer 

235 implemented with this manager. 

236 dimensions : `DimensionRecordStorageManager` 

237 Manager object for the dimensions in this `Registry`. 

238 caching_context : `CachingContext` 

239 Object controlling caching of information returned by managers. 

240 registry_schema_version : `VersionTuple` or `None` 

241 Schema version of this extension as defined in registry. 

242 

243 Returns 

244 ------- 

245 manager : `CollectionManager` 

246 An instance of a concrete `CollectionManager` subclass. 

247 """ 

248 raise NotImplementedError() 

249 

250 @classmethod 

251 @abstractmethod 

252 def addCollectionForeignKey( 

253 cls, 

254 tableSpec: ddl.TableSpec, 

255 *, 

256 prefix: str = "collection", 

257 onDelete: str | None = None, 

258 constraint: bool = True, 

259 **kwargs: Any, 

260 ) -> ddl.FieldSpec: 

261 """Add a foreign key (field and constraint) referencing the collection 

262 table. 

263 

264 Parameters 

265 ---------- 

266 tableSpec : `ddl.TableSpec` 

267 Specification for the table that should reference the collection 

268 table. Will be modified in place. 

269 prefix : `str`, optional 

270 A name to use for the prefix of the new field; the full name may 

271 have a suffix (and is given in the returned `ddl.FieldSpec`). 

272 onDelete : `str`, optional 

273 One of "CASCADE" or "SET NULL", indicating what should happen to 

274 the referencing row if the collection row is deleted. `None` 

275 indicates that this should be an integrity error. 

276 constraint : `bool`, optional 

277 If `False` (`True` is default), add a field that can be joined to 

278 the collection primary key, but do not add a foreign key 

279 constraint. 

280 **kwargs 

281 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

282 constructor (only the ``name`` and ``dtype`` arguments are 

283 otherwise provided). 

284 

285 Returns 

286 ------- 

287 fieldSpec : `ddl.FieldSpec` 

288 Specification for the field being added. 

289 """ 

290 raise NotImplementedError() 

291 

292 @classmethod 

293 @abstractmethod 

294 def addRunForeignKey( 

295 cls, 

296 tableSpec: ddl.TableSpec, 

297 *, 

298 prefix: str = "run", 

299 onDelete: str | None = None, 

300 constraint: bool = True, 

301 **kwargs: Any, 

302 ) -> ddl.FieldSpec: 

303 """Add a foreign key (field and constraint) referencing the run 

304 table. 

305 

306 Parameters 

307 ---------- 

308 tableSpec : `ddl.TableSpec` 

309 Specification for the table that should reference the run table. 

310 Will be modified in place. 

311 prefix : `str`, optional 

312 A name to use for the prefix of the new field; the full name may 

313 have a suffix (and is given in the returned `ddl.FieldSpec`). 

314 onDelete : `str`, optional 

315 One of "CASCADE" or "SET NULL", indicating what should happen to 

316 the referencing row if the collection row is deleted. `None` 

317 indicates that this should be an integrity error. 

318 constraint : `bool`, optional 

319 If `False` (`True` is default), add a field that can be joined to 

320 the run primary key, but do not add a foreign key constraint. 

321 **kwargs 

322 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

323 constructor (only the ``name`` and ``dtype`` arguments are 

324 otherwise provided). 

325 

326 Returns 

327 ------- 

328 fieldSpec : `ddl.FieldSpec` 

329 Specification for the field being added. 

330 """ 

331 raise NotImplementedError() 

332 

333 @classmethod 

334 @abstractmethod 

335 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

336 """Return the name of the field added by `addCollectionForeignKey` 

337 if called with the same prefix. 

338 

339 Parameters 

340 ---------- 

341 prefix : `str` 

342 A name to use for the prefix of the new field; the full name may 

343 have a suffix. 

344 

345 Returns 

346 ------- 

347 name : `str` 

348 The field name. 

349 """ 

350 raise NotImplementedError() 

351 

352 @classmethod 

353 @abstractmethod 

354 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

355 """Return the name of the field added by `addRunForeignKey` 

356 if called with the same prefix. 

357 

358 Parameters 

359 ---------- 

360 prefix : `str` 

361 A name to use for the prefix of the new field; the full name may 

362 have a suffix. 

363 

364 Returns 

365 ------- 

366 name : `str` 

367 The field name. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def refresh(self) -> None: 

373 """Ensure all other operations on this manager are aware of any 

374 collections that may have been registered by other clients since it 

375 was initialized or last refreshed. 

376 """ 

377 raise NotImplementedError() 

378 

379 @abstractmethod 

380 def register( 

381 self, name: str, type: CollectionType, doc: str | None = None 

382 ) -> tuple[CollectionRecord[_Key], bool]: 

383 """Ensure that a collection of the given name and type are present 

384 in the layer this manager is associated with. 

385 

386 Parameters 

387 ---------- 

388 name : `str` 

389 Name of the collection. 

390 type : `CollectionType` 

391 Enumeration value indicating the type of collection. 

392 doc : `str`, optional 

393 Documentation string for the collection. Ignored if the collection 

394 already exists. 

395 

396 Returns 

397 ------- 

398 record : `CollectionRecord` 

399 Object representing the collection, including its type and ID. 

400 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

401 instance. If ``type is CollectionType.CHAIN``, this will be a 

402 `ChainedCollectionRecord` instance. 

403 registered : `bool` 

404 True if the collection was registered, `False` if it already 

405 existed. 

406 

407 Raises 

408 ------ 

409 TransactionInterruption 

410 Raised if this operation is invoked within a `Database.transaction` 

411 context. 

412 DatabaseConflictError 

413 Raised if a collection with this name but a different type already 

414 exists. 

415 

416 Notes 

417 ----- 

418 Concurrent registrations of the same collection should be safe; nothing 

419 should happen if the types are consistent, and integrity errors due to 

420 inconsistent types should happen before any database changes are made. 

421 """ 

422 raise NotImplementedError() 

423 

424 @abstractmethod 

425 def remove(self, name: str) -> None: 

426 """Completely remove a collection. 

427 

428 Any existing `CollectionRecord` objects that correspond to the removed 

429 collection are considered invalidated. 

430 

431 Parameters 

432 ---------- 

433 name : `str` 

434 Name of the collection to remove. 

435 

436 Notes 

437 ----- 

438 If this collection is referenced by foreign keys in tables managed by 

439 other objects, the ON DELETE clauses of those tables will be invoked. 

440 That will frequently delete many dependent rows automatically (via 

441 "CASCADE", but it may also cause this operation to fail (with rollback) 

442 unless dependent rows that do not have an ON DELETE clause are removed 

443 first. 

444 """ 

445 raise NotImplementedError() 

446 

447 @abstractmethod 

448 def find(self, name: str) -> CollectionRecord[_Key]: 

449 """Return the collection record associated with the given name. 

450 

451 Parameters 

452 ---------- 

453 name : `str` 

454 Name of the collection. 

455 

456 Returns 

457 ------- 

458 record : `CollectionRecord` 

459 Object representing the collection, including its type and ID. 

460 If ``record.type is CollectionType.RUN``, this will be a 

461 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

462 this will be a `ChainedCollectionRecord` instance. 

463 

464 Raises 

465 ------ 

466 MissingCollectionError 

467 Raised if the given collection does not exist. 

468 

469 Notes 

470 ----- 

471 Collections registered by another client of the same layer since the 

472 last call to `initialize` or `refresh` may not be found. 

473 """ 

474 raise NotImplementedError() 

475 

476 @abstractmethod 

477 def __getitem__(self, key: Any) -> CollectionRecord[_Key]: 

478 """Return the collection record associated with the given 

479 primary/foreign key value. 

480 

481 Parameters 

482 ---------- 

483 key : `typing.Any` 

484 Internal primary key value for the collection. 

485 

486 Returns 

487 ------- 

488 record : `CollectionRecord` 

489 Object representing the collection, including its type and name. 

490 If ``record.type is CollectionType.RUN``, this will be a 

491 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

492 this will be a `ChainedCollectionRecord` instance. 

493 

494 Raises 

495 ------ 

496 MissingCollectionError 

497 Raised if no collection with this key exists. 

498 

499 Notes 

500 ----- 

501 Collections registered by another client of the same layer since the 

502 last call to `initialize` or `refresh` may not be found. 

503 """ 

504 raise NotImplementedError() 

505 

506 @abstractmethod 

507 def resolve_wildcard( 

508 self, 

509 wildcard: CollectionWildcard, 

510 *, 

511 collection_types: Set[CollectionType] = CollectionType.all(), 

512 done: set[str] | None = None, 

513 flatten_chains: bool = True, 

514 include_chains: bool | None = None, 

515 ) -> list[CollectionRecord[_Key]]: 

516 """Iterate over collection records that match a wildcard. 

517 

518 Parameters 

519 ---------- 

520 wildcard : `CollectionWildcard` 

521 Names and/or patterns for collections. 

522 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

523 If provided, only yield collections of these types. 

524 done : `set` [ `str` ], optional 

525 A `set` of collection names that will not be returned (presumably 

526 because they have already been returned in some higher-level logic) 

527 that will also be updated with the names of the collections 

528 returned. 

529 flatten_chains : `bool`, optional 

530 If `True` (default) recursively yield the child collections of 

531 `~CollectionType.CHAINED` collections. 

532 include_chains : `bool`, optional 

533 If `True`, return records for `~CollectionType.CHAINED` 

534 collections themselves. The default is the opposite of 

535 ``flatten_chains``: either return records for CHAINED collections 

536 or their children, but not both. 

537 

538 Returns 

539 ------- 

540 records : `list` [ `CollectionRecord` ] 

541 Matching collection records. 

542 """ 

543 raise NotImplementedError() 

544 

545 @abstractmethod 

546 def getDocumentation(self, key: _Key) -> str | None: 

547 """Retrieve the documentation string for a collection. 

548 

549 Parameters 

550 ---------- 

551 key : _Key 

552 Internal primary key value for the collection. 

553 

554 Returns 

555 ------- 

556 docs : `str` or `None` 

557 Docstring for the collection with the given key. 

558 """ 

559 raise NotImplementedError() 

560 

561 @abstractmethod 

562 def setDocumentation(self, key: _Key, doc: str | None) -> None: 

563 """Set the documentation string for a collection. 

564 

565 Parameters 

566 ---------- 

567 key : _Key 

568 Internal primary key value for the collection. 

569 doc : `str`, optional 

570 Docstring for the collection with the given key. 

571 """ 

572 raise NotImplementedError() 

573 

574 @abstractmethod 

575 def getParentChains(self, key: _Key) -> set[str]: 

576 """Find all CHAINED collection names that directly contain the given 

577 collection. 

578 

579 Parameters 

580 ---------- 

581 key : _Key 

582 Internal primary key value for the collection. 

583 

584 Returns 

585 ------- 

586 names : `set` [`str`] 

587 Parent collection names. 

588 """ 

589 raise NotImplementedError() 

590 

591 @abstractmethod 

592 def update_chain( 

593 self, record: ChainedCollectionRecord[_Key], children: Iterable[str], flatten: bool = False 

594 ) -> ChainedCollectionRecord[_Key]: 

595 """Update chained collection composition. 

596 

597 Parameters 

598 ---------- 

599 record : `ChainedCollectionRecord` 

600 Chained collection record. 

601 children : `~collections.abc.Iterable` [`str`] 

602 Ordered names of children collections. 

603 flatten : `bool`, optional 

604 If `True`, recursively flatten out any nested 

605 `~CollectionType.CHAINED` collections in ``children`` first. 

606 """ 

607 raise NotImplementedError()