Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 84%

93 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import ddl 

30 

31__all__ = [ 

32 "ChainedCollectionRecord", 

33 "CollectionManager", 

34 "CollectionRecord", 

35 "RunRecord", 

36] 

37 

38from abc import abstractmethod 

39from collections.abc import Iterable, Set 

40from typing import TYPE_CHECKING, Any, Generic, TypeVar 

41 

42from ..._timespan import Timespan 

43from .._collection_type import CollectionType 

44from ..wildcards import CollectionWildcard 

45from ._versioning import VersionedExtension, VersionTuple 

46 

47if TYPE_CHECKING: 

48 from .._caching_context import CachingContext 

49 from ._database import Database, StaticTablesContext 

50 from ._dimensions import DimensionRecordStorageManager 

51 

52 

53_Key = TypeVar("_Key") 

54 

55 

56class CollectionRecord(Generic[_Key]): 

57 """A struct used to represent a collection in internal `Registry` APIs. 

58 

59 User-facing code should always just use a `str` to represent collections. 

60 

61 Parameters 

62 ---------- 

63 key 

64 Unique collection ID, can be the same as ``name`` if ``name`` is used 

65 for identification. Usually this is an integer or string, but can be 

66 other database-specific type. 

67 name : `str` 

68 Name of the collection. 

69 type : `CollectionType` 

70 Enumeration value describing the type of the collection. 

71 

72 Notes 

73 ----- 

74 The `name`, `key`, and `type` attributes set by the base class should be 

75 considered immutable by all users and derived classes (as these are used 

76 in the definition of equality and this is a hashable type). Other 

77 attributes defined by subclasses may be mutable, as long as they do not 

78 participate in some subclass equality definition. 

79 """ 

80 

81 def __init__(self, key: _Key, name: str, type: CollectionType): 

82 self.key = key 

83 self.name = name 

84 self.type = type 

85 assert isinstance(self.type, CollectionType) 

86 

87 name: str 

88 """Name of the collection (`str`). 

89 """ 

90 

91 key: _Key 

92 """The primary/foreign key value for this collection. 

93 """ 

94 

95 type: CollectionType 

96 """Enumeration value describing the type of the collection 

97 (`CollectionType`). 

98 """ 

99 

100 def __eq__(self, other: Any) -> bool: 

101 try: 

102 return self.name == other.name and self.type == other.type and self.key == other.key 

103 except AttributeError: 

104 return NotImplemented 

105 

106 def __hash__(self) -> int: 

107 return hash(self.name) 

108 

109 def __repr__(self) -> str: 

110 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})" 

111 

112 def __str__(self) -> str: 

113 return self.name 

114 

115 

116class RunRecord(CollectionRecord[_Key]): 

117 """A subclass of `CollectionRecord` that adds execution information and 

118 an interface for updating it. 

119 

120 Parameters 

121 ---------- 

122 key: `object` 

123 Unique collection key. 

124 name : `str` 

125 Name of the collection. 

126 host : `str`, optional 

127 Name of the host or system on which this run was produced. 

128 timespan: `Timespan`, optional 

129 Begin and end timestamps for the period over which the run was 

130 produced. 

131 """ 

132 

133 host: str | None 

134 """Name of the host or system on which this run was produced (`str` or 

135 `None`). 

136 """ 

137 

138 timespan: Timespan 

139 """Begin and end timestamps for the period over which the run was produced. 

140 None`/``NULL`` values are interpreted as infinite bounds. 

141 """ 

142 

143 def __init__( 

144 self, 

145 key: _Key, 

146 name: str, 

147 *, 

148 host: str | None = None, 

149 timespan: Timespan | None = None, 

150 ): 

151 super().__init__(key=key, name=name, type=CollectionType.RUN) 

152 self.host = host 

153 if timespan is None: 

154 timespan = Timespan(begin=None, end=None) 

155 self.timespan = timespan 

156 

157 def __repr__(self) -> str: 

158 return f"RunRecord(key={self.key!r}, name={self.name!r})" 

159 

160 

161class ChainedCollectionRecord(CollectionRecord[_Key]): 

162 """A subclass of `CollectionRecord` that adds the list of child collections 

163 in a ``CHAINED`` collection. 

164 

165 Parameters 

166 ---------- 

167 key: `object` 

168 Unique collection key. 

169 name : `str` 

170 Name of the collection. 

171 children: Iterable[str], 

172 Ordered sequence of names of child collections. 

173 """ 

174 

175 children: tuple[str, ...] 

176 """The ordered search path of child collections that define this chain 

177 (`tuple` [ `str` ]). 

178 """ 

179 

180 def __init__( 

181 self, 

182 key: Any, 

183 name: str, 

184 *, 

185 children: Iterable[str], 

186 ): 

187 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

188 self.children = tuple(children) 

189 

190 def __repr__(self) -> str: 

191 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})" 

192 

193 

194class CollectionManager(Generic[_Key], VersionedExtension): 

195 """An interface for managing the collections (including runs) in a 

196 `Registry`. 

197 

198 Notes 

199 ----- 

200 Each layer in a multi-layer `Registry` has its own record for any 

201 collection for which it has datasets (or quanta). Different layers may 

202 use different IDs for the same collection, so any usage of the IDs 

203 obtained through the `CollectionManager` APIs are strictly for internal 

204 (to `Registry`) use. 

205 """ 

206 

207 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None: 

208 super().__init__(registry_schema_version=registry_schema_version) 

209 

210 @classmethod 

211 @abstractmethod 

212 def initialize( 

213 cls, 

214 db: Database, 

215 context: StaticTablesContext, 

216 *, 

217 dimensions: DimensionRecordStorageManager, 

218 caching_context: CachingContext, 

219 registry_schema_version: VersionTuple | None = None, 

220 ) -> CollectionManager: 

221 """Construct an instance of the manager. 

222 

223 Parameters 

224 ---------- 

225 db : `Database` 

226 Interface to the underlying database engine and namespace. 

227 context : `StaticTablesContext` 

228 Context object obtained from `Database.declareStaticTables`; used 

229 to declare any tables that should always be present in a layer 

230 implemented with this manager. 

231 dimensions : `DimensionRecordStorageManager` 

232 Manager object for the dimensions in this `Registry`. 

233 caching_context : `CachingContext` 

234 Object controlling caching of information returned by managers. 

235 registry_schema_version : `VersionTuple` or `None` 

236 Schema version of this extension as defined in registry. 

237 

238 Returns 

239 ------- 

240 manager : `CollectionManager` 

241 An instance of a concrete `CollectionManager` subclass. 

242 """ 

243 raise NotImplementedError() 

244 

245 @classmethod 

246 @abstractmethod 

247 def addCollectionForeignKey( 

248 cls, 

249 tableSpec: ddl.TableSpec, 

250 *, 

251 prefix: str = "collection", 

252 onDelete: str | None = None, 

253 constraint: bool = True, 

254 **kwargs: Any, 

255 ) -> ddl.FieldSpec: 

256 """Add a foreign key (field and constraint) referencing the collection 

257 table. 

258 

259 Parameters 

260 ---------- 

261 tableSpec : `ddl.TableSpec` 

262 Specification for the table that should reference the collection 

263 table. Will be modified in place. 

264 prefix: `str`, optional 

265 A name to use for the prefix of the new field; the full name may 

266 have a suffix (and is given in the returned `ddl.FieldSpec`). 

267 onDelete: `str`, optional 

268 One of "CASCADE" or "SET NULL", indicating what should happen to 

269 the referencing row if the collection row is deleted. `None` 

270 indicates that this should be an integrity error. 

271 constraint: `bool`, optional 

272 If `False` (`True` is default), add a field that can be joined to 

273 the collection primary key, but do not add a foreign key 

274 constraint. 

275 **kwargs 

276 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

277 constructor (only the ``name`` and ``dtype`` arguments are 

278 otherwise provided). 

279 

280 Returns 

281 ------- 

282 fieldSpec : `ddl.FieldSpec` 

283 Specification for the field being added. 

284 """ 

285 raise NotImplementedError() 

286 

287 @classmethod 

288 @abstractmethod 

289 def addRunForeignKey( 

290 cls, 

291 tableSpec: ddl.TableSpec, 

292 *, 

293 prefix: str = "run", 

294 onDelete: str | None = None, 

295 constraint: bool = True, 

296 **kwargs: Any, 

297 ) -> ddl.FieldSpec: 

298 """Add a foreign key (field and constraint) referencing the run 

299 table. 

300 

301 Parameters 

302 ---------- 

303 tableSpec : `ddl.TableSpec` 

304 Specification for the table that should reference the run table. 

305 Will be modified in place. 

306 prefix: `str`, optional 

307 A name to use for the prefix of the new field; the full name may 

308 have a suffix (and is given in the returned `ddl.FieldSpec`). 

309 onDelete: `str`, optional 

310 One of "CASCADE" or "SET NULL", indicating what should happen to 

311 the referencing row if the collection row is deleted. `None` 

312 indicates that this should be an integrity error. 

313 constraint: `bool`, optional 

314 If `False` (`True` is default), add a field that can be joined to 

315 the run primary key, but do not add a foreign key constraint. 

316 **kwargs 

317 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

318 constructor (only the ``name`` and ``dtype`` arguments are 

319 otherwise provided). 

320 

321 Returns 

322 ------- 

323 fieldSpec : `ddl.FieldSpec` 

324 Specification for the field being added. 

325 """ 

326 raise NotImplementedError() 

327 

328 @classmethod 

329 @abstractmethod 

330 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

331 """Return the name of the field added by `addCollectionForeignKey` 

332 if called with the same prefix. 

333 

334 Parameters 

335 ---------- 

336 prefix : `str` 

337 A name to use for the prefix of the new field; the full name may 

338 have a suffix. 

339 

340 Returns 

341 ------- 

342 name : `str` 

343 The field name. 

344 """ 

345 raise NotImplementedError() 

346 

347 @classmethod 

348 @abstractmethod 

349 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

350 """Return the name of the field added by `addRunForeignKey` 

351 if called with the same prefix. 

352 

353 Parameters 

354 ---------- 

355 prefix : `str` 

356 A name to use for the prefix of the new field; the full name may 

357 have a suffix. 

358 

359 Returns 

360 ------- 

361 name : `str` 

362 The field name. 

363 """ 

364 raise NotImplementedError() 

365 

366 @abstractmethod 

367 def refresh(self) -> None: 

368 """Ensure all other operations on this manager are aware of any 

369 collections that may have been registered by other clients since it 

370 was initialized or last refreshed. 

371 """ 

372 raise NotImplementedError() 

373 

374 @abstractmethod 

375 def register( 

376 self, name: str, type: CollectionType, doc: str | None = None 

377 ) -> tuple[CollectionRecord[_Key], bool]: 

378 """Ensure that a collection of the given name and type are present 

379 in the layer this manager is associated with. 

380 

381 Parameters 

382 ---------- 

383 name : `str` 

384 Name of the collection. 

385 type : `CollectionType` 

386 Enumeration value indicating the type of collection. 

387 doc : `str`, optional 

388 Documentation string for the collection. Ignored if the collection 

389 already exists. 

390 

391 Returns 

392 ------- 

393 record : `CollectionRecord` 

394 Object representing the collection, including its type and ID. 

395 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

396 instance. If ``type is CollectionType.CHAIN``, this will be a 

397 `ChainedCollectionRecord` instance. 

398 registered : `bool` 

399 True if the collection was registered, `False` if it already 

400 existed. 

401 

402 Raises 

403 ------ 

404 TransactionInterruption 

405 Raised if this operation is invoked within a `Database.transaction` 

406 context. 

407 DatabaseConflictError 

408 Raised if a collection with this name but a different type already 

409 exists. 

410 

411 Notes 

412 ----- 

413 Concurrent registrations of the same collection should be safe; nothing 

414 should happen if the types are consistent, and integrity errors due to 

415 inconsistent types should happen before any database changes are made. 

416 """ 

417 raise NotImplementedError() 

418 

419 @abstractmethod 

420 def remove(self, name: str) -> None: 

421 """Completely remove a collection. 

422 

423 Any existing `CollectionRecord` objects that correspond to the removed 

424 collection are considered invalidated. 

425 

426 Parameters 

427 ---------- 

428 name : `str` 

429 Name of the collection to remove. 

430 

431 Notes 

432 ----- 

433 If this collection is referenced by foreign keys in tables managed by 

434 other objects, the ON DELETE clauses of those tables will be invoked. 

435 That will frequently delete many dependent rows automatically (via 

436 "CASCADE", but it may also cause this operation to fail (with rollback) 

437 unless dependent rows that do not have an ON DELETE clause are removed 

438 first. 

439 """ 

440 raise NotImplementedError() 

441 

442 @abstractmethod 

443 def find(self, name: str) -> CollectionRecord[_Key]: 

444 """Return the collection record associated with the given name. 

445 

446 Parameters 

447 ---------- 

448 name : `str` 

449 Name of the collection. 

450 

451 Returns 

452 ------- 

453 record : `CollectionRecord` 

454 Object representing the collection, including its type and ID. 

455 If ``record.type is CollectionType.RUN``, this will be a 

456 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

457 this will be a `ChainedCollectionRecord` instance. 

458 

459 Raises 

460 ------ 

461 MissingCollectionError 

462 Raised if the given collection does not exist. 

463 

464 Notes 

465 ----- 

466 Collections registered by another client of the same layer since the 

467 last call to `initialize` or `refresh` may not be found. 

468 """ 

469 raise NotImplementedError() 

470 

471 @abstractmethod 

472 def __getitem__(self, key: Any) -> CollectionRecord[_Key]: 

473 """Return the collection record associated with the given 

474 primary/foreign key value. 

475 

476 Parameters 

477 ---------- 

478 key 

479 Internal primary key value for the collection. 

480 

481 Returns 

482 ------- 

483 record : `CollectionRecord` 

484 Object representing the collection, including its type and name. 

485 If ``record.type is CollectionType.RUN``, this will be a 

486 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

487 this will be a `ChainedCollectionRecord` instance. 

488 

489 Raises 

490 ------ 

491 MissingCollectionError 

492 Raised if no collection with this key exists. 

493 

494 Notes 

495 ----- 

496 Collections registered by another client of the same layer since the 

497 last call to `initialize` or `refresh` may not be found. 

498 """ 

499 raise NotImplementedError() 

500 

501 @abstractmethod 

502 def resolve_wildcard( 

503 self, 

504 wildcard: CollectionWildcard, 

505 *, 

506 collection_types: Set[CollectionType] = CollectionType.all(), 

507 done: set[str] | None = None, 

508 flatten_chains: bool = True, 

509 include_chains: bool | None = None, 

510 ) -> list[CollectionRecord[_Key]]: 

511 """Iterate over collection records that match a wildcard. 

512 

513 Parameters 

514 ---------- 

515 wildcard : `CollectionWildcard` 

516 Names and/or patterns for collections. 

517 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

518 If provided, only yield collections of these types. 

519 done : `set` [ `str` ], optional 

520 A `set` of collection names that will not be returned (presumably 

521 because they have already been returned in some higher-level logic) 

522 that will also be updated with the names of the collections 

523 returned. 

524 flatten_chains : `bool`, optional 

525 If `True` (default) recursively yield the child collections of 

526 `~CollectionType.CHAINED` collections. 

527 include_chains : `bool`, optional 

528 If `True`, return records for `~CollectionType.CHAINED` 

529 collections themselves. The default is the opposite of 

530 ``flatten_chains``: either return records for CHAINED collections 

531 or their children, but not both. 

532 

533 Returns 

534 ------- 

535 records : `list` [ `CollectionRecord` ] 

536 Matching collection records. 

537 """ 

538 raise NotImplementedError() 

539 

540 @abstractmethod 

541 def getDocumentation(self, key: _Key) -> str | None: 

542 """Retrieve the documentation string for a collection. 

543 

544 Parameters 

545 ---------- 

546 key 

547 Internal primary key value for the collection. 

548 

549 Returns 

550 ------- 

551 docs : `str` or `None` 

552 Docstring for the collection with the given key. 

553 """ 

554 raise NotImplementedError() 

555 

556 @abstractmethod 

557 def setDocumentation(self, key: _Key, doc: str | None) -> None: 

558 """Set the documentation string for a collection. 

559 

560 Parameters 

561 ---------- 

562 key 

563 Internal primary key value for the collection. 

564 docs : `str`, optional 

565 Docstring for the collection with the given key. 

566 """ 

567 raise NotImplementedError() 

568 

569 @abstractmethod 

570 def getParentChains(self, key: _Key) -> set[str]: 

571 """Find all CHAINED collection names that directly contain the given 

572 collection. 

573 

574 Parameters 

575 ---------- 

576 key 

577 Internal primary key value for the collection. 

578 

579 Returns 

580 ------- 

581 names : `set` [`str`] 

582 Parent collection names. 

583 """ 

584 raise NotImplementedError() 

585 

586 @abstractmethod 

587 def update_chain( 

588 self, record: ChainedCollectionRecord[_Key], children: Iterable[str], flatten: bool = False 

589 ) -> ChainedCollectionRecord[_Key]: 

590 """Update chained collection composition. 

591 

592 Parameters 

593 ---------- 

594 record : `ChainedCollectionRecord` 

595 Chained collection record. 

596 children : `~collections.abc.Iterable` [`str`] 

597 Ordered names of children collections. 

598 flatten : `bool`, optional 

599 If `True`, recursively flatten out any nested 

600 `~CollectionType.CHAINED` collections in ``children`` first. 

601 """ 

602 raise NotImplementedError()