Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from typing import ( 

32 Any, 

33 Iterator, 

34 Optional, 

35 TYPE_CHECKING, 

36) 

37 

38from ...core import ddl, DimensionUniverse, Timespan 

39from ..wildcards import CollectionSearch 

40from .._collectionType import CollectionType 

41from ._versioning import VersionedExtension 

42 

43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 from ._database import Database, StaticTablesContext 

45 from ._dimensions import DimensionRecordStorageManager 

46 

47 

48class CollectionRecord: 

49 """A struct used to represent a collection in internal `Registry` APIs. 

50 

51 User-facing code should always just use a `str` to represent collections. 

52 

53 Parameters 

54 ---------- 

55 key 

56 Unique collection ID, can be the same as ``name`` if ``name`` is used 

57 for identification. Usually this is an integer or string, but can be 

58 other database-specific type. 

59 name : `str` 

60 Name of the collection. 

61 type : `CollectionType` 

62 Enumeration value describing the type of the collection. 

63 """ 

64 def __init__(self, key: Any, name: str, type: CollectionType): 

65 self.key = key 

66 self.name = name 

67 self.type = type 

68 assert isinstance(self.type, CollectionType) 

69 

70 name: str 

71 """Name of the collection (`str`). 

72 """ 

73 

74 key: Any 

75 """The primary/foreign key value for this collection. 

76 """ 

77 

78 type: CollectionType 

79 """Enumeration value describing the type of the collection 

80 (`CollectionType`). 

81 """ 

82 

83 

84class RunRecord(CollectionRecord): 

85 """A subclass of `CollectionRecord` that adds execution information and 

86 an interface for updating it. 

87 """ 

88 

89 @abstractmethod 

90 def update(self, host: Optional[str] = None, 

91 timespan: Optional[Timespan] = None) -> None: 

92 """Update the database record for this run with new execution 

93 information. 

94 

95 Values not provided will set to ``NULL`` in the database, not ignored. 

96 

97 Parameters 

98 ---------- 

99 host : `str`, optional 

100 Name of the host or system on which this run was produced. 

101 Detailed form to be set by higher-level convention; from the 

102 `Registry` perspective, this is an entirely opaque value. 

103 timespan : `Timespan`, optional 

104 Begin and end timestamps for the period over which the run was 

105 produced. `None`/``NULL`` values are interpreted as infinite 

106 bounds. 

107 """ 

108 raise NotImplementedError() 

109 

110 @property 

111 @abstractmethod 

112 def host(self) -> Optional[str]: 

113 """Return the name of the host or system on which this run was 

114 produced (`str` or `None`). 

115 """ 

116 raise NotImplementedError() 

117 

118 @property 

119 @abstractmethod 

120 def timespan(self) -> Timespan: 

121 """Begin and end timestamps for the period over which the run was 

122 produced. `None`/``NULL`` values are interpreted as infinite 

123 bounds. 

124 """ 

125 raise NotImplementedError() 

126 

127 

128class ChainedCollectionRecord(CollectionRecord): 

129 """A subclass of `CollectionRecord` that adds the list of child collections 

130 in a ``CHAINED`` collection. 

131 

132 Parameters 

133 ---------- 

134 key 

135 Unique collection ID, can be the same as ``name`` if ``name`` is used 

136 for identification. Usually this is an integer or string, but can be 

137 other database-specific type. 

138 name : `str` 

139 Name of the collection. 

140 """ 

141 

142 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

143 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

144 self._children = CollectionSearch.fromExpression([]) 

145 

146 @property 

147 def children(self) -> CollectionSearch: 

148 """The ordered search path of child collections that define this chain 

149 (`CollectionSearch`). 

150 """ 

151 return self._children 

152 

153 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None: 

154 """Redefine this chain to search the given child collections. 

155 

156 This method should be used by all external code to set children. It 

157 delegates to `_update`, which is what should be overridden by 

158 subclasses. 

159 

160 Parameters 

161 ---------- 

162 manager : `CollectionManager` 

163 The object that manages this records instance and all records 

164 instances that may appear as its children. 

165 children : `CollectionSearch` 

166 A collection search path that should be resolved to set the child 

167 collections of this chain. 

168 flatten : `bool` 

169 If `True`, recursively flatten out any nested 

170 `~CollectionType.CHAINED` collections in ``children`` first. 

171 

172 Raises 

173 ------ 

174 ValueError 

175 Raised when the child collections contain a cycle. 

176 """ 

177 for record in children.iter(manager, flattenChains=True, includeChains=True, 

178 collectionTypes={CollectionType.CHAINED}): 

179 if record == self: 

180 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

181 if flatten: 

182 children = CollectionSearch.fromExpression( 

183 tuple(record.name for record in children.iter(manager, flattenChains=True)) 

184 ) 

185 self._update(manager, children) 

186 self._children = children 

187 

188 def refresh(self, manager: CollectionManager) -> None: 

189 """Load children from the database, using the given manager to resolve 

190 collection primary key values into records. 

191 

192 This method exists to ensure that all collections that may appear in a 

193 chain are known to the manager before any particular chain tries to 

194 retrieve their records from it. `ChainedCollectionRecord` subclasses 

195 can rely on it being called sometime after their own ``__init__`` to 

196 finish construction. 

197 

198 Parameters 

199 ---------- 

200 manager : `CollectionManager` 

201 The object that manages this records instance and all records 

202 instances that may appear as its children. 

203 """ 

204 self._children = self._load(manager) 

205 

206 @abstractmethod 

207 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

208 """Protected implementation hook for setting the `children` property. 

209 

210 This method should be implemented by subclasses to update the database 

211 to reflect the children given. It should never be called by anything 

212 other than the `children` setter, which should be used by all external 

213 code. 

214 

215 Parameters 

216 ---------- 

217 manager : `CollectionManager` 

218 The object that manages this records instance and all records 

219 instances that may appear as its children. 

220 children : `CollectionSearch` 

221 A collection search path that should be resolved to set the child 

222 collections of this chain. Guaranteed not to contain cycles. 

223 """ 

224 raise NotImplementedError() 

225 

226 @abstractmethod 

227 def _load(self, manager: CollectionManager) -> CollectionSearch: 

228 """Protected implementation hook for `refresh`. 

229 

230 This method should be implemented by subclasses to retrieve the chain's 

231 child collections from the database and return them. It should never 

232 be called by anything other than `refresh`, which should be used by all 

233 external code. 

234 

235 Parameters 

236 ---------- 

237 manager : `CollectionManager` 

238 The object that manages this records instance and all records 

239 instances that may appear as its children. 

240 

241 Returns 

242 ------- 

243 children : `CollectionSearch` 

244 The ordered sequence of collection names that defines the chained 

245 collection. Guaranteed not to contain cycles. 

246 """ 

247 raise NotImplementedError() 

248 

249 

250class CollectionManager(VersionedExtension): 

251 """An interface for managing the collections (including runs) in a 

252 `Registry`. 

253 

254 Notes 

255 ----- 

256 Each layer in a multi-layer `Registry` has its own record for any 

257 collection for which it has datasets (or quanta). Different layers may 

258 use different IDs for the same collection, so any usage of the IDs 

259 obtained through the `CollectionManager` APIs are strictly for internal 

260 (to `Registry`) use. 

261 """ 

262 

263 @classmethod 

264 @abstractmethod 

265 def initialize(cls, db: Database, context: StaticTablesContext, *, 

266 dimensions: DimensionRecordStorageManager) -> CollectionManager: 

267 """Construct an instance of the manager. 

268 

269 Parameters 

270 ---------- 

271 db : `Database` 

272 Interface to the underlying database engine and namespace. 

273 context : `StaticTablesContext` 

274 Context object obtained from `Database.declareStaticTables`; used 

275 to declare any tables that should always be present in a layer 

276 implemented with this manager. 

277 dimensions : `DimensionRecordStorageManager` 

278 Manager object for the dimensions in this `Registry`. 

279 

280 Returns 

281 ------- 

282 manager : `CollectionManager` 

283 An instance of a concrete `CollectionManager` subclass. 

284 """ 

285 raise NotImplementedError() 

286 

287 @classmethod 

288 @abstractmethod 

289 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

290 onDelete: Optional[str] = None, 

291 constraint: bool = True, 

292 **kwargs: Any) -> ddl.FieldSpec: 

293 """Add a foreign key (field and constraint) referencing the collection 

294 table. 

295 

296 Parameters 

297 ---------- 

298 tableSpec : `ddl.TableSpec` 

299 Specification for the table that should reference the collection 

300 table. Will be modified in place. 

301 prefix: `str`, optional 

302 A name to use for the prefix of the new field; the full name may 

303 have a suffix (and is given in the returned `ddl.FieldSpec`). 

304 onDelete: `str`, optional 

305 One of "CASCADE" or "SET NULL", indicating what should happen to 

306 the referencing row if the collection row is deleted. `None` 

307 indicates that this should be an integrity error. 

308 constraint: `bool`, optional 

309 If `False` (`True` is default), add a field that can be joined to 

310 the collection primary key, but do not add a foreign key 

311 constraint. 

312 **kwargs 

313 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

314 constructor (only the ``name`` and ``dtype`` arguments are 

315 otherwise provided). 

316 

317 Returns 

318 ------- 

319 fieldSpec : `ddl.FieldSpec` 

320 Specification for the field being added. 

321 """ 

322 raise NotImplementedError() 

323 

324 @classmethod 

325 @abstractmethod 

326 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

327 onDelete: Optional[str] = None, 

328 constraint: bool = True, 

329 **kwargs: Any) -> ddl.FieldSpec: 

330 """Add a foreign key (field and constraint) referencing the run 

331 table. 

332 

333 Parameters 

334 ---------- 

335 tableSpec : `ddl.TableSpec` 

336 Specification for the table that should reference the run table. 

337 Will be modified in place. 

338 prefix: `str`, optional 

339 A name to use for the prefix of the new field; the full name may 

340 have a suffix (and is given in the returned `ddl.FieldSpec`). 

341 onDelete: `str`, optional 

342 One of "CASCADE" or "SET NULL", indicating what should happen to 

343 the referencing row if the collection row is deleted. `None` 

344 indicates that this should be an integrity error. 

345 constraint: `bool`, optional 

346 If `False` (`True` is default), add a field that can be joined to 

347 the run primary key, but do not add a foreign key constraint. 

348 **kwds 

349 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

350 constructor (only the ``name`` and ``dtype`` arguments are 

351 otherwise provided). 

352 

353 Returns 

354 ------- 

355 fieldSpec : `ddl.FieldSpec` 

356 Specification for the field being added. 

357 """ 

358 raise NotImplementedError() 

359 

360 @classmethod 

361 @abstractmethod 

362 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

363 """Return the name of the field added by `addCollectionForeignKey` 

364 if called with the same prefix. 

365 

366 Parameters 

367 ---------- 

368 prefix : `str` 

369 A name to use for the prefix of the new field; the full name may 

370 have a suffix. 

371 

372 Returns 

373 ------- 

374 name : `str` 

375 The field name. 

376 """ 

377 raise NotImplementedError() 

378 

379 @classmethod 

380 @abstractmethod 

381 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

382 """Return the name of the field added by `addRunForeignKey` 

383 if called with the same prefix. 

384 

385 Parameters 

386 ---------- 

387 prefix : `str` 

388 A name to use for the prefix of the new field; the full name may 

389 have a suffix. 

390 

391 Returns 

392 ------- 

393 name : `str` 

394 The field name. 

395 """ 

396 raise NotImplementedError() 

397 

398 @abstractmethod 

399 def refresh(self) -> None: 

400 """Ensure all other operations on this manager are aware of any 

401 collections that may have been registered by other clients since it 

402 was initialized or last refreshed. 

403 """ 

404 raise NotImplementedError() 

405 

406 @abstractmethod 

407 def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord: 

408 """Ensure that a collection of the given name and type are present 

409 in the layer this manager is associated with. 

410 

411 Parameters 

412 ---------- 

413 name : `str` 

414 Name of the collection. 

415 type : `CollectionType` 

416 Enumeration value indicating the type of collection. 

417 doc : `str`, optional 

418 Documentation string for the collection. Ignored if the collection 

419 already exists. 

420 

421 Returns 

422 ------- 

423 record : `CollectionRecord` 

424 Object representing the collection, including its type and ID. 

425 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

426 instance. If ``type is CollectionType.CHAIN``, this will be a 

427 `ChainedCollectionRecord` instance. 

428 

429 Raises 

430 ------ 

431 TransactionInterruption 

432 Raised if this operation is invoked within a `Database.transaction` 

433 context. 

434 DatabaseConflictError 

435 Raised if a collection with this name but a different type already 

436 exists. 

437 

438 Notes 

439 ----- 

440 Concurrent registrations of the same collection should be safe; nothing 

441 should happen if the types are consistent, and integrity errors due to 

442 inconsistent types should happen before any database changes are made. 

443 """ 

444 raise NotImplementedError() 

445 

446 @abstractmethod 

447 def remove(self, name: str) -> None: 

448 """Completely remove a collection. 

449 

450 Any existing `CollectionRecord` objects that correspond to the removed 

451 collection are considered invalidated. 

452 

453 Parameters 

454 ---------- 

455 name : `str` 

456 Name of the collection to remove. 

457 

458 Notes 

459 ----- 

460 If this collection is referenced by foreign keys in tables managed by 

461 other objects, the ON DELETE clauses of those tables will be invoked. 

462 That will frequently delete many dependent rows automatically (via 

463 "CASCADE", but it may also cause this operation to fail (with rollback) 

464 unless dependent rows that do not have an ON DELETE clause are removed 

465 first. 

466 """ 

467 raise NotImplementedError() 

468 

469 @abstractmethod 

470 def find(self, name: str) -> CollectionRecord: 

471 """Return the collection record associated with the given name. 

472 

473 Parameters 

474 ---------- 

475 name : `str` 

476 Name of the collection. 

477 

478 Returns 

479 ------- 

480 record : `CollectionRecord` 

481 Object representing the collection, including its type and ID. 

482 If ``record.type is CollectionType.RUN``, this will be a 

483 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

484 this will be a `ChainedCollectionRecord` instance. 

485 

486 Raises 

487 ------ 

488 MissingCollectionError 

489 Raised if the given collection does not exist. 

490 

491 Notes 

492 ----- 

493 Collections registered by another client of the same layer since the 

494 last call to `initialize` or `refresh` may not be found. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def __getitem__(self, key: Any) -> CollectionRecord: 

500 """Return the collection record associated with the given 

501 primary/foreign key value. 

502 

503 Parameters 

504 ---------- 

505 key 

506 Internal primary key value for the collection. 

507 

508 Returns 

509 ------- 

510 record : `CollectionRecord` 

511 Object representing the collection, including its type and name. 

512 If ``record.type is CollectionType.RUN``, this will be a 

513 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

514 this will be a `ChainedCollectionRecord` instance. 

515 

516 Raises 

517 ------ 

518 MissingCollectionError 

519 Raised if no collection with this key exists. 

520 

521 Notes 

522 ----- 

523 Collections registered by another client of the same layer since the 

524 last call to `initialize` or `refresh` may not be found. 

525 """ 

526 raise NotImplementedError() 

527 

528 @abstractmethod 

529 def __iter__(self) -> Iterator[CollectionRecord]: 

530 """Iterate over all collections. 

531 

532 Yields 

533 ------ 

534 record : `CollectionRecord` 

535 The record for a managed collection. 

536 """ 

537 raise NotImplementedError() 

538 

539 @abstractmethod 

540 def getDocumentation(self, key: Any) -> Optional[str]: 

541 """Retrieve the documentation string for a collection. 

542 

543 Parameters 

544 ---------- 

545 key 

546 Internal primary key value for the collection. 

547 

548 Returns 

549 ------- 

550 docs : `str` or `None` 

551 Docstring for the collection with the given key. 

552 """ 

553 raise NotImplementedError() 

554 

555 @abstractmethod 

556 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

557 """Set the documentation string for a collection. 

558 

559 Parameters 

560 ---------- 

561 key 

562 Internal primary key value for the collection. 

563 docs : `str`, optional 

564 Docstring for the collection with the given key. 

565 """ 

566 raise NotImplementedError()