Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "RunRecord", 

28] 

29 

30from abc import abstractmethod 

31from typing import ( 

32 Any, 

33 Iterator, 

34 Optional, 

35 TYPE_CHECKING, 

36) 

37 

38from ...core import ddl, DimensionUniverse, Timespan 

39from ..wildcards import CollectionSearch 

40from .._collectionType import CollectionType 

41from ._versioning import VersionedExtension 

42 

43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true

44 from ._database import Database, StaticTablesContext 

45 from ._dimensions import DimensionRecordStorageManager 

46 

47 

48class CollectionRecord: 

49 """A struct used to represent a collection in internal `Registry` APIs. 

50 

51 User-facing code should always just use a `str` to represent collections. 

52 

53 Parameters 

54 ---------- 

55 key 

56 Unique collection ID, can be the same as ``name`` if ``name`` is used 

57 for identification. Usually this is an integer or string, but can be 

58 other database-specific type. 

59 name : `str` 

60 Name of the collection. 

61 type : `CollectionType` 

62 Enumeration value describing the type of the collection. 

63 """ 

64 def __init__(self, key: Any, name: str, type: CollectionType): 

65 self.key = key 

66 self.name = name 

67 self.type = type 

68 assert isinstance(self.type, CollectionType) 

69 

70 name: str 

71 """Name of the collection (`str`). 

72 """ 

73 

74 key: Any 

75 """The primary/foreign key value for this collection. 

76 """ 

77 

78 type: CollectionType 

79 """Enumeration value describing the type of the collection 

80 (`CollectionType`). 

81 """ 

82 

83 

84class RunRecord(CollectionRecord): 

85 """A subclass of `CollectionRecord` that adds execution information and 

86 an interface for updating it. 

87 """ 

88 

89 @abstractmethod 

90 def update(self, host: Optional[str] = None, 

91 timespan: Optional[Timespan] = None) -> None: 

92 """Update the database record for this run with new execution 

93 information. 

94 

95 Values not provided will set to ``NULL`` in the database, not ignored. 

96 

97 Parameters 

98 ---------- 

99 host : `str`, optional 

100 Name of the host or system on which this run was produced. 

101 Detailed form to be set by higher-level convention; from the 

102 `Registry` perspective, this is an entirely opaque value. 

103 timespan : `Timespan`, optional 

104 Begin and end timestamps for the period over which the run was 

105 produced. `None`/``NULL`` values are interpreted as infinite 

106 bounds. 

107 """ 

108 raise NotImplementedError() 

109 

110 @property 

111 @abstractmethod 

112 def host(self) -> Optional[str]: 

113 """Return the name of the host or system on which this run was 

114 produced (`str` or `None`). 

115 """ 

116 raise NotImplementedError() 

117 

118 @property 

119 @abstractmethod 

120 def timespan(self) -> Timespan: 

121 """Begin and end timestamps for the period over which the run was 

122 produced. `None`/``NULL`` values are interpreted as infinite 

123 bounds. 

124 """ 

125 raise NotImplementedError() 

126 

127 

128class ChainedCollectionRecord(CollectionRecord): 

129 """A subclass of `CollectionRecord` that adds the list of child collections 

130 in a ``CHAINED`` collection. 

131 

132 Parameters 

133 ---------- 

134 key 

135 Unique collection ID, can be the same as ``name`` if ``name`` is used 

136 for identification. Usually this is an integer or string, but can be 

137 other database-specific type. 

138 name : `str` 

139 Name of the collection. 

140 """ 

141 

142 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

143 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

144 self._children = CollectionSearch.fromExpression([]) 

145 

146 @property 

147 def children(self) -> CollectionSearch: 

148 """The ordered search path of child collections that define this chain 

149 (`CollectionSearch`). 

150 """ 

151 return self._children 

152 

153 def update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

154 """Redefine this chain to search the given child collections. 

155 

156 This method should be used by all external code to set children. It 

157 delegates to `_update`, which is what should be overridden by 

158 subclasses. 

159 

160 Parameters 

161 ---------- 

162 manager : `CollectionManager` 

163 The object that manages this records instance and all records 

164 instances that may appear as its children. 

165 children : `CollectionSearch` 

166 A collection search path that should be resolved to set the child 

167 collections of this chain. 

168 

169 Raises 

170 ------ 

171 ValueError 

172 Raised when the child collections contain a cycle. 

173 """ 

174 for record in children.iter(manager, flattenChains=True, includeChains=True, 

175 collectionTypes={CollectionType.CHAINED}): 

176 if record == self: 

177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

178 self._update(manager, children) 

179 self._children = children 

180 

181 def refresh(self, manager: CollectionManager) -> None: 

182 """Load children from the database, using the given manager to resolve 

183 collection primary key values into records. 

184 

185 This method exists to ensure that all collections that may appear in a 

186 chain are known to the manager before any particular chain tries to 

187 retrieve their records from it. `ChainedCollectionRecord` subclasses 

188 can rely on it being called sometime after their own ``__init__`` to 

189 finish construction. 

190 

191 Parameters 

192 ---------- 

193 manager : `CollectionManager` 

194 The object that manages this records instance and all records 

195 instances that may appear as its children. 

196 """ 

197 self._children = self._load(manager) 

198 

199 @abstractmethod 

200 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

201 """Protected implementation hook for setting the `children` property. 

202 

203 This method should be implemented by subclasses to update the database 

204 to reflect the children given. It should never be called by anything 

205 other than the `children` setter, which should be used by all external 

206 code. 

207 

208 Parameters 

209 ---------- 

210 manager : `CollectionManager` 

211 The object that manages this records instance and all records 

212 instances that may appear as its children. 

213 children : `CollectionSearch` 

214 A collection search path that should be resolved to set the child 

215 collections of this chain. Guaranteed not to contain cycles. 

216 """ 

217 raise NotImplementedError() 

218 

219 @abstractmethod 

220 def _load(self, manager: CollectionManager) -> CollectionSearch: 

221 """Protected implementation hook for `refresh`. 

222 

223 This method should be implemented by subclasses to retrieve the chain's 

224 child collections from the database and return them. It should never 

225 be called by anything other than `refresh`, which should be used by all 

226 external code. 

227 

228 Parameters 

229 ---------- 

230 manager : `CollectionManager` 

231 The object that manages this records instance and all records 

232 instances that may appear as its children. 

233 

234 Returns 

235 ------- 

236 children : `CollectionSearch` 

237 The ordered sequence of collection names that defines the chained 

238 collection. Guaranteed not to contain cycles. 

239 """ 

240 raise NotImplementedError() 

241 

242 

243class CollectionManager(VersionedExtension): 

244 """An interface for managing the collections (including runs) in a 

245 `Registry`. 

246 

247 Notes 

248 ----- 

249 Each layer in a multi-layer `Registry` has its own record for any 

250 collection for which it has datasets (or quanta). Different layers may 

251 use different IDs for the same collection, so any usage of the IDs 

252 obtained through the `CollectionManager` APIs are strictly for internal 

253 (to `Registry`) use. 

254 """ 

255 

256 @classmethod 

257 @abstractmethod 

258 def initialize(cls, db: Database, context: StaticTablesContext, *, 

259 dimensions: DimensionRecordStorageManager) -> CollectionManager: 

260 """Construct an instance of the manager. 

261 

262 Parameters 

263 ---------- 

264 db : `Database` 

265 Interface to the underlying database engine and namespace. 

266 context : `StaticTablesContext` 

267 Context object obtained from `Database.declareStaticTables`; used 

268 to declare any tables that should always be present in a layer 

269 implemented with this manager. 

270 dimensions : `DimensionRecordStorageManager` 

271 Manager object for the dimensions in this `Registry`. 

272 

273 Returns 

274 ------- 

275 manager : `CollectionManager` 

276 An instance of a concrete `CollectionManager` subclass. 

277 """ 

278 raise NotImplementedError() 

279 

280 @classmethod 

281 @abstractmethod 

282 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

283 onDelete: Optional[str] = None, 

284 constraint: bool = True, 

285 **kwargs: Any) -> ddl.FieldSpec: 

286 """Add a foreign key (field and constraint) referencing the collection 

287 table. 

288 

289 Parameters 

290 ---------- 

291 tableSpec : `ddl.TableSpec` 

292 Specification for the table that should reference the collection 

293 table. Will be modified in place. 

294 prefix: `str`, optional 

295 A name to use for the prefix of the new field; the full name may 

296 have a suffix (and is given in the returned `ddl.FieldSpec`). 

297 onDelete: `str`, optional 

298 One of "CASCADE" or "SET NULL", indicating what should happen to 

299 the referencing row if the collection row is deleted. `None` 

300 indicates that this should be an integrity error. 

301 constraint: `bool`, optional 

302 If `False` (`True` is default), add a field that can be joined to 

303 the collection primary key, but do not add a foreign key 

304 constraint. 

305 **kwargs 

306 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

307 constructor (only the ``name`` and ``dtype`` arguments are 

308 otherwise provided). 

309 

310 Returns 

311 ------- 

312 fieldSpec : `ddl.FieldSpec` 

313 Specification for the field being added. 

314 """ 

315 raise NotImplementedError() 

316 

317 @classmethod 

318 @abstractmethod 

319 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

320 onDelete: Optional[str] = None, 

321 constraint: bool = True, 

322 **kwargs: Any) -> ddl.FieldSpec: 

323 """Add a foreign key (field and constraint) referencing the run 

324 table. 

325 

326 Parameters 

327 ---------- 

328 tableSpec : `ddl.TableSpec` 

329 Specification for the table that should reference the run table. 

330 Will be modified in place. 

331 prefix: `str`, optional 

332 A name to use for the prefix of the new field; the full name may 

333 have a suffix (and is given in the returned `ddl.FieldSpec`). 

334 onDelete: `str`, optional 

335 One of "CASCADE" or "SET NULL", indicating what should happen to 

336 the referencing row if the collection row is deleted. `None` 

337 indicates that this should be an integrity error. 

338 constraint: `bool`, optional 

339 If `False` (`True` is default), add a field that can be joined to 

340 the run primary key, but do not add a foreign key constraint. 

341 **kwds 

342 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

343 constructor (only the ``name`` and ``dtype`` arguments are 

344 otherwise provided). 

345 

346 Returns 

347 ------- 

348 fieldSpec : `ddl.FieldSpec` 

349 Specification for the field being added. 

350 """ 

351 raise NotImplementedError() 

352 

353 @classmethod 

354 @abstractmethod 

355 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

356 """Return the name of the field added by `addCollectionForeignKey` 

357 if called with the same prefix. 

358 

359 Parameters 

360 ---------- 

361 prefix : `str` 

362 A name to use for the prefix of the new field; the full name may 

363 have a suffix. 

364 

365 Returns 

366 ------- 

367 name : `str` 

368 The field name. 

369 """ 

370 raise NotImplementedError() 

371 

372 @classmethod 

373 @abstractmethod 

374 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

375 """Return the name of the field added by `addRunForeignKey` 

376 if called with the same prefix. 

377 

378 Parameters 

379 ---------- 

380 prefix : `str` 

381 A name to use for the prefix of the new field; the full name may 

382 have a suffix. 

383 

384 Returns 

385 ------- 

386 name : `str` 

387 The field name. 

388 """ 

389 raise NotImplementedError() 

390 

391 @abstractmethod 

392 def refresh(self) -> None: 

393 """Ensure all other operations on this manager are aware of any 

394 collections that may have been registered by other clients since it 

395 was initialized or last refreshed. 

396 """ 

397 raise NotImplementedError() 

398 

399 @abstractmethod 

400 def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord: 

401 """Ensure that a collection of the given name and type are present 

402 in the layer this manager is associated with. 

403 

404 Parameters 

405 ---------- 

406 name : `str` 

407 Name of the collection. 

408 type : `CollectionType` 

409 Enumeration value indicating the type of collection. 

410 doc : `str`, optional 

411 Documentation string for the collection. Ignored if the collection 

412 already exists. 

413 

414 Returns 

415 ------- 

416 record : `CollectionRecord` 

417 Object representing the collection, including its type and ID. 

418 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

419 instance. If ``type is CollectionType.CHAIN``, this will be a 

420 `ChainedCollectionRecord` instance. 

421 

422 Raises 

423 ------ 

424 TransactionInterruption 

425 Raised if this operation is invoked within a `Database.transaction` 

426 context. 

427 DatabaseConflictError 

428 Raised if a collection with this name but a different type already 

429 exists. 

430 

431 Notes 

432 ----- 

433 Concurrent registrations of the same collection should be safe; nothing 

434 should happen if the types are consistent, and integrity errors due to 

435 inconsistent types should happen before any database changes are made. 

436 """ 

437 raise NotImplementedError() 

438 

439 @abstractmethod 

440 def remove(self, name: str) -> None: 

441 """Completely remove a collection. 

442 

443 Any existing `CollectionRecord` objects that correspond to the removed 

444 collection are considered invalidated. 

445 

446 Parameters 

447 ---------- 

448 name : `str` 

449 Name of the collection to remove. 

450 

451 Notes 

452 ----- 

453 If this collection is referenced by foreign keys in tables managed by 

454 other objects, the ON DELETE clauses of those tables will be invoked. 

455 That will frequently delete many dependent rows automatically (via 

456 "CASCADE", but it may also cause this operation to fail (with rollback) 

457 unless dependent rows that do not have an ON DELETE clause are removed 

458 first. 

459 """ 

460 raise NotImplementedError() 

461 

462 @abstractmethod 

463 def find(self, name: str) -> CollectionRecord: 

464 """Return the collection record associated with the given name. 

465 

466 Parameters 

467 ---------- 

468 name : `str` 

469 Name of the collection. 

470 

471 Returns 

472 ------- 

473 record : `CollectionRecord` 

474 Object representing the collection, including its type and ID. 

475 If ``record.type is CollectionType.RUN``, this will be a 

476 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

477 this will be a `ChainedCollectionRecord` instance. 

478 

479 Raises 

480 ------ 

481 MissingCollectionError 

482 Raised if the given collection does not exist. 

483 

484 Notes 

485 ----- 

486 Collections registered by another client of the same layer since the 

487 last call to `initialize` or `refresh` may not be found. 

488 """ 

489 raise NotImplementedError() 

490 

491 @abstractmethod 

492 def __getitem__(self, key: Any) -> CollectionRecord: 

493 """Return the collection record associated with the given 

494 primary/foreign key value. 

495 

496 Parameters 

497 ---------- 

498 key 

499 Internal primary key value for the collection. 

500 

501 Returns 

502 ------- 

503 record : `CollectionRecord` 

504 Object representing the collection, including its type and name. 

505 If ``record.type is CollectionType.RUN``, this will be a 

506 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

507 this will be a `ChainedCollectionRecord` instance. 

508 

509 Raises 

510 ------ 

511 MissingCollectionError 

512 Raised if no collection with this key exists. 

513 

514 Notes 

515 ----- 

516 Collections registered by another client of the same layer since the 

517 last call to `initialize` or `refresh` may not be found. 

518 """ 

519 raise NotImplementedError() 

520 

521 @abstractmethod 

522 def __iter__(self) -> Iterator[CollectionRecord]: 

523 """Iterate over all collections. 

524 

525 Yields 

526 ------ 

527 record : `CollectionRecord` 

528 The record for a managed collection. 

529 """ 

530 raise NotImplementedError() 

531 

532 @abstractmethod 

533 def getDocumentation(self, key: Any) -> Optional[str]: 

534 """Retrieve the documentation string for a collection. 

535 

536 Parameters 

537 ---------- 

538 key 

539 Internal primary key value for the collection. 

540 

541 Returns 

542 ------- 

543 docs : `str` or `None` 

544 Docstring for the collection with the given key. 

545 """ 

546 raise NotImplementedError() 

547 

548 @abstractmethod 

549 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

550 """Set the documentation string for a collection. 

551 

552 Parameters 

553 ---------- 

554 key 

555 Internal primary key value for the collection. 

556 docs : `str`, optional 

557 Docstring for the collection with the given key. 

558 """ 

559 raise NotImplementedError()