Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import abstractmethod 

32from typing import ( 

33 Any, 

34 Iterator, 

35 Optional, 

36 TYPE_CHECKING, 

37) 

38 

39from ...core import ddl, DimensionUniverse, Timespan 

40from ..wildcards import CollectionSearch 

41from .._collectionType import CollectionType 

42from ._versioning import VersionedExtension 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from ._database import Database, StaticTablesContext 

46 from ._dimensions import DimensionRecordStorageManager 

47 

48 

49class MissingCollectionError(Exception): 

50 """Exception raised when an operation attempts to use a collection that 

51 does not exist. 

52 """ 

53 

54 

55class CollectionRecord: 

56 """A struct used to represent a collection in internal `Registry` APIs. 

57 

58 User-facing code should always just use a `str` to represent collections. 

59 

60 Parameters 

61 ---------- 

62 key 

63 Unique collection ID, can be the same as ``name`` if ``name`` is used 

64 for identification. Usually this is an integer or string, but can be 

65 other database-specific type. 

66 name : `str` 

67 Name of the collection. 

68 type : `CollectionType` 

69 Enumeration value describing the type of the collection. 

70 """ 

71 def __init__(self, key: Any, name: str, type: CollectionType): 

72 self.key = key 

73 self.name = name 

74 self.type = type 

75 assert isinstance(self.type, CollectionType) 

76 

77 name: str 

78 """Name of the collection (`str`). 

79 """ 

80 

81 key: Any 

82 """The primary/foreign key value for this collection. 

83 """ 

84 

85 type: CollectionType 

86 """Enumeration value describing the type of the collection 

87 (`CollectionType`). 

88 """ 

89 

90 

91class RunRecord(CollectionRecord): 

92 """A subclass of `CollectionRecord` that adds execution information and 

93 an interface for updating it. 

94 """ 

95 

96 @abstractmethod 

97 def update(self, host: Optional[str] = None, 

98 timespan: Optional[Timespan] = None) -> None: 

99 """Update the database record for this run with new execution 

100 information. 

101 

102 Values not provided will set to ``NULL`` in the database, not ignored. 

103 

104 Parameters 

105 ---------- 

106 host : `str`, optional 

107 Name of the host or system on which this run was produced. 

108 Detailed form to be set by higher-level convention; from the 

109 `Registry` perspective, this is an entirely opaque value. 

110 timespan : `Timespan`, optional 

111 Begin and end timestamps for the period over which the run was 

112 produced. `None`/``NULL`` values are interpreted as infinite 

113 bounds. 

114 """ 

115 raise NotImplementedError() 

116 

117 @property 

118 @abstractmethod 

119 def host(self) -> Optional[str]: 

120 """Return the name of the host or system on which this run was 

121 produced (`str` or `None`). 

122 """ 

123 raise NotImplementedError() 

124 

125 @property 

126 @abstractmethod 

127 def timespan(self) -> Timespan: 

128 """Begin and end timestamps for the period over which the run was 

129 produced. `None`/``NULL`` values are interpreted as infinite 

130 bounds. 

131 """ 

132 raise NotImplementedError() 

133 

134 

135class ChainedCollectionRecord(CollectionRecord): 

136 """A subclass of `CollectionRecord` that adds the list of child collections 

137 in a ``CHAINED`` collection. 

138 

139 Parameters 

140 ---------- 

141 key 

142 Unique collection ID, can be the same as ``name`` if ``name`` is used 

143 for identification. Usually this is an integer or string, but can be 

144 other database-specific type. 

145 name : `str` 

146 Name of the collection. 

147 """ 

148 

149 def __init__(self, key: Any, name: str, universe: DimensionUniverse): 

150 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

151 self._children = CollectionSearch.fromExpression([]) 

152 

153 @property 

154 def children(self) -> CollectionSearch: 

155 """The ordered search path of child collections that define this chain 

156 (`CollectionSearch`). 

157 """ 

158 return self._children 

159 

160 def update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

161 """Redefine this chain to search the given child collections. 

162 

163 This method should be used by all external code to set children. It 

164 delegates to `_update`, which is what should be overridden by 

165 subclasses. 

166 

167 Parameters 

168 ---------- 

169 manager : `CollectionManager` 

170 The object that manages this records instance and all records 

171 instances that may appear as its children. 

172 children : `CollectionSearch` 

173 A collection search path that should be resolved to set the child 

174 collections of this chain. 

175 

176 Raises 

177 ------ 

178 ValueError 

179 Raised when the child collections contain a cycle. 

180 """ 

181 for record in children.iter(manager, flattenChains=True, includeChains=True, 

182 collectionTypes={CollectionType.CHAINED}): 

183 if record == self: 

184 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

185 self._update(manager, children) 

186 self._children = children 

187 

188 def refresh(self, manager: CollectionManager) -> None: 

189 """Load children from the database, using the given manager to resolve 

190 collection primary key values into records. 

191 

192 This method exists to ensure that all collections that may appear in a 

193 chain are known to the manager before any particular chain tries to 

194 retrieve their records from it. `ChainedCollectionRecord` subclasses 

195 can rely on it being called sometime after their own ``__init__`` to 

196 finish construction. 

197 

198 Parameters 

199 ---------- 

200 manager : `CollectionManager` 

201 The object that manages this records instance and all records 

202 instances that may appear as its children. 

203 """ 

204 self._children = self._load(manager) 

205 

206 @abstractmethod 

207 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

208 """Protected implementation hook for setting the `children` property. 

209 

210 This method should be implemented by subclasses to update the database 

211 to reflect the children given. It should never be called by anything 

212 other than the `children` setter, which should be used by all external 

213 code. 

214 

215 Parameters 

216 ---------- 

217 manager : `CollectionManager` 

218 The object that manages this records instance and all records 

219 instances that may appear as its children. 

220 children : `CollectionSearch` 

221 A collection search path that should be resolved to set the child 

222 collections of this chain. Guaranteed not to contain cycles. 

223 """ 

224 raise NotImplementedError() 

225 

226 @abstractmethod 

227 def _load(self, manager: CollectionManager) -> CollectionSearch: 

228 """Protected implementation hook for `refresh`. 

229 

230 This method should be implemented by subclasses to retrieve the chain's 

231 child collections from the database and return them. It should never 

232 be called by anything other than `refresh`, which should be used by all 

233 external code. 

234 

235 Parameters 

236 ---------- 

237 manager : `CollectionManager` 

238 The object that manages this records instance and all records 

239 instances that may appear as its children. 

240 

241 Returns 

242 ------- 

243 children : `CollectionSearch` 

244 The ordered sequence of collection names that defines the chained 

245 collection. Guaranteed not to contain cycles. 

246 """ 

247 raise NotImplementedError() 

248 

249 

250class CollectionManager(VersionedExtension): 

251 """An interface for managing the collections (including runs) in a 

252 `Registry`. 

253 

254 Notes 

255 ----- 

256 Each layer in a multi-layer `Registry` has its own record for any 

257 collection for which it has datasets (or quanta). Different layers may 

258 use different IDs for the same collection, so any usage of the IDs 

259 obtained through the `CollectionManager` APIs are strictly for internal 

260 (to `Registry`) use. 

261 """ 

262 

263 @classmethod 

264 @abstractmethod 

265 def initialize(cls, db: Database, context: StaticTablesContext, *, 

266 dimensions: DimensionRecordStorageManager) -> CollectionManager: 

267 """Construct an instance of the manager. 

268 

269 Parameters 

270 ---------- 

271 db : `Database` 

272 Interface to the underlying database engine and namespace. 

273 context : `StaticTablesContext` 

274 Context object obtained from `Database.declareStaticTables`; used 

275 to declare any tables that should always be present in a layer 

276 implemented with this manager. 

277 dimensions : `DimensionRecordStorageManager` 

278 Manager object for the dimensions in this `Registry`. 

279 

280 Returns 

281 ------- 

282 manager : `CollectionManager` 

283 An instance of a concrete `CollectionManager` subclass. 

284 """ 

285 raise NotImplementedError() 

286 

287 @classmethod 

288 @abstractmethod 

289 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

290 onDelete: Optional[str] = None, 

291 constraint: bool = True, 

292 **kwargs: Any) -> ddl.FieldSpec: 

293 """Add a foreign key (field and constraint) referencing the collection 

294 table. 

295 

296 Parameters 

297 ---------- 

298 tableSpec : `ddl.TableSpec` 

299 Specification for the table that should reference the collection 

300 table. Will be modified in place. 

301 prefix: `str`, optional 

302 A name to use for the prefix of the new field; the full name may 

303 have a suffix (and is given in the returned `ddl.FieldSpec`). 

304 onDelete: `str`, optional 

305 One of "CASCADE" or "SET NULL", indicating what should happen to 

306 the referencing row if the collection row is deleted. `None` 

307 indicates that this should be an integrity error. 

308 constraint: `bool`, optional 

309 If `False` (`True` is default), add a field that can be joined to 

310 the collection primary key, but do not add a foreign key 

311 constraint. 

312 **kwargs 

313 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

314 constructor (only the ``name`` and ``dtype`` arguments are 

315 otherwise provided). 

316 

317 Returns 

318 ------- 

319 fieldSpec : `ddl.FieldSpec` 

320 Specification for the field being added. 

321 """ 

322 raise NotImplementedError() 

323 

324 @classmethod 

325 @abstractmethod 

326 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

327 onDelete: Optional[str] = None, 

328 constraint: bool = True, 

329 **kwargs: Any) -> ddl.FieldSpec: 

330 """Add a foreign key (field and constraint) referencing the run 

331 table. 

332 

333 Parameters 

334 ---------- 

335 tableSpec : `ddl.TableSpec` 

336 Specification for the table that should reference the run table. 

337 Will be modified in place. 

338 prefix: `str`, optional 

339 A name to use for the prefix of the new field; the full name may 

340 have a suffix (and is given in the returned `ddl.FieldSpec`). 

341 onDelete: `str`, optional 

342 One of "CASCADE" or "SET NULL", indicating what should happen to 

343 the referencing row if the collection row is deleted. `None` 

344 indicates that this should be an integrity error. 

345 constraint: `bool`, optional 

346 If `False` (`True` is default), add a field that can be joined to 

347 the run primary key, but do not add a foreign key constraint. 

348 **kwds 

349 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

350 constructor (only the ``name`` and ``dtype`` arguments are 

351 otherwise provided). 

352 

353 Returns 

354 ------- 

355 fieldSpec : `ddl.FieldSpec` 

356 Specification for the field being added. 

357 """ 

358 raise NotImplementedError() 

359 

360 @classmethod 

361 @abstractmethod 

362 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

363 """Return the name of the field added by `addCollectionForeignKey` 

364 if called with the same prefix. 

365 

366 Parameters 

367 ---------- 

368 prefix : `str` 

369 A name to use for the prefix of the new field; the full name may 

370 have a suffix. 

371 

372 Returns 

373 ------- 

374 name : `str` 

375 The field name. 

376 """ 

377 raise NotImplementedError() 

378 

379 @classmethod 

380 @abstractmethod 

381 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

382 """Return the name of the field added by `addRunForeignKey` 

383 if called with the same prefix. 

384 

385 Parameters 

386 ---------- 

387 prefix : `str` 

388 A name to use for the prefix of the new field; the full name may 

389 have a suffix. 

390 

391 Returns 

392 ------- 

393 name : `str` 

394 The field name. 

395 """ 

396 raise NotImplementedError() 

397 

398 @abstractmethod 

399 def refresh(self) -> None: 

400 """Ensure all other operations on this manager are aware of any 

401 collections that may have been registered by other clients since it 

402 was initialized or last refreshed. 

403 """ 

404 raise NotImplementedError() 

405 

406 @abstractmethod 

407 def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord: 

408 """Ensure that a collection of the given name and type are present 

409 in the layer this manager is associated with. 

410 

411 Parameters 

412 ---------- 

413 name : `str` 

414 Name of the collection. 

415 type : `CollectionType` 

416 Enumeration value indicating the type of collection. 

417 doc : `str`, optional 

418 Documentation string for the collection. Ignored if the collection 

419 already exists. 

420 

421 Returns 

422 ------- 

423 record : `CollectionRecord` 

424 Object representing the collection, including its type and ID. 

425 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

426 instance. If ``type is CollectionType.CHAIN``, this will be a 

427 `ChainedCollectionRecord` instance. 

428 

429 Raises 

430 ------ 

431 TransactionInterruption 

432 Raised if this operation is invoked within a `Database.transaction` 

433 context. 

434 DatabaseConflictError 

435 Raised if a collection with this name but a different type already 

436 exists. 

437 

438 Notes 

439 ----- 

440 Concurrent registrations of the same collection should be safe; nothing 

441 should happen if the types are consistent, and integrity errors due to 

442 inconsistent types should happen before any database changes are made. 

443 """ 

444 raise NotImplementedError() 

445 

446 @abstractmethod 

447 def remove(self, name: str) -> None: 

448 """Completely remove a collection. 

449 

450 Any existing `CollectionRecord` objects that correspond to the removed 

451 collection are considered invalidated. 

452 

453 Parameters 

454 ---------- 

455 name : `str` 

456 Name of the collection to remove. 

457 

458 Notes 

459 ----- 

460 If this collection is referenced by foreign keys in tables managed by 

461 other objects, the ON DELETE clauses of those tables will be invoked. 

462 That will frequently delete many dependent rows automatically (via 

463 "CASCADE", but it may also cause this operation to fail (with rollback) 

464 unless dependent rows that do not have an ON DELETE clause are removed 

465 first. 

466 """ 

467 raise NotImplementedError() 

468 

469 @abstractmethod 

470 def find(self, name: str) -> CollectionRecord: 

471 """Return the collection record associated with the given name. 

472 

473 Parameters 

474 ---------- 

475 name : `str` 

476 Name of the collection. 

477 

478 Returns 

479 ------- 

480 record : `CollectionRecord` 

481 Object representing the collection, including its type and ID. 

482 If ``record.type is CollectionType.RUN``, this will be a 

483 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

484 this will be a `ChainedCollectionRecord` instance. 

485 

486 Raises 

487 ------ 

488 MissingCollectionError 

489 Raised if the given collection does not exist. 

490 

491 Notes 

492 ----- 

493 Collections registered by another client of the same layer since the 

494 last call to `initialize` or `refresh` may not be found. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def __getitem__(self, key: Any) -> CollectionRecord: 

500 """Return the collection record associated with the given 

501 primary/foreign key value. 

502 

503 Parameters 

504 ---------- 

505 key 

506 Internal primary key value for the collection. 

507 

508 Returns 

509 ------- 

510 record : `CollectionRecord` 

511 Object representing the collection, including its type and name. 

512 If ``record.type is CollectionType.RUN``, this will be a 

513 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

514 this will be a `ChainedCollectionRecord` instance. 

515 

516 Raises 

517 ------ 

518 MissingCollectionError 

519 Raised if no collection with this key exists. 

520 

521 Notes 

522 ----- 

523 Collections registered by another client of the same layer since the 

524 last call to `initialize` or `refresh` may not be found. 

525 """ 

526 raise NotImplementedError() 

527 

528 @abstractmethod 

529 def __iter__(self) -> Iterator[CollectionRecord]: 

530 """Iterate over all collections. 

531 

532 Yields 

533 ------ 

534 record : `CollectionRecord` 

535 The record for a managed collection. 

536 """ 

537 raise NotImplementedError() 

538 

539 @abstractmethod 

540 def getDocumentation(self, key: Any) -> Optional[str]: 

541 """Retrieve the documentation string for a collection. 

542 

543 Parameters 

544 ---------- 

545 key 

546 Internal primary key value for the collection. 

547 

548 Returns 

549 ------- 

550 docs : `str` or `None` 

551 Docstring for the collection with the given key. 

552 """ 

553 raise NotImplementedError() 

554 

555 @abstractmethod 

556 def setDocumentation(self, key: Any, doc: Optional[str]) -> None: 

557 """Set the documentation string for a collection. 

558 

559 Parameters 

560 ---------- 

561 key 

562 Internal primary key value for the collection. 

563 docs : `str`, optional 

564 Docstring for the collection with the given key. 

565 """ 

566 raise NotImplementedError()