Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import abstractmethod 

32from typing import ( 

33 Any, 

34 Iterator, 

35 Optional, 

36 TYPE_CHECKING, 

37) 

38 

39from ...core import ddl, Timespan 

40from ..wildcards import CollectionSearch 

41from .._collectionType import CollectionType 

42from ._versioning import VersionedExtension 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from ._database import Database, StaticTablesContext 

46 

47 

48class MissingCollectionError(Exception): 

49 """Exception raised when an operation attempts to use a collection that 

50 does not exist. 

51 """ 

52 

53 

54class CollectionRecord: 

55 """A struct used to represent a collection in internal `Registry` APIs. 

56 

57 User-facing code should always just use a `str` to represent collections. 

58 

59 Parameters 

60 ---------- 

61 key 

62 Unique collection ID, can be the same as ``name`` if ``name`` is used 

63 for identification. Usually this is an integer or string, but can be 

64 other database-specific type. 

65 name : `str` 

66 Name of the collection. 

67 type : `CollectionType` 

68 Enumeration value describing the type of the collection. 

69 """ 

70 def __init__(self, key: Any, name: str, type: CollectionType): 

71 self.key = key 

72 self.name = name 

73 self.type = type 

74 assert isinstance(self.type, CollectionType) 

75 

76 name: str 

77 """Name of the collection (`str`). 

78 """ 

79 

80 key: Any 

81 """The primary/foreign key value for this collection. 

82 """ 

83 

84 type: CollectionType 

85 """Enumeration value describing the type of the collection 

86 (`CollectionType`). 

87 """ 

88 

89 

90class RunRecord(CollectionRecord): 

91 """A subclass of `CollectionRecord` that adds execution information and 

92 an interface for updating it. 

93 """ 

94 

95 @abstractmethod 

96 def update(self, host: Optional[str] = None, 

97 timespan: Optional[Timespan] = None) -> None: 

98 """Update the database record for this run with new execution 

99 information. 

100 

101 Values not provided will set to ``NULL`` in the database, not ignored. 

102 

103 Parameters 

104 ---------- 

105 host : `str`, optional 

106 Name of the host or system on which this run was produced. 

107 Detailed form to be set by higher-level convention; from the 

108 `Registry` perspective, this is an entirely opaque value. 

109 timespan : `Timespan`, optional 

110 Begin and end timestamps for the period over which the run was 

111 produced. `None`/``NULL`` values are interpreted as infinite 

112 bounds. 

113 """ 

114 raise NotImplementedError() 

115 

116 @property 

117 @abstractmethod 

118 def host(self) -> Optional[str]: 

119 """Return the name of the host or system on which this run was 

120 produced (`str` or `None`). 

121 """ 

122 raise NotImplementedError() 

123 

124 @property 

125 @abstractmethod 

126 def timespan(self) -> Timespan: 

127 """Begin and end timestamps for the period over which the run was 

128 produced. `None`/``NULL`` values are interpreted as infinite 

129 bounds. 

130 """ 

131 raise NotImplementedError() 

132 

133 

134class ChainedCollectionRecord(CollectionRecord): 

135 """A subclass of `CollectionRecord` that adds the list of child collections 

136 in a ``CHAINED`` collection. 

137 

138 Parameters 

139 ---------- 

140 key 

141 Unique collection ID, can be the same as ``name`` if ``name`` is used 

142 for identification. Usually this is an integer or string, but can be 

143 other database-specific type. 

144 name : `str` 

145 Name of the collection. 

146 """ 

147 

148 def __init__(self, key: Any, name: str): 

149 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

150 self._children = CollectionSearch.fromExpression([]) 

151 

152 @property 

153 def children(self) -> CollectionSearch: 

154 """The ordered search path of child collections that define this chain 

155 (`CollectionSearch`). 

156 """ 

157 return self._children 

158 

159 def update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

160 """Redefine this chain to search the given child collections. 

161 

162 This method should be used by all external code to set children. It 

163 delegates to `_update`, which is what should be overridden by 

164 subclasses. 

165 

166 Parameters 

167 ---------- 

168 manager : `CollectionManager` 

169 The object that manages this records instance and all records 

170 instances that may appear as its children. 

171 children : `CollectionSearch` 

172 A collection search path that should be resolved to set the child 

173 collections of this chain. 

174 

175 Raises 

176 ------ 

177 ValueError 

178 Raised when the child collections contain a cycle. 

179 """ 

180 for record in children.iter(manager, flattenChains=True, includeChains=True, 

181 collectionType=CollectionType.CHAINED): 

182 if record == self: 

183 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

184 self._update(manager, children) 

185 self._children = children 

186 

187 def refresh(self, manager: CollectionManager) -> None: 

188 """Load children from the database, using the given manager to resolve 

189 collection primary key values into records. 

190 

191 This method exists to ensure that all collections that may appear in a 

192 chain are known to the manager before any particular chain tries to 

193 retrieve their records from it. `ChainedCollectionRecord` subclasses 

194 can rely on it being called sometime after their own ``__init__`` to 

195 finish construction. 

196 

197 Parameters 

198 ---------- 

199 manager : `CollectionManager` 

200 The object that manages this records instance and all records 

201 instances that may appear as its children. 

202 """ 

203 self._children = self._load(manager) 

204 

205 @abstractmethod 

206 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

207 """Protected implementation hook for setting the `children` property. 

208 

209 This method should be implemented by subclasses to update the database 

210 to reflect the children given. It should never be called by anything 

211 other than the `children` setter, which should be used by all external 

212 code. 

213 

214 Parameters 

215 ---------- 

216 manager : `CollectionManager` 

217 The object that manages this records instance and all records 

218 instances that may appear as its children. 

219 children : `CollectionSearch` 

220 A collection search path that should be resolved to set the child 

221 collections of this chain. Guaranteed not to contain cycles. 

222 """ 

223 raise NotImplementedError() 

224 

225 @abstractmethod 

226 def _load(self, manager: CollectionManager) -> CollectionSearch: 

227 """Protected implementation hook for `refresh`. 

228 

229 This method should be implemented by subclasses to retrieve the chain's 

230 child collections from the database and return them. It should never 

231 be called by anything other than `refresh`, which should be used by all 

232 external code. 

233 

234 Parameters 

235 ---------- 

236 manager : `CollectionManager` 

237 The object that manages this records instance and all records 

238 instances that may appear as its children. 

239 """ 

240 raise NotImplementedError() 

241 

242 

243class CollectionManager(VersionedExtension): 

244 """An interface for managing the collections (including runs) in a 

245 `Registry`. 

246 

247 Notes 

248 ----- 

249 Each layer in a multi-layer `Registry` has its own record for any 

250 collection for which it has datasets (or quanta). Different layers may 

251 use different IDs for the same collection, so any usage of the IDs 

252 obtained through the `CollectionManager` APIs are strictly for internal 

253 (to `Registry`) use. 

254 """ 

255 

256 @classmethod 

257 @abstractmethod 

258 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

259 """Construct an instance of the manager. 

260 

261 Parameters 

262 ---------- 

263 db : `Database` 

264 Interface to the underlying database engine and namespace. 

265 context : `StaticTablesContext` 

266 Context object obtained from `Database.declareStaticTables`; used 

267 to declare any tables that should always be present in a layer 

268 implemented with this manager. 

269 

270 Returns 

271 ------- 

272 manager : `CollectionManager` 

273 An instance of a concrete `CollectionManager` subclass. 

274 """ 

275 raise NotImplementedError() 

276 

277 @classmethod 

278 @abstractmethod 

279 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

280 onDelete: Optional[str] = None, 

281 constraint: bool = True, 

282 **kwargs: Any) -> ddl.FieldSpec: 

283 """Add a foreign key (field and constraint) referencing the collection 

284 table. 

285 

286 Parameters 

287 ---------- 

288 tableSpec : `ddl.TableSpec` 

289 Specification for the table that should reference the collection 

290 table. Will be modified in place. 

291 prefix: `str`, optional 

292 A name to use for the prefix of the new field; the full name may 

293 have a suffix (and is given in the returned `ddl.FieldSpec`). 

294 onDelete: `str`, optional 

295 One of "CASCADE" or "SET NULL", indicating what should happen to 

296 the referencing row if the collection row is deleted. `None` 

297 indicates that this should be an integrity error. 

298 constraint: `bool`, optional 

299 If `False` (`True` is default), add a field that can be joined to 

300 the collection primary key, but do not add a foreign key 

301 constraint. 

302 **kwargs 

303 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

304 constructor (only the ``name`` and ``dtype`` arguments are 

305 otherwise provided). 

306 

307 Returns 

308 ------- 

309 fieldSpec : `ddl.FieldSpec` 

310 Specification for the field being added. 

311 """ 

312 raise NotImplementedError() 

313 

314 @classmethod 

315 @abstractmethod 

316 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

317 onDelete: Optional[str] = None, 

318 constraint: bool = True, 

319 **kwargs: Any) -> ddl.FieldSpec: 

320 """Add a foreign key (field and constraint) referencing the run 

321 table. 

322 

323 Parameters 

324 ---------- 

325 tableSpec : `ddl.TableSpec` 

326 Specification for the table that should reference the run table. 

327 Will be modified in place. 

328 prefix: `str`, optional 

329 A name to use for the prefix of the new field; the full name may 

330 have a suffix (and is given in the returned `ddl.FieldSpec`). 

331 onDelete: `str`, optional 

332 One of "CASCADE" or "SET NULL", indicating what should happen to 

333 the referencing row if the collection row is deleted. `None` 

334 indicates that this should be an integrity error. 

335 constraint: `bool`, optional 

336 If `False` (`True` is default), add a field that can be joined to 

337 the run primary key, but do not add a foreign key constraint. 

338 **kwds 

339 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

340 constructor (only the ``name`` and ``dtype`` arguments are 

341 otherwise provided). 

342 

343 Returns 

344 ------- 

345 fieldSpec : `ddl.FieldSpec` 

346 Specification for the field being added. 

347 """ 

348 raise NotImplementedError() 

349 

350 @classmethod 

351 @abstractmethod 

352 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

353 """Return the name of the field added by `addCollectionForeignKey` 

354 if called with the same prefix. 

355 

356 Parameters 

357 ---------- 

358 prefix : `str` 

359 A name to use for the prefix of the new field; the full name may 

360 have a suffix. 

361 

362 Returns 

363 ------- 

364 name : `str` 

365 The field name. 

366 """ 

367 raise NotImplementedError() 

368 

369 @classmethod 

370 @abstractmethod 

371 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

372 """Return the name of the field added by `addRunForeignKey` 

373 if called with the same prefix. 

374 

375 Parameters 

376 ---------- 

377 prefix : `str` 

378 A name to use for the prefix of the new field; the full name may 

379 have a suffix. 

380 

381 Returns 

382 ------- 

383 name : `str` 

384 The field name. 

385 """ 

386 raise NotImplementedError() 

387 

388 @abstractmethod 

389 def refresh(self) -> None: 

390 """Ensure all other operations on this manager are aware of any 

391 collections that may have been registered by other clients since it 

392 was initialized or last refreshed. 

393 """ 

394 raise NotImplementedError() 

395 

396 @abstractmethod 

397 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

398 """Ensure that a collection of the given name and type are present 

399 in the layer this manager is associated with. 

400 

401 Parameters 

402 ---------- 

403 name : `str` 

404 Name of the collection. 

405 type : `CollectionType` 

406 Enumeration value indicating the type of collection. 

407 

408 Returns 

409 ------- 

410 record : `CollectionRecord` 

411 Object representing the collection, including its type and ID. 

412 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

413 instance. If ``type is CollectionType.CHAIN``, this will be a 

414 `ChainedCollectionRecord` instance. 

415 

416 Raises 

417 ------ 

418 TransactionInterruption 

419 Raised if this operation is invoked within a `Database.transaction` 

420 context. 

421 DatabaseConflictError 

422 Raised if a collection with this name but a different type already 

423 exists. 

424 

425 Notes 

426 ----- 

427 Concurrent registrations of the same collection should be safe; nothing 

428 should happen if the types are consistent, and integrity errors due to 

429 inconsistent types should happen before any database changes are made. 

430 """ 

431 raise NotImplementedError() 

432 

433 @abstractmethod 

434 def remove(self, name: str) -> None: 

435 """Completely remove a collection. 

436 

437 Any existing `CollectionRecord` objects that correspond to the removed 

438 collection are considered invalidated. 

439 

440 Parameters 

441 ---------- 

442 name : `str` 

443 Name of the collection to remove. 

444 

445 Notes 

446 ----- 

447 If this collection is referenced by foreign keys in tables managed by 

448 other objects, the ON DELETE clauses of those tables will be invoked. 

449 That will frequently delete many dependent rows automatically (via 

450 "CASCADE", but it may also cause this operation to fail (with rollback) 

451 unless dependent rows that do not have an ON DELETE clause are removed 

452 first. 

453 """ 

454 raise NotImplementedError() 

455 

456 @abstractmethod 

457 def find(self, name: str) -> CollectionRecord: 

458 """Return the collection record associated with the given name. 

459 

460 Parameters 

461 ---------- 

462 name : `str` 

463 Name of the collection. 

464 

465 Returns 

466 ------- 

467 record : `CollectionRecord` 

468 Object representing the collection, including its type and ID. 

469 If ``record.type is CollectionType.RUN``, this will be a 

470 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

471 this will be a `ChainedCollectionRecord` instance. 

472 

473 Raises 

474 ------ 

475 MissingCollectionError 

476 Raised if the given collection does not exist. 

477 

478 Notes 

479 ----- 

480 Collections registered by another client of the same layer since the 

481 last call to `initialize` or `refresh` may not be found. 

482 """ 

483 raise NotImplementedError() 

484 

485 @abstractmethod 

486 def __getitem__(self, key: Any) -> CollectionRecord: 

487 """Return the collection record associated with the given 

488 primary/foreign key value. 

489 

490 Parameters 

491 ---------- 

492 key 

493 Internal primary key value for the collection. 

494 

495 Returns 

496 ------- 

497 record : `CollectionRecord` 

498 Object representing the collection, including its type and name. 

499 If ``record.type is CollectionType.RUN``, this will be a 

500 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

501 this will be a `ChainedCollectionRecord` instance. 

502 

503 Raises 

504 ------ 

505 MissingCollectionError 

506 Raised if no collection with this key exists. 

507 

508 Notes 

509 ----- 

510 Collections registered by another client of the same layer since the 

511 last call to `initialize` or `refresh` may not be found. 

512 """ 

513 raise NotImplementedError() 

514 

515 @abstractmethod 

516 def __iter__(self) -> Iterator[CollectionRecord]: 

517 """Iterate over all collections. 

518 

519 Yields 

520 ------ 

521 record : `CollectionRecord` 

522 The record for a managed collection. 

523 """ 

524 raise NotImplementedError()