Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import abstractmethod 

32from typing import ( 

33 Any, 

34 Iterator, 

35 Optional, 

36 TYPE_CHECKING, 

37) 

38 

39import astropy.time 

40 

41from ...core import ddl, Timespan 

42from ..wildcards import CollectionSearch 

43from .._collectionType import CollectionType 

44from ._versioning import VersionedExtension 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ._database import Database, StaticTablesContext 

48 

49 

50class MissingCollectionError(Exception): 

51 """Exception raised when an operation attempts to use a collection that 

52 does not exist. 

53 """ 

54 

55 

56class CollectionRecord: 

57 """A struct used to represent a collection in internal `Registry` APIs. 

58 

59 User-facing code should always just use a `str` to represent collections. 

60 

61 Parameters 

62 ---------- 

63 key 

64 Unique collection ID, can be the same as ``name`` if ``name`` is used 

65 for identification. Usually this is an integer or string, but can be 

66 other database-specific type. 

67 name : `str` 

68 Name of the collection. 

69 type : `CollectionType` 

70 Enumeration value describing the type of the collection. 

71 """ 

72 def __init__(self, key: Any, name: str, type: CollectionType): 

73 self.key = key 

74 self.name = name 

75 self.type = type 

76 assert isinstance(self.type, CollectionType) 

77 

78 name: str 

79 """Name of the collection (`str`). 

80 """ 

81 

82 key: Any 

83 """The primary/foreign key value for this collection. 

84 """ 

85 

86 type: CollectionType 

87 """Enumeration value describing the type of the collection 

88 (`CollectionType`). 

89 """ 

90 

91 

92class RunRecord(CollectionRecord): 

93 """A subclass of `CollectionRecord` that adds execution information and 

94 an interface for updating it. 

95 """ 

96 

97 @abstractmethod 

98 def update(self, host: Optional[str] = None, 

99 timespan: Optional[Timespan[astropy.time.Time]] = None) -> None: 

100 """Update the database record for this run with new execution 

101 information. 

102 

103 Values not provided will set to ``NULL`` in the database, not ignored. 

104 

105 Parameters 

106 ---------- 

107 host : `str`, optional 

108 Name of the host or system on which this run was produced. 

109 Detailed form to be set by higher-level convention; from the 

110 `Registry` perspective, this is an entirely opaque value. 

111 timespan : `Timespan`, optional 

112 Begin and end timestamps for the period over which the run was 

113 produced. `None`/``NULL`` values are interpreted as infinite 

114 bounds. 

115 """ 

116 raise NotImplementedError() 

117 

118 @property 

119 @abstractmethod 

120 def host(self) -> Optional[str]: 

121 """Return the name of the host or system on which this run was 

122 produced (`str` or `None`). 

123 """ 

124 raise NotImplementedError() 

125 

126 @property 

127 @abstractmethod 

128 def timespan(self) -> Timespan[astropy.time.Time]: 

129 """Begin and end timestamps for the period over which the run was 

130 produced. `None`/``NULL`` values are interpreted as infinite 

131 bounds. 

132 """ 

133 raise NotImplementedError() 

134 

135 

136class ChainedCollectionRecord(CollectionRecord): 

137 """A subclass of `CollectionRecord` that adds the list of child collections 

138 in a ``CHAINED`` collection. 

139 

140 Parameters 

141 ---------- 

142 key 

143 Unique collection ID, can be the same as ``name`` if ``name`` is used 

144 for identification. Usually this is an integer or string, but can be 

145 other database-specific type. 

146 name : `str` 

147 Name of the collection. 

148 """ 

149 

150 def __init__(self, key: Any, name: str): 

151 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

152 self._children = CollectionSearch.fromExpression([]) 

153 

154 @property 

155 def children(self) -> CollectionSearch: 

156 """The ordered search path of child collections that define this chain 

157 (`CollectionSearch`). 

158 """ 

159 return self._children 

160 

161 def update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

162 """Redefine this chain to search the given child collections. 

163 

164 This method should be used by all external code to set children. It 

165 delegates to `_update`, which is what should be overridden by 

166 subclasses. 

167 

168 Parameters 

169 ---------- 

170 manager : `CollectionManager` 

171 The object that manages this records instance and all records 

172 instances that may appear as its children. 

173 children : `CollectionSearch` 

174 A collection search path that should be resolved to set the child 

175 collections of this chain. 

176 

177 Raises 

178 ------ 

179 ValueError 

180 Raised when the child collections contain a cycle. 

181 """ 

182 for record in children.iter(manager, flattenChains=True, includeChains=True, 

183 collectionType=CollectionType.CHAINED): 

184 if record == self: 

185 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

186 self._update(manager, children) 

187 self._children = children 

188 

189 def refresh(self, manager: CollectionManager) -> None: 

190 """Load children from the database, using the given manager to resolve 

191 collection primary key values into records. 

192 

193 This method exists to ensure that all collections that may appear in a 

194 chain are known to the manager before any particular chain tries to 

195 retrieve their records from it. `ChainedCollectionRecord` subclasses 

196 can rely on it being called sometime after their own ``__init__`` to 

197 finish construction. 

198 

199 Parameters 

200 ---------- 

201 manager : `CollectionManager` 

202 The object that manages this records instance and all records 

203 instances that may appear as its children. 

204 """ 

205 self._children = self._load(manager) 

206 

207 @abstractmethod 

208 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

209 """Protected implementation hook for setting the `children` property. 

210 

211 This method should be implemented by subclasses to update the database 

212 to reflect the children given. It should never be called by anything 

213 other than the `children` setter, which should be used by all external 

214 code. 

215 

216 Parameters 

217 ---------- 

218 manager : `CollectionManager` 

219 The object that manages this records instance and all records 

220 instances that may appear as its children. 

221 children : `CollectionSearch` 

222 A collection search path that should be resolved to set the child 

223 collections of this chain. Guaranteed not to contain cycles. 

224 """ 

225 raise NotImplementedError() 

226 

227 @abstractmethod 

228 def _load(self, manager: CollectionManager) -> CollectionSearch: 

229 """Protected implementation hook for `refresh`. 

230 

231 This method should be implemented by subclasses to retrieve the chain's 

232 child collections from the database and return them. It should never 

233 be called by anything other than `refresh`, which should be used by all 

234 external code. 

235 

236 Parameters 

237 ---------- 

238 manager : `CollectionManager` 

239 The object that manages this records instance and all records 

240 instances that may appear as its children. 

241 """ 

242 raise NotImplementedError() 

243 

244 

245class CollectionManager(VersionedExtension): 

246 """An interface for managing the collections (including runs) in a 

247 `Registry`. 

248 

249 Notes 

250 ----- 

251 Each layer in a multi-layer `Registry` has its own record for any 

252 collection for which it has datasets (or quanta). Different layers may 

253 use different IDs for the same collection, so any usage of the IDs 

254 obtained through the `CollectionManager` APIs are strictly for internal 

255 (to `Registry`) use. 

256 """ 

257 

258 @classmethod 

259 @abstractmethod 

260 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

261 """Construct an instance of the manager. 

262 

263 Parameters 

264 ---------- 

265 db : `Database` 

266 Interface to the underlying database engine and namespace. 

267 context : `StaticTablesContext` 

268 Context object obtained from `Database.declareStaticTables`; used 

269 to declare any tables that should always be present in a layer 

270 implemented with this manager. 

271 

272 Returns 

273 ------- 

274 manager : `CollectionManager` 

275 An instance of a concrete `CollectionManager` subclass. 

276 """ 

277 raise NotImplementedError() 

278 

279 @classmethod 

280 @abstractmethod 

281 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

282 onDelete: Optional[str] = None, 

283 constraint: bool = True, 

284 **kwargs: Any) -> ddl.FieldSpec: 

285 """Add a foreign key (field and constraint) referencing the collection 

286 table. 

287 

288 Parameters 

289 ---------- 

290 tableSpec : `ddl.TableSpec` 

291 Specification for the table that should reference the collection 

292 table. Will be modified in place. 

293 prefix: `str`, optional 

294 A name to use for the prefix of the new field; the full name may 

295 have a suffix (and is given in the returned `ddl.FieldSpec`). 

296 onDelete: `str`, optional 

297 One of "CASCADE" or "SET NULL", indicating what should happen to 

298 the referencing row if the collection row is deleted. `None` 

299 indicates that this should be an integrity error. 

300 constraint: `bool`, optional 

301 If `False` (`True` is default), add a field that can be joined to 

302 the collection primary key, but do not add a foreign key 

303 constraint. 

304 **kwargs 

305 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

306 constructor (only the ``name`` and ``dtype`` arguments are 

307 otherwise provided). 

308 

309 Returns 

310 ------- 

311 fieldSpec : `ddl.FieldSpec` 

312 Specification for the field being added. 

313 """ 

314 raise NotImplementedError() 

315 

316 @classmethod 

317 @abstractmethod 

318 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

319 onDelete: Optional[str] = None, 

320 constraint: bool = True, 

321 **kwargs: Any) -> ddl.FieldSpec: 

322 """Add a foreign key (field and constraint) referencing the run 

323 table. 

324 

325 Parameters 

326 ---------- 

327 tableSpec : `ddl.TableSpec` 

328 Specification for the table that should reference the run table. 

329 Will be modified in place. 

330 prefix: `str`, optional 

331 A name to use for the prefix of the new field; the full name may 

332 have a suffix (and is given in the returned `ddl.FieldSpec`). 

333 onDelete: `str`, optional 

334 One of "CASCADE" or "SET NULL", indicating what should happen to 

335 the referencing row if the collection row is deleted. `None` 

336 indicates that this should be an integrity error. 

337 constraint: `bool`, optional 

338 If `False` (`True` is default), add a field that can be joined to 

339 the run primary key, but do not add a foreign key constraint. 

340 **kwds 

341 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

342 constructor (only the ``name`` and ``dtype`` arguments are 

343 otherwise provided). 

344 

345 Returns 

346 ------- 

347 fieldSpec : `ddl.FieldSpec` 

348 Specification for the field being added. 

349 """ 

350 raise NotImplementedError() 

351 

352 @classmethod 

353 @abstractmethod 

354 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

355 """Return the name of the field added by `addCollectionForeignKey` 

356 if called with the same prefix. 

357 

358 Parameters 

359 ---------- 

360 prefix : `str` 

361 A name to use for the prefix of the new field; the full name may 

362 have a suffix. 

363 

364 Returns 

365 ------- 

366 name : `str` 

367 The field name. 

368 """ 

369 raise NotImplementedError() 

370 

371 @classmethod 

372 @abstractmethod 

373 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

374 """Return the name of the field added by `addRunForeignKey` 

375 if called with the same prefix. 

376 

377 Parameters 

378 ---------- 

379 prefix : `str` 

380 A name to use for the prefix of the new field; the full name may 

381 have a suffix. 

382 

383 Returns 

384 ------- 

385 name : `str` 

386 The field name. 

387 """ 

388 raise NotImplementedError() 

389 

390 @abstractmethod 

391 def refresh(self) -> None: 

392 """Ensure all other operations on this manager are aware of any 

393 collections that may have been registered by other clients since it 

394 was initialized or last refreshed. 

395 """ 

396 raise NotImplementedError() 

397 

398 @abstractmethod 

399 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

400 """Ensure that a collection of the given name and type are present 

401 in the layer this manager is associated with. 

402 

403 Parameters 

404 ---------- 

405 name : `str` 

406 Name of the collection. 

407 type : `CollectionType` 

408 Enumeration value indicating the type of collection. 

409 

410 Returns 

411 ------- 

412 record : `CollectionRecord` 

413 Object representing the collection, including its type and ID. 

414 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

415 instance. If ``type is CollectionType.CHAIN``, this will be a 

416 `ChainedCollectionRecord` instance. 

417 

418 Raises 

419 ------ 

420 TransactionInterruption 

421 Raised if this operation is invoked within a `Database.transaction` 

422 context. 

423 DatabaseConflictError 

424 Raised if a collection with this name but a different type already 

425 exists. 

426 

427 Notes 

428 ----- 

429 Concurrent registrations of the same collection should be safe; nothing 

430 should happen if the types are consistent, and integrity errors due to 

431 inconsistent types should happen before any database changes are made. 

432 """ 

433 raise NotImplementedError() 

434 

435 @abstractmethod 

436 def remove(self, name: str) -> None: 

437 """Completely remove a collection. 

438 

439 Any existing `CollectionRecord` objects that correspond to the removed 

440 collection are considered invalidated. 

441 

442 Parameters 

443 ---------- 

444 name : `str` 

445 Name of the collection to remove. 

446 

447 Notes 

448 ----- 

449 If this collection is referenced by foreign keys in tables managed by 

450 other objects, the ON DELETE clauses of those tables will be invoked. 

451 That will frequently delete many dependent rows automatically (via 

452 "CASCADE", but it may also cause this operation to fail (with rollback) 

453 unless dependent rows that do not have an ON DELETE clause are removed 

454 first. 

455 """ 

456 raise NotImplementedError() 

457 

458 @abstractmethod 

459 def find(self, name: str) -> CollectionRecord: 

460 """Return the collection record associated with the given name. 

461 

462 Parameters 

463 ---------- 

464 name : `str` 

465 Name of the collection. 

466 

467 Returns 

468 ------- 

469 record : `CollectionRecord` 

470 Object representing the collection, including its type and ID. 

471 If ``record.type is CollectionType.RUN``, this will be a 

472 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

473 this will be a `ChainedCollectionRecord` instance. 

474 

475 Raises 

476 ------ 

477 MissingCollectionError 

478 Raised if the given collection does not exist. 

479 

480 Notes 

481 ----- 

482 Collections registered by another client of the same layer since the 

483 last call to `initialize` or `refresh` may not be found. 

484 """ 

485 raise NotImplementedError() 

486 

487 @abstractmethod 

488 def __getitem__(self, key: Any) -> CollectionRecord: 

489 """Return the collection record associated with the given 

490 primary/foreign key value. 

491 

492 Parameters 

493 ---------- 

494 key 

495 Internal primary key value for the collection. 

496 

497 Returns 

498 ------- 

499 record : `CollectionRecord` 

500 Object representing the collection, including its type and name. 

501 If ``record.type is CollectionType.RUN``, this will be a 

502 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

503 this will be a `ChainedCollectionRecord` instance. 

504 

505 Raises 

506 ------ 

507 MissingCollectionError 

508 Raised if no collection with this key exists. 

509 

510 Notes 

511 ----- 

512 Collections registered by another client of the same layer since the 

513 last call to `initialize` or `refresh` may not be found. 

514 """ 

515 raise NotImplementedError() 

516 

517 @abstractmethod 

518 def __iter__(self) -> Iterator[CollectionRecord]: 

519 """Iterate over all collections. 

520 

521 Yields 

522 ------ 

523 record : `CollectionRecord` 

524 The record for a managed collection. 

525 """ 

526 raise NotImplementedError()