Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import ABC, abstractmethod 

32from typing import ( 

33 Any, 

34 Iterator, 

35 Optional, 

36 TYPE_CHECKING, 

37) 

38 

39import astropy.time 

40 

41from ...core import ddl, Timespan 

42from ..wildcards import CollectionSearch 

43from .._collectionType import CollectionType 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from .database import Database, StaticTablesContext 

47 

48 

49class MissingCollectionError(Exception): 

50 """Exception raised when an operation attempts to use a collection that 

51 does not exist. 

52 """ 

53 

54 

55class CollectionRecord: 

56 """A struct used to represent a collection in internal `Registry` APIs. 

57 

58 User-facing code should always just use a `str` to represent collections. 

59 

60 Parameters 

61 ---------- 

62 key 

63 Unique collection ID, can be the same as ``name`` if ``name`` is used 

64 for identification. Usually this is an integer or string, but can be 

65 other database-specific type. 

66 name : `str` 

67 Name of the collection. 

68 type : `CollectionType` 

69 Enumeration value describing the type of the collection. 

70 """ 

71 def __init__(self, key: Any, name: str, type: CollectionType): 

72 self.key = key 

73 self.name = name 

74 self.type = type 

75 assert isinstance(self.type, CollectionType) 

76 

77 name: str 

78 """Name of the collection (`str`). 

79 """ 

80 

81 key: Any 

82 """The primary/foreign key value for this collection. 

83 """ 

84 

85 type: CollectionType 

86 """Enumeration value describing the type of the collection 

87 (`CollectionType`). 

88 """ 

89 

90 

91class RunRecord(CollectionRecord): 

92 """A subclass of `CollectionRecord` that adds execution information and 

93 an interface for updating it. 

94 """ 

95 

96 @abstractmethod 

97 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[astropy.time.Time]] = None): 

98 """Update the database record for this run with new execution 

99 information. 

100 

101 Values not provided will set to ``NULL`` in the database, not ignored. 

102 

103 Parameters 

104 ---------- 

105 host : `str`, optional 

106 Name of the host or system on which this run was produced. 

107 Detailed form to be set by higher-level convention; from the 

108 `Registry` perspective, this is an entirely opaque value. 

109 timespan : `Timespan`, optional 

110 Begin and end timestamps for the period over which the run was 

111 produced. `None`/``NULL`` values are interpreted as infinite 

112 bounds. 

113 """ 

114 raise NotImplementedError() 

115 

116 @property 

117 @abstractmethod 

118 def host(self) -> Optional[str]: 

119 """Return the name of the host or system on which this run was 

120 produced (`str` or `None`). 

121 """ 

122 raise NotImplementedError() 

123 

124 @property 

125 @abstractmethod 

126 def timespan(self) -> Timespan[astropy.time.Time]: 

127 """Begin and end timestamps for the period over which the run was 

128 produced. `None`/``NULL`` values are interpreted as infinite 

129 bounds. 

130 """ 

131 raise NotImplementedError() 

132 

133 

134class ChainedCollectionRecord(CollectionRecord): 

135 """A subclass of `CollectionRecord` that adds the list of child collections 

136 in a ``CHAINED`` collection. 

137 

138 Parameters 

139 ---------- 

140 key 

141 Unique collection ID, can be the same as ``name`` if ``name`` is used 

142 for identification. Usually this is an integer or string, but can be 

143 other database-specific type. 

144 name : `str` 

145 Name of the collection. 

146 """ 

147 

148 def __init__(self, key: Any, name: str): 

149 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

150 self._children = CollectionSearch.fromExpression([]) 

151 

152 @property 

153 def children(self) -> CollectionSearch: 

154 """The ordered search path of child collections that define this chain 

155 (`CollectionSearch`). 

156 """ 

157 return self._children 

158 

159 def update(self, manager: CollectionManager, children: CollectionSearch): 

160 """Redefine this chain to search the given child collections. 

161 

162 This method should be used by all external code to set children. It 

163 delegates to `_update`, which is what should be overridden by 

164 subclasses. 

165 

166 Parameters 

167 ---------- 

168 manager : `CollectionManager` 

169 The object that manages this records instance and all records 

170 instances that may appear as its children. 

171 children : `CollectionSearch` 

172 A collection search path that should be resolved to set the child 

173 collections of this chain. 

174 

175 Raises 

176 ------ 

177 ValueError 

178 Raised when the child collections contain a cycle. 

179 """ 

180 for record in children.iter(manager, flattenChains=True, includeChains=True, 

181 collectionType=CollectionType.CHAINED): 

182 if record == self: 

183 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

184 self._update(manager, children) 

185 self._children = children 

186 

187 def refresh(self, manager: CollectionManager): 

188 """Load children from the database, using the given manager to resolve 

189 collection primary key values into records. 

190 

191 This method exists to ensure that all collections that may appear in a 

192 chain are known to the manager before any particular chain tries to 

193 retrieve their records from it. `ChainedCollectionRecord` subclasses 

194 can rely on it being called sometime after their own ``__init__`` to 

195 finish construction. 

196 

197 Parameters 

198 ---------- 

199 manager : `CollectionManager` 

200 The object that manages this records instance and all records 

201 instances that may appear as its children. 

202 """ 

203 self._children = self._load(manager) 

204 

205 @abstractmethod 

206 def _update(self, manager: CollectionManager, children: CollectionSearch): 

207 """Protected implementation hook for setting the `children` property. 

208 

209 This method should be implemented by subclasses to update the database 

210 to reflect the children given. It should never be called by anything 

211 other than the `children` setter, which should be used by all external 

212 code. 

213 

214 Parameters 

215 ---------- 

216 manager : `CollectionManager` 

217 The object that manages this records instance and all records 

218 instances that may appear as its children. 

219 children : `CollectionSearch` 

220 A collection search path that should be resolved to set the child 

221 collections of this chain. Guaranteed not to contain cycles. 

222 """ 

223 raise NotImplementedError() 

224 

225 @abstractmethod 

226 def _load(self, manager: CollectionManager) -> CollectionSearch: 

227 """Protected implementation hook for `refresh`. 

228 

229 This method should be implemented by subclasses to retrieve the chain's 

230 child collections from the database and return them. It should never 

231 be called by anything other than `refresh`, which should be used by all 

232 external code. 

233 

234 Parameters 

235 ---------- 

236 manager : `CollectionManager` 

237 The object that manages this records instance and all records 

238 instances that may appear as its children. 

239 """ 

240 raise NotImplementedError() 

241 

242 

243class CollectionManager(ABC): 

244 """An interface for managing the collections (including runs) in a 

245 `Registry`. 

246 

247 Notes 

248 ----- 

249 Each layer in a multi-layer `Registry` has its own record for any 

250 collection for which it has datasets (or quanta). Different layers may 

251 use different IDs for the same collection, so any usage of the IDs 

252 obtained through the `CollectionManager` APIs are strictly for internal 

253 (to `Registry`) use. 

254 """ 

255 

256 @classmethod 

257 @abstractmethod 

258 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

259 """Construct an instance of the manager. 

260 

261 Parameters 

262 ---------- 

263 db : `Database` 

264 Interface to the underlying database engine and namespace. 

265 context : `StaticTablesContext` 

266 Context object obtained from `Database.declareStaticTables`; used 

267 to declare any tables that should always be present in a layer 

268 implemented with this manager. 

269 

270 Returns 

271 ------- 

272 manager : `CollectionManager` 

273 An instance of a concrete `CollectionManager` subclass. 

274 """ 

275 raise NotImplementedError() 

276 

277 @classmethod 

278 @abstractmethod 

279 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

280 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

281 """Add a foreign key (field and constraint) referencing the collection 

282 table. 

283 

284 Parameters 

285 ---------- 

286 tableSpec : `ddl.TableSpec` 

287 Specification for the table that should reference the collection 

288 table. Will be modified in place. 

289 prefix: `str`, optional 

290 A name to use for the prefix of the new field; the full name may 

291 have a suffix (and is given in the returned `ddl.FieldSpec`). 

292 onDelete: `str`, optional 

293 One of "CASCADE" or "SET NULL", indicating what should happen to 

294 the referencing row if the collection row is deleted. `None` 

295 indicates that this should be an integrity error. 

296 **kwds 

297 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

298 constructor (only the ``name`` and ``dtype`` arguments are 

299 otherwise provided). 

300 

301 Returns 

302 ------- 

303 fieldSpec : `ddl.FieldSpec` 

304 Specification for the field being added. 

305 """ 

306 raise NotImplementedError() 

307 

308 @classmethod 

309 @abstractmethod 

310 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

311 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

312 """Add a foreign key (field and constraint) referencing the run 

313 table. 

314 

315 Parameters 

316 ---------- 

317 tableSpec : `ddl.TableSpec` 

318 Specification for the table that should reference the run table. 

319 Will be modified in place. 

320 prefix: `str`, optional 

321 A name to use for the prefix of the new field; the full name may 

322 have a suffix (and is given in the returned `ddl.FieldSpec`). 

323 onDelete: `str`, optional 

324 One of "CASCADE" or "SET NULL", indicating what should happen to 

325 the referencing row if the collection row is deleted. `None` 

326 indicates that this should be an integrity error. 

327 **kwds 

328 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

329 constructor (only the ``name`` and ``dtype`` arguments are 

330 otherwise provided). 

331 

332 Returns 

333 ------- 

334 fieldSpec : `ddl.FieldSpec` 

335 Specification for the field being added. 

336 """ 

337 raise NotImplementedError() 

338 

339 @classmethod 

340 @abstractmethod 

341 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

342 """Return the name of the field added by `addCollectionForeignKey` 

343 if called with the same prefix. 

344 

345 Parameters 

346 ---------- 

347 prefix : `str` 

348 A name to use for the prefix of the new field; the full name may 

349 have a suffix. 

350 

351 Returns 

352 ------- 

353 name : `str` 

354 The field name. 

355 """ 

356 raise NotImplementedError() 

357 

358 @classmethod 

359 @abstractmethod 

360 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

361 """Return the name of the field added by `addRunForeignKey` 

362 if called with the same prefix. 

363 

364 Parameters 

365 ---------- 

366 prefix : `str` 

367 A name to use for the prefix of the new field; the full name may 

368 have a suffix. 

369 

370 Returns 

371 ------- 

372 name : `str` 

373 The field name. 

374 """ 

375 raise NotImplementedError() 

376 

377 @abstractmethod 

378 def refresh(self): 

379 """Ensure all other operations on this manager are aware of any 

380 collections that may have been registered by other clients since it 

381 was initialized or last refreshed. 

382 """ 

383 raise NotImplementedError() 

384 

385 @abstractmethod 

386 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

387 """Ensure that a collection of the given name and type are present 

388 in the layer this manager is associated with. 

389 

390 Parameters 

391 ---------- 

392 name : `str` 

393 Name of the collection. 

394 type : `CollectionType` 

395 Enumeration value indicating the type of collection. 

396 

397 Returns 

398 ------- 

399 record : `CollectionRecord` 

400 Object representing the collection, including its type and ID. 

401 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

402 instance. If ``type is CollectionType.CHAIN``, this will be a 

403 `ChainedCollectionRecord` instance. 

404 

405 Raises 

406 ------ 

407 TransactionInterruption 

408 Raised if this operation is invoked within a `Database.transaction` 

409 context. 

410 DatabaseConflictError 

411 Raised if a collection with this name but a different type already 

412 exists. 

413 

414 Notes 

415 ----- 

416 Concurrent registrations of the same collection should be safe; nothing 

417 should happen if the types are consistent, and integrity errors due to 

418 inconsistent types should happen before any database changes are made. 

419 """ 

420 raise NotImplementedError() 

421 

422 @abstractmethod 

423 def remove(self, name: str): 

424 """Completely remove a collection. 

425 

426 Any existing `CollectionRecord` objects that correspond to the removed 

427 collection are considered invalidated. 

428 

429 Parameters 

430 ---------- 

431 name : `str` 

432 Name of the collection to remove. 

433 

434 Notes 

435 ----- 

436 If this collection is referenced by foreign keys in tables managed by 

437 other objects, the ON DELETE clauses of those tables will be invoked. 

438 That will frequently delete many dependent rows automatically (via 

439 "CASCADE", but it may also cause this operation to fail (with rollback) 

440 unless dependent rows that do not have an ON DELETE clause are removed 

441 first. 

442 """ 

443 raise NotImplementedError() 

444 

445 @abstractmethod 

446 def find(self, name: str) -> CollectionRecord: 

447 """Return the collection record associated with the given name. 

448 

449 Parameters 

450 ---------- 

451 name : `str` 

452 Name of the collection. 

453 

454 Returns 

455 ------- 

456 record : `CollectionRecord` 

457 Object representing the collection, including its type and ID. 

458 If ``record.type is CollectionType.RUN``, this will be a 

459 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

460 this will be a `ChainedCollectionRecord` instance. 

461 

462 Raises 

463 ------ 

464 MissingCollectionError 

465 Raised if the given collection does not exist. 

466 

467 Notes 

468 ----- 

469 Collections registered by another client of the same layer since the 

470 last call to `initialize` or `refresh` may not be found. 

471 """ 

472 raise NotImplementedError() 

473 

474 @abstractmethod 

475 def __getitem__(self, key: Any) -> CollectionRecord: 

476 """Return the collection record associated with the given 

477 primary/foreign key value. 

478 

479 Parameters 

480 ---------- 

481 key 

482 Internal primary key value for the collection. 

483 

484 Returns 

485 ------- 

486 record : `CollectionRecord` 

487 Object representing the collection, including its type and name. 

488 If ``record.type is CollectionType.RUN``, this will be a 

489 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

490 this will be a `ChainedCollectionRecord` instance. 

491 

492 Raises 

493 ------ 

494 MissingCollectionError 

495 Raised if no collection with this key exists. 

496 

497 Notes 

498 ----- 

499 Collections registered by another client of the same layer since the 

500 last call to `initialize` or `refresh` may not be found. 

501 """ 

502 raise NotImplementedError() 

503 

504 @abstractmethod 

505 def __iter__(self) -> Iterator[CollectionRecord]: 

506 """Iterate over all collections. 

507 

508 Yields 

509 ------ 

510 record : `CollectionRecord` 

511 The record for a managed collection. 

512 """ 

513 raise NotImplementedError()