Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import ABC, abstractmethod 

32from typing import ( 

33 Any, 

34 Iterator, 

35 Optional, 

36 TYPE_CHECKING, 

37) 

38 

39import astropy.time 

40 

41from ...core import ddl, Timespan 

42from ..wildcards import CollectionSearch 

43from .._collectionType import CollectionType 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from .database import Database, StaticTablesContext 

47 

48 

49class MissingCollectionError(Exception): 

50 """Exception raised when an operation attempts to use a collection that 

51 does not exist. 

52 """ 

53 

54 

55class CollectionRecord(ABC): 

56 """A struct used to represent a collection in internal `Registry` APIs. 

57 

58 User-facing code should always just use a `str` to represent collections. 

59 

60 Parameters 

61 ---------- 

62 name : `str` 

63 Name of the collection. 

64 type : `CollectionType` 

65 Enumeration value describing the type of the collection. 

66 """ 

67 def __init__(self, name: str, type: CollectionType): 

68 self.name = name 

69 self.type = type 

70 assert isinstance(self.type, CollectionType) 

71 

72 @property 

73 @abstractmethod 

74 def key(self) -> Any: 

75 """The primary/foreign key value for this collection. 

76 """ 

77 raise NotImplementedError() 

78 

79 name: str 

80 """Name of the collection (`str`). 

81 """ 

82 

83 type: CollectionType 

84 """Enumeration value describing the type of the collection 

85 (`CollectionType`). 

86 """ 

87 

88 

89class RunRecord(CollectionRecord): 

90 """A subclass of `CollectionRecord` that adds execution information and 

91 an interface for updating it. 

92 """ 

93 

94 @abstractmethod 

95 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[astropy.time.Time]] = None): 

96 """Update the database record for this run with new execution 

97 information. 

98 

99 Values not provided will set to ``NULL`` in the database, not ignored. 

100 

101 Parameters 

102 ---------- 

103 host : `str`, optional 

104 Name of the host or system on which this run was produced. 

105 Detailed form to be set by higher-level convention; from the 

106 `Registry` perspective, this is an entirely opaque value. 

107 timespan : `Timespan`, optional 

108 Begin and end timestamps for the period over which the run was 

109 produced. `None`/``NULL`` values are interpreted as infinite 

110 bounds. 

111 """ 

112 raise NotImplementedError() 

113 

114 @property 

115 @abstractmethod 

116 def host(self) -> Optional[str]: 

117 """Return the name of the host or system on which this run was 

118 produced (`str` or `None`). 

119 """ 

120 raise NotImplementedError() 

121 

122 @property 

123 @abstractmethod 

124 def timespan(self) -> Timespan[astropy.time.Time]: 

125 """Begin and end timestamps for the period over which the run was 

126 produced. `None`/``NULL`` values are interpreted as infinite 

127 bounds. 

128 """ 

129 raise NotImplementedError() 

130 

131 

132class ChainedCollectionRecord(CollectionRecord): 

133 """A subclass of `CollectionRecord` that adds the list of child collections 

134 in a ``CHAINED`` collection. 

135 

136 Parameters 

137 ---------- 

138 name : `str` 

139 Name of the collection. 

140 """ 

141 

142 def __init__(self, name: str): 

143 super().__init__(name=name, type=CollectionType.CHAINED) 

144 self._children = CollectionSearch.fromExpression([]) 

145 

146 @property 

147 def children(self) -> CollectionSearch: 

148 """The ordered search path of child collections that define this chain 

149 (`CollectionSearch`). 

150 """ 

151 return self._children 

152 

153 def update(self, manager: CollectionManager, children: CollectionSearch): 

154 """Redefine this chain to search the given child collections. 

155 

156 This method should be used by all external code to set children. It 

157 delegates to `_update`, which is what should be overridden by 

158 subclasses. 

159 

160 Parameters 

161 ---------- 

162 manager : `CollectionManager` 

163 The object that manages this records instance and all records 

164 instances that may appear as its children. 

165 children : `CollectionSearch` 

166 A collection search path that should be resolved to set the child 

167 collections of this chain. 

168 

169 Raises 

170 ------ 

171 ValueError 

172 Raised when the child collections contain a cycle. 

173 """ 

174 for record in children.iter(manager, flattenChains=True, includeChains=True, 

175 collectionType=CollectionType.CHAINED): 

176 if record == self: 

177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

178 self._update(manager, children) 

179 self._children = children 

180 

181 def refresh(self, manager: CollectionManager): 

182 """Load children from the database, using the given manager to resolve 

183 collection primary key values into records. 

184 

185 This method exists to ensure that all collections that may appear in a 

186 chain are known to the manager before any particular chain tries to 

187 retrieve their records from it. `ChainedCollectionRecord` subclasses 

188 can rely on it being called sometime after their own ``__init__`` to 

189 finish construction. 

190 

191 Parameters 

192 ---------- 

193 manager : `CollectionManager` 

194 The object that manages this records instance and all records 

195 instances that may appear as its children. 

196 """ 

197 self._children = self._load(manager) 

198 

199 @abstractmethod 

200 def _update(self, manager: CollectionManager, children: CollectionSearch): 

201 """Protected implementation hook for setting the `children` property. 

202 

203 This method should be implemented by subclasses to update the database 

204 to reflect the children given. It should never be called by anything 

205 other than the `children` setter, which should be used by all external 

206 code. 

207 

208 Parameters 

209 ---------- 

210 manager : `CollectionManager` 

211 The object that manages this records instance and all records 

212 instances that may appear as its children. 

213 children : `CollectionSearch` 

214 A collection search path that should be resolved to set the child 

215 collections of this chain. Guaranteed not to contain cycles. 

216 """ 

217 raise NotImplementedError() 

218 

219 @abstractmethod 

220 def _load(self, manager: CollectionManager) -> CollectionSearch: 

221 """Protected implementation hook for `refresh`. 

222 

223 This method should be implemented by subclasses to retrieve the chain's 

224 child collections from the database and return them. It should never 

225 be called by anything other than `refresh`, which should be used by all 

226 external code. 

227 

228 Parameters 

229 ---------- 

230 manager : `CollectionManager` 

231 The object that manages this records instance and all records 

232 instances that may appear as its children. 

233 """ 

234 raise NotImplementedError() 

235 

236 

237class CollectionManager(ABC): 

238 """An interface for managing the collections (including runs) in a 

239 `Registry`. 

240 

241 Notes 

242 ----- 

243 Each layer in a multi-layer `Registry` has its own record for any 

244 collection for which it has datasets (or quanta). Different layers may 

245 use different IDs for the same collection, so any usage of the IDs 

246 obtained through the `CollectionManager` APIs are strictly for internal 

247 (to `Registry`) use. 

248 """ 

249 

250 @classmethod 

251 @abstractmethod 

252 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

253 """Construct an instance of the manager. 

254 

255 Parameters 

256 ---------- 

257 db : `Database` 

258 Interface to the underlying database engine and namespace. 

259 context : `StaticTablesContext` 

260 Context object obtained from `Database.declareStaticTables`; used 

261 to declare any tables that should always be present in a layer 

262 implemented with this manager. 

263 

264 Returns 

265 ------- 

266 manager : `CollectionManager` 

267 An instance of a concrete `CollectionManager` subclass. 

268 """ 

269 raise NotImplementedError() 

270 

271 @classmethod 

272 @abstractmethod 

273 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

274 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

275 """Add a foreign key (field and constraint) referencing the collection 

276 table. 

277 

278 Parameters 

279 ---------- 

280 tableSpec : `ddl.TableSpec` 

281 Specification for the table that should reference the collection 

282 table. Will be modified in place. 

283 prefix: `str`, optional 

284 A name to use for the prefix of the new field; the full name may 

285 have a suffix (and is given in the returned `ddl.FieldSpec`). 

286 onDelete: `str`, optional 

287 One of "CASCADE" or "SET NULL", indicating what should happen to 

288 the referencing row if the collection row is deleted. `None` 

289 indicates that this should be an integrity error. 

290 **kwds 

291 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

292 constructor (only the ``name`` and ``dtype`` arguments are 

293 otherwise provided). 

294 

295 Returns 

296 ------- 

297 fieldSpec : `ddl.FieldSpec` 

298 Specification for the field being added. 

299 """ 

300 raise NotImplementedError() 

301 

302 @classmethod 

303 @abstractmethod 

304 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

305 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

306 """Add a foreign key (field and constraint) referencing the run 

307 table. 

308 

309 Parameters 

310 ---------- 

311 tableSpec : `ddl.TableSpec` 

312 Specification for the table that should reference the run table. 

313 Will be modified in place. 

314 prefix: `str`, optional 

315 A name to use for the prefix of the new field; the full name may 

316 have a suffix (and is given in the returned `ddl.FieldSpec`). 

317 onDelete: `str`, optional 

318 One of "CASCADE" or "SET NULL", indicating what should happen to 

319 the referencing row if the collection row is deleted. `None` 

320 indicates that this should be an integrity error. 

321 **kwds 

322 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

323 constructor (only the ``name`` and ``dtype`` arguments are 

324 otherwise provided). 

325 

326 Returns 

327 ------- 

328 fieldSpec : `ddl.FieldSpec` 

329 Specification for the field being added. 

330 """ 

331 raise NotImplementedError() 

332 

333 @classmethod 

334 @abstractmethod 

335 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

336 """Return the name of the field added by `addCollectionForeignKey` 

337 if called with the same prefix. 

338 

339 Parameters 

340 ---------- 

341 prefix : `str` 

342 A name to use for the prefix of the new field; the full name may 

343 have a suffix. 

344 

345 Returns 

346 ------- 

347 name : `str` 

348 The field name. 

349 """ 

350 raise NotImplementedError() 

351 

352 @classmethod 

353 @abstractmethod 

354 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

355 """Return the name of the field added by `addRunForeignKey` 

356 if called with the same prefix. 

357 

358 Parameters 

359 ---------- 

360 prefix : `str` 

361 A name to use for the prefix of the new field; the full name may 

362 have a suffix. 

363 

364 Returns 

365 ------- 

366 name : `str` 

367 The field name. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def refresh(self): 

373 """Ensure all other operations on this manager are aware of any 

374 collections that may have been registered by other clients since it 

375 was initialized or last refreshed. 

376 """ 

377 raise NotImplementedError() 

378 

379 @abstractmethod 

380 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

381 """Ensure that a collection of the given name and type are present 

382 in the layer this manager is associated with. 

383 

384 Parameters 

385 ---------- 

386 name : `str` 

387 Name of the collection. 

388 type : `CollectionType` 

389 Enumeration value indicating the type of collection. 

390 

391 Returns 

392 ------- 

393 record : `CollectionRecord` 

394 Object representing the collection, including its type and ID. 

395 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

396 instance. If ``type is CollectionType.CHAIN``, this will be a 

397 `ChainedCollectionRecord` instance. 

398 

399 Raises 

400 ------ 

401 TransactionInterruption 

402 Raised if this operation is invoked within a `Database.transaction` 

403 context. 

404 DatabaseConflictError 

405 Raised if a collection with this name but a different type already 

406 exists. 

407 

408 Notes 

409 ----- 

410 Concurrent registrations of the same collection should be safe; nothing 

411 should happen if the types are consistent, and integrity errors due to 

412 inconsistent types should happen before any database changes are made. 

413 """ 

414 raise NotImplementedError() 

415 

416 @abstractmethod 

417 def remove(self, name: str): 

418 """Completely remove a collection. 

419 

420 Any existing `CollectionRecord` objects that correspond to the removed 

421 collection are considered invalidated. 

422 

423 Parameters 

424 ---------- 

425 name : `str` 

426 Name of the collection to remove. 

427 

428 Notes 

429 ----- 

430 If this collection is referenced by foreign keys in tables managed by 

431 other objects, the ON DELETE clauses of those tables will be invoked. 

432 That will frequently delete many dependent rows automatically (via 

433 "CASCADE", but it may also cause this operation to fail (with rollback) 

434 unless dependent rows that do not have an ON DELETE clause are removed 

435 first. 

436 """ 

437 raise NotImplementedError() 

438 

439 @abstractmethod 

440 def find(self, name: str) -> CollectionRecord: 

441 """Return the collection record associated with the given name. 

442 

443 Parameters 

444 ---------- 

445 name : `str` 

446 Name of the collection. 

447 

448 Returns 

449 ------- 

450 record : `CollectionRecord` 

451 Object representing the collection, including its type and ID. 

452 If ``record.type is CollectionType.RUN``, this will be a 

453 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

454 this will be a `ChainedCollectionRecord` instance. 

455 

456 Raises 

457 ------ 

458 MissingCollectionError 

459 Raised if the given collection does not exist. 

460 

461 Notes 

462 ----- 

463 Collections registered by another client of the same layer since the 

464 last call to `initialize` or `refresh` may not be found. 

465 """ 

466 raise NotImplementedError() 

467 

468 @abstractmethod 

469 def __getitem__(self, key: Any) -> CollectionRecord: 

470 """Return the collection record associated with the given 

471 primary/foreign key value. 

472 

473 Parameters 

474 ---------- 

475 key 

476 Internal primary key value for the collection. 

477 

478 Returns 

479 ------- 

480 record : `CollectionRecord` 

481 Object representing the collection, including its type and name. 

482 If ``record.type is CollectionType.RUN``, this will be a 

483 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

484 this will be a `ChainedCollectionRecord` instance. 

485 

486 Raises 

487 ------ 

488 MissingCollectionError 

489 Raised if no collection with this key exists. 

490 

491 Notes 

492 ----- 

493 Collections registered by another client of the same layer since the 

494 last call to `initialize` or `refresh` may not be found. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def __iter__(self) -> Iterator[CollectionRecord]: 

500 """Iterate over all collections. 

501 

502 Yields 

503 ------ 

504 record : `CollectionRecord` 

505 The record for a managed collection. 

506 """ 

507 raise NotImplementedError()