Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import abstractmethod 

32from typing import ( 

33 Any, 

34 Iterator, 

35 Optional, 

36 TYPE_CHECKING, 

37) 

38 

39import astropy.time 

40 

41from ...core import ddl, Timespan 

42from ..wildcards import CollectionSearch 

43from .._collectionType import CollectionType 

44from ._versioning import VersionedExtension 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from ._database import Database, StaticTablesContext 

48 

49 

50class MissingCollectionError(Exception): 

51 """Exception raised when an operation attempts to use a collection that 

52 does not exist. 

53 """ 

54 

55 

56class CollectionRecord: 

57 """A struct used to represent a collection in internal `Registry` APIs. 

58 

59 User-facing code should always just use a `str` to represent collections. 

60 

61 Parameters 

62 ---------- 

63 key 

64 Unique collection ID, can be the same as ``name`` if ``name`` is used 

65 for identification. Usually this is an integer or string, but can be 

66 other database-specific type. 

67 name : `str` 

68 Name of the collection. 

69 type : `CollectionType` 

70 Enumeration value describing the type of the collection. 

71 """ 

72 def __init__(self, key: Any, name: str, type: CollectionType): 

73 self.key = key 

74 self.name = name 

75 self.type = type 

76 assert isinstance(self.type, CollectionType) 

77 

78 name: str 

79 """Name of the collection (`str`). 

80 """ 

81 

82 key: Any 

83 """The primary/foreign key value for this collection. 

84 """ 

85 

86 type: CollectionType 

87 """Enumeration value describing the type of the collection 

88 (`CollectionType`). 

89 """ 

90 

91 

92class RunRecord(CollectionRecord): 

93 """A subclass of `CollectionRecord` that adds execution information and 

94 an interface for updating it. 

95 """ 

96 

97 @abstractmethod 

98 def update(self, host: Optional[str] = None, 

99 timespan: Optional[Timespan[astropy.time.Time]] = None) -> None: 

100 """Update the database record for this run with new execution 

101 information. 

102 

103 Values not provided will set to ``NULL`` in the database, not ignored. 

104 

105 Parameters 

106 ---------- 

107 host : `str`, optional 

108 Name of the host or system on which this run was produced. 

109 Detailed form to be set by higher-level convention; from the 

110 `Registry` perspective, this is an entirely opaque value. 

111 timespan : `Timespan`, optional 

112 Begin and end timestamps for the period over which the run was 

113 produced. `None`/``NULL`` values are interpreted as infinite 

114 bounds. 

115 """ 

116 raise NotImplementedError() 

117 

118 @property 

119 @abstractmethod 

120 def host(self) -> Optional[str]: 

121 """Return the name of the host or system on which this run was 

122 produced (`str` or `None`). 

123 """ 

124 raise NotImplementedError() 

125 

126 @property 

127 @abstractmethod 

128 def timespan(self) -> Timespan[astropy.time.Time]: 

129 """Begin and end timestamps for the period over which the run was 

130 produced. `None`/``NULL`` values are interpreted as infinite 

131 bounds. 

132 """ 

133 raise NotImplementedError() 

134 

135 

136class ChainedCollectionRecord(CollectionRecord): 

137 """A subclass of `CollectionRecord` that adds the list of child collections 

138 in a ``CHAINED`` collection. 

139 

140 Parameters 

141 ---------- 

142 key 

143 Unique collection ID, can be the same as ``name`` if ``name`` is used 

144 for identification. Usually this is an integer or string, but can be 

145 other database-specific type. 

146 name : `str` 

147 Name of the collection. 

148 """ 

149 

150 def __init__(self, key: Any, name: str): 

151 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

152 self._children = CollectionSearch.fromExpression([]) 

153 

154 @property 

155 def children(self) -> CollectionSearch: 

156 """The ordered search path of child collections that define this chain 

157 (`CollectionSearch`). 

158 """ 

159 return self._children 

160 

161 def update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

162 """Redefine this chain to search the given child collections. 

163 

164 This method should be used by all external code to set children. It 

165 delegates to `_update`, which is what should be overridden by 

166 subclasses. 

167 

168 Parameters 

169 ---------- 

170 manager : `CollectionManager` 

171 The object that manages this records instance and all records 

172 instances that may appear as its children. 

173 children : `CollectionSearch` 

174 A collection search path that should be resolved to set the child 

175 collections of this chain. 

176 

177 Raises 

178 ------ 

179 ValueError 

180 Raised when the child collections contain a cycle. 

181 """ 

182 for record in children.iter(manager, flattenChains=True, includeChains=True, 

183 collectionType=CollectionType.CHAINED): 

184 if record == self: 

185 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

186 self._update(manager, children) 

187 self._children = children 

188 

189 def refresh(self, manager: CollectionManager) -> None: 

190 """Load children from the database, using the given manager to resolve 

191 collection primary key values into records. 

192 

193 This method exists to ensure that all collections that may appear in a 

194 chain are known to the manager before any particular chain tries to 

195 retrieve their records from it. `ChainedCollectionRecord` subclasses 

196 can rely on it being called sometime after their own ``__init__`` to 

197 finish construction. 

198 

199 Parameters 

200 ---------- 

201 manager : `CollectionManager` 

202 The object that manages this records instance and all records 

203 instances that may appear as its children. 

204 """ 

205 self._children = self._load(manager) 

206 

207 @abstractmethod 

208 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

209 """Protected implementation hook for setting the `children` property. 

210 

211 This method should be implemented by subclasses to update the database 

212 to reflect the children given. It should never be called by anything 

213 other than the `children` setter, which should be used by all external 

214 code. 

215 

216 Parameters 

217 ---------- 

218 manager : `CollectionManager` 

219 The object that manages this records instance and all records 

220 instances that may appear as its children. 

221 children : `CollectionSearch` 

222 A collection search path that should be resolved to set the child 

223 collections of this chain. Guaranteed not to contain cycles. 

224 """ 

225 raise NotImplementedError() 

226 

227 @abstractmethod 

228 def _load(self, manager: CollectionManager) -> CollectionSearch: 

229 """Protected implementation hook for `refresh`. 

230 

231 This method should be implemented by subclasses to retrieve the chain's 

232 child collections from the database and return them. It should never 

233 be called by anything other than `refresh`, which should be used by all 

234 external code. 

235 

236 Parameters 

237 ---------- 

238 manager : `CollectionManager` 

239 The object that manages this records instance and all records 

240 instances that may appear as its children. 

241 """ 

242 raise NotImplementedError() 

243 

244 

245class CollectionManager(VersionedExtension): 

246 """An interface for managing the collections (including runs) in a 

247 `Registry`. 

248 

249 Notes 

250 ----- 

251 Each layer in a multi-layer `Registry` has its own record for any 

252 collection for which it has datasets (or quanta). Different layers may 

253 use different IDs for the same collection, so any usage of the IDs 

254 obtained through the `CollectionManager` APIs are strictly for internal 

255 (to `Registry`) use. 

256 """ 

257 

258 @classmethod 

259 @abstractmethod 

260 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

261 """Construct an instance of the manager. 

262 

263 Parameters 

264 ---------- 

265 db : `Database` 

266 Interface to the underlying database engine and namespace. 

267 context : `StaticTablesContext` 

268 Context object obtained from `Database.declareStaticTables`; used 

269 to declare any tables that should always be present in a layer 

270 implemented with this manager. 

271 

272 Returns 

273 ------- 

274 manager : `CollectionManager` 

275 An instance of a concrete `CollectionManager` subclass. 

276 """ 

277 raise NotImplementedError() 

278 

279 @classmethod 

280 @abstractmethod 

281 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

282 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

283 """Add a foreign key (field and constraint) referencing the collection 

284 table. 

285 

286 Parameters 

287 ---------- 

288 tableSpec : `ddl.TableSpec` 

289 Specification for the table that should reference the collection 

290 table. Will be modified in place. 

291 prefix: `str`, optional 

292 A name to use for the prefix of the new field; the full name may 

293 have a suffix (and is given in the returned `ddl.FieldSpec`). 

294 onDelete: `str`, optional 

295 One of "CASCADE" or "SET NULL", indicating what should happen to 

296 the referencing row if the collection row is deleted. `None` 

297 indicates that this should be an integrity error. 

298 **kwds 

299 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

300 constructor (only the ``name`` and ``dtype`` arguments are 

301 otherwise provided). 

302 

303 Returns 

304 ------- 

305 fieldSpec : `ddl.FieldSpec` 

306 Specification for the field being added. 

307 """ 

308 raise NotImplementedError() 

309 

310 @classmethod 

311 @abstractmethod 

312 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

313 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

314 """Add a foreign key (field and constraint) referencing the run 

315 table. 

316 

317 Parameters 

318 ---------- 

319 tableSpec : `ddl.TableSpec` 

320 Specification for the table that should reference the run table. 

321 Will be modified in place. 

322 prefix: `str`, optional 

323 A name to use for the prefix of the new field; the full name may 

324 have a suffix (and is given in the returned `ddl.FieldSpec`). 

325 onDelete: `str`, optional 

326 One of "CASCADE" or "SET NULL", indicating what should happen to 

327 the referencing row if the collection row is deleted. `None` 

328 indicates that this should be an integrity error. 

329 **kwds 

330 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

331 constructor (only the ``name`` and ``dtype`` arguments are 

332 otherwise provided). 

333 

334 Returns 

335 ------- 

336 fieldSpec : `ddl.FieldSpec` 

337 Specification for the field being added. 

338 """ 

339 raise NotImplementedError() 

340 

341 @classmethod 

342 @abstractmethod 

343 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

344 """Return the name of the field added by `addCollectionForeignKey` 

345 if called with the same prefix. 

346 

347 Parameters 

348 ---------- 

349 prefix : `str` 

350 A name to use for the prefix of the new field; the full name may 

351 have a suffix. 

352 

353 Returns 

354 ------- 

355 name : `str` 

356 The field name. 

357 """ 

358 raise NotImplementedError() 

359 

360 @classmethod 

361 @abstractmethod 

362 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

363 """Return the name of the field added by `addRunForeignKey` 

364 if called with the same prefix. 

365 

366 Parameters 

367 ---------- 

368 prefix : `str` 

369 A name to use for the prefix of the new field; the full name may 

370 have a suffix. 

371 

372 Returns 

373 ------- 

374 name : `str` 

375 The field name. 

376 """ 

377 raise NotImplementedError() 

378 

379 @abstractmethod 

380 def refresh(self) -> None: 

381 """Ensure all other operations on this manager are aware of any 

382 collections that may have been registered by other clients since it 

383 was initialized or last refreshed. 

384 """ 

385 raise NotImplementedError() 

386 

387 @abstractmethod 

388 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

389 """Ensure that a collection of the given name and type are present 

390 in the layer this manager is associated with. 

391 

392 Parameters 

393 ---------- 

394 name : `str` 

395 Name of the collection. 

396 type : `CollectionType` 

397 Enumeration value indicating the type of collection. 

398 

399 Returns 

400 ------- 

401 record : `CollectionRecord` 

402 Object representing the collection, including its type and ID. 

403 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

404 instance. If ``type is CollectionType.CHAIN``, this will be a 

405 `ChainedCollectionRecord` instance. 

406 

407 Raises 

408 ------ 

409 TransactionInterruption 

410 Raised if this operation is invoked within a `Database.transaction` 

411 context. 

412 DatabaseConflictError 

413 Raised if a collection with this name but a different type already 

414 exists. 

415 

416 Notes 

417 ----- 

418 Concurrent registrations of the same collection should be safe; nothing 

419 should happen if the types are consistent, and integrity errors due to 

420 inconsistent types should happen before any database changes are made. 

421 """ 

422 raise NotImplementedError() 

423 

424 @abstractmethod 

425 def remove(self, name: str) -> None: 

426 """Completely remove a collection. 

427 

428 Any existing `CollectionRecord` objects that correspond to the removed 

429 collection are considered invalidated. 

430 

431 Parameters 

432 ---------- 

433 name : `str` 

434 Name of the collection to remove. 

435 

436 Notes 

437 ----- 

438 If this collection is referenced by foreign keys in tables managed by 

439 other objects, the ON DELETE clauses of those tables will be invoked. 

440 That will frequently delete many dependent rows automatically (via 

441 "CASCADE", but it may also cause this operation to fail (with rollback) 

442 unless dependent rows that do not have an ON DELETE clause are removed 

443 first. 

444 """ 

445 raise NotImplementedError() 

446 

447 @abstractmethod 

448 def find(self, name: str) -> CollectionRecord: 

449 """Return the collection record associated with the given name. 

450 

451 Parameters 

452 ---------- 

453 name : `str` 

454 Name of the collection. 

455 

456 Returns 

457 ------- 

458 record : `CollectionRecord` 

459 Object representing the collection, including its type and ID. 

460 If ``record.type is CollectionType.RUN``, this will be a 

461 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

462 this will be a `ChainedCollectionRecord` instance. 

463 

464 Raises 

465 ------ 

466 MissingCollectionError 

467 Raised if the given collection does not exist. 

468 

469 Notes 

470 ----- 

471 Collections registered by another client of the same layer since the 

472 last call to `initialize` or `refresh` may not be found. 

473 """ 

474 raise NotImplementedError() 

475 

476 @abstractmethod 

477 def __getitem__(self, key: Any) -> CollectionRecord: 

478 """Return the collection record associated with the given 

479 primary/foreign key value. 

480 

481 Parameters 

482 ---------- 

483 key 

484 Internal primary key value for the collection. 

485 

486 Returns 

487 ------- 

488 record : `CollectionRecord` 

489 Object representing the collection, including its type and name. 

490 If ``record.type is CollectionType.RUN``, this will be a 

491 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

492 this will be a `ChainedCollectionRecord` instance. 

493 

494 Raises 

495 ------ 

496 MissingCollectionError 

497 Raised if no collection with this key exists. 

498 

499 Notes 

500 ----- 

501 Collections registered by another client of the same layer since the 

502 last call to `initialize` or `refresh` may not be found. 

503 """ 

504 raise NotImplementedError() 

505 

506 @abstractmethod 

507 def __iter__(self) -> Iterator[CollectionRecord]: 

508 """Iterate over all collections. 

509 

510 Yields 

511 ------ 

512 record : `CollectionRecord` 

513 The record for a managed collection. 

514 """ 

515 raise NotImplementedError()