Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import ABC, abstractmethod 

32from typing import ( 

33 Any, 

34 Iterator, 

35 Optional, 

36 TYPE_CHECKING, 

37) 

38 

39import astropy.time 

40 

41from ...core import ddl, Timespan 

42from ..wildcards import CollectionSearch 

43from .._collectionType import CollectionType 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from ._database import Database, StaticTablesContext 

47 

48 

49class MissingCollectionError(Exception): 

50 """Exception raised when an operation attempts to use a collection that 

51 does not exist. 

52 """ 

53 

54 

55class CollectionRecord: 

56 """A struct used to represent a collection in internal `Registry` APIs. 

57 

58 User-facing code should always just use a `str` to represent collections. 

59 

60 Parameters 

61 ---------- 

62 key 

63 Unique collection ID, can be the same as ``name`` if ``name`` is used 

64 for identification. Usually this is an integer or string, but can be 

65 other database-specific type. 

66 name : `str` 

67 Name of the collection. 

68 type : `CollectionType` 

69 Enumeration value describing the type of the collection. 

70 """ 

71 def __init__(self, key: Any, name: str, type: CollectionType): 

72 self.key = key 

73 self.name = name 

74 self.type = type 

75 assert isinstance(self.type, CollectionType) 

76 

77 name: str 

78 """Name of the collection (`str`). 

79 """ 

80 

81 key: Any 

82 """The primary/foreign key value for this collection. 

83 """ 

84 

85 type: CollectionType 

86 """Enumeration value describing the type of the collection 

87 (`CollectionType`). 

88 """ 

89 

90 

91class RunRecord(CollectionRecord): 

92 """A subclass of `CollectionRecord` that adds execution information and 

93 an interface for updating it. 

94 """ 

95 

96 @abstractmethod 

97 def update(self, host: Optional[str] = None, 

98 timespan: Optional[Timespan[astropy.time.Time]] = None) -> None: 

99 """Update the database record for this run with new execution 

100 information. 

101 

102 Values not provided will set to ``NULL`` in the database, not ignored. 

103 

104 Parameters 

105 ---------- 

106 host : `str`, optional 

107 Name of the host or system on which this run was produced. 

108 Detailed form to be set by higher-level convention; from the 

109 `Registry` perspective, this is an entirely opaque value. 

110 timespan : `Timespan`, optional 

111 Begin and end timestamps for the period over which the run was 

112 produced. `None`/``NULL`` values are interpreted as infinite 

113 bounds. 

114 """ 

115 raise NotImplementedError() 

116 

117 @property 

118 @abstractmethod 

119 def host(self) -> Optional[str]: 

120 """Return the name of the host or system on which this run was 

121 produced (`str` or `None`). 

122 """ 

123 raise NotImplementedError() 

124 

125 @property 

126 @abstractmethod 

127 def timespan(self) -> Timespan[astropy.time.Time]: 

128 """Begin and end timestamps for the period over which the run was 

129 produced. `None`/``NULL`` values are interpreted as infinite 

130 bounds. 

131 """ 

132 raise NotImplementedError() 

133 

134 

135class ChainedCollectionRecord(CollectionRecord): 

136 """A subclass of `CollectionRecord` that adds the list of child collections 

137 in a ``CHAINED`` collection. 

138 

139 Parameters 

140 ---------- 

141 key 

142 Unique collection ID, can be the same as ``name`` if ``name`` is used 

143 for identification. Usually this is an integer or string, but can be 

144 other database-specific type. 

145 name : `str` 

146 Name of the collection. 

147 """ 

148 

149 def __init__(self, key: Any, name: str): 

150 super().__init__(key=key, name=name, type=CollectionType.CHAINED) 

151 self._children = CollectionSearch.fromExpression([]) 

152 

153 @property 

154 def children(self) -> CollectionSearch: 

155 """The ordered search path of child collections that define this chain 

156 (`CollectionSearch`). 

157 """ 

158 return self._children 

159 

160 def update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

161 """Redefine this chain to search the given child collections. 

162 

163 This method should be used by all external code to set children. It 

164 delegates to `_update`, which is what should be overridden by 

165 subclasses. 

166 

167 Parameters 

168 ---------- 

169 manager : `CollectionManager` 

170 The object that manages this records instance and all records 

171 instances that may appear as its children. 

172 children : `CollectionSearch` 

173 A collection search path that should be resolved to set the child 

174 collections of this chain. 

175 

176 Raises 

177 ------ 

178 ValueError 

179 Raised when the child collections contain a cycle. 

180 """ 

181 for record in children.iter(manager, flattenChains=True, includeChains=True, 

182 collectionType=CollectionType.CHAINED): 

183 if record == self: 

184 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

185 self._update(manager, children) 

186 self._children = children 

187 

188 def refresh(self, manager: CollectionManager) -> None: 

189 """Load children from the database, using the given manager to resolve 

190 collection primary key values into records. 

191 

192 This method exists to ensure that all collections that may appear in a 

193 chain are known to the manager before any particular chain tries to 

194 retrieve their records from it. `ChainedCollectionRecord` subclasses 

195 can rely on it being called sometime after their own ``__init__`` to 

196 finish construction. 

197 

198 Parameters 

199 ---------- 

200 manager : `CollectionManager` 

201 The object that manages this records instance and all records 

202 instances that may appear as its children. 

203 """ 

204 self._children = self._load(manager) 

205 

206 @abstractmethod 

207 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None: 

208 """Protected implementation hook for setting the `children` property. 

209 

210 This method should be implemented by subclasses to update the database 

211 to reflect the children given. It should never be called by anything 

212 other than the `children` setter, which should be used by all external 

213 code. 

214 

215 Parameters 

216 ---------- 

217 manager : `CollectionManager` 

218 The object that manages this records instance and all records 

219 instances that may appear as its children. 

220 children : `CollectionSearch` 

221 A collection search path that should be resolved to set the child 

222 collections of this chain. Guaranteed not to contain cycles. 

223 """ 

224 raise NotImplementedError() 

225 

226 @abstractmethod 

227 def _load(self, manager: CollectionManager) -> CollectionSearch: 

228 """Protected implementation hook for `refresh`. 

229 

230 This method should be implemented by subclasses to retrieve the chain's 

231 child collections from the database and return them. It should never 

232 be called by anything other than `refresh`, which should be used by all 

233 external code. 

234 

235 Parameters 

236 ---------- 

237 manager : `CollectionManager` 

238 The object that manages this records instance and all records 

239 instances that may appear as its children. 

240 """ 

241 raise NotImplementedError() 

242 

243 

244class CollectionManager(ABC): 

245 """An interface for managing the collections (including runs) in a 

246 `Registry`. 

247 

248 Notes 

249 ----- 

250 Each layer in a multi-layer `Registry` has its own record for any 

251 collection for which it has datasets (or quanta). Different layers may 

252 use different IDs for the same collection, so any usage of the IDs 

253 obtained through the `CollectionManager` APIs are strictly for internal 

254 (to `Registry`) use. 

255 """ 

256 

257 @classmethod 

258 @abstractmethod 

259 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

260 """Construct an instance of the manager. 

261 

262 Parameters 

263 ---------- 

264 db : `Database` 

265 Interface to the underlying database engine and namespace. 

266 context : `StaticTablesContext` 

267 Context object obtained from `Database.declareStaticTables`; used 

268 to declare any tables that should always be present in a layer 

269 implemented with this manager. 

270 

271 Returns 

272 ------- 

273 manager : `CollectionManager` 

274 An instance of a concrete `CollectionManager` subclass. 

275 """ 

276 raise NotImplementedError() 

277 

278 @classmethod 

279 @abstractmethod 

280 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

281 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

282 """Add a foreign key (field and constraint) referencing the collection 

283 table. 

284 

285 Parameters 

286 ---------- 

287 tableSpec : `ddl.TableSpec` 

288 Specification for the table that should reference the collection 

289 table. Will be modified in place. 

290 prefix: `str`, optional 

291 A name to use for the prefix of the new field; the full name may 

292 have a suffix (and is given in the returned `ddl.FieldSpec`). 

293 onDelete: `str`, optional 

294 One of "CASCADE" or "SET NULL", indicating what should happen to 

295 the referencing row if the collection row is deleted. `None` 

296 indicates that this should be an integrity error. 

297 **kwds 

298 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

299 constructor (only the ``name`` and ``dtype`` arguments are 

300 otherwise provided). 

301 

302 Returns 

303 ------- 

304 fieldSpec : `ddl.FieldSpec` 

305 Specification for the field being added. 

306 """ 

307 raise NotImplementedError() 

308 

309 @classmethod 

310 @abstractmethod 

311 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

312 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

313 """Add a foreign key (field and constraint) referencing the run 

314 table. 

315 

316 Parameters 

317 ---------- 

318 tableSpec : `ddl.TableSpec` 

319 Specification for the table that should reference the run table. 

320 Will be modified in place. 

321 prefix: `str`, optional 

322 A name to use for the prefix of the new field; the full name may 

323 have a suffix (and is given in the returned `ddl.FieldSpec`). 

324 onDelete: `str`, optional 

325 One of "CASCADE" or "SET NULL", indicating what should happen to 

326 the referencing row if the collection row is deleted. `None` 

327 indicates that this should be an integrity error. 

328 **kwds 

329 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

330 constructor (only the ``name`` and ``dtype`` arguments are 

331 otherwise provided). 

332 

333 Returns 

334 ------- 

335 fieldSpec : `ddl.FieldSpec` 

336 Specification for the field being added. 

337 """ 

338 raise NotImplementedError() 

339 

340 @classmethod 

341 @abstractmethod 

342 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

343 """Return the name of the field added by `addCollectionForeignKey` 

344 if called with the same prefix. 

345 

346 Parameters 

347 ---------- 

348 prefix : `str` 

349 A name to use for the prefix of the new field; the full name may 

350 have a suffix. 

351 

352 Returns 

353 ------- 

354 name : `str` 

355 The field name. 

356 """ 

357 raise NotImplementedError() 

358 

359 @classmethod 

360 @abstractmethod 

361 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

362 """Return the name of the field added by `addRunForeignKey` 

363 if called with the same prefix. 

364 

365 Parameters 

366 ---------- 

367 prefix : `str` 

368 A name to use for the prefix of the new field; the full name may 

369 have a suffix. 

370 

371 Returns 

372 ------- 

373 name : `str` 

374 The field name. 

375 """ 

376 raise NotImplementedError() 

377 

378 @abstractmethod 

379 def refresh(self) -> None: 

380 """Ensure all other operations on this manager are aware of any 

381 collections that may have been registered by other clients since it 

382 was initialized or last refreshed. 

383 """ 

384 raise NotImplementedError() 

385 

386 @abstractmethod 

387 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

388 """Ensure that a collection of the given name and type are present 

389 in the layer this manager is associated with. 

390 

391 Parameters 

392 ---------- 

393 name : `str` 

394 Name of the collection. 

395 type : `CollectionType` 

396 Enumeration value indicating the type of collection. 

397 

398 Returns 

399 ------- 

400 record : `CollectionRecord` 

401 Object representing the collection, including its type and ID. 

402 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

403 instance. If ``type is CollectionType.CHAIN``, this will be a 

404 `ChainedCollectionRecord` instance. 

405 

406 Raises 

407 ------ 

408 TransactionInterruption 

409 Raised if this operation is invoked within a `Database.transaction` 

410 context. 

411 DatabaseConflictError 

412 Raised if a collection with this name but a different type already 

413 exists. 

414 

415 Notes 

416 ----- 

417 Concurrent registrations of the same collection should be safe; nothing 

418 should happen if the types are consistent, and integrity errors due to 

419 inconsistent types should happen before any database changes are made. 

420 """ 

421 raise NotImplementedError() 

422 

423 @abstractmethod 

424 def remove(self, name: str) -> None: 

425 """Completely remove a collection. 

426 

427 Any existing `CollectionRecord` objects that correspond to the removed 

428 collection are considered invalidated. 

429 

430 Parameters 

431 ---------- 

432 name : `str` 

433 Name of the collection to remove. 

434 

435 Notes 

436 ----- 

437 If this collection is referenced by foreign keys in tables managed by 

438 other objects, the ON DELETE clauses of those tables will be invoked. 

439 That will frequently delete many dependent rows automatically (via 

440 "CASCADE", but it may also cause this operation to fail (with rollback) 

441 unless dependent rows that do not have an ON DELETE clause are removed 

442 first. 

443 """ 

444 raise NotImplementedError() 

445 

446 @abstractmethod 

447 def find(self, name: str) -> CollectionRecord: 

448 """Return the collection record associated with the given name. 

449 

450 Parameters 

451 ---------- 

452 name : `str` 

453 Name of the collection. 

454 

455 Returns 

456 ------- 

457 record : `CollectionRecord` 

458 Object representing the collection, including its type and ID. 

459 If ``record.type is CollectionType.RUN``, this will be a 

460 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

461 this will be a `ChainedCollectionRecord` instance. 

462 

463 Raises 

464 ------ 

465 MissingCollectionError 

466 Raised if the given collection does not exist. 

467 

468 Notes 

469 ----- 

470 Collections registered by another client of the same layer since the 

471 last call to `initialize` or `refresh` may not be found. 

472 """ 

473 raise NotImplementedError() 

474 

475 @abstractmethod 

476 def __getitem__(self, key: Any) -> CollectionRecord: 

477 """Return the collection record associated with the given 

478 primary/foreign key value. 

479 

480 Parameters 

481 ---------- 

482 key 

483 Internal primary key value for the collection. 

484 

485 Returns 

486 ------- 

487 record : `CollectionRecord` 

488 Object representing the collection, including its type and name. 

489 If ``record.type is CollectionType.RUN``, this will be a 

490 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

491 this will be a `ChainedCollectionRecord` instance. 

492 

493 Raises 

494 ------ 

495 MissingCollectionError 

496 Raised if no collection with this key exists. 

497 

498 Notes 

499 ----- 

500 Collections registered by another client of the same layer since the 

501 last call to `initialize` or `refresh` may not be found. 

502 """ 

503 raise NotImplementedError() 

504 

505 @abstractmethod 

506 def __iter__(self) -> Iterator[CollectionRecord]: 

507 """Iterate over all collections. 

508 

509 Yields 

510 ------ 

511 record : `CollectionRecord` 

512 The record for a managed collection. 

513 """ 

514 raise NotImplementedError()