Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import ABC, abstractmethod 

32from datetime import datetime 

33from typing import ( 

34 Any, 

35 Iterator, 

36 Optional, 

37 TYPE_CHECKING, 

38) 

39 

40from ...core import ddl, Timespan 

41from ..wildcards import CollectionSearch 

42from .._collectionType import CollectionType 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from .database import Database, StaticTablesContext 

46 

47 

48class MissingCollectionError(Exception): 

49 """Exception raised when an operation attempts to use a collection that 

50 does not exist. 

51 """ 

52 

53 

54class CollectionRecord(ABC): 

55 """A struct used to represent a collection in internal `Registry` APIs. 

56 

57 User-facing code should always just use a `str` to represent collections. 

58 

59 Parameters 

60 ---------- 

61 name : `str` 

62 Name of the collection. 

63 type : `CollectionType` 

64 Enumeration value describing the type of the collection. 

65 """ 

66 def __init__(self, name: str, type: CollectionType): 

67 self.name = name 

68 self.type = type 

69 assert isinstance(self.type, CollectionType) 

70 

71 @property 

72 @abstractmethod 

73 def key(self) -> Any: 

74 """The primary/foreign key value for this collection. 

75 """ 

76 raise NotImplementedError() 

77 

78 name: str 

79 """Name of the collection (`str`). 

80 """ 

81 

82 type: CollectionType 

83 """Enumeration value describing the type of the collection 

84 (`CollectionType`). 

85 """ 

86 

87 

88class RunRecord(CollectionRecord): 

89 """A subclass of `CollectionRecord` that adds execution information and 

90 an interface for updating it. 

91 """ 

92 

93 @abstractmethod 

94 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None): 

95 """Update the database record for this run with new execution 

96 information. 

97 

98 Values not provided will set to ``NULL`` in the database, not ignored. 

99 

100 Parameters 

101 ---------- 

102 host : `str`, optional 

103 Name of the host or system on which this run was produced. 

104 Detailed form to be set by higher-level convention; from the 

105 `Registry` perspective, this is an entirely opaque value. 

106 timespan : `Timespan`, optional 

107 Begin and end timestamps for the period over which the run was 

108 produced. `None`/``NULL`` values are interpreted as infinite 

109 bounds. 

110 """ 

111 raise NotImplementedError() 

112 

113 @property 

114 @abstractmethod 

115 def host(self) -> Optional[str]: 

116 """Return the name of the host or system on which this run was 

117 produced (`str` or `None`). 

118 """ 

119 raise NotImplementedError() 

120 

121 @property 

122 @abstractmethod 

123 def timespan(self) -> Timespan[Optional[datetime]]: 

124 """Begin and end timestamps for the period over which the run was 

125 produced. `None`/``NULL`` values are interpreted as infinite 

126 bounds. 

127 """ 

128 raise NotImplementedError() 

129 

130 

131class ChainedCollectionRecord(CollectionRecord): 

132 """A subclass of `CollectionRecord` that adds the list of child collections 

133 in a ``CHAINED`` collection. 

134 

135 Parameters 

136 ---------- 

137 name : `str` 

138 Name of the collection. 

139 """ 

140 

141 def __init__(self, name: str): 

142 super().__init__(name=name, type=CollectionType.CHAINED) 

143 self._children = CollectionSearch.fromExpression([]) 

144 

145 @property 

146 def children(self) -> CollectionSearch: 

147 """The ordered search path of child collections that define this chain 

148 (`CollectionSearch`). 

149 """ 

150 return self._children 

151 

152 def update(self, manager: CollectionManager, children: CollectionSearch): 

153 """Redefine this chain to search the given child collections. 

154 

155 This method should be used by all external code to set children. It 

156 delegates to `_update`, which is what should be overridden by 

157 subclasses. 

158 

159 Parameters 

160 ---------- 

161 manager : `CollectionManager` 

162 The object that manages this records instance and all records 

163 instances that may appear as its children. 

164 children : `CollectionSearch` 

165 A collection search path that should be resolved to set the child 

166 collections of this chain. 

167 

168 Raises 

169 ------ 

170 ValueError 

171 Raised when the child collections contain a cycle. 

172 """ 

173 for record in children.iter(manager, flattenChains=True, includeChains=True, 

174 collectionType=CollectionType.CHAINED): 

175 if record == self: 

176 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

177 self._update(manager, children) 

178 self._children = children 

179 

180 def refresh(self, manager: CollectionManager): 

181 """Load children from the database, using the given manager to resolve 

182 collection primary key values into records. 

183 

184 This method exists to ensure that all collections that may appear in a 

185 chain are known to the manager before any particular chain tries to 

186 retrieve their records from it. `ChainedCollectionRecord` subclasses 

187 can rely on it being called sometime after their own ``__init__`` to 

188 finish construction. 

189 

190 Parameters 

191 ---------- 

192 manager : `CollectionManager` 

193 The object that manages this records instance and all records 

194 instances that may appear as its children. 

195 """ 

196 self._children = self._load(manager) 

197 

198 @abstractmethod 

199 def _update(self, manager: CollectionManager, children: CollectionSearch): 

200 """Protected implementation hook for setting the `children` property. 

201 

202 This method should be implemented by subclasses to update the database 

203 to reflect the children given. It should never be called by anything 

204 other than the `children` setter, which should be used by all external 

205 code. 

206 

207 Parameters 

208 ---------- 

209 manager : `CollectionManager` 

210 The object that manages this records instance and all records 

211 instances that may appear as its children. 

212 children : `CollectionSearch` 

213 A collection search path that should be resolved to set the child 

214 collections of this chain. Guaranteed not to contain cycles. 

215 """ 

216 raise NotImplementedError() 

217 

218 @abstractmethod 

219 def _load(self, manager: CollectionManager) -> CollectionSearch: 

220 """Protected implementation hook for `refresh`. 

221 

222 This method should be implemented by subclasses to retrieve the chain's 

223 child collections from the database and return them. It should never 

224 be called by anything other than `refresh`, which should be used by all 

225 external code. 

226 

227 Parameters 

228 ---------- 

229 manager : `CollectionManager` 

230 The object that manages this records instance and all records 

231 instances that may appear as its children. 

232 """ 

233 raise NotImplementedError() 

234 

235 

236class CollectionManager(ABC): 

237 """An interface for managing the collections (including runs) in a 

238 `Registry`. 

239 

240 Notes 

241 ----- 

242 Each layer in a multi-layer `Registry` has its own record for any 

243 collection for which it has datasets (or quanta). Different layers may 

244 use different IDs for the same collection, so any usage of the IDs 

245 obtained through the `CollectionManager` APIs are strictly for internal 

246 (to `Registry`) use. 

247 """ 

248 

249 @classmethod 

250 @abstractmethod 

251 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

252 """Construct an instance of the manager. 

253 

254 Parameters 

255 ---------- 

256 db : `Database` 

257 Interface to the underlying database engine and namespace. 

258 context : `StaticTablesContext` 

259 Context object obtained from `Database.declareStaticTables`; used 

260 to declare any tables that should always be present in a layer 

261 implemented with this manager. 

262 

263 Returns 

264 ------- 

265 manager : `CollectionManager` 

266 An instance of a concrete `CollectionManager` subclass. 

267 """ 

268 raise NotImplementedError() 

269 

270 @classmethod 

271 @abstractmethod 

272 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

273 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

274 """Add a foreign key (field and constraint) referencing the collection 

275 table. 

276 

277 Parameters 

278 ---------- 

279 tableSpec : `ddl.TableSpec` 

280 Specification for the table that should reference the collection 

281 table. Will be modified in place. 

282 prefix: `str`, optional 

283 A name to use for the prefix of the new field; the full name may 

284 have a suffix (and is given in the returned `ddl.FieldSpec`). 

285 onDelete: `str`, optional 

286 One of "CASCADE" or "SET NULL", indicating what should happen to 

287 the referencing row if the collection row is deleted. `None` 

288 indicates that this should be an integrity error. 

289 **kwds 

290 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

291 constructor (only the ``name`` and ``dtype`` arguments are 

292 otherwise provided). 

293 

294 Returns 

295 ------- 

296 fieldSpec : `ddl.FieldSpec` 

297 Specification for the field being added. 

298 """ 

299 raise NotImplementedError() 

300 

301 @classmethod 

302 @abstractmethod 

303 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

304 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

305 """Add a foreign key (field and constraint) referencing the run 

306 table. 

307 

308 Parameters 

309 ---------- 

310 tableSpec : `ddl.TableSpec` 

311 Specification for the table that should reference the run table. 

312 Will be modified in place. 

313 prefix: `str`, optional 

314 A name to use for the prefix of the new field; the full name may 

315 have a suffix (and is given in the returned `ddl.FieldSpec`). 

316 onDelete: `str`, optional 

317 One of "CASCADE" or "SET NULL", indicating what should happen to 

318 the referencing row if the collection row is deleted. `None` 

319 indicates that this should be an integrity error. 

320 **kwds 

321 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

322 constructor (only the ``name`` and ``dtype`` arguments are 

323 otherwise provided). 

324 

325 Returns 

326 ------- 

327 fieldSpec : `ddl.FieldSpec` 

328 Specification for the field being added. 

329 """ 

330 raise NotImplementedError() 

331 

332 @classmethod 

333 @abstractmethod 

334 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

335 """Return the name of the field added by `addCollectionForeignKey` 

336 if called with the same prefix. 

337 

338 Parameters 

339 ---------- 

340 prefix : `str` 

341 A name to use for the prefix of the new field; the full name may 

342 have a suffix. 

343 

344 Returns 

345 ------- 

346 name : `str` 

347 The field name. 

348 """ 

349 raise NotImplementedError() 

350 

351 @classmethod 

352 @abstractmethod 

353 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

354 """Return the name of the field added by `addRunForeignKey` 

355 if called with the same prefix. 

356 

357 Parameters 

358 ---------- 

359 prefix : `str` 

360 A name to use for the prefix of the new field; the full name may 

361 have a suffix. 

362 

363 Returns 

364 ------- 

365 name : `str` 

366 The field name. 

367 """ 

368 raise NotImplementedError() 

369 

370 @abstractmethod 

371 def refresh(self): 

372 """Ensure all other operations on this manager are aware of any 

373 collections that may have been registered by other clients since it 

374 was initialized or last refreshed. 

375 """ 

376 raise NotImplementedError() 

377 

378 @abstractmethod 

379 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

380 """Ensure that a collection of the given name and type are present 

381 in the layer this manager is associated with. 

382 

383 Parameters 

384 ---------- 

385 name : `str` 

386 Name of the collection. 

387 type : `CollectionType` 

388 Enumeration value indicating the type of collection. 

389 

390 Returns 

391 ------- 

392 record : `CollectionRecord` 

393 Object representing the collection, including its type and ID. 

394 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

395 instance. If ``type is CollectionType.CHAIN``, this will be a 

396 `ChainedCollectionRecord` instance. 

397 

398 Raises 

399 ------ 

400 TransactionInterruption 

401 Raised if this operation is invoked within a `Database.transaction` 

402 context. 

403 DatabaseConflictError 

404 Raised if a collection with this name but a different type already 

405 exists. 

406 

407 Notes 

408 ----- 

409 Concurrent registrations of the same collection should be safe; nothing 

410 should happen if the types are consistent, and integrity errors due to 

411 inconsistent types should happen before any database changes are made. 

412 """ 

413 raise NotImplementedError() 

414 

415 @abstractmethod 

416 def find(self, name: str) -> CollectionRecord: 

417 """Return the collection record associated with the given name. 

418 

419 Parameters 

420 ---------- 

421 name : `str` 

422 Name of the collection. 

423 

424 Returns 

425 ------- 

426 record : `CollectionRecord` 

427 Object representing the collection, including its type and ID. 

428 If ``record.type is CollectionType.RUN``, this will be a 

429 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

430 this will be a `ChainedCollectionRecord` instance. 

431 

432 Raises 

433 ------ 

434 MissingCollectionError 

435 Raised if the given collection does not exist. 

436 

437 Notes 

438 ----- 

439 Collections registered by another client of the same layer since the 

440 last call to `initialize` or `refresh` may not be found. 

441 """ 

442 raise NotImplementedError() 

443 

444 @abstractmethod 

445 def __getitem__(self, key: Any) -> CollectionRecord: 

446 """Return the collection record associated with the given 

447 primary/foreign key value. 

448 

449 Parameters 

450 ---------- 

451 key 

452 Internal primary key value for the collection. 

453 

454 Returns 

455 ------- 

456 record : `CollectionRecord` 

457 Object representing the collection, including its type and name. 

458 If ``record.type is CollectionType.RUN``, this will be a 

459 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

460 this will be a `ChainedCollectionRecord` instance. 

461 

462 Raises 

463 ------ 

464 MissingCollectionError 

465 Raised if no collection with this key exists. 

466 

467 Notes 

468 ----- 

469 Collections registered by another client of the same layer since the 

470 last call to `initialize` or `refresh` may not be found. 

471 """ 

472 raise NotImplementedError() 

473 

474 @abstractmethod 

475 def __iter__(self) -> Iterator[CollectionRecord]: 

476 """Iterate over all collections. 

477 

478 Yields 

479 ------ 

480 record : `CollectionRecord` 

481 The record for a managed collection. 

482 """ 

483 raise NotImplementedError()