Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [ 

24 "ChainedCollectionRecord", 

25 "CollectionManager", 

26 "CollectionRecord", 

27 "MissingCollectionError", 

28 "RunRecord", 

29] 

30 

31from abc import ABC, abstractmethod 

32from datetime import datetime 

33from typing import ( 

34 Any, 

35 Iterator, 

36 Optional, 

37 TYPE_CHECKING, 

38) 

39 

40from ...core import ddl, Timespan 

41from ..wildcards import CollectionSearch 

42from .._collectionType import CollectionType 

43 

44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true

45 from .database import Database, StaticTablesContext 

46 

47 

48class MissingCollectionError(Exception): 

49 """Exception raised when an operation attempts to use a collection that 

50 does not exist. 

51 """ 

52 

53 

54class CollectionRecord(ABC): 

55 """A struct used to represent a collection in internal `Registry` APIs. 

56 

57 User-facing code should always just use a `str` to represent collections. 

58 

59 Parameters 

60 ---------- 

61 name : `str` 

62 Name of the collection. 

63 type : `CollectionType` 

64 Enumeration value describing the type of the collection. 

65 """ 

66 def __init__(self, name: str, type: CollectionType): 

67 self.name = name 

68 self.type = type 

69 

70 @property 

71 @abstractmethod 

72 def key(self) -> Any: 

73 """The primary/foreign key value for this collection. 

74 """ 

75 raise NotImplementedError() 

76 

77 name: str 

78 """Name of the collection (`str`). 

79 """ 

80 

81 type: CollectionType 

82 """Enumeration value describing the type of the collection 

83 (`CollectionType`). 

84 """ 

85 

86 

87class RunRecord(CollectionRecord): 

88 """A subclass of `CollectionRecord` that adds execution information and 

89 an interface for updating it. 

90 """ 

91 

92 @abstractmethod 

93 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None): 

94 """Update the database record for this run with new execution 

95 information. 

96 

97 Values not provided will set to ``NULL`` in the database, not ignored. 

98 

99 Parameters 

100 ---------- 

101 host : `str`, optional 

102 Name of the host or system on which this run was produced. 

103 Detailed form to be set by higher-level convention; from the 

104 `Registry` perspective, this is an entirely opaque value. 

105 timespan : `Timespan`, optional 

106 Begin and end timestamps for the period over which the run was 

107 produced. `None`/``NULL`` values are interpreted as infinite 

108 bounds. 

109 """ 

110 raise NotImplementedError() 

111 

112 @property 

113 @abstractmethod 

114 def host(self) -> Optional[str]: 

115 """Return the name of the host or system on which this run was 

116 produced (`str` or `None`). 

117 """ 

118 raise NotImplementedError() 

119 

120 @property 

121 @abstractmethod 

122 def timespan(self) -> Timespan[Optional[datetime]]: 

123 """Begin and end timestamps for the period over which the run was 

124 produced. `None`/``NULL`` values are interpreted as infinite 

125 bounds. 

126 """ 

127 raise NotImplementedError() 

128 

129 

130class ChainedCollectionRecord(CollectionRecord): 

131 """A subclass of `CollectionRecord` that adds the list of child collections 

132 in a ``CHAINED`` collection. 

133 

134 Parameters 

135 ---------- 

136 name : `str` 

137 Name of the collection. 

138 """ 

139 

140 def __init__(self, name: str): 

141 super().__init__(name=name, type=CollectionType.CHAINED) 

142 self._children = CollectionSearch.fromExpression([]) 

143 

144 @property 

145 def children(self) -> CollectionSearch: 

146 """The ordered search path of child collections that define this chain 

147 (`CollectionSearch`). 

148 """ 

149 return self._children 

150 

151 def update(self, manager: CollectionManager, children: CollectionSearch): 

152 """Redefine this chain to search the given child collections. 

153 

154 This method should be used by all external code to set children. It 

155 delegates to `_update`, which is what should be overridden by 

156 subclasses. 

157 

158 Parameters 

159 ---------- 

160 manager : `CollectionManager` 

161 The object that manages this records instance and all records 

162 instances that may appear as its children. 

163 children : `CollectionSearch` 

164 A collection search path that should be resolved to set the child 

165 collections of this chain. 

166 

167 Raises 

168 ------ 

169 ValueError 

170 Raised when the child collections contain a cycle. 

171 """ 

172 for record in children.iter(manager, flattenChains=True, includeChains=True, 

173 collectionType=CollectionType.CHAINED): 

174 if record == self: 

175 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.") 

176 self._update(manager, children) 

177 self._children = children 

178 

179 def refresh(self, manager: CollectionManager): 

180 """Load children from the database, using the given manager to resolve 

181 collection primary key values into records. 

182 

183 This method exists to ensure that all collections that may appear in a 

184 chain are known to the manager before any particular chain tries to 

185 retrieve their records from it. `ChainedCollectionRecord` subclasses 

186 can rely on it being called sometime after their own ``__init__`` to 

187 finish construction. 

188 

189 Parameters 

190 ---------- 

191 manager : `CollectionManager` 

192 The object that manages this records instance and all records 

193 instances that may appear as its children. 

194 """ 

195 self._children = self._load(manager) 

196 

197 @abstractmethod 

198 def _update(self, manager: CollectionManager, children: CollectionSearch): 

199 """Protected implementation hook for setting the `children` property. 

200 

201 This method should be implemented by subclasses to update the database 

202 to reflect the children given. It should never be called by anything 

203 other than the `children` setter, which should be used by all external 

204 code. 

205 

206 Parameters 

207 ---------- 

208 manager : `CollectionManager` 

209 The object that manages this records instance and all records 

210 instances that may appear as its children. 

211 children : `CollectionSearch` 

212 A collection search path that should be resolved to set the child 

213 collections of this chain. Guaranteed not to contain cycles. 

214 """ 

215 raise NotImplementedError() 

216 

217 @abstractmethod 

218 def _load(self, manager: CollectionManager) -> CollectionSearch: 

219 """Protected implementation hook for `refresh`. 

220 

221 This method should be implemented by subclasses to retrieve the chain's 

222 child collections from the database and return them. It should never 

223 be called by anything other than `refresh`, which should be used by all 

224 external code. 

225 

226 Parameters 

227 ---------- 

228 manager : `CollectionManager` 

229 The object that manages this records instance and all records 

230 instances that may appear as its children. 

231 """ 

232 raise NotImplementedError() 

233 

234 

235class CollectionManager(ABC): 

236 """An interface for managing the collections (including runs) in a 

237 `Registry`. 

238 

239 Notes 

240 ----- 

241 Each layer in a multi-layer `Registry` has its own record for any 

242 collection for which it has datasets (or quanta). Different layers may 

243 use different IDs for the same collection, so any usage of the IDs 

244 obtained through the `CollectionManager` APIs are strictly for internal 

245 (to `Registry`) use. 

246 """ 

247 

248 @classmethod 

249 @abstractmethod 

250 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

251 """Construct an instance of the manager. 

252 

253 Parameters 

254 ---------- 

255 db : `Database` 

256 Interface to the underlying database engine and namespace. 

257 context : `StaticTablesContext` 

258 Context object obtained from `Database.declareStaticTables`; used 

259 to declare any tables that should always be present in a layer 

260 implemented with this manager. 

261 

262 Returns 

263 ------- 

264 manager : `CollectionManager` 

265 An instance of a concrete `CollectionManager` subclass. 

266 """ 

267 raise NotImplementedError() 

268 

269 @classmethod 

270 @abstractmethod 

271 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

272 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

273 """Add a foreign key (field and constraint) referencing the collection 

274 table. 

275 

276 Parameters 

277 ---------- 

278 tableSpec : `ddl.TableSpec` 

279 Specification for the table that should reference the collection 

280 table. Will be modified in place. 

281 prefix: `str`, optional 

282 A name to use for the prefix of the new field; the full name may 

283 have a suffix (and is given in the returned `ddl.FieldSpec`). 

284 onDelete: `str`, optional 

285 One of "CASCADE" or "SET NULL", indicating what should happen to 

286 the referencing row if the collection row is deleted. `None` 

287 indicates that this should be an integrity error. 

288 **kwds 

289 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

290 constructor (only the ``name`` and ``dtype`` arguments are 

291 otherwise provided). 

292 

293 Returns 

294 ------- 

295 fieldSpec : `ddl.FieldSpec` 

296 Specification for the field being added. 

297 """ 

298 raise NotImplementedError() 

299 

300 @classmethod 

301 @abstractmethod 

302 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

303 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

304 """Add a foreign key (field and constraint) referencing the run 

305 table. 

306 

307 Parameters 

308 ---------- 

309 tableSpec : `ddl.TableSpec` 

310 Specification for the table that should reference the run table. 

311 Will be modified in place. 

312 prefix: `str`, optional 

313 A name to use for the prefix of the new field; the full name may 

314 have a suffix (and is given in the returned `ddl.FieldSpec`). 

315 onDelete: `str`, optional 

316 One of "CASCADE" or "SET NULL", indicating what should happen to 

317 the referencing row if the collection row is deleted. `None` 

318 indicates that this should be an integrity error. 

319 **kwds 

320 Additional keyword arguments are forwarded to the `ddl.FieldSpec` 

321 constructor (only the ``name`` and ``dtype`` arguments are 

322 otherwise provided). 

323 

324 Returns 

325 ------- 

326 fieldSpec : `ddl.FieldSpec` 

327 Specification for the field being added. 

328 """ 

329 raise NotImplementedError() 

330 

331 @classmethod 

332 @abstractmethod 

333 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

334 """Return the name of the field added by `addCollectionForeignKey` 

335 if called with the same prefix. 

336 

337 Parameters 

338 ---------- 

339 prefix : `str` 

340 A name to use for the prefix of the new field; the full name may 

341 have a suffix. 

342 

343 Returns 

344 ------- 

345 name : `str` 

346 The field name. 

347 """ 

348 raise NotImplementedError() 

349 

350 @classmethod 

351 @abstractmethod 

352 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

353 """Return the name of the field added by `addRunForeignKey` 

354 if called with the same prefix. 

355 

356 Parameters 

357 ---------- 

358 prefix : `str` 

359 A name to use for the prefix of the new field; the full name may 

360 have a suffix. 

361 

362 Returns 

363 ------- 

364 name : `str` 

365 The field name. 

366 """ 

367 raise NotImplementedError() 

368 

369 @abstractmethod 

370 def refresh(self): 

371 """Ensure all other operations on this manager are aware of any 

372 collections that may have been registered by other clients since it 

373 was initialized or last refreshed. 

374 """ 

375 raise NotImplementedError() 

376 

377 @abstractmethod 

378 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

379 """Ensure that a collection of the given name and type are present 

380 in the layer this manager is associated with. 

381 

382 Parameters 

383 ---------- 

384 name : `str` 

385 Name of the collection. 

386 type : `CollectionType` 

387 Enumeration value indicating the type of collection. 

388 

389 Returns 

390 ------- 

391 record : `CollectionRecord` 

392 Object representing the collection, including its type and ID. 

393 If ``type is CollectionType.RUN``, this will be a `RunRecord` 

394 instance. If ``type is CollectionType.CHAIN``, this will be a 

395 `ChainedCollectionRecord` instance. 

396 

397 Raises 

398 ------ 

399 TransactionInterruption 

400 Raised if this operation is invoked within a `Database.transaction` 

401 context. 

402 DatabaseConflictError 

403 Raised if a collection with this name but a different type already 

404 exists. 

405 

406 Notes 

407 ----- 

408 Concurrent registrations of the same collection should be safe; nothing 

409 should happen if the types are consistent, and integrity errors due to 

410 inconsistent types should happen before any database changes are made. 

411 """ 

412 raise NotImplementedError() 

413 

414 @abstractmethod 

415 def find(self, name: str) -> CollectionRecord: 

416 """Return the collection record associated with the given name. 

417 

418 Parameters 

419 ---------- 

420 name : `str` 

421 Name of the collection. 

422 

423 Returns 

424 ------- 

425 record : `CollectionRecord` 

426 Object representing the collection, including its type and ID. 

427 If ``record.type is CollectionType.RUN``, this will be a 

428 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

429 this will be a `ChainedCollectionRecord` instance. 

430 

431 Raises 

432 ------ 

433 MissingCollectionError 

434 Raised if the given collection does not exist. 

435 

436 Notes 

437 ----- 

438 Collections registered by another client of the same layer since the 

439 last call to `initialize` or `refresh` may not be found. 

440 """ 

441 raise NotImplementedError() 

442 

443 @abstractmethod 

444 def __getitem__(self, key: Any) -> CollectionRecord: 

445 """Return the collection record associated with the given 

446 primary/foreign key value. 

447 

448 Parameters 

449 ---------- 

450 key 

451 Internal primary key value for the collection. 

452 

453 Returns 

454 ------- 

455 record : `CollectionRecord` 

456 Object representing the collection, including its type and name. 

457 If ``record.type is CollectionType.RUN``, this will be a 

458 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``, 

459 this will be a `ChainedCollectionRecord` instance. 

460 

461 Raises 

462 ------ 

463 MissingCollectionError 

464 Raised if no collection with this key exists. 

465 

466 Notes 

467 ----- 

468 Collections registered by another client of the same layer since the 

469 last call to `initialize` or `refresh` may not be found. 

470 """ 

471 raise NotImplementedError() 

472 

473 @abstractmethod 

474 def __iter__(self) -> Iterator[CollectionRecord]: 

475 """Iterate over all collections. 

476 

477 Yields 

478 ------ 

479 record : `CollectionRecord` 

480 The record for a managed collection. 

481 """ 

482 raise NotImplementedError()