Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = [] 

24 

25from abc import abstractmethod 

26import astropy.time 

27import itertools 

28from typing import ( 

29 Any, 

30 Iterator, 

31 NamedTuple, 

32 Optional, 

33 TYPE_CHECKING, 

34) 

35 

36import sqlalchemy 

37 

38from ...core import ddl 

39from ...core.timespan import Timespan, TIMESPAN_FIELD_SPECS 

40from .._collectionType import CollectionType 

41from ..interfaces import ( 

42 ChainedCollectionRecord, 

43 CollectionManager, 

44 CollectionRecord, 

45 MissingCollectionError, 

46 RunRecord, 

47) 

48from ..wildcards import CollectionSearch 

49 

50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from .database import Database 

52 

53 

54def _makeCollectionForeignKey(sourceColumnName: str, collectionIdName: str, **kwargs) -> ddl.ForeignKeySpec: 

55 """Define foreign key specification that refers to collections table. 

56 

57 Parameters 

58 ---------- 

59 sourceColumnName : `str` 

60 Name of the column in the referring table. 

61 collectionIdName : `str` 

62 Name of the column in collections table that identifies it (PK). 

63 **kwargs 

64 Additional keyword arguments passed directly to `ddl.ForeignKeySpec`. 

65 

66 Returns 

67 ------- 

68 spec : `ddl.ForeignKeySpec` 

69 Foreign key specification. 

70 

71 Notes 

72 ----- 

73 This method assumes fixed name ("collection") of a collections table. 

74 There is also a general assumption that collection primary key consists 

75 of a single column. 

76 """ 

77 return ddl.ForeignKeySpec("collection", source=(sourceColumnName,), target=(collectionIdName,), 

78 **kwargs) 

79 

80 

81def makeRunTableSpec(collectionIdName: str, collectionIdType: type): 

82 """Define specification for "run" table. 

83 

84 Parameters 

85 ---------- 

86 collectionIdName : `str` 

87 Name of the column in collections table that identifies it (PK). 

88 collectionIdType 

89 Type of the PK column in the collections table, one of the 

90 `sqlalchemy` types. 

91 

92 Returns 

93 ------- 

94 spec : `ddl.TableSpec` 

95 Specification for run table. 

96 

97 Notes 

98 ----- 

99 Assumption here and in the code below is that the name of the identifying 

100 column is the same in both collections and run tables. The names of 

101 non-identifying columns containing run metadata are fixed. 

102 """ 

103 return ddl.TableSpec( 

104 fields=[ 

105 ddl.FieldSpec(collectionIdName, dtype=collectionIdType, primaryKey=True), 

106 TIMESPAN_FIELD_SPECS.begin, 

107 TIMESPAN_FIELD_SPECS.end, 

108 ddl.FieldSpec("host", dtype=sqlalchemy.String, length=128), 

109 ], 

110 foreignKeys=[ 

111 _makeCollectionForeignKey(collectionIdName, collectionIdName, onDelete="CASCADE"), 

112 ], 

113 ) 

114 

115 

116def makeCollectionChainTableSpec(collectionIdName: str, collectionIdType: type): 

117 """Define specification for "collection_chain" table. 

118 

119 Parameters 

120 ---------- 

121 collectionIdName : `str` 

122 Name of the column in collections table that identifies it (PK). 

123 collectionIdType 

124 Type of the PK column in the collections table, one of the 

125 `sqlalchemy` types. 

126 

127 Returns 

128 ------- 

129 spec : `ddl.TableSpec` 

130 Specification for collection chain table. 

131 

132 Notes 

133 ----- 

134 Collection chain is simply an ordered one-to-many relation between 

135 collections. The names of the columns in the table are fixed and 

136 also hardcoded in the code below. 

137 """ 

138 return ddl.TableSpec( 

139 fields=[ 

140 ddl.FieldSpec("parent", dtype=collectionIdType, primaryKey=True), 

141 ddl.FieldSpec("position", dtype=sqlalchemy.SmallInteger, primaryKey=True), 

142 ddl.FieldSpec("child", dtype=collectionIdType, nullable=False), 

143 ddl.FieldSpec("dataset_type_name", dtype=sqlalchemy.String, length=128, nullable=True), 

144 ], 

145 foreignKeys=[ 

146 _makeCollectionForeignKey("parent", collectionIdName, onDelete="CASCADE"), 

147 _makeCollectionForeignKey("child", collectionIdName), 

148 ], 

149 ) 

150 

151 

152class DefaultRunRecord(RunRecord): 

153 """Default `RunRecord` implementation. 

154 

155 This method assumes the same run table definition as produced by 

156 `makeRunTableSpec` method. The only non-fixed name in the schema 

157 is the PK column name, this needs to be passed in a constructor. 

158 

159 Parameters 

160 ---------- 

161 db : `Database` 

162 Registry database. 

163 key 

164 Unique collection ID, can be the same as ``name`` if ``name`` is used 

165 for identification. Usually this is an integer or string, but can be 

166 other database-specific type. 

167 name : `str` 

168 Run collection name. 

169 table : `sqlalchemy.schema.Table` 

170 Table for run records. 

171 idColumnName : `str` 

172 Name of the identifying column in run table. 

173 host : `str`, optional 

174 Name of the host where run was produced. 

175 timespan : `Timespan`, optional 

176 Timespan for this run. 

177 """ 

178 def __init__(self, db: Database, key: Any, name: str, *, table: sqlalchemy.schema.Table, 

179 idColumnName: str, host: Optional[str] = None, 

180 timespan: Optional[Timespan[astropy.time.Time]] = None): 

181 super().__init__(key=key, name=name, type=CollectionType.RUN) 

182 self._db = db 

183 self._table = table 

184 self._host = host 

185 if timespan is None: 185 ↛ 186line 185 didn't jump to line 186, because the condition on line 185 was never true

186 timespan = Timespan(begin=None, end=None) 

187 self._timespan = timespan 

188 self._idName = idColumnName 

189 

190 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[astropy.time.Time]] = None): 

191 # Docstring inherited from RunRecord. 

192 if timespan is None: 

193 timespan = Timespan(begin=None, end=None) 

194 row = { 

195 self._idName: self.key, 

196 TIMESPAN_FIELD_SPECS.begin.name: timespan.begin, 

197 TIMESPAN_FIELD_SPECS.end.name: timespan.end, 

198 "host": host 

199 } 

200 count = self._db.update(self._table, {self._idName: self.key}, row) 

201 if count != 1: 

202 raise RuntimeError(f"Run update affected {count} records; expected exactly one.") 

203 self._host = host 

204 self._timespan = timespan 

205 

206 @property 

207 def host(self) -> Optional[str]: 

208 # Docstring inherited from RunRecord. 

209 return self._host 

210 

211 @property 

212 def timespan(self) -> Timespan[astropy.time.Time]: 

213 # Docstring inherited from RunRecord. 

214 return self._timespan 

215 

216 

217class DefaultChainedCollectionRecord(ChainedCollectionRecord): 

218 """Default `ChainedCollectionRecord` implementation. 

219 

220 This method assumes the same chain table definition as produced by 

221 `makeCollectionChainTableSpec` method. All column names in the table are 

222 fixed and hard-coded in the methods. 

223 

224 Parameters 

225 ---------- 

226 db : `Database` 

227 Registry database. 

228 key 

229 Unique collection ID, can be the same as ``name`` if ``name`` is used 

230 for identification. Usually this is an integer or string, but can be 

231 other database-specific type. 

232 name : `str` 

233 Collection name. 

234 table : `sqlalchemy.schema.Table` 

235 Table for chain relationship records. 

236 """ 

237 def __init__(self, db: Database, key: Any, name: str, *, table: sqlalchemy.schema.Table): 

238 super().__init__(key=key, name=name) 

239 self._db = db 

240 self._table = table 

241 

242 def _update(self, manager: CollectionManager, children: CollectionSearch): 

243 # Docstring inherited from ChainedCollectionRecord. 

244 rows = [] 

245 position = itertools.count() 

246 for child, restriction in children.iter(manager, withRestrictions=True, flattenChains=False): 

247 if restriction.names is ...: 

248 rows.append({"parent": self.key, "child": child.key, 

249 "position": next(position), "dataset_type_name": ""}) 

250 else: 

251 for name in restriction.names: 

252 rows.append({"parent": self.key, "child": child.key, 

253 "position": next(position), "dataset_type_name": name}) 

254 with self._db.transaction(): 

255 self._db.delete(self._table, ["parent"], {"parent": self.key}) 

256 self._db.insert(self._table, *rows) 

257 

258 def _load(self, manager: CollectionManager) -> CollectionSearch: 

259 # Docstring inherited from ChainedCollectionRecord. 

260 sql = sqlalchemy.sql.select( 

261 [self._table.columns.child, self._table.columns.dataset_type_name] 

262 ).select_from( 

263 self._table 

264 ).where( 

265 self._table.columns.parent == self.key 

266 ).order_by( 

267 self._table.columns.position 

268 ) 

269 # It's fine to have consecutive rows with the same collection name 

270 # and different dataset type names - CollectionSearch will group those 

271 # up for us. 

272 children = [] 

273 for row in self._db.query(sql): 

274 key = row[self._table.columns.child] 

275 restriction = row[self._table.columns.dataset_type_name] 

276 if not restriction: 

277 restriction = ... # we store ... as "" in the database 

278 record = manager[key] 

279 children.append((record.name, restriction)) 

280 return CollectionSearch.fromExpression(children) 

281 

282 

283class DefaultCollectionManager(CollectionManager): 

284 """Default `CollectionManager` implementation. 

285 

286 This implementation uses record classes defined in this module and is 

287 based on the same assumptions about schema outlined in the record classes. 

288 

289 Parameters 

290 ---------- 

291 db : `Database` 

292 Interface to the underlying database engine and namespace. 

293 tables : `NamedTuple` 

294 Named tuple of SQLAlchemy table objects. 

295 collectionIdName : `str` 

296 Name of the column in collections table that identifies it (PK). 

297 

298 Notes 

299 ----- 

300 Implementation uses "aggressive" pre-fetching and caching of the records 

301 in memory. Memory cache is synchronized from database when `refresh` 

302 method is called. 

303 """ 

304 def __init__(self, db: Database, tables: NamedTuple[sqlalchemy.schema.Table, ...], 

305 collectionIdName: str): 

306 self._db = db 

307 self._tables = tables 

308 self._collectionIdName = collectionIdName 

309 self._records = {} # indexed by record ID 

310 

311 def refresh(self): 

312 # Docstring inherited from CollectionManager. 

313 sql = sqlalchemy.sql.select( 

314 self._tables.collection.columns + self._tables.run.columns 

315 ).select_from( 

316 self._tables.collection.join(self._tables.run, isouter=True) 

317 ) 

318 # Put found records into a temporary instead of updating self._records 

319 # in place, for exception safety. 

320 records = [] 

321 chains = [] 

322 for row in self._db.query(sql).fetchall(): 

323 collection_id = row[self._tables.collection.columns[self._collectionIdName]] 

324 name = row[self._tables.collection.columns.name] 

325 type = CollectionType(row["type"]) 

326 if type is CollectionType.RUN: 326 ↛ 339line 326 didn't jump to line 339, because the condition on line 326 was never false

327 record = DefaultRunRecord( 

328 key=collection_id, 

329 name=name, 

330 db=self._db, 

331 table=self._tables.run, 

332 idColumnName=self._collectionIdName, 

333 host=row[self._tables.run.columns.host], 

334 timespan=Timespan( 

335 begin=row[self._tables.run.columns[TIMESPAN_FIELD_SPECS.begin.name]], 

336 end=row[self._tables.run.columns[TIMESPAN_FIELD_SPECS.end.name]], 

337 ) 

338 ) 

339 elif type is CollectionType.CHAINED: 

340 record = DefaultChainedCollectionRecord(db=self._db, 

341 key=collection_id, 

342 table=self._tables.collection_chain, 

343 name=name) 

344 chains.append(record) 

345 else: 

346 record = CollectionRecord(key=collection_id, name=name, type=type) 

347 records.append(record) 

348 self._setRecordCache(records) 

349 for chain in chains: 349 ↛ 350line 349 didn't jump to line 350, because the loop on line 349 never started

350 chain.refresh(self) 

351 

352 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

353 # Docstring inherited from CollectionManager. 

354 record = self._getByName(name) 

355 if record is None: 

356 row, _ = self._db.sync( 

357 self._tables.collection, 

358 keys={"name": name}, 

359 compared={"type": int(type)}, 

360 returning=[self._collectionIdName], 

361 ) 

362 collection_id = row[self._collectionIdName] 

363 if type is CollectionType.RUN: 

364 row, _ = self._db.sync( 

365 self._tables.run, 

366 keys={self._collectionIdName: collection_id}, 

367 returning={"host", TIMESPAN_FIELD_SPECS.begin.name, TIMESPAN_FIELD_SPECS.end.name}, 

368 ) 

369 record = DefaultRunRecord( 

370 db=self._db, 

371 key=collection_id, 

372 name=name, 

373 table=self._tables.run, 

374 idColumnName=self._collectionIdName, 

375 host=row["host"], 

376 timespan=Timespan( 

377 row[TIMESPAN_FIELD_SPECS.begin.name], 

378 row[TIMESPAN_FIELD_SPECS.end.name] 

379 ), 

380 ) 

381 elif type is CollectionType.CHAINED: 

382 record = DefaultChainedCollectionRecord(db=self._db, key=collection_id, name=name, 

383 table=self._tables.collection_chain) 

384 else: 

385 record = CollectionRecord(key=collection_id, name=name, type=type) 

386 self._addCachedRecord(record) 

387 return record 

388 

389 def remove(self, name: str): 

390 # Docstring inherited from CollectionManager. 

391 record = self._getByName(name) 

392 if record is None: 392 ↛ 393line 392 didn't jump to line 393, because the condition on line 392 was never true

393 raise MissingCollectionError(f"No collection with name '{name}' found.") 

394 # This may raise 

395 self._db.delete(self._tables.collection, [self._collectionIdName], 

396 {self._collectionIdName: record.key}) 

397 self._removeCachedRecord(record) 

398 

399 def find(self, name: str) -> CollectionRecord: 

400 # Docstring inherited from CollectionManager. 

401 result = self._getByName(name) 

402 if result is None: 

403 raise MissingCollectionError(f"No collection with name '{name}' found.") 

404 return result 

405 

406 def __getitem__(self, key: Any) -> Optional[CollectionRecord]: 

407 # Docstring inherited from CollectionManager. 

408 try: 

409 return self._records[key] 

410 except KeyError as err: 

411 raise MissingCollectionError(f"Collection with key '{key}' not found.") from err 

412 

413 def __iter__(self) -> Iterator[CollectionRecord]: 

414 yield from self._records.values() 

415 

416 def _setRecordCache(self, records: Iterator[CollectionRecord]): 

417 """Set internal record cache to contain given records, 

418 old cached records will be removed. 

419 """ 

420 self._records = {} 

421 for record in records: 

422 self._records[record.key] = record 

423 

424 def _addCachedRecord(self, record: CollectionRecord): 

425 """Add single record to cache. 

426 """ 

427 self._records[record.key] = record 

428 

429 def _removeCachedRecord(self, record: CollectionRecord): 

430 """Remove single record from cache. 

431 """ 

432 del self._records[record.key] 

433 

434 @abstractmethod 

435 def _getByName(self, name: str): 

436 """Find collection record given collection name. 

437 """ 

438 raise NotImplementedError()