Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["AggressiveNameKeyCollectionManager"] 

24 

25from collections import namedtuple 

26from datetime import datetime 

27from typing import ( 

28 Any, 

29 Iterator, 

30 Optional, 

31 TYPE_CHECKING, 

32) 

33 

34import sqlalchemy 

35 

36from ...core import ddl 

37from ...core.timespan import Timespan, TIMESPAN_FIELD_SPECS 

38from .._collectionType import CollectionType 

39from ..interfaces import ( 

40 ChainedCollectionRecord, 

41 CollectionManager, 

42 CollectionRecord, 

43 MissingCollectionError, 

44 RunRecord, 

45) 

46from ..wildcards import CollectionSearch 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from .database import Database, StaticTablesContext 

50 

51 

52_TablesTuple = namedtuple("CollectionTablesTuple", ["collection", "run", "collection_chain"]) 

53 

54_TABLES_SPEC = _TablesTuple( 

55 collection=ddl.TableSpec( 

56 fields=[ 

57 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=64, primaryKey=True), 

58 ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False), 

59 ], 

60 ), 

61 run=ddl.TableSpec( 

62 fields=[ 

63 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=64, primaryKey=True), 

64 TIMESPAN_FIELD_SPECS.begin, 

65 TIMESPAN_FIELD_SPECS.end, 

66 ddl.FieldSpec("host", dtype=sqlalchemy.String, length=128), 

67 ], 

68 foreignKeys=[ 

69 ddl.ForeignKeySpec("collection", source=("name",), target=("name",), onDelete="CASCADE"), 

70 ], 

71 ), 

72 collection_chain=ddl.TableSpec( 

73 fields=[ 

74 ddl.FieldSpec("parent", dtype=sqlalchemy.String, length=64, primaryKey=True), 

75 ddl.FieldSpec("index", dtype=sqlalchemy.SmallInteger, primaryKey=True), 

76 ddl.FieldSpec("child", dtype=sqlalchemy.String, length=64, nullable=False), 

77 ddl.FieldSpec("dataset_type_name", dtype=sqlalchemy.String, length=128, nullable=True), 

78 ], 

79 foreignKeys=[ 

80 ddl.ForeignKeySpec("collection", source=("parent",), target=("name",), onDelete="CASCADE"), 

81 ddl.ForeignKeySpec("collection", source=("child",), target=("name",)), 

82 ], 

83 ), 

84) 

85 

86 

87class NameKeyCollectionRecord(CollectionRecord): 

88 """A `CollectionRecord` implementation that just uses the string name as 

89 the primary/foreign key for collections. 

90 """ 

91 

92 @property 

93 def key(self) -> str: 

94 # Docstring inherited from CollectionRecord. 

95 return self.name 

96 

97 

98class NameKeyRunRecord(RunRecord): 

99 """A `RunRecord` implementation that just uses the string name as the 

100 primary/foreign key for collections. 

101 """ 

102 def __init__(self, db: Database, name: str, *, table: sqlalchemy.schema.Table, 

103 host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None): 

104 super().__init__(name=name, type=CollectionType.RUN) 

105 self._db = db 

106 self._table = table 

107 self._host = host 

108 if timespan is None: 108 ↛ 109line 108 didn't jump to line 109, because the condition on line 108 was never true

109 timespan = Timespan(begin=None, end=None) 

110 self._timespan = timespan 

111 

112 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None): 

113 # Docstring inherited from RunRecord. 

114 if timespan is None: 

115 timespan = Timespan(begin=None, end=None) 

116 row = { 

117 "name": self.name, 

118 TIMESPAN_FIELD_SPECS.begin.name: timespan.begin, 

119 TIMESPAN_FIELD_SPECS.end.name: timespan.end, 

120 "host": host, 

121 } 

122 count = self._db.update(self._table, {"name": self.name}, row) 

123 if count != 1: 

124 raise RuntimeError(f"Run update affected {count} records; expected exactly one.") 

125 self._host = host 

126 self._timespan = timespan 

127 

128 @property 

129 def key(self) -> str: 

130 # Docstring inherited from CollectionRecord. 

131 return self.name 

132 

133 @property 

134 def host(self) -> Optional[str]: 

135 # Docstring inherited from RunRecord. 

136 return self._host 

137 

138 @property 

139 def timespan(self) -> Timespan[Optional[datetime]]: 

140 # Docstring inherited from RunRecord. 

141 return self._timespan 

142 

143 

144class NameKeyChainedCollectionRecord(ChainedCollectionRecord): 

145 """A `ChainedCollectionRecord` implementation that just uses the string 

146 name as the primary/foreign key for collections. 

147 """ 

148 def __init__(self, db: Database, name: str, *, table: sqlalchemy.schema.Table): 

149 super().__init__(name=name) 

150 self._db = db 

151 self._table = table 

152 

153 @property 

154 def key(self) -> str: 

155 # Docstring inherited from CollectionRecord. 

156 return self.name 

157 

158 def _update(self, manager: CollectionManager, children: CollectionSearch): 

159 # Docstring inherited from ChainedCollectionRecord. 

160 rows = [] 

161 i = 0 

162 for child, restriction in children.iter(manager, withRestrictions=True, flattenChains=False): 

163 if restriction.names is ...: 

164 rows.append({"parent": self.key, "child": child.key, "index": i, 

165 "dataset_type_name": ""}) 

166 i += 1 

167 else: 

168 for name in restriction.names: 

169 rows.append({"parent": self.key, "child": child.key, "index": i, 

170 "dataset_type_name": name}) 

171 i += 1 

172 with self._db.transaction(): 

173 self._db.delete(self._table, ["parent"], {"parent": self.key}) 

174 self._db.insert(self._table, *rows) 

175 

176 def _load(self, manager: CollectionManager) -> CollectionSearch: 

177 # Docstring inherited from ChainedCollectionRecord. 

178 sql = sqlalchemy.sql.select( 

179 [self._table.columns.child, self._table.columns.dataset_type_name] 

180 ).select_from( 

181 self._table 

182 ).where( 

183 self._table.columns.parent == self.key 

184 ).order_by( 

185 self._table.columns.index 

186 ) 

187 # It's fine to have consecutive rows with the same collection name 

188 # and different dataset type names - CollectionSearch will group those 

189 # up for us. 

190 children = [] 

191 for row in self._db.query(sql): 

192 key = row[self._table.columns.child] 

193 restriction = row[self._table.columns.dataset_type_name] 

194 if not restriction: 

195 restriction = ... # we store ... as "" in the database 

196 record = manager[key] 

197 children.append((record.name, restriction)) 

198 return CollectionSearch.fromExpression(children) 

199 

200 

201class AggressiveNameKeyCollectionManager(CollectionManager): 

202 """A `CollectionManager` implementation that uses collection names for 

203 primary/foreign keys and aggressively loads all collection/run records in 

204 the database into memory. 

205 

206 Parameters 

207 ---------- 

208 db : `Database` 

209 Interface to the underlying database engine and namespace. 

210 tables : `_TablesTuple` 

211 Named tuple of SQLAlchemy table objects. 

212 """ 

213 def __init__(self, db: Database, tables: _TablesTuple): 

214 self._db = db 

215 self._tables = tables 

216 self._records = {} 

217 

218 @classmethod 

219 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager: 

220 # Docstring inherited from CollectionManager. 

221 return cls(db, tables=context.addTableTuple(_TABLES_SPEC)) 

222 

223 @classmethod 

224 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection", 

225 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

226 # Docstring inherited from CollectionManager. 

227 if prefix is None: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true

228 prefix = "collection" 

229 original = _TABLES_SPEC.collection.fields["name"] 

230 copy = ddl.FieldSpec(cls.getCollectionForeignKeyName(prefix), dtype=original.dtype, **kwds) 

231 tableSpec.fields.add(copy) 

232 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("collection", source=(copy.name,), 

233 target=(original.name,), onDelete=onDelete)) 

234 return copy 

235 

236 @classmethod 

237 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run", 

238 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec: 

239 # Docstring inherited from CollectionManager. 

240 if prefix is None: 240 ↛ 241line 240 didn't jump to line 241, because the condition on line 240 was never true

241 prefix = "run" 

242 original = _TABLES_SPEC.run.fields["name"] 

243 copy = ddl.FieldSpec(cls.getRunForeignKeyName(prefix), dtype=original.dtype, **kwds) 

244 tableSpec.fields.add(copy) 

245 tableSpec.foreignKeys.append(ddl.ForeignKeySpec("run", source=(copy.name,), 

246 target=(original.name,), onDelete=onDelete)) 

247 return copy 

248 

249 @classmethod 

250 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

251 return f"{prefix}_name" 

252 

253 @classmethod 

254 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

255 return f"{prefix}_name" 

256 

257 def refresh(self): 

258 # Docstring inherited from CollectionManager. 

259 sql = sqlalchemy.sql.select( 

260 self._tables.collection.columns + self._tables.run.columns 

261 ).select_from( 

262 self._tables.collection.join(self._tables.run, isouter=True) 

263 ) 

264 # Put found records into a temporary instead of updating self._records 

265 # in place, for exception safety. 

266 records = {} 

267 chains = [] 

268 for row in self._db.query(sql).fetchall(): 

269 name = row[self._tables.collection.columns.name] 

270 type = CollectionType(row["type"]) 

271 if type is CollectionType.RUN: 271 ↛ 282line 271 didn't jump to line 282, because the condition on line 271 was never false

272 record = NameKeyRunRecord( 

273 name=name, 

274 db=self._db, 

275 table=self._tables.run, 

276 host=row[self._tables.run.columns.host], 

277 timespan=Timespan( 

278 begin=row[self._tables.run.columns[TIMESPAN_FIELD_SPECS.begin.name]], 

279 end=row[self._tables.run.columns[TIMESPAN_FIELD_SPECS.end.name]], 

280 ) 

281 ) 

282 elif type is CollectionType.CHAINED: 

283 record = NameKeyChainedCollectionRecord(db=self._db, table=self._tables.collection_chain, 

284 name=name) 

285 chains.append(record) 

286 else: 

287 record = NameKeyCollectionRecord(type=type, name=name) 

288 records[record.name] = record 

289 self._records = records 

290 for chain in chains: 290 ↛ 291line 290 didn't jump to line 291, because the loop on line 290 never started

291 chain.refresh(self) 

292 

293 def register(self, name: str, type: CollectionType) -> CollectionRecord: 

294 # Docstring inherited from CollectionManager. 

295 record = self._records.get(name) 

296 if record is None: 

297 kwds = {"name": name} 

298 self._db.sync( 

299 self._tables.collection, 

300 keys=kwds, 

301 compared={"type": int(type)}, 

302 ) 

303 if type is CollectionType.RUN: 

304 row, _ = self._db.sync( 

305 self._tables.run, 

306 keys=kwds, 

307 returning={"host", TIMESPAN_FIELD_SPECS.begin.name, TIMESPAN_FIELD_SPECS.end.name}, 

308 ) 

309 record = NameKeyRunRecord( 

310 db=self._db, 

311 table=self._tables.run, 

312 host=row["host"], 

313 timespan=Timespan( 

314 row[TIMESPAN_FIELD_SPECS.begin.name], 

315 row[TIMESPAN_FIELD_SPECS.end.name] 

316 ), 

317 **kwds 

318 ) 

319 elif type is CollectionType.CHAINED: 

320 record = NameKeyChainedCollectionRecord(db=self._db, table=self._tables.collection_chain, 

321 **kwds) 

322 else: 

323 record = NameKeyCollectionRecord(type=type, **kwds) 

324 self._records[record.name] = record 

325 return record 

326 

327 def find(self, name: str) -> CollectionRecord: 

328 # Docstring inherited from CollectionManager. 

329 result = self._records.get(name) 

330 if result is None: 330 ↛ 331line 330 didn't jump to line 331, because the condition on line 330 was never true

331 raise MissingCollectionError(f"No collection with name '{name}' found.") 

332 return result 

333 

334 def __getitem__(self, key: Any) -> Optional[CollectionRecord]: 

335 # Docstring inherited from CollectionManager. 

336 try: 

337 return self._records[key] 

338 except KeyError as err: 

339 raise MissingCollectionError(f"Collection with key '{err}' not found.") from err 

340 

341 def __iter__(self) -> Iterator[CollectionRecord]: 

342 yield from self._records.values()