Coverage for python/lsst/daf/butler/registry/collections/nameKey.py: 99%

110 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-26 02:47 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["NameKeyCollectionManager"] 

30 

31import logging 

32from collections.abc import Iterable, Mapping 

33from typing import TYPE_CHECKING, Any 

34 

35import sqlalchemy 

36 

37from ... import ddl 

38from ...column_spec import COLLECTION_NAME_MAX_LENGTH 

39from ...timespan_database_representation import TimespanDatabaseRepresentation 

40from .._collection_type import CollectionType 

41from ..interfaces import ChainedCollectionRecord, CollectionRecord, RunRecord, VersionTuple 

42from ._base import ( 

43 CollectionTablesTuple, 

44 DefaultCollectionManager, 

45 makeCollectionChainTableSpec, 

46 makeRunTableSpec, 

47) 

48 

49if TYPE_CHECKING: 

50 from .._caching_context import CachingContext 

51 from ..interfaces import Database, StaticTablesContext 

52 

53 

54_KEY_FIELD_SPEC = ddl.FieldSpec( 

55 "name", dtype=sqlalchemy.String, length=COLLECTION_NAME_MAX_LENGTH, primaryKey=True 

56) 

57 

58 

59# This has to be updated on every schema change 

60_VERSION = VersionTuple(2, 0, 0) 

61 

62 

63_LOG = logging.getLogger(__name__) 

64 

65 

66def _makeTableSpecs( 

67 TimespanReprClass: type[TimespanDatabaseRepresentation], 

68) -> CollectionTablesTuple[ddl.TableSpec]: 

69 return CollectionTablesTuple( 

70 collection=ddl.TableSpec( 

71 fields=[ 

72 _KEY_FIELD_SPEC, 

73 ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False), 

74 ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True), 

75 ], 

76 ), 

77 run=makeRunTableSpec("name", sqlalchemy.String, TimespanReprClass), 

78 collection_chain=makeCollectionChainTableSpec("name", sqlalchemy.String), 

79 ) 

80 

81 

82class NameKeyCollectionManager(DefaultCollectionManager[str]): 

83 """A `CollectionManager` implementation that uses collection names for 

84 primary/foreign keys and aggressively loads all collection/run records in 

85 the database into memory. 

86 

87 Most of the logic, including caching policy, is implemented in the base 

88 class, this class only adds customizations specific to this particular 

89 table schema. 

90 """ 

91 

92 @classmethod 

93 def initialize( 

94 cls, 

95 db: Database, 

96 context: StaticTablesContext, 

97 *, 

98 caching_context: CachingContext, 

99 registry_schema_version: VersionTuple | None = None, 

100 ) -> NameKeyCollectionManager: 

101 # Docstring inherited from CollectionManager. 

102 return cls( 

103 db, 

104 tables=context.addTableTuple(_makeTableSpecs(db.getTimespanRepresentation())), # type: ignore 

105 collectionIdName="name", 

106 caching_context=caching_context, 

107 registry_schema_version=registry_schema_version, 

108 ) 

109 

110 def clone(self, db: Database, caching_context: CachingContext) -> NameKeyCollectionManager: 

111 return NameKeyCollectionManager( 

112 db, 

113 tables=self._tables, 

114 collectionIdName=self._collectionIdName, 

115 caching_context=caching_context, 

116 registry_schema_version=self._registry_schema_version, 

117 ) 

118 

119 @classmethod 

120 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

121 # Docstring inherited from CollectionManager. 

122 return f"{prefix}_name" 

123 

124 @classmethod 

125 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

126 # Docstring inherited from CollectionManager. 

127 return f"{prefix}_name" 

128 

129 @classmethod 

130 def addCollectionForeignKey( 

131 cls, 

132 tableSpec: ddl.TableSpec, 

133 *, 

134 prefix: str = "collection", 

135 onDelete: str | None = None, 

136 constraint: bool = True, 

137 **kwargs: Any, 

138 ) -> ddl.FieldSpec: 

139 # Docstring inherited from CollectionManager. 

140 original = _KEY_FIELD_SPEC 

141 copy = ddl.FieldSpec( 

142 cls.getCollectionForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs 

143 ) 

144 tableSpec.fields.add(copy) 

145 if constraint: 

146 tableSpec.foreignKeys.append( 

147 ddl.ForeignKeySpec( 

148 "collection", source=(copy.name,), target=(original.name,), onDelete=onDelete 

149 ) 

150 ) 

151 return copy 

152 

153 @classmethod 

154 def addRunForeignKey( 

155 cls, 

156 tableSpec: ddl.TableSpec, 

157 *, 

158 prefix: str = "run", 

159 onDelete: str | None = None, 

160 constraint: bool = True, 

161 **kwargs: Any, 

162 ) -> ddl.FieldSpec: 

163 # Docstring inherited from CollectionManager. 

164 original = _KEY_FIELD_SPEC 

165 copy = ddl.FieldSpec( 

166 cls.getRunForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs 

167 ) 

168 tableSpec.fields.add(copy) 

169 if constraint: 169 ↛ 173line 169 didn't jump to line 173, because the condition on line 169 was never false

170 tableSpec.foreignKeys.append( 

171 ddl.ForeignKeySpec("run", source=(copy.name,), target=(original.name,), onDelete=onDelete) 

172 ) 

173 return copy 

174 

175 def getParentChains(self, key: str) -> set[str]: 

176 # Docstring inherited from CollectionManager. 

177 table = self._tables.collection_chain 

178 sql = ( 

179 sqlalchemy.sql.select(table.columns["parent"]) 

180 .select_from(table) 

181 .where(table.columns["child"] == key) 

182 ) 

183 with self._db.query(sql) as sql_result: 

184 parent_names = set(sql_result.scalars().all()) 

185 return parent_names 

186 

187 def lookup_name_sql( 

188 self, sql_key: sqlalchemy.ColumnElement[str], sql_from_clause: sqlalchemy.FromClause 

189 ) -> tuple[sqlalchemy.ColumnElement[str], sqlalchemy.FromClause]: 

190 # Docstring inherited. 

191 return sql_key, sql_from_clause 

192 

193 def _fetch_by_name(self, names: Iterable[str]) -> list[CollectionRecord[str]]: 

194 # Docstring inherited from base class. 

195 return self._fetch_by_key(names) 

196 

197 def _fetch_by_key(self, collection_ids: Iterable[str] | None) -> list[CollectionRecord[str]]: 

198 # Docstring inherited from base class. 

199 _LOG.debug("Fetching collection records using names %s.", collection_ids) 

200 sql = sqlalchemy.sql.select(*self._tables.collection.columns, *self._tables.run.columns).select_from( 

201 self._tables.collection.join(self._tables.run, isouter=True) 

202 ) 

203 

204 chain_sql = sqlalchemy.sql.select( 

205 self._tables.collection_chain.columns["parent"], 

206 self._tables.collection_chain.columns["position"], 

207 self._tables.collection_chain.columns["child"], 

208 ) 

209 

210 records: list[CollectionRecord[str]] = [] 

211 # We want to keep transactions as short as possible. When we fetch 

212 # everything we want to quickly fetch things into memory and finish 

213 # transaction. When we fetch just few records we need to process result 

214 # of the first query before we can run the second one. 

215 if collection_ids is not None: 

216 sql = sql.where(self._tables.collection.columns[self._collectionIdName].in_(collection_ids)) 

217 with self._db.transaction(): 

218 with self._db.query(sql) as sql_result: 

219 sql_rows = sql_result.mappings().fetchall() 

220 

221 records, chained_ids = self._rows_to_records(sql_rows) 

222 

223 if chained_ids: 

224 # Retrieve chained collection compositions 

225 chain_sql = chain_sql.where( 

226 self._tables.collection_chain.columns["parent"].in_(chained_ids) 

227 ) 

228 with self._db.query(chain_sql) as sql_result: 

229 chain_rows = sql_result.mappings().fetchall() 

230 

231 records += self._rows_to_chains(chain_rows, chained_ids) 

232 

233 else: 

234 with self._db.transaction(): 

235 with self._db.query(sql) as sql_result: 

236 sql_rows = sql_result.mappings().fetchall() 

237 with self._db.query(chain_sql) as sql_result: 

238 chain_rows = sql_result.mappings().fetchall() 

239 

240 records, chained_ids = self._rows_to_records(sql_rows) 

241 records += self._rows_to_chains(chain_rows, chained_ids) 

242 

243 return records 

244 

245 def _rows_to_records(self, rows: Iterable[Mapping]) -> tuple[list[CollectionRecord[str]], list[str]]: 

246 """Convert rows returned from collection query to a list of records 

247 and a list chained collection names. 

248 """ 

249 records: list[CollectionRecord[str]] = [] 

250 TimespanReprClass = self._db.getTimespanRepresentation() 

251 chained_ids: list[str] = [] 

252 for row in rows: 

253 name = row[self._tables.collection.columns.name] 

254 type = CollectionType(row["type"]) 

255 record: CollectionRecord[str] 

256 if type is CollectionType.RUN: 

257 record = RunRecord[str]( 

258 key=name, 

259 name=name, 

260 host=row[self._tables.run.columns.host], 

261 timespan=TimespanReprClass.extract(row), 

262 ) 

263 records.append(record) 

264 elif type is CollectionType.CHAINED: 

265 # Need to delay chained collection construction until to 

266 # fetch their children names. 

267 chained_ids.append(name) 

268 else: 

269 record = CollectionRecord[str](key=name, name=name, type=type) 

270 records.append(record) 

271 

272 return records, chained_ids 

273 

274 def _rows_to_chains(self, rows: Iterable[Mapping], chained_ids: list[str]) -> list[CollectionRecord[str]]: 

275 """Convert rows returned from collection chain query to a list of 

276 records. 

277 """ 

278 chains_defs: dict[str, list[tuple[int, str]]] = {chain_id: [] for chain_id in chained_ids} 

279 for row in rows: 

280 chains_defs[row["parent"]].append((row["position"], row["child"])) 

281 

282 records: list[CollectionRecord[str]] = [] 

283 for name, children in chains_defs.items(): 

284 children_names = [child for _, child in sorted(children)] 

285 record = ChainedCollectionRecord[str]( 

286 key=name, 

287 name=name, 

288 children=children_names, 

289 ) 

290 records.append(record) 

291 

292 return records 

293 

294 def _select_pkey_by_name(self, collection_name: str) -> sqlalchemy.Select: 

295 table = self._tables.collection 

296 return sqlalchemy.select(table.c.name.label("key"), table.c.type).where( 

297 table.c.name == collection_name 

298 ) 

299 

300 @classmethod 

301 def currentVersions(cls) -> list[VersionTuple]: 

302 # Docstring inherited from VersionedExtension. 

303 return [_VERSION]