Coverage for python/lsst/daf/butler/registry/collections/nameKey.py: 99%

108 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-05 02:52 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["NameKeyCollectionManager"] 

30 

31import logging 

32from collections.abc import Iterable, Mapping 

33from typing import TYPE_CHECKING, Any 

34 

35import sqlalchemy 

36 

37from ... import ddl 

38from ...column_spec import COLLECTION_NAME_MAX_LENGTH 

39from ...timespan_database_representation import TimespanDatabaseRepresentation 

40from .._collection_type import CollectionType 

41from ..interfaces import ChainedCollectionRecord, CollectionRecord, RunRecord, VersionTuple 

42from ._base import ( 

43 CollectionTablesTuple, 

44 DefaultCollectionManager, 

45 makeCollectionChainTableSpec, 

46 makeRunTableSpec, 

47) 

48 

49if TYPE_CHECKING: 

50 from .._caching_context import CachingContext 

51 from ..interfaces import Database, StaticTablesContext 

52 

53 

54_KEY_FIELD_SPEC = ddl.FieldSpec( 

55 "name", dtype=sqlalchemy.String, length=COLLECTION_NAME_MAX_LENGTH, primaryKey=True 

56) 

57 

58 

59# This has to be updated on every schema change 

60_VERSION = VersionTuple(2, 0, 0) 

61 

62 

63_LOG = logging.getLogger(__name__) 

64 

65 

66def _makeTableSpecs( 

67 TimespanReprClass: type[TimespanDatabaseRepresentation], 

68) -> CollectionTablesTuple[ddl.TableSpec]: 

69 return CollectionTablesTuple( 

70 collection=ddl.TableSpec( 

71 fields=[ 

72 _KEY_FIELD_SPEC, 

73 ddl.FieldSpec("type", dtype=sqlalchemy.SmallInteger, nullable=False), 

74 ddl.FieldSpec("doc", dtype=sqlalchemy.Text, nullable=True), 

75 ], 

76 ), 

77 run=makeRunTableSpec("name", sqlalchemy.String, TimespanReprClass), 

78 collection_chain=makeCollectionChainTableSpec("name", sqlalchemy.String), 

79 ) 

80 

81 

82class NameKeyCollectionManager(DefaultCollectionManager[str]): 

83 """A `CollectionManager` implementation that uses collection names for 

84 primary/foreign keys and aggressively loads all collection/run records in 

85 the database into memory. 

86 

87 Most of the logic, including caching policy, is implemented in the base 

88 class, this class only adds customizations specific to this particular 

89 table schema. 

90 """ 

91 

92 @classmethod 

93 def initialize( 

94 cls, 

95 db: Database, 

96 context: StaticTablesContext, 

97 *, 

98 caching_context: CachingContext, 

99 registry_schema_version: VersionTuple | None = None, 

100 ) -> NameKeyCollectionManager: 

101 # Docstring inherited from CollectionManager. 

102 return cls( 

103 db, 

104 tables=context.addTableTuple(_makeTableSpecs(db.getTimespanRepresentation())), # type: ignore 

105 collectionIdName="name", 

106 caching_context=caching_context, 

107 registry_schema_version=registry_schema_version, 

108 ) 

109 

110 def clone(self, db: Database, caching_context: CachingContext) -> NameKeyCollectionManager: 

111 return NameKeyCollectionManager( 

112 db, 

113 tables=self._tables, 

114 collectionIdName=self._collectionIdName, 

115 caching_context=caching_context, 

116 registry_schema_version=self._registry_schema_version, 

117 ) 

118 

119 @classmethod 

120 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str: 

121 # Docstring inherited from CollectionManager. 

122 return f"{prefix}_name" 

123 

124 @classmethod 

125 def getRunForeignKeyName(cls, prefix: str = "run") -> str: 

126 # Docstring inherited from CollectionManager. 

127 return f"{prefix}_name" 

128 

129 @classmethod 

130 def addCollectionForeignKey( 

131 cls, 

132 tableSpec: ddl.TableSpec, 

133 *, 

134 prefix: str = "collection", 

135 onDelete: str | None = None, 

136 constraint: bool = True, 

137 **kwargs: Any, 

138 ) -> ddl.FieldSpec: 

139 # Docstring inherited from CollectionManager. 

140 original = _KEY_FIELD_SPEC 

141 copy = ddl.FieldSpec( 

142 cls.getCollectionForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs 

143 ) 

144 tableSpec.fields.add(copy) 

145 if constraint: 

146 tableSpec.foreignKeys.append( 

147 ddl.ForeignKeySpec( 

148 "collection", source=(copy.name,), target=(original.name,), onDelete=onDelete 

149 ) 

150 ) 

151 return copy 

152 

153 @classmethod 

154 def addRunForeignKey( 

155 cls, 

156 tableSpec: ddl.TableSpec, 

157 *, 

158 prefix: str = "run", 

159 onDelete: str | None = None, 

160 constraint: bool = True, 

161 **kwargs: Any, 

162 ) -> ddl.FieldSpec: 

163 # Docstring inherited from CollectionManager. 

164 original = _KEY_FIELD_SPEC 

165 copy = ddl.FieldSpec( 

166 cls.getRunForeignKeyName(prefix), dtype=original.dtype, length=original.length, **kwargs 

167 ) 

168 tableSpec.fields.add(copy) 

169 if constraint: 169 ↛ 173line 169 didn't jump to line 173, because the condition on line 169 was never false

170 tableSpec.foreignKeys.append( 

171 ddl.ForeignKeySpec("run", source=(copy.name,), target=(original.name,), onDelete=onDelete) 

172 ) 

173 return copy 

174 

175 def getParentChains(self, key: str) -> set[str]: 

176 # Docstring inherited from CollectionManager. 

177 table = self._tables.collection_chain 

178 sql = ( 

179 sqlalchemy.sql.select(table.columns["parent"]) 

180 .select_from(table) 

181 .where(table.columns["child"] == key) 

182 ) 

183 with self._db.query(sql) as sql_result: 

184 parent_names = set(sql_result.scalars().all()) 

185 return parent_names 

186 

187 def _fetch_by_name(self, names: Iterable[str]) -> list[CollectionRecord[str]]: 

188 # Docstring inherited from base class. 

189 return self._fetch_by_key(names) 

190 

191 def _fetch_by_key(self, collection_ids: Iterable[str] | None) -> list[CollectionRecord[str]]: 

192 # Docstring inherited from base class. 

193 _LOG.debug("Fetching collection records using names %s.", collection_ids) 

194 sql = sqlalchemy.sql.select(*self._tables.collection.columns, *self._tables.run.columns).select_from( 

195 self._tables.collection.join(self._tables.run, isouter=True) 

196 ) 

197 

198 chain_sql = sqlalchemy.sql.select( 

199 self._tables.collection_chain.columns["parent"], 

200 self._tables.collection_chain.columns["position"], 

201 self._tables.collection_chain.columns["child"], 

202 ) 

203 

204 records: list[CollectionRecord[str]] = [] 

205 # We want to keep transactions as short as possible. When we fetch 

206 # everything we want to quickly fetch things into memory and finish 

207 # transaction. When we fetch just few records we need to process result 

208 # of the first query before we can run the second one. 

209 if collection_ids is not None: 

210 sql = sql.where(self._tables.collection.columns[self._collectionIdName].in_(collection_ids)) 

211 with self._db.transaction(): 

212 with self._db.query(sql) as sql_result: 

213 sql_rows = sql_result.mappings().fetchall() 

214 

215 records, chained_ids = self._rows_to_records(sql_rows) 

216 

217 if chained_ids: 

218 # Retrieve chained collection compositions 

219 chain_sql = chain_sql.where( 

220 self._tables.collection_chain.columns["parent"].in_(chained_ids) 

221 ) 

222 with self._db.query(chain_sql) as sql_result: 

223 chain_rows = sql_result.mappings().fetchall() 

224 

225 records += self._rows_to_chains(chain_rows, chained_ids) 

226 

227 else: 

228 with self._db.transaction(): 

229 with self._db.query(sql) as sql_result: 

230 sql_rows = sql_result.mappings().fetchall() 

231 with self._db.query(chain_sql) as sql_result: 

232 chain_rows = sql_result.mappings().fetchall() 

233 

234 records, chained_ids = self._rows_to_records(sql_rows) 

235 records += self._rows_to_chains(chain_rows, chained_ids) 

236 

237 return records 

238 

239 def _rows_to_records(self, rows: Iterable[Mapping]) -> tuple[list[CollectionRecord[str]], list[str]]: 

240 """Convert rows returned from collection query to a list of records 

241 and a list chained collection names. 

242 """ 

243 records: list[CollectionRecord[str]] = [] 

244 TimespanReprClass = self._db.getTimespanRepresentation() 

245 chained_ids: list[str] = [] 

246 for row in rows: 

247 name = row[self._tables.collection.columns.name] 

248 type = CollectionType(row["type"]) 

249 record: CollectionRecord[str] 

250 if type is CollectionType.RUN: 

251 record = RunRecord[str]( 

252 key=name, 

253 name=name, 

254 host=row[self._tables.run.columns.host], 

255 timespan=TimespanReprClass.extract(row), 

256 ) 

257 records.append(record) 

258 elif type is CollectionType.CHAINED: 

259 # Need to delay chained collection construction until to 

260 # fetch their children names. 

261 chained_ids.append(name) 

262 else: 

263 record = CollectionRecord[str](key=name, name=name, type=type) 

264 records.append(record) 

265 

266 return records, chained_ids 

267 

268 def _rows_to_chains(self, rows: Iterable[Mapping], chained_ids: list[str]) -> list[CollectionRecord[str]]: 

269 """Convert rows returned from collection chain query to a list of 

270 records. 

271 """ 

272 chains_defs: dict[str, list[tuple[int, str]]] = {chain_id: [] for chain_id in chained_ids} 

273 for row in rows: 

274 chains_defs[row["parent"]].append((row["position"], row["child"])) 

275 

276 records: list[CollectionRecord[str]] = [] 

277 for name, children in chains_defs.items(): 

278 children_names = [child for _, child in sorted(children)] 

279 record = ChainedCollectionRecord[str]( 

280 key=name, 

281 name=name, 

282 children=children_names, 

283 ) 

284 records.append(record) 

285 

286 return records 

287 

288 def _select_pkey_by_name(self, collection_name: str) -> sqlalchemy.Select: 

289 table = self._tables.collection 

290 return sqlalchemy.select(table.c.name.label("key"), table.c.type).where( 

291 table.c.name == collection_name 

292 ) 

293 

294 @classmethod 

295 def currentVersions(cls) -> list[VersionTuple]: 

296 # Docstring inherited from VersionedExtension. 

297 return [_VERSION]