Coverage for python/lsst/daf/butler/registry/interfaces/_dimensions.py: 96%

33 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DimensionRecordStorageManager",) 

30 

31from abc import abstractmethod 

32from collections.abc import Set 

33from typing import TYPE_CHECKING, Any 

34 

35from lsst.daf.relation import Join, Relation 

36 

37from ...dimensions import ( 

38 DataCoordinate, 

39 DimensionElement, 

40 DimensionGroup, 

41 DimensionRecord, 

42 DimensionRecordSet, 

43 DimensionUniverse, 

44) 

45from ...dimensions.record_cache import DimensionRecordCache 

46from ._versioning import VersionedExtension, VersionTuple 

47 

48if TYPE_CHECKING: 

49 from .. import queries 

50 from ._database import Database, StaticTablesContext 

51 

52 

53class DimensionRecordStorageManager(VersionedExtension): 

54 """An interface for managing the dimension records in a `Registry`. 

55 

56 `DimensionRecordStorageManager` primarily serves as a container and factory 

57 for `DimensionRecordStorage` instances, which each provide access to the 

58 records for a different `DimensionElement`. 

59 

60 Parameters 

61 ---------- 

62 universe : `DimensionUniverse` 

63 Universe of all dimensions and dimension elements known to the 

64 `Registry`. 

65 registry_schema_version : `VersionTuple` or `None`, optional 

66 Version of registry schema. 

67 

68 Notes 

69 ----- 

70 In a multi-layer `Registry`, many dimension elements will only have 

71 records in one layer (often the base layer). The union of the records 

72 across all layers forms the logical table for the full `Registry`. 

73 """ 

74 

75 def __init__(self, *, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None): 

76 super().__init__(registry_schema_version=registry_schema_version) 

77 self.universe = universe 

78 

79 @classmethod 

80 @abstractmethod 

81 def initialize( 

82 cls, 

83 db: Database, 

84 context: StaticTablesContext, 

85 *, 

86 universe: DimensionUniverse, 

87 registry_schema_version: VersionTuple | None = None, 

88 ) -> DimensionRecordStorageManager: 

89 """Construct an instance of the manager. 

90 

91 Parameters 

92 ---------- 

93 db : `Database` 

94 Interface to the underlying database engine and namespace. 

95 context : `StaticTablesContext` 

96 Context object obtained from `Database.declareStaticTables`; used 

97 to declare any tables that should always be present in a layer 

98 implemented with this manager. 

99 universe : `DimensionUniverse` 

100 Universe graph containing dimensions known to this `Registry`. 

101 registry_schema_version : `VersionTuple` or `None` 

102 Schema version of this extension as defined in registry. 

103 

104 Returns 

105 ------- 

106 manager : `DimensionRecordStorageManager` 

107 An instance of a concrete `DimensionRecordStorageManager` subclass. 

108 """ 

109 raise NotImplementedError() 

110 

111 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]: 

112 """Return a `dict` that can back a `DimensionRecordSet`. 

113 

114 This method is intended as the ``fetch`` callback argument to 

115 `DimensionRecordCache`, in contexts where direct SQL queries are 

116 possible. 

117 """ 

118 raise NotImplementedError() 

119 

120 @abstractmethod 

121 def insert( 

122 self, 

123 element: DimensionElement, 

124 *records: DimensionRecord, 

125 cache: DimensionRecordCache, 

126 replace: bool = False, 

127 skip_existing: bool = False, 

128 ) -> None: 

129 """Insert one or more records into storage. 

130 

131 Parameters 

132 ---------- 

133 element : `DimensionElement` 

134 Dimension element that provides the definition for records. 

135 *records : `DimensionRecord` 

136 One or more instances of the `DimensionRecord` subclass for the 

137 element this storage is associated with. 

138 cache : `DimensionRecordCache` 

139 Cache of dimension records to update along with the database. 

140 replace : `bool`, optional 

141 If `True` (`False` is default), replace existing records in the 

142 database if there is a conflict. 

143 skip_existing : `bool`, optional 

144 If `True` (`False` is default), skip insertion if a record with 

145 the same primary key values already exists. 

146 

147 Raises 

148 ------ 

149 TypeError 

150 Raised if the element does not support record insertion. 

151 sqlalchemy.exc.IntegrityError 

152 Raised if one or more records violate database integrity 

153 constraints. 

154 """ 

155 raise NotImplementedError() 

156 

157 @abstractmethod 

158 def sync( 

159 self, record: DimensionRecord, cache: DimensionRecordCache, update: bool = False 

160 ) -> bool | dict[str, Any]: 

161 """Synchronize a record with the database, inserting it only if it does 

162 not exist and comparing values if it does. 

163 

164 Parameters 

165 ---------- 

166 record : `DimensionRecord` 

167 An instance of the `DimensionRecord` subclass for the 

168 element this storage is associated with. 

169 cache : `DimensionRecordCache` 

170 Cache of dimension records to update along with the database. 

171 update : `bool`, optional 

172 If `True` (`False` is default), update the existing record in the 

173 database if there is a conflict. 

174 

175 Returns 

176 ------- 

177 inserted_or_updated : `bool` or `dict` 

178 `True` if a new row was inserted, `False` if no changes were 

179 needed, or a `dict` mapping updated column names to their old 

180 values if an update was performed (only possible if 

181 ``update=True``). 

182 

183 Raises 

184 ------ 

185 DatabaseConflictError 

186 Raised if the record exists in the database (according to primary 

187 key lookup) but is inconsistent with the given one. 

188 TypeError 

189 Raised if the element does not support record synchronization. 

190 sqlalchemy.exc.IntegrityError 

191 Raised if one or more records violate database integrity 

192 constraints. 

193 """ 

194 raise NotImplementedError() 

195 

196 @abstractmethod 

197 def fetch_one( 

198 self, 

199 element_name: str, 

200 data_id: DataCoordinate, 

201 cache: DimensionRecordCache, 

202 ) -> DimensionRecord | None: 

203 """Retrieve a single record from storage. 

204 

205 Parameters 

206 ---------- 

207 element_name : `str` 

208 Name of the dimension element for the record to fetch. 

209 data_id : `DataCoordinate` 

210 Data ID of the record to fetch. Implied dimensions do not need to 

211 be present. 

212 cache : `DimensionRecordCache` 

213 Cache to look in first. 

214 

215 Returns 

216 ------- 

217 record : `DimensionRecord` or `None` 

218 Fetched record, or *possibly* `None` if there was no match for the 

219 given data ID. 

220 """ 

221 raise NotImplementedError() 

222 

223 @abstractmethod 

224 def save_dimension_group(self, group: DimensionGroup) -> int: 

225 """Save a `DimensionGroup` definition to the database, allowing it to 

226 be retrieved later via the returned key. 

227 

228 Parameters 

229 ---------- 

230 group : `DimensionGroup` 

231 Set of dimensions to save. 

232 

233 Returns 

234 ------- 

235 key : `int` 

236 Integer used as the unique key for this `DimensionGroup` in the 

237 database. 

238 

239 Raises 

240 ------ 

241 TransactionInterruption 

242 Raised if this operation is invoked within a `Database.transaction` 

243 context. 

244 """ 

245 raise NotImplementedError() 

246 

247 @abstractmethod 

248 def load_dimension_group(self, key: int) -> DimensionGroup: 

249 """Retrieve a `DimensionGroup` that was previously saved in the 

250 database. 

251 

252 Parameters 

253 ---------- 

254 key : `int` 

255 Integer used as the unique key for this `DimensionGroup` in the 

256 database. 

257 

258 Returns 

259 ------- 

260 dimensions : `DimensionGroup` 

261 Retrieved dimensions. 

262 

263 Raises 

264 ------ 

265 KeyError 

266 Raised if the given key cannot be found in the database. 

267 """ 

268 raise NotImplementedError() 

269 

270 @abstractmethod 

271 def join( 

272 self, 

273 element_name: str, 

274 target: Relation, 

275 join: Join, 

276 context: queries.SqlQueryContext, 

277 ) -> Relation: 

278 """Join this dimension element's records to a relation. 

279 

280 Parameters 

281 ---------- 

282 element_name : `str` 

283 Name of the dimension element whose relation should be joined in. 

284 target : `~lsst.daf.relation.Relation` 

285 Existing relation to join to. Implementations may require that 

286 this relation already include dimension key columns for this 

287 dimension element and assume that dataset or spatial join relations 

288 that might provide these will be included in the relation tree 

289 first. 

290 join : `~lsst.daf.relation.Join` 

291 Join operation to use when the implementation is an actual join. 

292 When a true join is being simulated by other relation operations, 

293 this objects `~lsst.daf.relation.Join.min_columns` and 

294 `~lsst.daf.relation.Join.max_columns` should still be respected. 

295 context : `.queries.SqlQueryContext` 

296 Object that manages relation engines and database-side state (e.g. 

297 temporary tables) for the query. 

298 

299 Returns 

300 ------- 

301 joined : `~lsst.daf.relation.Relation` 

302 New relation that includes this relation's dimension key and record 

303 columns, as well as all columns in ``target``, with rows 

304 constrained to those for which this element's dimension key values 

305 exist in the registry and rows already exist in ``target``. 

306 """ 

307 raise NotImplementedError() 

308 

309 @abstractmethod 

310 def make_spatial_join_relation( 

311 self, 

312 element1: str, 

313 element2: str, 

314 context: queries.SqlQueryContext, 

315 existing_relationships: Set[frozenset[str]] = frozenset(), 

316 ) -> tuple[Relation, bool]: 

317 """Create a relation that represents the spatial join between two 

318 dimension elements. 

319 

320 Parameters 

321 ---------- 

322 element1 : `str` 

323 Name of one of the elements participating in the join. 

324 element2 : `str` 

325 Name of the other element participating in the join. 

326 context : `.queries.SqlQueryContext` 

327 Object that manages relation engines and database-side state 

328 (e.g. temporary tables) for the query. 

329 existing_relationships : `~collections.abc.Set` [ `frozenset` [ `str` \ 

330 ] ], optional 

331 Relationships between dimensions that are already present in the 

332 relation the result will be joined to. Spatial join relations 

333 that duplicate these relationships will not be included in the 

334 result, which may cause an identity relation to be returned if 

335 a spatial relationship has already been established. 

336 

337 Returns 

338 ------- 

339 relation : `lsst.daf.relation.Relation` 

340 New relation that represents a spatial join between the two given 

341 elements. Guaranteed to have key columns for all required 

342 dimensions of both elements. 

343 needs_refinement : `bool` 

344 Whether the returned relation represents a conservative join that 

345 needs refinement via native-iteration predicate. 

346 """ 

347 raise NotImplementedError() 

348 

349 universe: DimensionUniverse 

350 """Universe of all dimensions and dimension elements known to the 

351 `Registry` (`DimensionUniverse`). 

352 """