Coverage for python/lsst/daf/butler/registry/interfaces/_dimensions.py: 96%

35 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-26 02:48 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DimensionRecordStorageManager",) 

30 

31from abc import abstractmethod 

32from collections.abc import Set 

33from typing import TYPE_CHECKING, Any 

34 

35from lsst.daf.relation import Join, Relation 

36 

37from ...dimensions import ( 

38 DataCoordinate, 

39 DimensionElement, 

40 DimensionGroup, 

41 DimensionRecord, 

42 DimensionRecordSet, 

43 DimensionUniverse, 

44) 

45from ...dimensions.record_cache import DimensionRecordCache 

46from ._versioning import VersionedExtension, VersionTuple 

47 

48if TYPE_CHECKING: 

49 from .. import queries 

50 from ._database import Database, StaticTablesContext 

51 

52 

53class DimensionRecordStorageManager(VersionedExtension): 

54 """An interface for managing the dimension records in a `Registry`. 

55 

56 `DimensionRecordStorageManager` primarily serves as a container and factory 

57 for `DimensionRecordStorage` instances, which each provide access to the 

58 records for a different `DimensionElement`. 

59 

60 Parameters 

61 ---------- 

62 universe : `DimensionUniverse` 

63 Universe of all dimensions and dimension elements known to the 

64 `Registry`. 

65 registry_schema_version : `VersionTuple` or `None`, optional 

66 Version of registry schema. 

67 

68 Notes 

69 ----- 

70 In a multi-layer `Registry`, many dimension elements will only have 

71 records in one layer (often the base layer). The union of the records 

72 across all layers forms the logical table for the full `Registry`. 

73 """ 

74 

75 def __init__(self, *, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None): 

76 super().__init__(registry_schema_version=registry_schema_version) 

77 self.universe = universe 

78 

79 @abstractmethod 

80 def clone(self, db: Database) -> DimensionRecordStorageManager: 

81 """Make an independent copy of this manager instance bound to a new 

82 `Database` instance. 

83 

84 Parameters 

85 ---------- 

86 db : `Database` 

87 New `Database` object to use when instantiating the manager. 

88 

89 Returns 

90 ------- 

91 instance : `DatasetRecordStorageManager` 

92 New manager instance with the same configuration as this instance, 

93 but bound to a new Database object. 

94 """ 

95 raise NotImplementedError() 

96 

97 @classmethod 

98 @abstractmethod 

99 def initialize( 

100 cls, 

101 db: Database, 

102 context: StaticTablesContext, 

103 *, 

104 universe: DimensionUniverse, 

105 registry_schema_version: VersionTuple | None = None, 

106 ) -> DimensionRecordStorageManager: 

107 """Construct an instance of the manager. 

108 

109 Parameters 

110 ---------- 

111 db : `Database` 

112 Interface to the underlying database engine and namespace. 

113 context : `StaticTablesContext` 

114 Context object obtained from `Database.declareStaticTables`; used 

115 to declare any tables that should always be present in a layer 

116 implemented with this manager. 

117 universe : `DimensionUniverse` 

118 Universe graph containing dimensions known to this `Registry`. 

119 registry_schema_version : `VersionTuple` or `None` 

120 Schema version of this extension as defined in registry. 

121 

122 Returns 

123 ------- 

124 manager : `DimensionRecordStorageManager` 

125 An instance of a concrete `DimensionRecordStorageManager` subclass. 

126 """ 

127 raise NotImplementedError() 

128 

129 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]: 

130 """Return a `dict` that can back a `DimensionRecordSet`. 

131 

132 This method is intended as the ``fetch`` callback argument to 

133 `DimensionRecordCache`, in contexts where direct SQL queries are 

134 possible. 

135 """ 

136 raise NotImplementedError() 

137 

138 @abstractmethod 

139 def insert( 

140 self, 

141 element: DimensionElement, 

142 *records: DimensionRecord, 

143 replace: bool = False, 

144 skip_existing: bool = False, 

145 ) -> None: 

146 """Insert one or more records into storage. 

147 

148 Parameters 

149 ---------- 

150 element : `DimensionElement` 

151 Dimension element that provides the definition for records. 

152 *records : `DimensionRecord` 

153 One or more instances of the `DimensionRecord` subclass for the 

154 element this storage is associated with. 

155 replace : `bool`, optional 

156 If `True` (`False` is default), replace existing records in the 

157 database if there is a conflict. 

158 skip_existing : `bool`, optional 

159 If `True` (`False` is default), skip insertion if a record with 

160 the same primary key values already exists. 

161 

162 Raises 

163 ------ 

164 TypeError 

165 Raised if the element does not support record insertion. 

166 sqlalchemy.exc.IntegrityError 

167 Raised if one or more records violate database integrity 

168 constraints. 

169 """ 

170 raise NotImplementedError() 

171 

172 @abstractmethod 

173 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]: 

174 """Synchronize a record with the database, inserting it only if it does 

175 not exist and comparing values if it does. 

176 

177 Parameters 

178 ---------- 

179 record : `DimensionRecord` 

180 An instance of the `DimensionRecord` subclass for the 

181 element this storage is associated with. 

182 update : `bool`, optional 

183 If `True` (`False` is default), update the existing record in the 

184 database if there is a conflict. 

185 

186 Returns 

187 ------- 

188 inserted_or_updated : `bool` or `dict` 

189 `True` if a new row was inserted, `False` if no changes were 

190 needed, or a `dict` mapping updated column names to their old 

191 values if an update was performed (only possible if 

192 ``update=True``). 

193 

194 Raises 

195 ------ 

196 DatabaseConflictError 

197 Raised if the record exists in the database (according to primary 

198 key lookup) but is inconsistent with the given one. 

199 TypeError 

200 Raised if the element does not support record synchronization. 

201 sqlalchemy.exc.IntegrityError 

202 Raised if one or more records violate database integrity 

203 constraints. 

204 """ 

205 raise NotImplementedError() 

206 

207 @abstractmethod 

208 def fetch_one( 

209 self, 

210 element_name: str, 

211 data_id: DataCoordinate, 

212 cache: DimensionRecordCache, 

213 ) -> DimensionRecord | None: 

214 """Retrieve a single record from storage. 

215 

216 Parameters 

217 ---------- 

218 element_name : `str` 

219 Name of the dimension element for the record to fetch. 

220 data_id : `DataCoordinate` 

221 Data ID of the record to fetch. Implied dimensions do not need to 

222 be present. 

223 cache : `DimensionRecordCache` 

224 Cache to look in first. 

225 

226 Returns 

227 ------- 

228 record : `DimensionRecord` or `None` 

229 Fetched record, or *possibly* `None` if there was no match for the 

230 given data ID. 

231 """ 

232 raise NotImplementedError() 

233 

234 @abstractmethod 

235 def save_dimension_group(self, group: DimensionGroup) -> int: 

236 """Save a `DimensionGroup` definition to the database, allowing it to 

237 be retrieved later via the returned key. 

238 

239 Parameters 

240 ---------- 

241 group : `DimensionGroup` 

242 Set of dimensions to save. 

243 

244 Returns 

245 ------- 

246 key : `int` 

247 Integer used as the unique key for this `DimensionGroup` in the 

248 database. 

249 

250 Raises 

251 ------ 

252 TransactionInterruption 

253 Raised if this operation is invoked within a `Database.transaction` 

254 context. 

255 """ 

256 raise NotImplementedError() 

257 

258 @abstractmethod 

259 def load_dimension_group(self, key: int) -> DimensionGroup: 

260 """Retrieve a `DimensionGroup` that was previously saved in the 

261 database. 

262 

263 Parameters 

264 ---------- 

265 key : `int` 

266 Integer used as the unique key for this `DimensionGroup` in the 

267 database. 

268 

269 Returns 

270 ------- 

271 dimensions : `DimensionGroup` 

272 Retrieved dimensions. 

273 

274 Raises 

275 ------ 

276 KeyError 

277 Raised if the given key cannot be found in the database. 

278 """ 

279 raise NotImplementedError() 

280 

281 @abstractmethod 

282 def join( 

283 self, 

284 element_name: str, 

285 target: Relation, 

286 join: Join, 

287 context: queries.SqlQueryContext, 

288 ) -> Relation: 

289 """Join this dimension element's records to a relation. 

290 

291 Parameters 

292 ---------- 

293 element_name : `str` 

294 Name of the dimension element whose relation should be joined in. 

295 target : `~lsst.daf.relation.Relation` 

296 Existing relation to join to. Implementations may require that 

297 this relation already include dimension key columns for this 

298 dimension element and assume that dataset or spatial join relations 

299 that might provide these will be included in the relation tree 

300 first. 

301 join : `~lsst.daf.relation.Join` 

302 Join operation to use when the implementation is an actual join. 

303 When a true join is being simulated by other relation operations, 

304 this objects `~lsst.daf.relation.Join.min_columns` and 

305 `~lsst.daf.relation.Join.max_columns` should still be respected. 

306 context : `.queries.SqlQueryContext` 

307 Object that manages relation engines and database-side state (e.g. 

308 temporary tables) for the query. 

309 

310 Returns 

311 ------- 

312 joined : `~lsst.daf.relation.Relation` 

313 New relation that includes this relation's dimension key and record 

314 columns, as well as all columns in ``target``, with rows 

315 constrained to those for which this element's dimension key values 

316 exist in the registry and rows already exist in ``target``. 

317 """ 

318 raise NotImplementedError() 

319 

320 @abstractmethod 

321 def make_spatial_join_relation( 

322 self, 

323 element1: str, 

324 element2: str, 

325 context: queries.SqlQueryContext, 

326 existing_relationships: Set[frozenset[str]] = frozenset(), 

327 ) -> tuple[Relation, bool]: 

328 """Create a relation that represents the spatial join between two 

329 dimension elements. 

330 

331 Parameters 

332 ---------- 

333 element1 : `str` 

334 Name of one of the elements participating in the join. 

335 element2 : `str` 

336 Name of the other element participating in the join. 

337 context : `.queries.SqlQueryContext` 

338 Object that manages relation engines and database-side state 

339 (e.g. temporary tables) for the query. 

340 existing_relationships : `~collections.abc.Set` [ `frozenset` [ `str` \ 

341 ] ], optional 

342 Relationships between dimensions that are already present in the 

343 relation the result will be joined to. Spatial join relations 

344 that duplicate these relationships will not be included in the 

345 result, which may cause an identity relation to be returned if 

346 a spatial relationship has already been established. 

347 

348 Returns 

349 ------- 

350 relation : `lsst.daf.relation.Relation` 

351 New relation that represents a spatial join between the two given 

352 elements. Guaranteed to have key columns for all required 

353 dimensions of both elements. 

354 needs_refinement : `bool` 

355 Whether the returned relation represents a conservative join that 

356 needs refinement via native-iteration predicate. 

357 """ 

358 raise NotImplementedError() 

359 

360 universe: DimensionUniverse 

361 """Universe of all dimensions and dimension elements known to the 

362 `Registry` (`DimensionUniverse`). 

363 """