Coverage for python / lsst / daf / butler / registry / interfaces / _dimensions.py: 94%

34 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:37 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DimensionRecordStorageManager",) 

30 

31from abc import abstractmethod 

32from collections.abc import Iterable, Mapping, Set 

33from typing import TYPE_CHECKING, Any 

34 

35from ...dimensions import ( 

36 DataCoordinate, 

37 DataIdValue, 

38 DimensionElement, 

39 DimensionGroup, 

40 DimensionRecord, 

41 DimensionRecordSet, 

42 DimensionUniverse, 

43) 

44from ...dimensions.record_cache import DimensionRecordCache 

45from ._versioning import VersionedExtension, VersionTuple 

46 

47if TYPE_CHECKING: 

48 from ...direct_query_driver import ( # Future query system (direct,server). 

49 Postprocessing, 

50 SqlJoinsBuilder, 

51 SqlSelectBuilder, 

52 ) 

53 from ...queries.tree import AnyDatasetType, Predicate # Future query system (direct,client,server). 

54 from ._database import Database, StaticTablesContext 

55 

56 

57class DimensionRecordStorageManager(VersionedExtension): 

58 """An interface for managing the dimension records in a `Registry`. 

59 

60 `DimensionRecordStorageManager` primarily serves as a container and factory 

61 for `DimensionRecordStorage` instances, which each provide access to the 

62 records for a different `DimensionElement`. 

63 

64 Parameters 

65 ---------- 

66 universe : `DimensionUniverse` 

67 Universe of all dimensions and dimension elements known to the 

68 `Registry`. 

69 registry_schema_version : `VersionTuple` or `None`, optional 

70 Version of registry schema. 

71 

72 Notes 

73 ----- 

74 In a multi-layer `Registry`, many dimension elements will only have 

75 records in one layer (often the base layer). The union of the records 

76 across all layers forms the logical table for the full `Registry`. 

77 """ 

78 

79 def __init__(self, *, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None): 

80 super().__init__(registry_schema_version=registry_schema_version) 

81 self.universe = universe 

82 

83 @abstractmethod 

84 def clone(self, db: Database) -> DimensionRecordStorageManager: 

85 """Make an independent copy of this manager instance bound to a new 

86 `Database` instance. 

87 

88 Parameters 

89 ---------- 

90 db : `Database` 

91 New `Database` object to use when instantiating the manager. 

92 

93 Returns 

94 ------- 

95 instance : `DatasetRecordStorageManager` 

96 New manager instance with the same configuration as this instance, 

97 but bound to a new Database object. 

98 """ 

99 raise NotImplementedError() 

100 

101 @classmethod 

102 @abstractmethod 

103 def initialize( 

104 cls, 

105 db: Database, 

106 context: StaticTablesContext, 

107 *, 

108 universe: DimensionUniverse, 

109 registry_schema_version: VersionTuple | None = None, 

110 ) -> DimensionRecordStorageManager: 

111 """Construct an instance of the manager. 

112 

113 Parameters 

114 ---------- 

115 db : `Database` 

116 Interface to the underlying database engine and namespace. 

117 context : `StaticTablesContext` 

118 Context object obtained from `Database.declareStaticTables`; used 

119 to declare any tables that should always be present in a layer 

120 implemented with this manager. 

121 universe : `DimensionUniverse` 

122 Universe graph containing dimensions known to this `Registry`. 

123 registry_schema_version : `VersionTuple` or `None` 

124 Schema version of this extension as defined in registry. 

125 

126 Returns 

127 ------- 

128 manager : `DimensionRecordStorageManager` 

129 An instance of a concrete `DimensionRecordStorageManager` subclass. 

130 """ 

131 raise NotImplementedError() 

132 

133 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]: 

134 """Return a `dict` that can back a `DimensionRecordSet`. 

135 

136 This method is intended as the ``fetch`` callback argument to 

137 `DimensionRecordCache`, in contexts where direct SQL queries are 

138 possible. 

139 """ 

140 raise NotImplementedError() 

141 

142 @abstractmethod 

143 def insert( 

144 self, 

145 element: DimensionElement, 

146 *records: DimensionRecord, 

147 replace: bool = False, 

148 skip_existing: bool = False, 

149 ) -> None: 

150 """Insert one or more records into storage. 

151 

152 Parameters 

153 ---------- 

154 element : `DimensionElement` 

155 Dimension element that provides the definition for records. 

156 *records : `DimensionRecord` 

157 One or more instances of the `DimensionRecord` subclass for the 

158 element this storage is associated with. 

159 replace : `bool`, optional 

160 If `True` (`False` is default), replace existing records in the 

161 database if there is a conflict. 

162 skip_existing : `bool`, optional 

163 If `True` (`False` is default), skip insertion if a record with 

164 the same primary key values already exists. 

165 

166 Raises 

167 ------ 

168 TypeError 

169 Raised if the element does not support record insertion. 

170 sqlalchemy.exc.IntegrityError 

171 Raised if one or more records violate database integrity 

172 constraints. 

173 """ 

174 raise NotImplementedError() 

175 

176 @abstractmethod 

177 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]: 

178 """Synchronize a record with the database, inserting it only if it does 

179 not exist and comparing values if it does. 

180 

181 Parameters 

182 ---------- 

183 record : `DimensionRecord` 

184 An instance of the `DimensionRecord` subclass for the 

185 element this storage is associated with. 

186 update : `bool`, optional 

187 If `True` (`False` is default), update the existing record in the 

188 database if there is a conflict. 

189 

190 Returns 

191 ------- 

192 inserted_or_updated : `bool` or `dict` 

193 `True` if a new row was inserted, `False` if no changes were 

194 needed, or a `dict` mapping updated column names to their old 

195 values if an update was performed (only possible if 

196 ``update=True``). 

197 

198 Raises 

199 ------ 

200 DatabaseConflictError 

201 Raised if the record exists in the database (according to primary 

202 key lookup) but is inconsistent with the given one. 

203 TypeError 

204 Raised if the element does not support record synchronization. 

205 sqlalchemy.exc.IntegrityError 

206 Raised if one or more records violate database integrity 

207 constraints. 

208 """ 

209 raise NotImplementedError() 

210 

211 @abstractmethod 

212 def fetch_one( 

213 self, 

214 element_name: str, 

215 data_id: DataCoordinate, 

216 cache: DimensionRecordCache, 

217 ) -> DimensionRecord | None: 

218 """Retrieve a single record from storage. 

219 

220 Parameters 

221 ---------- 

222 element_name : `str` 

223 Name of the dimension element for the record to fetch. 

224 data_id : `DataCoordinate` 

225 Data ID of the record to fetch. Implied dimensions do not need to 

226 be present. 

227 cache : `DimensionRecordCache` 

228 Cache to look in first. 

229 

230 Returns 

231 ------- 

232 record : `DimensionRecord` or `None` 

233 Fetched record, or *possibly* `None` if there was no match for the 

234 given data ID. 

235 """ 

236 raise NotImplementedError() 

237 

238 @abstractmethod 

239 def save_dimension_group(self, group: DimensionGroup) -> int: 

240 """Save a `DimensionGroup` definition to the database, allowing it to 

241 be retrieved later via the returned key. 

242 

243 If this dimension group has already been saved, this method just 

244 returns the key already associated with it. 

245 

246 Parameters 

247 ---------- 

248 group : `DimensionGroup` 

249 Set of dimensions to save. 

250 

251 Returns 

252 ------- 

253 key : `int` 

254 Integer used as the unique key for this `DimensionGroup` in the 

255 database. 

256 

257 Raises 

258 ------ 

259 TransactionInterruption 

260 Raised if this operation is invoked within a `Database.transaction` 

261 context. 

262 """ 

263 raise NotImplementedError() 

264 

265 @abstractmethod 

266 def load_dimension_group(self, key: int) -> DimensionGroup: 

267 """Retrieve a `DimensionGroup` that was previously saved in the 

268 database. 

269 

270 Parameters 

271 ---------- 

272 key : `int` 

273 Integer used as the unique key for this `DimensionGroup` in the 

274 database. 

275 

276 Returns 

277 ------- 

278 dimensions : `DimensionGroup` 

279 Retrieved dimensions. 

280 

281 Raises 

282 ------ 

283 KeyError 

284 Raised if the given key cannot be found in the database. 

285 """ 

286 raise NotImplementedError() 

287 

288 @abstractmethod 

289 def make_joins_builder(self, element: DimensionElement, fields: Set[str]) -> SqlJoinsBuilder: 

290 """Make a `~lsst.daf.butler.direct_query_driver.SqlJoinsBuilder` that 

291 represents a dimension element table. 

292 

293 Parameters 

294 ---------- 

295 element : `DimensionElement` 

296 Dimension element the table corresponds to. 

297 fields : `~collections.abc.Set` [ `str` ] 

298 Names of fields to make available in the builder. These can be any 

299 metadata or alternate key field in the element's schema, including 

300 the special ``region`` and ``timespan`` fields. Dimension keys in 

301 the element's schema are always included. 

302 

303 Returns 

304 ------- 

305 builder : `~lsst.daf.butler.direct_query_driver.SqlJoinsBuilder` 

306 A query-construction object representing a table or subquery. This 

307 is guaranteed to have rows that are unique over dimension keys and 

308 all possible key values for this dimension, so joining in a 

309 dimension element table: 

310 

311 - never introduces duplicates into the query's result rows; 

312 - never restricts the query's rows *except* to ensure 

313 required-implied relationships are followed. 

314 """ 

315 raise NotImplementedError() 

316 

317 @abstractmethod 

318 def process_query_overlaps( 

319 self, 

320 dimensions: DimensionGroup, 

321 predicate: Predicate, 

322 join_operands: Iterable[DimensionGroup], 

323 calibration_dataset_types: Set[str | AnyDatasetType], 

324 allow_duplicates: bool, 

325 constraint_data_id: Mapping[str, DataIdValue], 

326 ) -> tuple[Predicate, SqlSelectBuilder, Postprocessing]: 

327 """Process a query's WHERE predicate and dimensions to handle spatial 

328 and temporal overlaps. 

329 

330 Parameters 

331 ---------- 

332 dimensions : `~lsst.daf.butler.dimensions.DimensionGroup` 

333 Full dimensions of all tables to be joined into the query (even if 

334 they are not included in the query results). 

335 predicate : `~lsst.daf.butler.queries.tree.Predicate` 

336 Boolean column expression that may contain user-provided spatial 

337 and/or temporal overlaps intermixed with other constraints. 

338 join_operands : `~collections.abc.Iterable` [ \ 

339 `~lsst.daf.butler.dimensions.DimensionGroup` ] 

340 Dimensions of tables or subqueries that are already going to be 

341 joined into the query that may establish their own spatial or 

342 temporal relationships (e.g. a dataset search with both ``visit`` 

343 and ``patch`` dimensions). 

344 calibration_dataset_types : `~collections.abc.Set` [ `str` or \ 

345 `~lsst.daf.butler.queries.tree.AnyDatasetType` ] 

346 The names of dataset types that have been joined into the query via 

347 a search that includes at least one calibration collection. 

348 allow_duplicates : `bool` 

349 If set to `True` then query will be allowed to return non-distinct 

350 rows. 

351 constraint_data_id : `~collections.abc.Mapping` [`str`, `int` | `str`] 

352 Dimension values that are known to be common to all rows in the 

353 query result set. 

354 

355 Returns 

356 ------- 

357 predicate : `lsst.daf.butler.queries.tree.Predicate` 

358 A version of the given predicate that preserves the overall 

359 behavior of the filter while possibly rewriting overlap expressions 

360 that have been partially moved into ``builder`` as some combination 

361 of new nested predicates, joins, and postprocessing. 

362 builder : `~lsst.daf.butler.direct_query_driver.SqlSelectBuilder` 

363 A query-construction helper object that includes any initial joins 

364 and postprocessing needed to handle overlap expression extracted 

365 from the original predicate. 

366 postprocessing : `Postprocessing` 

367 Struct representing post-query processing to be done in Python. 

368 

369 Notes 

370 ----- 

371 Implementations must delegate to `.queries.overlaps.OverlapsVisitor` 

372 (possibly by subclassing it) to ensure "automatic" spatial and temporal 

373 joins are added consistently by all query-construction implementations. 

374 """ 

375 raise NotImplementedError() 

376 

377 universe: DimensionUniverse 

378 """Universe of all dimensions and dimension elements known to the 

379 `Registry` (`DimensionUniverse`). 

380 """