Coverage for python/lsst/daf/butler/registry/interfaces/_dimensions.py: 97%

39 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("DimensionRecordStorageManager",) 

30 

31from abc import abstractmethod 

32from collections.abc import Iterable, Set 

33from typing import TYPE_CHECKING, Any 

34 

35from lsst.daf.relation import Join, Relation 

36 

37from ...dimensions import ( 

38 DataCoordinate, 

39 DimensionElement, 

40 DimensionGroup, 

41 DimensionRecord, 

42 DimensionRecordSet, 

43 DimensionUniverse, 

44) 

45from ...dimensions.record_cache import DimensionRecordCache 

46from ._versioning import VersionedExtension, VersionTuple 

47 

48if TYPE_CHECKING: 

49 from ...direct_query_driver import QueryBuilder, QueryJoiner # Future query system (direct,server). 

50 from ...queries.tree import Predicate # Future query system (direct,client,server). 

51 from .. import queries # Old Registry.query* system. 

52 from ._database import Database, StaticTablesContext 

53 

54 

55class DimensionRecordStorageManager(VersionedExtension): 

56 """An interface for managing the dimension records in a `Registry`. 

57 

58 `DimensionRecordStorageManager` primarily serves as a container and factory 

59 for `DimensionRecordStorage` instances, which each provide access to the 

60 records for a different `DimensionElement`. 

61 

62 Parameters 

63 ---------- 

64 universe : `DimensionUniverse` 

65 Universe of all dimensions and dimension elements known to the 

66 `Registry`. 

67 registry_schema_version : `VersionTuple` or `None`, optional 

68 Version of registry schema. 

69 

70 Notes 

71 ----- 

72 In a multi-layer `Registry`, many dimension elements will only have 

73 records in one layer (often the base layer). The union of the records 

74 across all layers forms the logical table for the full `Registry`. 

75 """ 

76 

77 def __init__(self, *, universe: DimensionUniverse, registry_schema_version: VersionTuple | None = None): 

78 super().__init__(registry_schema_version=registry_schema_version) 

79 self.universe = universe 

80 

81 @abstractmethod 

82 def clone(self, db: Database) -> DimensionRecordStorageManager: 

83 """Make an independent copy of this manager instance bound to a new 

84 `Database` instance. 

85 

86 Parameters 

87 ---------- 

88 db : `Database` 

89 New `Database` object to use when instantiating the manager. 

90 

91 Returns 

92 ------- 

93 instance : `DatasetRecordStorageManager` 

94 New manager instance with the same configuration as this instance, 

95 but bound to a new Database object. 

96 """ 

97 raise NotImplementedError() 

98 

99 @classmethod 

100 @abstractmethod 

101 def initialize( 

102 cls, 

103 db: Database, 

104 context: StaticTablesContext, 

105 *, 

106 universe: DimensionUniverse, 

107 registry_schema_version: VersionTuple | None = None, 

108 ) -> DimensionRecordStorageManager: 

109 """Construct an instance of the manager. 

110 

111 Parameters 

112 ---------- 

113 db : `Database` 

114 Interface to the underlying database engine and namespace. 

115 context : `StaticTablesContext` 

116 Context object obtained from `Database.declareStaticTables`; used 

117 to declare any tables that should always be present in a layer 

118 implemented with this manager. 

119 universe : `DimensionUniverse` 

120 Universe graph containing dimensions known to this `Registry`. 

121 registry_schema_version : `VersionTuple` or `None` 

122 Schema version of this extension as defined in registry. 

123 

124 Returns 

125 ------- 

126 manager : `DimensionRecordStorageManager` 

127 An instance of a concrete `DimensionRecordStorageManager` subclass. 

128 """ 

129 raise NotImplementedError() 

130 

131 def fetch_cache_dict(self) -> dict[str, DimensionRecordSet]: 

132 """Return a `dict` that can back a `DimensionRecordSet`. 

133 

134 This method is intended as the ``fetch`` callback argument to 

135 `DimensionRecordCache`, in contexts where direct SQL queries are 

136 possible. 

137 """ 

138 raise NotImplementedError() 

139 

140 @abstractmethod 

141 def insert( 

142 self, 

143 element: DimensionElement, 

144 *records: DimensionRecord, 

145 replace: bool = False, 

146 skip_existing: bool = False, 

147 ) -> None: 

148 """Insert one or more records into storage. 

149 

150 Parameters 

151 ---------- 

152 element : `DimensionElement` 

153 Dimension element that provides the definition for records. 

154 *records : `DimensionRecord` 

155 One or more instances of the `DimensionRecord` subclass for the 

156 element this storage is associated with. 

157 replace : `bool`, optional 

158 If `True` (`False` is default), replace existing records in the 

159 database if there is a conflict. 

160 skip_existing : `bool`, optional 

161 If `True` (`False` is default), skip insertion if a record with 

162 the same primary key values already exists. 

163 

164 Raises 

165 ------ 

166 TypeError 

167 Raised if the element does not support record insertion. 

168 sqlalchemy.exc.IntegrityError 

169 Raised if one or more records violate database integrity 

170 constraints. 

171 """ 

172 raise NotImplementedError() 

173 

174 @abstractmethod 

175 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]: 

176 """Synchronize a record with the database, inserting it only if it does 

177 not exist and comparing values if it does. 

178 

179 Parameters 

180 ---------- 

181 record : `DimensionRecord` 

182 An instance of the `DimensionRecord` subclass for the 

183 element this storage is associated with. 

184 update : `bool`, optional 

185 If `True` (`False` is default), update the existing record in the 

186 database if there is a conflict. 

187 

188 Returns 

189 ------- 

190 inserted_or_updated : `bool` or `dict` 

191 `True` if a new row was inserted, `False` if no changes were 

192 needed, or a `dict` mapping updated column names to their old 

193 values if an update was performed (only possible if 

194 ``update=True``). 

195 

196 Raises 

197 ------ 

198 DatabaseConflictError 

199 Raised if the record exists in the database (according to primary 

200 key lookup) but is inconsistent with the given one. 

201 TypeError 

202 Raised if the element does not support record synchronization. 

203 sqlalchemy.exc.IntegrityError 

204 Raised if one or more records violate database integrity 

205 constraints. 

206 """ 

207 raise NotImplementedError() 

208 

209 @abstractmethod 

210 def fetch_one( 

211 self, 

212 element_name: str, 

213 data_id: DataCoordinate, 

214 cache: DimensionRecordCache, 

215 ) -> DimensionRecord | None: 

216 """Retrieve a single record from storage. 

217 

218 Parameters 

219 ---------- 

220 element_name : `str` 

221 Name of the dimension element for the record to fetch. 

222 data_id : `DataCoordinate` 

223 Data ID of the record to fetch. Implied dimensions do not need to 

224 be present. 

225 cache : `DimensionRecordCache` 

226 Cache to look in first. 

227 

228 Returns 

229 ------- 

230 record : `DimensionRecord` or `None` 

231 Fetched record, or *possibly* `None` if there was no match for the 

232 given data ID. 

233 """ 

234 raise NotImplementedError() 

235 

236 @abstractmethod 

237 def save_dimension_group(self, group: DimensionGroup) -> int: 

238 """Save a `DimensionGroup` definition to the database, allowing it to 

239 be retrieved later via the returned key. 

240 

241 Parameters 

242 ---------- 

243 group : `DimensionGroup` 

244 Set of dimensions to save. 

245 

246 Returns 

247 ------- 

248 key : `int` 

249 Integer used as the unique key for this `DimensionGroup` in the 

250 database. 

251 

252 Raises 

253 ------ 

254 TransactionInterruption 

255 Raised if this operation is invoked within a `Database.transaction` 

256 context. 

257 """ 

258 raise NotImplementedError() 

259 

260 @abstractmethod 

261 def load_dimension_group(self, key: int) -> DimensionGroup: 

262 """Retrieve a `DimensionGroup` that was previously saved in the 

263 database. 

264 

265 Parameters 

266 ---------- 

267 key : `int` 

268 Integer used as the unique key for this `DimensionGroup` in the 

269 database. 

270 

271 Returns 

272 ------- 

273 dimensions : `DimensionGroup` 

274 Retrieved dimensions. 

275 

276 Raises 

277 ------ 

278 KeyError 

279 Raised if the given key cannot be found in the database. 

280 """ 

281 raise NotImplementedError() 

282 

283 @abstractmethod 

284 def join( 

285 self, 

286 element_name: str, 

287 target: Relation, 

288 join: Join, 

289 context: queries.SqlQueryContext, 

290 ) -> Relation: 

291 """Join this dimension element's records to a relation. 

292 

293 Parameters 

294 ---------- 

295 element_name : `str` 

296 Name of the dimension element whose relation should be joined in. 

297 target : `~lsst.daf.relation.Relation` 

298 Existing relation to join to. Implementations may require that 

299 this relation already include dimension key columns for this 

300 dimension element and assume that dataset or spatial join relations 

301 that might provide these will be included in the relation tree 

302 first. 

303 join : `~lsst.daf.relation.Join` 

304 Join operation to use when the implementation is an actual join. 

305 When a true join is being simulated by other relation operations, 

306 this objects `~lsst.daf.relation.Join.min_columns` and 

307 `~lsst.daf.relation.Join.max_columns` should still be respected. 

308 context : `.queries.SqlQueryContext` 

309 Object that manages relation engines and database-side state (e.g. 

310 temporary tables) for the query. 

311 

312 Returns 

313 ------- 

314 joined : `~lsst.daf.relation.Relation` 

315 New relation that includes this relation's dimension key and record 

316 columns, as well as all columns in ``target``, with rows 

317 constrained to those for which this element's dimension key values 

318 exist in the registry and rows already exist in ``target``. 

319 """ 

320 raise NotImplementedError() 

321 

322 @abstractmethod 

323 def make_spatial_join_relation( 

324 self, 

325 element1: str, 

326 element2: str, 

327 context: queries.SqlQueryContext, 

328 existing_relationships: Set[frozenset[str]] = frozenset(), 

329 ) -> tuple[Relation, bool]: 

330 """Create a relation that represents the spatial join between two 

331 dimension elements. 

332 

333 Parameters 

334 ---------- 

335 element1 : `str` 

336 Name of one of the elements participating in the join. 

337 element2 : `str` 

338 Name of the other element participating in the join. 

339 context : `.queries.SqlQueryContext` 

340 Object that manages relation engines and database-side state 

341 (e.g. temporary tables) for the query. 

342 existing_relationships : `~collections.abc.Set` [ `frozenset` [ `str` \ 

343 ] ], optional 

344 Relationships between dimensions that are already present in the 

345 relation the result will be joined to. Spatial join relations 

346 that duplicate these relationships will not be included in the 

347 result, which may cause an identity relation to be returned if 

348 a spatial relationship has already been established. 

349 

350 Returns 

351 ------- 

352 relation : `lsst.daf.relation.Relation` 

353 New relation that represents a spatial join between the two given 

354 elements. Guaranteed to have key columns for all required 

355 dimensions of both elements. 

356 needs_refinement : `bool` 

357 Whether the returned relation represents a conservative join that 

358 needs refinement via native-iteration predicate. 

359 """ 

360 raise NotImplementedError() 

361 

362 @abstractmethod 

363 def make_query_joiner(self, element: DimensionElement, fields: Set[str]) -> QueryJoiner: 

364 """Make a `..direct_query_driver.QueryJoiner` that represents a 

365 dimension element table. 

366 

367 Parameters 

368 ---------- 

369 element : `DimensionElement` 

370 Dimension element the table corresponds to. 

371 fields : `~collections.abc.Set` [ `str` ] 

372 Names of fields to make available in the joiner. These can be any 

373 metadata or alternate key field in the element's schema, including 

374 the special ``region`` and ``timespan`` fields. Dimension keys in 

375 the element's schema are always included. 

376 

377 Returns 

378 ------- 

379 joiner : `..direct_query_driver.QueryJoiner` 

380 A query-construction object representing a table or subquery. This 

381 is guaranteed to have rows that are unique over dimension keys and 

382 all possible key values for this dimension, so joining in a 

383 dimension element table: 

384 

385 - never introduces duplicates into the query's result rows; 

386 - never restricts the query's rows *except* to ensure 

387 required-implied relationships are followed. 

388 """ 

389 raise NotImplementedError() 

390 

391 @abstractmethod 

392 def process_query_overlaps( 

393 self, 

394 dimensions: DimensionGroup, 

395 predicate: Predicate, 

396 join_operands: Iterable[DimensionGroup], 

397 ) -> tuple[Predicate, QueryBuilder]: 

398 """Process a query's WHERE predicate and dimensions to handle spatial 

399 and temporal overlaps. 

400 

401 Parameters 

402 ---------- 

403 dimensions : `..dimensions.DimensionGroup` 

404 Full dimensions of all tables to be joined into the query (even if 

405 they are not included in the query results). 

406 predicate : `..queries.tree.Predicate` 

407 Boolean column expression that may contain user-provided spatial 

408 and/or temporal overlaps intermixed with other constraints. 

409 join_operands : `~collections.abc.Iterable` [ \ 

410 `..dimensions.DimensionGroup` ] 

411 Dimensions of tables or subqueries that are already going to be 

412 joined into the query that may establish their own spatial or 

413 temporal relationships (e.g. a dataset search with both ``visit`` 

414 and ``patch`` dimensions). 

415 

416 Returns 

417 ------- 

418 predicate : `..queries.tree.Predicate` 

419 A version of the given predicate that preserves the overall 

420 behavior of the filter while possibly rewriting overlap expressions 

421 that have been partially moved into ``builder`` as some combination 

422 of new nested predicates, joins, and postprocessing. 

423 builder : `..direct_query_driver.QueryBuilder` 

424 A query-construction helper object that includes any initial joins 

425 and postprocessing needed to handle overlap expression extracted 

426 from the original predicate. 

427 

428 Notes 

429 ----- 

430 Implementations must delegate to `.queries.overlaps.OverlapsVisitor` 

431 (possibly by subclassing it) to ensure "automatic" spatial and temporal 

432 joins are added consistently by all query-construction implementations. 

433 """ 

434 raise NotImplementedError() 

435 

436 universe: DimensionUniverse 

437 """Universe of all dimensions and dimension elements known to the 

438 `Registry` (`DimensionUniverse`). 

439 """