Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 39%

112 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-07 11:04 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import _timespan 

30 

31__all__ = ("QueryBackend",) 

32 

33from abc import abstractmethod 

34from collections.abc import Iterable, Mapping, Sequence, Set 

35from contextlib import AbstractContextManager 

36from typing import TYPE_CHECKING, Any, Generic, TypeVar 

37 

38from lsst.daf.relation import ( 

39 BinaryOperationRelation, 

40 ColumnExpression, 

41 ColumnTag, 

42 LeafRelation, 

43 MarkerRelation, 

44 Predicate, 

45 Relation, 

46 UnaryOperationRelation, 

47) 

48 

49from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag 

50from ..._dataset_type import DatasetType 

51from ...dimensions import DimensionGroup, DimensionRecordSet, DimensionUniverse 

52from .._collection_type import CollectionType 

53from .._exceptions import DatasetTypeError, MissingDatasetTypeError 

54from ..wildcards import CollectionWildcard 

55from ._query_context import QueryContext 

56from .find_first_dataset import FindFirstDataset 

57 

58if TYPE_CHECKING: 

59 from ..interfaces import CollectionRecord 

60 

61 

62_C = TypeVar("_C", bound=QueryContext) 

63 

64 

65class QueryBackend(Generic[_C]): 

66 """An interface for constructing and evaluating the 

67 `~lsst.daf.relation.Relation` objects that comprise registry queries. 

68 

69 This ABC is expected to have a concrete subclass for each concrete registry 

70 type, and most subclasses will be paired with a `QueryContext` subclass. 

71 See `QueryContext` for the division of responsibilities between these two 

72 interfaces. 

73 """ 

74 

75 @property 

76 @abstractmethod 

77 def universe(self) -> DimensionUniverse: 

78 """Definition of all dimensions and dimension elements for this 

79 registry (`DimensionUniverse`). 

80 """ 

81 raise NotImplementedError() 

82 

83 @abstractmethod 

84 def caching_context(self) -> AbstractContextManager[None]: 

85 """Enable caching of collection records and summaries for the duration 

86 of the returned context manager. 

87 """ 

88 raise NotImplementedError() 

89 

90 def context(self) -> _C: 

91 """Return a context manager that can be used to execute queries with 

92 this backend. 

93 

94 Returns 

95 ------- 

96 context : `QueryContext` 

97 Context manager that manages state and connections needed to 

98 execute queries. 

99 """ 

100 raise NotImplementedError() 

101 

102 @abstractmethod 

103 def get_collection_name(self, key: Any) -> str: 

104 """Return the collection name associated with a collection primary key 

105 value. 

106 

107 Parameters 

108 ---------- 

109 key : `~typing.Any` 

110 Collection primary key value. 

111 

112 Returns 

113 ------- 

114 name : `str` 

115 Collection name. 

116 """ 

117 raise NotImplementedError() 

118 

119 @abstractmethod 

120 def resolve_collection_wildcard( 

121 self, 

122 expression: Any, 

123 *, 

124 collection_types: Set[CollectionType] = CollectionType.all(), 

125 done: set[str] | None = None, 

126 flatten_chains: bool = True, 

127 include_chains: bool | None = None, 

128 ) -> list[CollectionRecord]: 

129 """Return the collection records that match a wildcard expression. 

130 

131 Parameters 

132 ---------- 

133 expression : `~typing.Any` 

134 Names and/or patterns for collections; will be passed to 

135 `CollectionWildcard.from_expression`. 

136 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

137 If provided, only yield collections of these types. 

138 done : `set` [ `str` ], optional 

139 A set of collection names that should be skipped, updated to 

140 include all processed collection names on return. 

141 flatten_chains : `bool`, optional 

142 If `True` (default) recursively yield the child collections of 

143 `~CollectionType.CHAINED` collections. 

144 include_chains : `bool`, optional 

145 If `False`, return records for `~CollectionType.CHAINED` 

146 collections themselves. The default is the opposite of 

147 ``flattenChains``: either return records for CHAINED collections or 

148 their children, but not both. 

149 

150 Returns 

151 ------- 

152 records : `list` [ `CollectionRecord` ] 

153 Matching collection records. 

154 """ 

155 raise NotImplementedError() 

156 

157 @abstractmethod 

158 def resolve_dataset_type_wildcard( 

159 self, 

160 expression: Any, 

161 missing: list[str] | None = None, 

162 explicit_only: bool = False, 

163 ) -> list[DatasetType]: 

164 """Return the dataset types that match a wildcard expression. 

165 

166 Parameters 

167 ---------- 

168 expression : `~typing.Any` 

169 Names and/or patterns for dataset types; will be passed to 

170 `DatasetTypeWildcard.from_expression`. 

171 missing : `list` of `str`, optional 

172 String dataset type names that were explicitly given (i.e. not 

173 regular expression patterns) but not found will be appended to this 

174 list, if it is provided. 

175 explicit_only : `bool`, optional 

176 If `True`, require explicit `DatasetType` instances or `str` names, 

177 with `re.Pattern` instances deprecated and ``...`` prohibited. 

178 

179 Returns 

180 ------- 

181 dataset_types : `list` [ `DatasetType` ] 

182 A list of resolved dataset types. 

183 """ 

184 raise NotImplementedError() 

185 

186 def resolve_single_dataset_type_wildcard( 

187 self, 

188 expression: Any, 

189 explicit_only: bool = False, 

190 ) -> DatasetType: 

191 """Return a single dataset type that matches a wildcard expression. 

192 

193 Parameters 

194 ---------- 

195 expression : `~typing.Any` 

196 Names and/or patterns for the dataset type; will be passed to 

197 `DatasetTypeWildcard.from_expression`. 

198 explicit_only : `bool`, optional 

199 If `True`, require explicit `DatasetType` instances or `str` names, 

200 with `re.Pattern` instances deprecated and ``...`` prohibited. 

201 

202 Returns 

203 ------- 

204 single : `DatasetType` 

205 The matched dataset type. 

206 """ 

207 missing: list[str] = [] 

208 matching = self.resolve_dataset_type_wildcard( 

209 expression, missing=missing, explicit_only=explicit_only 

210 ) 

211 if not matching: 

212 if missing: 

213 raise MissingDatasetTypeError( 

214 "\n".join( 

215 f"Dataset type {t!r} is not registered, so no instances of it can exist." 

216 for t in missing 

217 ) 

218 ) 

219 else: 

220 raise MissingDatasetTypeError( 

221 f"No registered dataset types matched expression {expression!r}, " 

222 "so no datasets will be found." 

223 ) 

224 if len(matching) > 1: 

225 raise DatasetTypeError( 

226 f"Expression {expression!r} matched multiple parent dataset types: " 

227 f"{[t.name for t in matching]}, but only one is allowed." 

228 ) 

229 (single_parent,) = matching 

230 if missing: 

231 raise DatasetTypeError( 

232 f"Expression {expression!r} appears to involve multiple dataset types, even though only " 

233 f"one ({single_parent.name}) is registered, and only one is allowed here." 

234 ) 

235 return single_parent 

236 

237 @abstractmethod 

238 def filter_dataset_collections( 

239 self, 

240 dataset_types: Iterable[DatasetType], 

241 collections: Sequence[CollectionRecord], 

242 *, 

243 governor_constraints: Mapping[str, Set[str]], 

244 rejections: list[str] | None = None, 

245 ) -> dict[DatasetType, list[CollectionRecord]]: 

246 """Filter a sequence of collections to those for which a dataset query 

247 might succeed. 

248 

249 Parameters 

250 ---------- 

251 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ] 

252 Dataset types that are being queried. Must include only parent 

253 or standalone dataset types, not components. 

254 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

255 Sequence of collections that will be searched. 

256 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

257 `~collections.abc.Set` [ `str` ] ], optional 

258 Constraints imposed by other aspects of the query on governor 

259 dimensions; collections inconsistent with these constraints will be 

260 skipped. 

261 rejections : `list` [ `str` ], optional 

262 If not `None`, a `list` that diagnostic messages will be appended 

263 to, for any collection that matches ``collections`` that is not 

264 returned. At least one message is guaranteed whenever the result 

265 is empty. 

266 

267 Returns 

268 ------- 

269 dataset_collections : `dict` [ `DatasetType`, \ 

270 `list` [ `CollectionRecord` ] ] 

271 The collections to search for each dataset. The dictionary's keys 

272 are always exactly ``dataset_types`` (in the same order), and each 

273 nested `list` of collections is ordered consistently with the 

274 given ``collections``. 

275 

276 Notes 

277 ----- 

278 This method accepts multiple dataset types and multiple collections at 

279 once to enable implementations to batch up the fetching of summary 

280 information needed to relate them. 

281 """ 

282 raise NotImplementedError() 

283 

284 def resolve_dataset_collections( 

285 self, 

286 dataset_type: DatasetType, 

287 collections: CollectionWildcard, 

288 *, 

289 governor_constraints: Mapping[str, Set[str]], 

290 rejections: list[str] | None = None, 

291 collection_types: Set[CollectionType] = CollectionType.all(), 

292 allow_calibration_collections: bool = False, 

293 ) -> list[CollectionRecord]: 

294 """Resolve the sequence of collections to query for a dataset type. 

295 

296 Parameters 

297 ---------- 

298 dataset_type : `DatasetType` 

299 Dataset type to be queried in the returned collections. 

300 collections : `CollectionWildcard` 

301 Expression for the collections to be queried. 

302 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

303 `~collections.abc.Set` ], optional 

304 Constraints imposed by other aspects of the query on governor 

305 dimensions; collections inconsistent with these constraints will be 

306 skipped. 

307 rejections : `list` [ `str` ], optional 

308 If not `None`, a `list` that diagnostic messages will be appended 

309 to, for any collection that matches ``collections`` that is not 

310 returned. At least one message is guaranteed whenever the result 

311 is empty. 

312 collection_types : `~collections.abc.Set` [ `CollectionType` ], \ 

313 optional 

314 Collection types to consider when resolving the collection 

315 expression. 

316 allow_calibration_collections : `bool`, optional 

317 If `False`, skip (with a ``rejections`` message) any calibration 

318 collections that match ``collections`` are not given explicitly by 

319 name, and raise `NotImplementedError` for any calibration 

320 collection that is given explicitly. This is a temporary option 

321 that will be removed when the query system can handle temporal 

322 joins involving calibration collections. 

323 

324 Returns 

325 ------- 

326 records : `list` [ `CollectionRecord` ] 

327 A new list of `CollectionRecord` instances, for collections that 

328 both match ``collections`` and may have datasets of the given type. 

329 

330 Notes 

331 ----- 

332 This is a higher-level driver for `resolve_collection_wildcard` and 

333 `filter_dataset_collections` that is mostly concerned with handling 

334 queries against `~Collection.Type.CALIBRATION` collections that aren't 

335 fully supported yet. Once that support improves, this method may be 

336 removed. 

337 """ 

338 if collections == CollectionWildcard() and collection_types == CollectionType.all(): 

339 collection_types = {CollectionType.RUN} 

340 explicit_collections = frozenset(collections.strings) 

341 matching_collection_records = self.resolve_collection_wildcard( 

342 collections, collection_types=collection_types 

343 ) 

344 ((_, filtered_collection_records),) = self.filter_dataset_collections( 

345 [dataset_type], 

346 matching_collection_records, 

347 governor_constraints=governor_constraints, 

348 rejections=rejections, 

349 ).items() 

350 if not allow_calibration_collections: 

351 supported_collection_records: list[CollectionRecord] = [] 

352 for record in filtered_collection_records: 

353 if record.type is CollectionType.CALIBRATION: 

354 # If collection name was provided explicitly then raise, 

355 # since this is a kind of query we don't support yet; 

356 # otherwise collection is a part of a chained one or regex 

357 # match, and we skip it to not break queries of other 

358 # included collections. 

359 if record.name in explicit_collections: 

360 raise NotImplementedError( 

361 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type " 

362 f"collection {record.name!r} is not yet supported." 

363 ) 

364 else: 

365 if rejections is not None: 

366 rejections.append( 

367 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION " 

368 f"collection {record.name!r} because calibration queries aren't fully " 

369 "implemented; this is not an error only because the query structure " 

370 "implies that searching this collection may be incidental." 

371 ) 

372 supported_collection_records.append(record) 

373 else: 

374 supported_collection_records.append(record) 

375 else: 

376 supported_collection_records = filtered_collection_records 

377 if not supported_collection_records and rejections is not None and not rejections: 

378 rejections.append(f"No collections to search matching expression {collections!r}.") 

379 return supported_collection_records 

380 

381 @abstractmethod 

382 def _make_dataset_query_relation_impl( 

383 self, 

384 dataset_type: DatasetType, 

385 collections: Sequence[CollectionRecord], 

386 columns: Set[str], 

387 context: _C, 

388 ) -> Relation: 

389 """Construct a relation that represents an unordered query for datasets 

390 that returns matching results from all given collections. 

391 

392 Parameters 

393 ---------- 

394 dataset_type : `DatasetType` 

395 Type for the datasets being queried. 

396 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

397 Records for collections to query. Should generally be the result 

398 of a call to `resolve_dataset_collections`, and must not be empty. 

399 context : `QueryContext` 

400 Context that manages per-query state. 

401 columns : `~collections.abc.Set` [ `str` ] 

402 Columns to include in the relation. See `Query.find_datasets` for 

403 details. 

404 

405 Returns 

406 ------- 

407 relation : `lsst.daf.relation.Relation` 

408 Relation representing a dataset query. 

409 

410 Notes 

411 ----- 

412 This method must be implemented by derived classes but is not 

413 responsible for joining the resulting relation to an existing relation. 

414 """ 

415 raise NotImplementedError() 

416 

417 def make_dataset_query_relation( 

418 self, 

419 dataset_type: DatasetType, 

420 collections: Sequence[CollectionRecord], 

421 columns: Set[str], 

422 context: _C, 

423 *, 

424 join_to: Relation | None = None, 

425 temporal_join_on: Set[ColumnTag] = frozenset(), 

426 ) -> Relation: 

427 """Construct a relation that represents an unordered query for datasets 

428 that returns matching results from all given collections. 

429 

430 Parameters 

431 ---------- 

432 dataset_type : `DatasetType` 

433 Type for the datasets being queried. 

434 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

435 Records for collections to query. Should generally be the result 

436 of a call to `resolve_dataset_collections`, and must not be empty. 

437 columns : `~collections.abc.Set` [ `str` ] 

438 Columns to include in the relation. See `Query.find_datasets` for 

439 details. 

440 context : `QueryContext` 

441 Context that manages per-query state. 

442 join_to : `Relation`, optional 

443 Another relation to join with the query for datasets in all 

444 collections. 

445 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional 

446 Timespan columns in ``join_to`` that calibration dataset timespans 

447 must overlap. Must already be present in ``join_to``. Ignored if 

448 ``join_to`` is `None` or if there are no calibration collections. 

449 

450 Returns 

451 ------- 

452 relation : `lsst.daf.relation.Relation` 

453 Relation representing a dataset query. 

454 """ 

455 # If we need to do a temporal join to a calibration collection, we need 

456 # to include the timespan column in the base query and prepare the join 

457 # predicate. 

458 join_predicates: list[Predicate] = [] 

459 base_timespan_tag: ColumnTag | None = None 

460 full_columns: set[str] = set(columns) 

461 if ( 

462 temporal_join_on 

463 and join_to is not None 

464 and any(r.type is CollectionType.CALIBRATION for r in collections) 

465 ): 

466 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan") 

467 rhs = ColumnExpression.reference(base_timespan_tag, dtype=_timespan.Timespan) 

468 full_columns.add("timespan") 

469 for timespan_tag in temporal_join_on: 

470 lhs = ColumnExpression.reference(timespan_tag, dtype=_timespan.Timespan) 

471 join_predicates.append(lhs.predicate_method("overlaps", rhs)) 

472 # Delegate to the concrete QueryBackend subclass to do most of the 

473 # work. 

474 result = self._make_dataset_query_relation_impl( 

475 dataset_type, 

476 collections, 

477 full_columns, 

478 context=context, 

479 ) 

480 if join_to is not None: 

481 result = join_to.join( 

482 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None 

483 ) 

484 if join_predicates and "timespan" not in columns: 

485 # Drop the timespan column we added for the join only if the 

486 # timespan wasn't requested in its own right. 

487 result = result.with_only_columns(result.columns - {base_timespan_tag}) 

488 return result 

489 

490 def make_dataset_search_relation( 

491 self, 

492 dataset_type: DatasetType, 

493 collections: Sequence[CollectionRecord], 

494 columns: Set[str], 

495 context: _C, 

496 *, 

497 join_to: Relation | None = None, 

498 temporal_join_on: Set[ColumnTag] = frozenset(), 

499 ) -> Relation: 

500 """Construct a relation that represents an order query for datasets 

501 that returns results from the first matching collection for each data 

502 ID. 

503 

504 Parameters 

505 ---------- 

506 dataset_type : `DatasetType` 

507 Type for the datasets being search. 

508 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

509 Records for collections to search. Should generally be the result 

510 of a call to `resolve_dataset_collections`, and must not be empty. 

511 columns : `~collections.abc.Set` [ `str` ] 

512 Columns to include in the ``relation``. See 

513 `make_dataset_query_relation` for options. 

514 context : `QueryContext` 

515 Context that manages per-query state. 

516 join_to : `Relation`, optional 

517 Another relation to join with the query for datasets in all 

518 collections before filtering out out shadowed datasets. 

519 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional 

520 Timespan columns in ``join_to`` that calibration dataset timespans 

521 must overlap. Must already be present in ``join_to``. Ignored if 

522 ``join_to`` is `None` or if there are no calibration collections. 

523 

524 Returns 

525 ------- 

526 relation : `lsst.daf.relation.Relation` 

527 Relation representing a find-first dataset search. 

528 """ 

529 base = self.make_dataset_query_relation( 

530 dataset_type, 

531 collections, 

532 columns | {"rank"}, 

533 context=context, 

534 join_to=join_to, 

535 temporal_join_on=temporal_join_on, 

536 ) 

537 # Query-simplification shortcut: if there is only one collection, a 

538 # find-first search is just a regular result subquery. Same if there 

539 # are no collections. 

540 if len(collections) <= 1: 

541 return base 

542 # We filter the dimension keys in the given relation through 

543 # DimensionGroup.required.names to minimize the set we partition on 

544 # and order it in a more index-friendly way. More precisely, any 

545 # index we define on dimensions will be consistent with this order, but 

546 # any particular index may not have the same dimension columns. 

547 dimensions = self.universe.conform( 

548 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)] 

549 ) 

550 find_first = FindFirstDataset( 

551 dimensions=DimensionKeyColumnTag.generate(dimensions.required), 

552 rank=DatasetColumnTag(dataset_type.name, "rank"), 

553 ) 

554 return find_first.apply( 

555 base, preferred_engine=context.preferred_engine, require_preferred_engine=True 

556 ).with_only_columns(base.columns - {find_first.rank}) 

557 

558 def make_doomed_dataset_relation( 

559 self, 

560 dataset_type: DatasetType, 

561 columns: Set[str], 

562 messages: Iterable[str], 

563 context: _C, 

564 ) -> Relation: 

565 """Construct a relation that represents a doomed query for datasets. 

566 

567 Parameters 

568 ---------- 

569 dataset_type : `DatasetType` 

570 Dataset type being queried. 

571 columns : `~collections.abc.Set` [ `str` ] 

572 Dataset columns to include (dimension key columns are always 

573 included). See `make_dataset_query_relation` for allowed values. 

574 messages : `~collections.abc.Iterable` [ `str` ] 

575 Diagnostic messages that explain why the query is doomed to yield 

576 no rows. 

577 context : `QueryContext` 

578 Context that manages per-query state. 

579 

580 Returns 

581 ------- 

582 relation : `lsst.daf.relation.Relation` 

583 Relation with the requested columns and no rows. 

584 """ 

585 column_tags: set[ColumnTag] = set( 

586 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names) 

587 ) 

588 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns)) 

589 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages)) 

590 

591 @abstractmethod 

592 def make_dimension_relation( 

593 self, 

594 dimensions: DimensionGroup, 

595 columns: Set[ColumnTag], 

596 context: _C, 

597 *, 

598 initial_relation: Relation | None = None, 

599 initial_join_max_columns: frozenset[ColumnTag] | None = None, 

600 initial_dimension_relationships: Set[frozenset[str]] | None = None, 

601 spatial_joins: Iterable[tuple[str, str]] = (), 

602 governor_constraints: Mapping[str, Set[str]], 

603 ) -> Relation: 

604 """Construct a relation that provides columns and constraints from 

605 dimension records. 

606 

607 Parameters 

608 ---------- 

609 dimensions : `DimensionGroup` 

610 Dimensions to include. The key columns for all dimensions (both 

611 required and implied) will be included in the returned relation. 

612 columns : `~collections.abc.Set` [ `ColumnTag` ] 

613 Dimension record columns to include. This set may include key 

614 column tags as well, though these may be ignored; the set of key 

615 columns to include is determined by the ``dimensions`` argument 

616 instead. 

617 context : `QueryContext` 

618 Context that manages per-query state. 

619 initial_relation : `~lsst.daf.relation.Relation`, optional 

620 Initial relation to join to the dimension relations. If this 

621 relation provides record columns, key columns, and relationships 

622 between key columns (see ``initial_dimension_relationships`` below) 

623 that would otherwise have been added by joining in a dimension 

624 element's relation, that relation may not be joined in at all. 

625 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional 

626 Maximum superset of common columns for joins to 

627 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL 

628 ``JOIN`` clauses). If provided, this is a subset of the dimension 

629 key columns in ``initial_relation``, which are otherwise all 

630 considered as potential common columns for joins. Ignored if 

631 ``initial_relation`` is not provided. 

632 initial_dimension_relationships : `~collections.abc.Set` \ 

633 [ `frozenset` [ `str` ] ], optional 

634 A set of sets of dimension names representing relationships between 

635 dimensions encoded in the rows of ``initial_relation``. If not 

636 provided (and ``initial_relation`` is), 

637 `extract_dimension_relationships` will be called on 

638 ``initial_relation``. 

639 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ] 

640 Iterable of dimension element name pairs that should be spatially 

641 joined. 

642 governor_constraints : `~collections.abc.Mapping` [ `str` \ 

643 [ `~collections.abc.Set` [ `str` ] ] ], optional 

644 Constraints on governor dimensions that are provided by other parts 

645 of the query that either have been included in ``initial_relation`` 

646 or are guaranteed to be added in the future. This is a mapping from 

647 governor dimension name to sets of values that dimension may take. 

648 

649 Returns 

650 ------- 

651 relation : `lsst.daf.relation.Relation` 

652 Relation containing the given dimension columns and constraints. 

653 """ 

654 raise NotImplementedError() 

655 

656 @abstractmethod 

657 def resolve_governor_constraints( 

658 self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]] 

659 ) -> Mapping[str, Set[str]]: 

660 """Resolve governor dimension constraints provided by user input to 

661 a query against the content in the `Registry`. 

662 

663 Parameters 

664 ---------- 

665 dimensions : `DimensionGroup` 

666 Dimensions that bound the governor dimensions to consider (via 

667 ``dimensions.governors``, more specifically). 

668 constraints : `~collections.abc.Mapping` [ `str`, \ 

669 `~collections.abc.Set` [ `str` ] ] 

670 Constraints from user input to the query (e.g. from data IDs and 

671 string expression predicates). 

672 

673 Returns 

674 ------- 

675 resolved : `~collections.abc.Mapping` [ `str`, \ 

676 `~collections.abc.Set` [ `str` ] ] 

677 A shallow copy of ``constraints`` with keys equal to 

678 ``dimensions.governors.names`` and value sets constrained by the 

679 Registry content if they were not already in ``constraints``. 

680 

681 Raises 

682 ------ 

683 DataIdValueError 

684 Raised if ``constraints`` includes governor dimension values that 

685 are not present in the `Registry`. 

686 """ 

687 raise NotImplementedError() 

688 

689 @abstractmethod 

690 def get_dimension_record_cache(self, element_name: str) -> DimensionRecordSet | None: 

691 """Return a local cache of all `DimensionRecord` objects for a 

692 dimension element, fetching it if necessary. 

693 

694 Parameters 

695 ---------- 

696 element_name : `str` 

697 Name of the dimension element. 

698 

699 Returns 

700 ------- 

701 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \ 

702 `DimensionRecord` ] or `None` 

703 Mapping from data ID to dimension record, or `None` if this 

704 element's records are never cached. 

705 """ 

706 raise NotImplementedError() 

707 

708 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]: 

709 """Extract the dimension key relationships encoded in a relation tree. 

710 

711 Parameters 

712 ---------- 

713 relation : `Relation` 

714 Relation tree to process. 

715 

716 Returns 

717 ------- 

718 relationships : `set` [ `frozenset` [ `str` ] ] 

719 Set of sets of dimension names, where each inner set represents a 

720 relationship between dimensions. 

721 

722 Notes 

723 ----- 

724 Dimension relationships include both many-to-one implied dependencies 

725 and many-to-many joins backed by "always-join" dimension elements, and 

726 it's important to join in the dimension table that defines a 

727 relationship in any query involving dimensions that are a superset of 

728 that relationship. For example, let's consider a relation tree that 

729 joins dataset existence-check relations for two dataset types, with 

730 dimensions ``{instrument, exposure, detector}`` and ``{instrument, 

731 physical_filter}``. The joined relation appears to have all dimension 

732 keys in its expanded graph present except ``band``, and the system 

733 could easily correct this by joining that dimension in directly. But 

734 it's also missing the ``{instrument, exposure, physical_filter}`` 

735 relationship we'd get from the ``exposure`` dimension's own relation 

736 (``exposure`` implies ``physical_filter``) and the similar 

737 ``{instrument, physical_filter, band}`` relationship from the 

738 ``physical_filter`` dimension relation; we need the relationship logic 

739 to recognize that those dimensions need to be joined in as well in 

740 order for the full relation to have rows that represent valid data IDs. 

741 

742 The implementation of this method relies on the assumption that 

743 `LeafRelation` objects always have rows that are consistent with all 

744 defined relationships (i.e. are valid data IDs). This is true for not 

745 just dimension relations themselves, but anything created from queries 

746 based on them, including datasets and query results. It is possible to 

747 construct `LeafRelation` objects that don't satisfy this criteria (e.g. 

748 when accepting in user-provided data IDs), and in this case 

749 higher-level guards or warnings must be provided.`` 

750 """ 

751 return { 

752 frozenset( 

753 tag.dimension 

754 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns) 

755 ) 

756 for leaf_relation in self._extract_leaf_relations(relation).values() 

757 } 

758 

759 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]: 

760 """Recursively extract leaf relations from a relation tree. 

761 

762 Parameters 

763 ---------- 

764 relation : `Relation` 

765 Tree to process. 

766 

767 Returns 

768 ------- 

769 leaves : `dict` [ `str`, `LeafRelation` ] 

770 Leaf relations, keyed and deduplicated by name. 

771 """ 

772 match relation: 

773 case LeafRelation() as leaf: 

774 return {leaf.name: leaf} 

775 case UnaryOperationRelation(target=target): 

776 return self._extract_leaf_relations(target) 

777 case BinaryOperationRelation(lhs=lhs, rhs=rhs): 

778 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs) 

779 case MarkerRelation(target=target): 

780 return self._extract_leaf_relations(target) 

781 raise AssertionError("Match should be exhaustive and all branches should return.")