Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 40%

113 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-27 03:00 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import _timespan 

30 

31__all__ = ("QueryBackend",) 

32 

33from abc import abstractmethod 

34from collections.abc import Iterable, Mapping, Sequence, Set 

35from contextlib import AbstractContextManager 

36from typing import TYPE_CHECKING, Any, Generic, TypeVar 

37 

38from lsst.daf.relation import ( 

39 BinaryOperationRelation, 

40 ColumnExpression, 

41 ColumnTag, 

42 LeafRelation, 

43 MarkerRelation, 

44 Predicate, 

45 Relation, 

46 UnaryOperationRelation, 

47) 

48 

49from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag 

50from ..._dataset_type import DatasetType 

51from ..._exceptions import MissingDatasetTypeError 

52from ..._exceptions_legacy import DatasetTypeError 

53from ...dimensions import DimensionGroup, DimensionRecordSet, DimensionUniverse 

54from .._collection_type import CollectionType 

55from ..wildcards import CollectionWildcard 

56from ._query_context import QueryContext 

57from .find_first_dataset import FindFirstDataset 

58 

59if TYPE_CHECKING: 

60 from ..interfaces import CollectionRecord 

61 

62 

63_C = TypeVar("_C", bound=QueryContext) 

64 

65 

66class QueryBackend(Generic[_C]): 

67 """An interface for constructing and evaluating the 

68 `~lsst.daf.relation.Relation` objects that comprise registry queries. 

69 

70 This ABC is expected to have a concrete subclass for each concrete registry 

71 type, and most subclasses will be paired with a `QueryContext` subclass. 

72 See `QueryContext` for the division of responsibilities between these two 

73 interfaces. 

74 """ 

75 

76 @property 

77 @abstractmethod 

78 def universe(self) -> DimensionUniverse: 

79 """Definition of all dimensions and dimension elements for this 

80 registry (`DimensionUniverse`). 

81 """ 

82 raise NotImplementedError() 

83 

84 @abstractmethod 

85 def caching_context(self) -> AbstractContextManager[None]: 

86 """Enable caching of collection records and summaries for the duration 

87 of the returned context manager. 

88 """ 

89 raise NotImplementedError() 

90 

91 def context(self) -> _C: 

92 """Return a context manager that can be used to execute queries with 

93 this backend. 

94 

95 Returns 

96 ------- 

97 context : `QueryContext` 

98 Context manager that manages state and connections needed to 

99 execute queries. 

100 """ 

101 raise NotImplementedError() 

102 

103 @abstractmethod 

104 def get_collection_name(self, key: Any) -> str: 

105 """Return the collection name associated with a collection primary key 

106 value. 

107 

108 Parameters 

109 ---------- 

110 key : `~typing.Any` 

111 Collection primary key value. 

112 

113 Returns 

114 ------- 

115 name : `str` 

116 Collection name. 

117 """ 

118 raise NotImplementedError() 

119 

120 @abstractmethod 

121 def resolve_collection_wildcard( 

122 self, 

123 expression: Any, 

124 *, 

125 collection_types: Set[CollectionType] = CollectionType.all(), 

126 done: set[str] | None = None, 

127 flatten_chains: bool = True, 

128 include_chains: bool | None = None, 

129 ) -> list[CollectionRecord]: 

130 """Return the collection records that match a wildcard expression. 

131 

132 Parameters 

133 ---------- 

134 expression : `~typing.Any` 

135 Names and/or patterns for collections; will be passed to 

136 `CollectionWildcard.from_expression`. 

137 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

138 If provided, only yield collections of these types. 

139 done : `set` [ `str` ], optional 

140 A set of collection names that should be skipped, updated to 

141 include all processed collection names on return. 

142 flatten_chains : `bool`, optional 

143 If `True` (default) recursively yield the child collections of 

144 `~CollectionType.CHAINED` collections. 

145 include_chains : `bool`, optional 

146 If `False`, return records for `~CollectionType.CHAINED` 

147 collections themselves. The default is the opposite of 

148 ``flattenChains``: either return records for CHAINED collections or 

149 their children, but not both. 

150 

151 Returns 

152 ------- 

153 records : `list` [ `CollectionRecord` ] 

154 Matching collection records. 

155 """ 

156 raise NotImplementedError() 

157 

158 @abstractmethod 

159 def resolve_dataset_type_wildcard( 

160 self, 

161 expression: Any, 

162 missing: list[str] | None = None, 

163 explicit_only: bool = False, 

164 ) -> list[DatasetType]: 

165 """Return the dataset types that match a wildcard expression. 

166 

167 Parameters 

168 ---------- 

169 expression : `~typing.Any` 

170 Names and/or patterns for dataset types; will be passed to 

171 `DatasetTypeWildcard.from_expression`. 

172 missing : `list` of `str`, optional 

173 String dataset type names that were explicitly given (i.e. not 

174 regular expression patterns) but not found will be appended to this 

175 list, if it is provided. 

176 explicit_only : `bool`, optional 

177 If `True`, require explicit `DatasetType` instances or `str` names, 

178 with `re.Pattern` instances deprecated and ``...`` prohibited. 

179 

180 Returns 

181 ------- 

182 dataset_types : `list` [ `DatasetType` ] 

183 A list of resolved dataset types. 

184 """ 

185 raise NotImplementedError() 

186 

187 def resolve_single_dataset_type_wildcard( 

188 self, 

189 expression: Any, 

190 explicit_only: bool = False, 

191 ) -> DatasetType: 

192 """Return a single dataset type that matches a wildcard expression. 

193 

194 Parameters 

195 ---------- 

196 expression : `~typing.Any` 

197 Names and/or patterns for the dataset type; will be passed to 

198 `DatasetTypeWildcard.from_expression`. 

199 explicit_only : `bool`, optional 

200 If `True`, require explicit `DatasetType` instances or `str` names, 

201 with `re.Pattern` instances deprecated and ``...`` prohibited. 

202 

203 Returns 

204 ------- 

205 single : `DatasetType` 

206 The matched dataset type. 

207 """ 

208 missing: list[str] = [] 

209 matching = self.resolve_dataset_type_wildcard( 

210 expression, missing=missing, explicit_only=explicit_only 

211 ) 

212 if not matching: 

213 if missing: 

214 raise MissingDatasetTypeError( 

215 "\n".join( 

216 f"Dataset type {t!r} is not registered, so no instances of it can exist." 

217 for t in missing 

218 ) 

219 ) 

220 else: 

221 raise MissingDatasetTypeError( 

222 f"No registered dataset types matched expression {expression!r}, " 

223 "so no datasets will be found." 

224 ) 

225 if len(matching) > 1: 

226 raise DatasetTypeError( 

227 f"Expression {expression!r} matched multiple parent dataset types: " 

228 f"{[t.name for t in matching]}, but only one is allowed." 

229 ) 

230 (single_parent,) = matching 

231 if missing: 

232 raise DatasetTypeError( 

233 f"Expression {expression!r} appears to involve multiple dataset types, even though only " 

234 f"one ({single_parent.name}) is registered, and only one is allowed here." 

235 ) 

236 return single_parent 

237 

238 @abstractmethod 

239 def filter_dataset_collections( 

240 self, 

241 dataset_types: Iterable[DatasetType], 

242 collections: Sequence[CollectionRecord], 

243 *, 

244 governor_constraints: Mapping[str, Set[str]], 

245 rejections: list[str] | None = None, 

246 ) -> dict[DatasetType, list[CollectionRecord]]: 

247 """Filter a sequence of collections to those for which a dataset query 

248 might succeed. 

249 

250 Parameters 

251 ---------- 

252 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ] 

253 Dataset types that are being queried. Must include only parent 

254 or standalone dataset types, not components. 

255 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

256 Sequence of collections that will be searched. 

257 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

258 `~collections.abc.Set` [ `str` ] ], optional 

259 Constraints imposed by other aspects of the query on governor 

260 dimensions; collections inconsistent with these constraints will be 

261 skipped. 

262 rejections : `list` [ `str` ], optional 

263 If not `None`, a `list` that diagnostic messages will be appended 

264 to, for any collection that matches ``collections`` that is not 

265 returned. At least one message is guaranteed whenever the result 

266 is empty. 

267 

268 Returns 

269 ------- 

270 dataset_collections : `dict` [ `DatasetType`, \ 

271 `list` [ `CollectionRecord` ] ] 

272 The collections to search for each dataset. The dictionary's keys 

273 are always exactly ``dataset_types`` (in the same order), and each 

274 nested `list` of collections is ordered consistently with the 

275 given ``collections``. 

276 

277 Notes 

278 ----- 

279 This method accepts multiple dataset types and multiple collections at 

280 once to enable implementations to batch up the fetching of summary 

281 information needed to relate them. 

282 """ 

283 raise NotImplementedError() 

284 

285 def resolve_dataset_collections( 

286 self, 

287 dataset_type: DatasetType, 

288 collections: CollectionWildcard, 

289 *, 

290 governor_constraints: Mapping[str, Set[str]], 

291 rejections: list[str] | None = None, 

292 collection_types: Set[CollectionType] = CollectionType.all(), 

293 allow_calibration_collections: bool = False, 

294 ) -> list[CollectionRecord]: 

295 """Resolve the sequence of collections to query for a dataset type. 

296 

297 Parameters 

298 ---------- 

299 dataset_type : `DatasetType` 

300 Dataset type to be queried in the returned collections. 

301 collections : `CollectionWildcard` 

302 Expression for the collections to be queried. 

303 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

304 `~collections.abc.Set` ], optional 

305 Constraints imposed by other aspects of the query on governor 

306 dimensions; collections inconsistent with these constraints will be 

307 skipped. 

308 rejections : `list` [ `str` ], optional 

309 If not `None`, a `list` that diagnostic messages will be appended 

310 to, for any collection that matches ``collections`` that is not 

311 returned. At least one message is guaranteed whenever the result 

312 is empty. 

313 collection_types : `~collections.abc.Set` [ `CollectionType` ], \ 

314 optional 

315 Collection types to consider when resolving the collection 

316 expression. 

317 allow_calibration_collections : `bool`, optional 

318 If `False`, skip (with a ``rejections`` message) any calibration 

319 collections that match ``collections`` are not given explicitly by 

320 name, and raise `NotImplementedError` for any calibration 

321 collection that is given explicitly. This is a temporary option 

322 that will be removed when the query system can handle temporal 

323 joins involving calibration collections. 

324 

325 Returns 

326 ------- 

327 records : `list` [ `CollectionRecord` ] 

328 A new list of `CollectionRecord` instances, for collections that 

329 both match ``collections`` and may have datasets of the given type. 

330 

331 Notes 

332 ----- 

333 This is a higher-level driver for `resolve_collection_wildcard` and 

334 `filter_dataset_collections` that is mostly concerned with handling 

335 queries against `~Collection.Type.CALIBRATION` collections that aren't 

336 fully supported yet. Once that support improves, this method may be 

337 removed. 

338 """ 

339 if collections == CollectionWildcard() and collection_types == CollectionType.all(): 

340 collection_types = {CollectionType.RUN} 

341 explicit_collections = frozenset(collections.strings) 

342 matching_collection_records = self.resolve_collection_wildcard( 

343 collections, collection_types=collection_types 

344 ) 

345 ((_, filtered_collection_records),) = self.filter_dataset_collections( 

346 [dataset_type], 

347 matching_collection_records, 

348 governor_constraints=governor_constraints, 

349 rejections=rejections, 

350 ).items() 

351 if not allow_calibration_collections: 

352 supported_collection_records: list[CollectionRecord] = [] 

353 for record in filtered_collection_records: 

354 if record.type is CollectionType.CALIBRATION: 

355 # If collection name was provided explicitly then raise, 

356 # since this is a kind of query we don't support yet; 

357 # otherwise collection is a part of a chained one or regex 

358 # match, and we skip it to not break queries of other 

359 # included collections. 

360 if record.name in explicit_collections: 

361 raise NotImplementedError( 

362 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type " 

363 f"collection {record.name!r} is not yet supported." 

364 ) 

365 else: 

366 if rejections is not None: 

367 rejections.append( 

368 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION " 

369 f"collection {record.name!r} because calibration queries aren't fully " 

370 "implemented; this is not an error only because the query structure " 

371 "implies that searching this collection may be incidental." 

372 ) 

373 supported_collection_records.append(record) 

374 else: 

375 supported_collection_records.append(record) 

376 else: 

377 supported_collection_records = filtered_collection_records 

378 if not supported_collection_records and rejections is not None and not rejections: 

379 rejections.append(f"No collections to search matching expression {collections!r}.") 

380 return supported_collection_records 

381 

382 @abstractmethod 

383 def _make_dataset_query_relation_impl( 

384 self, 

385 dataset_type: DatasetType, 

386 collections: Sequence[CollectionRecord], 

387 columns: Set[str], 

388 context: _C, 

389 ) -> Relation: 

390 """Construct a relation that represents an unordered query for datasets 

391 that returns matching results from all given collections. 

392 

393 Parameters 

394 ---------- 

395 dataset_type : `DatasetType` 

396 Type for the datasets being queried. 

397 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

398 Records for collections to query. Should generally be the result 

399 of a call to `resolve_dataset_collections`, and must not be empty. 

400 context : `QueryContext` 

401 Context that manages per-query state. 

402 columns : `~collections.abc.Set` [ `str` ] 

403 Columns to include in the relation. See `Query.find_datasets` for 

404 details. 

405 

406 Returns 

407 ------- 

408 relation : `lsst.daf.relation.Relation` 

409 Relation representing a dataset query. 

410 

411 Notes 

412 ----- 

413 This method must be implemented by derived classes but is not 

414 responsible for joining the resulting relation to an existing relation. 

415 """ 

416 raise NotImplementedError() 

417 

418 def make_dataset_query_relation( 

419 self, 

420 dataset_type: DatasetType, 

421 collections: Sequence[CollectionRecord], 

422 columns: Set[str], 

423 context: _C, 

424 *, 

425 join_to: Relation | None = None, 

426 temporal_join_on: Set[ColumnTag] = frozenset(), 

427 ) -> Relation: 

428 """Construct a relation that represents an unordered query for datasets 

429 that returns matching results from all given collections. 

430 

431 Parameters 

432 ---------- 

433 dataset_type : `DatasetType` 

434 Type for the datasets being queried. 

435 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

436 Records for collections to query. Should generally be the result 

437 of a call to `resolve_dataset_collections`, and must not be empty. 

438 columns : `~collections.abc.Set` [ `str` ] 

439 Columns to include in the relation. See `Query.find_datasets` for 

440 details. 

441 context : `QueryContext` 

442 Context that manages per-query state. 

443 join_to : `Relation`, optional 

444 Another relation to join with the query for datasets in all 

445 collections. 

446 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional 

447 Timespan columns in ``join_to`` that calibration dataset timespans 

448 must overlap. Must already be present in ``join_to``. Ignored if 

449 ``join_to`` is `None` or if there are no calibration collections. 

450 

451 Returns 

452 ------- 

453 relation : `lsst.daf.relation.Relation` 

454 Relation representing a dataset query. 

455 """ 

456 # If we need to do a temporal join to a calibration collection, we need 

457 # to include the timespan column in the base query and prepare the join 

458 # predicate. 

459 join_predicates: list[Predicate] = [] 

460 base_timespan_tag: ColumnTag | None = None 

461 full_columns: set[str] = set(columns) 

462 if ( 

463 temporal_join_on 

464 and join_to is not None 

465 and any(r.type is CollectionType.CALIBRATION for r in collections) 

466 ): 

467 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan") 

468 rhs = ColumnExpression.reference(base_timespan_tag, dtype=_timespan.Timespan) 

469 full_columns.add("timespan") 

470 for timespan_tag in temporal_join_on: 

471 lhs = ColumnExpression.reference(timespan_tag, dtype=_timespan.Timespan) 

472 join_predicates.append(lhs.predicate_method("overlaps", rhs)) 

473 # Delegate to the concrete QueryBackend subclass to do most of the 

474 # work. 

475 result = self._make_dataset_query_relation_impl( 

476 dataset_type, 

477 collections, 

478 full_columns, 

479 context=context, 

480 ) 

481 if join_to is not None: 

482 result = join_to.join( 

483 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None 

484 ) 

485 if join_predicates and "timespan" not in columns: 

486 # Drop the timespan column we added for the join only if the 

487 # timespan wasn't requested in its own right. 

488 result = result.with_only_columns(result.columns - {base_timespan_tag}) 

489 return result 

490 

491 def make_dataset_search_relation( 

492 self, 

493 dataset_type: DatasetType, 

494 collections: Sequence[CollectionRecord], 

495 columns: Set[str], 

496 context: _C, 

497 *, 

498 join_to: Relation | None = None, 

499 temporal_join_on: Set[ColumnTag] = frozenset(), 

500 ) -> Relation: 

501 """Construct a relation that represents an order query for datasets 

502 that returns results from the first matching collection for each data 

503 ID. 

504 

505 Parameters 

506 ---------- 

507 dataset_type : `DatasetType` 

508 Type for the datasets being search. 

509 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

510 Records for collections to search. Should generally be the result 

511 of a call to `resolve_dataset_collections`, and must not be empty. 

512 columns : `~collections.abc.Set` [ `str` ] 

513 Columns to include in the ``relation``. See 

514 `make_dataset_query_relation` for options. 

515 context : `QueryContext` 

516 Context that manages per-query state. 

517 join_to : `Relation`, optional 

518 Another relation to join with the query for datasets in all 

519 collections before filtering out out shadowed datasets. 

520 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional 

521 Timespan columns in ``join_to`` that calibration dataset timespans 

522 must overlap. Must already be present in ``join_to``. Ignored if 

523 ``join_to`` is `None` or if there are no calibration collections. 

524 

525 Returns 

526 ------- 

527 relation : `lsst.daf.relation.Relation` 

528 Relation representing a find-first dataset search. 

529 """ 

530 base = self.make_dataset_query_relation( 

531 dataset_type, 

532 collections, 

533 columns | {"rank"}, 

534 context=context, 

535 join_to=join_to, 

536 temporal_join_on=temporal_join_on, 

537 ) 

538 # Query-simplification shortcut: if there is only one collection, a 

539 # find-first search is just a regular result subquery. Same if there 

540 # are no collections. 

541 if len(collections) <= 1: 

542 return base 

543 # We filter the dimension keys in the given relation through 

544 # DimensionGroup.required.names to minimize the set we partition on 

545 # and order it in a more index-friendly way. More precisely, any 

546 # index we define on dimensions will be consistent with this order, but 

547 # any particular index may not have the same dimension columns. 

548 dimensions = self.universe.conform( 

549 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)] 

550 ) 

551 find_first = FindFirstDataset( 

552 dimensions=DimensionKeyColumnTag.generate(dimensions.required), 

553 rank=DatasetColumnTag(dataset_type.name, "rank"), 

554 ) 

555 return find_first.apply( 

556 base, preferred_engine=context.preferred_engine, require_preferred_engine=True 

557 ).with_only_columns(base.columns - {find_first.rank}) 

558 

559 def make_doomed_dataset_relation( 

560 self, 

561 dataset_type: DatasetType, 

562 columns: Set[str], 

563 messages: Iterable[str], 

564 context: _C, 

565 ) -> Relation: 

566 """Construct a relation that represents a doomed query for datasets. 

567 

568 Parameters 

569 ---------- 

570 dataset_type : `DatasetType` 

571 Dataset type being queried. 

572 columns : `~collections.abc.Set` [ `str` ] 

573 Dataset columns to include (dimension key columns are always 

574 included). See `make_dataset_query_relation` for allowed values. 

575 messages : `~collections.abc.Iterable` [ `str` ] 

576 Diagnostic messages that explain why the query is doomed to yield 

577 no rows. 

578 context : `QueryContext` 

579 Context that manages per-query state. 

580 

581 Returns 

582 ------- 

583 relation : `lsst.daf.relation.Relation` 

584 Relation with the requested columns and no rows. 

585 """ 

586 column_tags: set[ColumnTag] = set( 

587 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names) 

588 ) 

589 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns)) 

590 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages)) 

591 

592 @abstractmethod 

593 def make_dimension_relation( 

594 self, 

595 dimensions: DimensionGroup, 

596 columns: Set[ColumnTag], 

597 context: _C, 

598 *, 

599 initial_relation: Relation | None = None, 

600 initial_join_max_columns: frozenset[ColumnTag] | None = None, 

601 initial_dimension_relationships: Set[frozenset[str]] | None = None, 

602 spatial_joins: Iterable[tuple[str, str]] = (), 

603 governor_constraints: Mapping[str, Set[str]], 

604 ) -> Relation: 

605 """Construct a relation that provides columns and constraints from 

606 dimension records. 

607 

608 Parameters 

609 ---------- 

610 dimensions : `DimensionGroup` 

611 Dimensions to include. The key columns for all dimensions (both 

612 required and implied) will be included in the returned relation. 

613 columns : `~collections.abc.Set` [ `ColumnTag` ] 

614 Dimension record columns to include. This set may include key 

615 column tags as well, though these may be ignored; the set of key 

616 columns to include is determined by the ``dimensions`` argument 

617 instead. 

618 context : `QueryContext` 

619 Context that manages per-query state. 

620 initial_relation : `~lsst.daf.relation.Relation`, optional 

621 Initial relation to join to the dimension relations. If this 

622 relation provides record columns, key columns, and relationships 

623 between key columns (see ``initial_dimension_relationships`` below) 

624 that would otherwise have been added by joining in a dimension 

625 element's relation, that relation may not be joined in at all. 

626 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional 

627 Maximum superset of common columns for joins to 

628 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL 

629 ``JOIN`` clauses). If provided, this is a subset of the dimension 

630 key columns in ``initial_relation``, which are otherwise all 

631 considered as potential common columns for joins. Ignored if 

632 ``initial_relation`` is not provided. 

633 initial_dimension_relationships : `~collections.abc.Set` \ 

634 [ `frozenset` [ `str` ] ], optional 

635 A set of sets of dimension names representing relationships between 

636 dimensions encoded in the rows of ``initial_relation``. If not 

637 provided (and ``initial_relation`` is), 

638 `extract_dimension_relationships` will be called on 

639 ``initial_relation``. 

640 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ] 

641 Iterable of dimension element name pairs that should be spatially 

642 joined. 

643 governor_constraints : `~collections.abc.Mapping` [ `str` \ 

644 [ `~collections.abc.Set` [ `str` ] ] ], optional 

645 Constraints on governor dimensions that are provided by other parts 

646 of the query that either have been included in ``initial_relation`` 

647 or are guaranteed to be added in the future. This is a mapping from 

648 governor dimension name to sets of values that dimension may take. 

649 

650 Returns 

651 ------- 

652 relation : `lsst.daf.relation.Relation` 

653 Relation containing the given dimension columns and constraints. 

654 """ 

655 raise NotImplementedError() 

656 

657 @abstractmethod 

658 def resolve_governor_constraints( 

659 self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]] 

660 ) -> Mapping[str, Set[str]]: 

661 """Resolve governor dimension constraints provided by user input to 

662 a query against the content in the `Registry`. 

663 

664 Parameters 

665 ---------- 

666 dimensions : `DimensionGroup` 

667 Dimensions that bound the governor dimensions to consider (via 

668 ``dimensions.governors``, more specifically). 

669 constraints : `~collections.abc.Mapping` [ `str`, \ 

670 `~collections.abc.Set` [ `str` ] ] 

671 Constraints from user input to the query (e.g. from data IDs and 

672 string expression predicates). 

673 

674 Returns 

675 ------- 

676 resolved : `~collections.abc.Mapping` [ `str`, \ 

677 `~collections.abc.Set` [ `str` ] ] 

678 A shallow copy of ``constraints`` with keys equal to 

679 ``dimensions.governors.names`` and value sets constrained by the 

680 Registry content if they were not already in ``constraints``. 

681 

682 Raises 

683 ------ 

684 DataIdValueError 

685 Raised if ``constraints`` includes governor dimension values that 

686 are not present in the `Registry`. 

687 """ 

688 raise NotImplementedError() 

689 

690 @abstractmethod 

691 def get_dimension_record_cache(self, element_name: str) -> DimensionRecordSet | None: 

692 """Return a local cache of all `DimensionRecord` objects for a 

693 dimension element, fetching it if necessary. 

694 

695 Parameters 

696 ---------- 

697 element_name : `str` 

698 Name of the dimension element. 

699 

700 Returns 

701 ------- 

702 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \ 

703 `DimensionRecord` ] or `None` 

704 Mapping from data ID to dimension record, or `None` if this 

705 element's records are never cached. 

706 """ 

707 raise NotImplementedError() 

708 

709 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]: 

710 """Extract the dimension key relationships encoded in a relation tree. 

711 

712 Parameters 

713 ---------- 

714 relation : `Relation` 

715 Relation tree to process. 

716 

717 Returns 

718 ------- 

719 relationships : `set` [ `frozenset` [ `str` ] ] 

720 Set of sets of dimension names, where each inner set represents a 

721 relationship between dimensions. 

722 

723 Notes 

724 ----- 

725 Dimension relationships include both many-to-one implied dependencies 

726 and many-to-many joins backed by "always-join" dimension elements, and 

727 it's important to join in the dimension table that defines a 

728 relationship in any query involving dimensions that are a superset of 

729 that relationship. For example, let's consider a relation tree that 

730 joins dataset existence-check relations for two dataset types, with 

731 dimensions ``{instrument, exposure, detector}`` and ``{instrument, 

732 physical_filter}``. The joined relation appears to have all dimension 

733 keys in its expanded graph present except ``band``, and the system 

734 could easily correct this by joining that dimension in directly. But 

735 it's also missing the ``{instrument, exposure, physical_filter}`` 

736 relationship we'd get from the ``exposure`` dimension's own relation 

737 (``exposure`` implies ``physical_filter``) and the similar 

738 ``{instrument, physical_filter, band}`` relationship from the 

739 ``physical_filter`` dimension relation; we need the relationship logic 

740 to recognize that those dimensions need to be joined in as well in 

741 order for the full relation to have rows that represent valid data IDs. 

742 

743 The implementation of this method relies on the assumption that 

744 `LeafRelation` objects always have rows that are consistent with all 

745 defined relationships (i.e. are valid data IDs). This is true for not 

746 just dimension relations themselves, but anything created from queries 

747 based on them, including datasets and query results. It is possible to 

748 construct `LeafRelation` objects that don't satisfy this criteria (e.g. 

749 when accepting in user-provided data IDs), and in this case 

750 higher-level guards or warnings must be provided.`` 

751 """ 

752 return { 

753 frozenset( 

754 tag.dimension 

755 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns) 

756 ) 

757 for leaf_relation in self._extract_leaf_relations(relation).values() 

758 } 

759 

760 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]: 

761 """Recursively extract leaf relations from a relation tree. 

762 

763 Parameters 

764 ---------- 

765 relation : `Relation` 

766 Tree to process. 

767 

768 Returns 

769 ------- 

770 leaves : `dict` [ `str`, `LeafRelation` ] 

771 Leaf relations, keyed and deduplicated by name. 

772 """ 

773 match relation: 

774 case LeafRelation() as leaf: 

775 return {leaf.name: leaf} 

776 case UnaryOperationRelation(target=target): 

777 return self._extract_leaf_relations(target) 

778 case BinaryOperationRelation(lhs=lhs, rhs=rhs): 

779 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs) 

780 case MarkerRelation(target=target): 

781 return self._extract_leaf_relations(target) 

782 raise AssertionError("Match should be exhaustive and all branches should return.")