Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 36%

106 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-02 09:34 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBackend",) 

24 

25from abc import abstractmethod 

26from collections.abc import Iterable, Mapping, Sequence, Set 

27from typing import TYPE_CHECKING, Any, Generic, TypeVar 

28 

29from lsst.daf.relation import ( 

30 BinaryOperationRelation, 

31 ColumnExpression, 

32 ColumnTag, 

33 LeafRelation, 

34 MarkerRelation, 

35 Predicate, 

36 Relation, 

37 UnaryOperationRelation, 

38) 

39 

40from ...core import ( 

41 DataCoordinate, 

42 DatasetColumnTag, 

43 DatasetType, 

44 DimensionGraph, 

45 DimensionKeyColumnTag, 

46 DimensionRecord, 

47 DimensionUniverse, 

48 timespan, 

49) 

50from .._collectionType import CollectionType 

51from .._exceptions import DatasetTypeError, MissingDatasetTypeError 

52from ..wildcards import CollectionWildcard 

53from ._query_context import QueryContext 

54from .find_first_dataset import FindFirstDataset 

55 

56if TYPE_CHECKING: 

57 from ..interfaces import CollectionRecord 

58 

59 

60_C = TypeVar("_C", bound=QueryContext) 

61 

62 

63class QueryBackend(Generic[_C]): 

64 """An interface for constructing and evaluating the 

65 `~lsst.daf.relation.Relation` objects that comprise registry queries. 

66 

67 This ABC is expected to have a concrete subclass for each concrete registry 

68 type, and most subclasses will be paired with a `QueryContext` subclass. 

69 See `QueryContext` for the division of responsibilities between these two 

70 interfaces. 

71 """ 

72 

73 @property 

74 @abstractmethod 

75 def universe(self) -> DimensionUniverse: 

76 """Definition of all dimensions and dimension elements for this 

77 registry (`DimensionUniverse`). 

78 """ 

79 raise NotImplementedError() 

80 

81 def context(self) -> _C: 

82 """Return a context manager that can be used to execute queries with 

83 this backend. 

84 

85 Returns 

86 ------- 

87 context : `QueryContext` 

88 Context manager that manages state and connections needed to 

89 execute queries. 

90 """ 

91 raise NotImplementedError() 

92 

93 @abstractmethod 

94 def get_collection_name(self, key: Any) -> str: 

95 """Return the collection name associated with a collection primary key 

96 value. 

97 

98 Parameters 

99 ---------- 

100 key 

101 Collection primary key value. 

102 

103 Returns 

104 ------- 

105 name : `str` 

106 Collection name. 

107 """ 

108 raise NotImplementedError() 

109 

110 @abstractmethod 

111 def resolve_collection_wildcard( 

112 self, 

113 expression: Any, 

114 *, 

115 collection_types: Set[CollectionType] = CollectionType.all(), 

116 done: set[str] | None = None, 

117 flatten_chains: bool = True, 

118 include_chains: bool | None = None, 

119 ) -> list[CollectionRecord]: 

120 """Return the collection records that match a wildcard expression. 

121 

122 Parameters 

123 ---------- 

124 expression 

125 Names and/or patterns for collections; will be passed to 

126 `CollectionWildcard.from_expression`. 

127 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

128 If provided, only yield collections of these types. 

129 done : `set` [ `str` ], optional 

130 A set of collection names that should be skipped, updated to 

131 include all processed collection names on return. 

132 flatten_chains : `bool`, optional 

133 If `True` (default) recursively yield the child collections of 

134 `~CollectionType.CHAINED` collections. 

135 include_chains : `bool`, optional 

136 If `False`, return records for `~CollectionType.CHAINED` 

137 collections themselves. The default is the opposite of 

138 ``flattenChains``: either return records for CHAINED collections or 

139 their children, but not both. 

140 

141 Returns 

142 ------- 

143 records : `list` [ `CollectionRecord` ] 

144 Matching collection records. 

145 """ 

146 raise NotImplementedError() 

147 

148 @abstractmethod 

149 def resolve_dataset_type_wildcard( 

150 self, 

151 expression: Any, 

152 components: bool | None = None, 

153 missing: list[str] | None = None, 

154 explicit_only: bool = False, 

155 components_deprecated: bool = True, 

156 ) -> dict[DatasetType, list[str | None]]: 

157 """Return the dataset types that match a wildcard expression. 

158 

159 Parameters 

160 ---------- 

161 expression 

162 Names and/or patterns for dataset types; will be passed to 

163 `DatasetTypeWildcard.from_expression`. 

164 components : `bool`, optional 

165 If `True`, apply all expression patterns to component dataset type 

166 names as well. If `False`, never apply patterns to components. If 

167 `None` (default), apply patterns to components only if their parent 

168 datasets were not matched by the expression. Fully-specified 

169 component datasets (`str` or `DatasetType` instances) are always 

170 included. 

171 missing : `list` of `str`, optional 

172 String dataset type names that were explicitly given (i.e. not 

173 regular expression patterns) but not found will be appended to this 

174 list, if it is provided. 

175 explicit_only : `bool`, optional 

176 If `True`, require explicit `DatasetType` instances or `str` names, 

177 with `re.Pattern` instances deprecated and ``...`` prohibited. 

178 components_deprecated : `bool`, optional 

179 If `True`, this is a context in which component dataset support is 

180 deprecated. This will result in a deprecation warning when 

181 ``components=True`` or ``components=None`` and a component dataset 

182 is matched. In the future this will become an error. 

183 

184 Returns 

185 ------- 

186 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] 

187 A mapping with resolved dataset types as keys and lists of 

188 matched component names as values, where `None` indicates the 

189 parent composite dataset type was matched. 

190 """ 

191 raise NotImplementedError() 

192 

193 def resolve_single_dataset_type_wildcard( 

194 self, 

195 expression: Any, 

196 components: bool | None = None, 

197 explicit_only: bool = False, 

198 components_deprecated: bool = True, 

199 ) -> tuple[DatasetType, list[str | None]]: 

200 """Return a single dataset type that matches a wildcard expression. 

201 

202 Parameters 

203 ---------- 

204 expression 

205 Names and/or patterns for the dataset type; will be passed to 

206 `DatasetTypeWildcard.from_expression`. 

207 components : `bool`, optional 

208 If `True`, apply all expression patterns to component dataset type 

209 names as well. If `False`, never apply patterns to components. If 

210 `None` (default), apply patterns to components only if their parent 

211 datasets were not matched by the expression. Fully-specified 

212 component datasets (`str` or `DatasetType` instances) are always 

213 included. 

214 explicit_only : `bool`, optional 

215 If `True`, require explicit `DatasetType` instances or `str` names, 

216 with `re.Pattern` instances deprecated and ``...`` prohibited. 

217 components_deprecated : `bool`, optional 

218 If `True`, this is a context in which component dataset support is 

219 deprecated. This will result in a deprecation warning when 

220 ``components=True`` or ``components=None`` and a component dataset 

221 is matched. In the future this will become an error. 

222 

223 Returns 

224 ------- 

225 single_parent : `DatasetType` 

226 The matched parent dataset type. 

227 single_components : `list` [ `str` | `None` ] 

228 The matched components that correspond to this parent, or `None` if 

229 the parent dataset type itself was matched. 

230 

231 Notes 

232 ----- 

233 This method really finds a single parent dataset type and any number of 

234 components, because it's only the parent dataset type that's known to 

235 registry at all; many callers are expected to discard the 

236 ``single_components`` return value. 

237 """ 

238 missing: list[str] = [] 

239 matching = self.resolve_dataset_type_wildcard( 

240 expression, 

241 components=components, 

242 missing=missing, 

243 explicit_only=explicit_only, 

244 components_deprecated=components_deprecated, 

245 ) 

246 if not matching: 

247 if missing: 

248 raise MissingDatasetTypeError( 

249 "\n".join( 

250 f"Dataset type {t!r} is not registered, so no instances of it can exist." 

251 for t in missing 

252 ) 

253 ) 

254 else: 

255 raise MissingDatasetTypeError( 

256 f"No registered dataset types matched expression {expression!r}, " 

257 "so no datasets will be found." 

258 ) 

259 if len(matching) > 1: 

260 raise DatasetTypeError( 

261 f"Expression {expression!r} matched multiple parent dataset types: " 

262 f"{[t.name for t in matching]}, but only one is allowed." 

263 ) 

264 ((single_parent, single_components),) = matching.items() 

265 if missing: 

266 raise DatasetTypeError( 

267 f"Expression {expression!r} appears to involve multiple dataset types, even though only " 

268 f"one ({single_parent.name}) is registered, and only one is allowed here." 

269 ) 

270 return single_parent, single_components 

271 

272 @abstractmethod 

273 def filter_dataset_collections( 

274 self, 

275 dataset_types: Iterable[DatasetType], 

276 collections: Sequence[CollectionRecord], 

277 *, 

278 governor_constraints: Mapping[str, Set[str]], 

279 rejections: list[str] | None = None, 

280 ) -> dict[DatasetType, list[CollectionRecord]]: 

281 """Filter a sequence of collections to those for which a dataset query 

282 might succeed. 

283 

284 Parameters 

285 ---------- 

286 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ] 

287 Dataset types that are being queried. Must include only parent 

288 or standalone dataset types, not components. 

289 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

290 Sequence of collections that will be searched. 

291 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

292 `~collections.abc.Set` [ `str` ] ], optional 

293 Constraints imposed by other aspects of the query on governor 

294 dimensions; collections inconsistent with these constraints will be 

295 skipped. 

296 rejections : `list` [ `str` ], optional 

297 If not `None`, a `list` that diagnostic messages will be appended 

298 to, for any collection that matches ``collections`` that is not 

299 returned. At least one message is guaranteed whenever the result 

300 is empty. 

301 

302 Returns 

303 ------- 

304 dataset_collections : `dict` [ `DatasetType`, \ 

305 `list` [ `CollectionRecord` ] ] 

306 The collections to search for each dataset. The dictionary's keys 

307 are always exactly ``dataset_types`` (in the same order), and each 

308 nested `list` of collections is ordered consistently with the 

309 given ``collections``. 

310 

311 Notes 

312 ----- 

313 This method accepts multiple dataset types and multiple collections at 

314 once to enable implementations to batch up the fetching of summary 

315 information needed to relate them. 

316 """ 

317 raise NotImplementedError() 

318 

319 def resolve_dataset_collections( 

320 self, 

321 dataset_type: DatasetType, 

322 collections: CollectionWildcard, 

323 *, 

324 governor_constraints: Mapping[str, Set[str]], 

325 rejections: list[str] | None = None, 

326 collection_types: Set[CollectionType] = CollectionType.all(), 

327 allow_calibration_collections: bool = False, 

328 ) -> list[CollectionRecord]: 

329 """Resolve the sequence of collections to query for a dataset type. 

330 

331 Parameters 

332 ---------- 

333 dataset_type : `DatasetType` 

334 Dataset type to be queried in the returned collections. 

335 collections : `CollectionWildcard` 

336 Expression for the collections to be queried. 

337 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

338 `~collections.abc.Set` ], optional 

339 Constraints imposed by other aspects of the query on governor 

340 dimensions; collections inconsistent with these constraints will be 

341 skipped. 

342 rejections : `list` [ `str` ], optional 

343 If not `None`, a `list` that diagnostic messages will be appended 

344 to, for any collection that matches ``collections`` that is not 

345 returned. At least one message is guaranteed whenever the result 

346 is empty. 

347 collection_types : `~collections.abc.Set` [ `CollectionType` ], \ 

348 optional 

349 Collection types to consider when resolving the collection 

350 expression. 

351 allow_calibration_collections : `bool`, optional 

352 If `False`, skip (with a ``rejections`` message) any calibration 

353 collections that match ``collections`` are not given explicitly by 

354 name, and raise `NotImplementedError` for any calibration 

355 collection that is given explicitly. This is a temporary option 

356 that will be removed when the query system can handle temporal 

357 joins involving calibration collections. 

358 

359 Returns 

360 ------- 

361 records : `list` [ `CollectionRecord` ] 

362 A new list of `CollectionRecord` instances, for collections that 

363 both match ``collections`` and may have datasets of the given type. 

364 

365 Notes 

366 ----- 

367 This is a higher-level driver for `resolve_collection_wildcard` and 

368 `filter_dataset_collections` that is mostly concerned with handling 

369 queries against `~Collection.Type.CALIBRATION` collections that aren't 

370 fully supported yet. Once that support improves, this method may be 

371 removed. 

372 """ 

373 if collections == CollectionWildcard() and collection_types == CollectionType.all(): 

374 collection_types = {CollectionType.RUN} 

375 explicit_collections = frozenset(collections.strings) 

376 matching_collection_records = self.resolve_collection_wildcard( 

377 collections, collection_types=collection_types 

378 ) 

379 ((_, filtered_collection_records),) = self.filter_dataset_collections( 

380 [dataset_type], 

381 matching_collection_records, 

382 governor_constraints=governor_constraints, 

383 rejections=rejections, 

384 ).items() 

385 if not allow_calibration_collections: 

386 supported_collection_records: list[CollectionRecord] = [] 

387 for record in filtered_collection_records: 

388 if record.type is CollectionType.CALIBRATION: 

389 # If collection name was provided explicitly then raise, 

390 # since this is a kind of query we don't support yet; 

391 # otherwise collection is a part of a chained one or regex 

392 # match, and we skip it to not break queries of other 

393 # included collections. 

394 if record.name in explicit_collections: 

395 raise NotImplementedError( 

396 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type " 

397 f"collection {record.name!r} is not yet supported." 

398 ) 

399 else: 

400 if rejections is not None: 

401 rejections.append( 

402 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION " 

403 f"collection {record.name!r} because calibration queries aren't fully " 

404 "implemented; this is not an error only because the query structure " 

405 "implies that searching this collection may be incidental." 

406 ) 

407 supported_collection_records.append(record) 

408 else: 

409 supported_collection_records.append(record) 

410 else: 

411 supported_collection_records = filtered_collection_records 

412 if not supported_collection_records and rejections is not None and not rejections: 

413 rejections.append(f"No collections to search matching expression {collections!r}.") 

414 return supported_collection_records 

415 

416 @abstractmethod 

417 def _make_dataset_query_relation_impl( 

418 self, 

419 dataset_type: DatasetType, 

420 collections: Sequence[CollectionRecord], 

421 columns: Set[str], 

422 context: _C, 

423 ) -> Relation: 

424 """Construct a relation that represents an unordered query for datasets 

425 that returns matching results from all given collections. 

426 

427 Parameters 

428 ---------- 

429 dataset_type : `DatasetType` 

430 Type for the datasets being queried. 

431 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

432 Records for collections to query. Should generally be the result 

433 of a call to `resolve_dataset_collections`, and must not be empty. 

434 context : `QueryContext` 

435 Context that manages per-query state. 

436 columns : `~collections.abc.Set` [ `str` ] 

437 Columns to include in the relation. See `Query.find_datasets` for 

438 details. 

439 

440 Returns 

441 ------- 

442 relation : `lsst.daf.relation.Relation` 

443 Relation representing a dataset query. 

444 

445 Notes 

446 ----- 

447 This method must be implemented by derived classes but is not 

448 responsible for joining the resulting relation to an existing relation. 

449 """ 

450 raise NotImplementedError() 

451 

452 def make_dataset_query_relation( 

453 self, 

454 dataset_type: DatasetType, 

455 collections: Sequence[CollectionRecord], 

456 columns: Set[str], 

457 context: _C, 

458 *, 

459 join_to: Relation | None = None, 

460 temporal_join_on: Set[ColumnTag] = frozenset(), 

461 ) -> Relation: 

462 """Construct a relation that represents an unordered query for datasets 

463 that returns matching results from all given collections. 

464 

465 Parameters 

466 ---------- 

467 dataset_type : `DatasetType` 

468 Type for the datasets being queried. 

469 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

470 Records for collections to query. Should generally be the result 

471 of a call to `resolve_dataset_collections`, and must not be empty. 

472 context : `QueryContext` 

473 Context that manages per-query state. 

474 columns : `~collections.abc.Set` [ `str` ] 

475 Columns to include in the relation. See `Query.find_datasets` for 

476 details. 

477 join_to : `Relation`, optional 

478 Another relation to join with the query for datasets in all 

479 collections. 

480 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional 

481 Timespan columns in ``join_to`` that calibration dataset timespans 

482 must overlap. Must already be present in ``join_to``. Ignored if 

483 ``join_to`` is `None` or if there are no calibration collections. 

484 

485 Returns 

486 ------- 

487 relation : `lsst.daf.relation.Relation` 

488 Relation representing a dataset query. 

489 """ 

490 # If we need to do a temporal join to a calibration collection, we need 

491 # to include the timespan column in the base query and prepare the join 

492 # predicate. 

493 join_predicates: list[Predicate] = [] 

494 base_timespan_tag: ColumnTag | None = None 

495 full_columns: set[str] = set(columns) 

496 if ( 

497 temporal_join_on 

498 and join_to is not None 

499 and any(r.type is CollectionType.CALIBRATION for r in collections) 

500 ): 

501 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan") 

502 rhs = ColumnExpression.reference(base_timespan_tag, dtype=timespan.Timespan) 

503 full_columns.add("timespan") 

504 for timespan_tag in temporal_join_on: 

505 lhs = ColumnExpression.reference(timespan_tag, dtype=timespan.Timespan) 

506 join_predicates.append(lhs.predicate_method("overlaps", rhs)) 

507 # Delegate to the concrete QueryBackend subclass to do most of the 

508 # work. 

509 result = self._make_dataset_query_relation_impl( 

510 dataset_type, 

511 collections, 

512 full_columns, 

513 context=context, 

514 ) 

515 if join_to is not None: 

516 result = join_to.join( 

517 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None 

518 ) 

519 if join_predicates and "timespan" not in columns: 

520 # Drop the timespan column we added for the join only if the 

521 # timespan wasn't requested in its own right. 

522 result = result.with_only_columns(result.columns - {base_timespan_tag}) 

523 return result 

524 

525 def make_dataset_search_relation( 

526 self, 

527 dataset_type: DatasetType, 

528 collections: Sequence[CollectionRecord], 

529 columns: Set[str], 

530 context: _C, 

531 *, 

532 join_to: Relation | None = None, 

533 temporal_join_on: Set[ColumnTag] = frozenset(), 

534 ) -> Relation: 

535 """Construct a relation that represents an order query for datasets 

536 that returns results from the first matching collection for each data 

537 ID. 

538 

539 Parameters 

540 ---------- 

541 dataset_type : `DatasetType` 

542 Type for the datasets being search. 

543 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

544 Records for collections to search. Should generally be the result 

545 of a call to `resolve_dataset_collections`, and must not be empty. 

546 columns : `~collections.abc.Set` [ `str` ] 

547 Columns to include in the ``relation``. See 

548 `make_dataset_query_relation` for options. 

549 context : `QueryContext` 

550 Context that manages per-query state. 

551 join_to : `Relation`, optional 

552 Another relation to join with the query for datasets in all 

553 collections before filtering out out shadowed datasets. 

554 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional 

555 Timespan columns in ``join_to`` that calibration dataset timespans 

556 must overlap. Must already be present in ``join_to``. Ignored if 

557 ``join_to`` is `None` or if there are no calibration collections. 

558 

559 Returns 

560 ------- 

561 relation : `lsst.daf.relation.Relation` 

562 Relation representing a find-first dataset search. 

563 """ 

564 base = self.make_dataset_query_relation( 

565 dataset_type, 

566 collections, 

567 columns | {"rank"}, 

568 context=context, 

569 join_to=join_to, 

570 temporal_join_on=temporal_join_on, 

571 ) 

572 # Query-simplification shortcut: if there is only one collection, a 

573 # find-first search is just a regular result subquery. Same if there 

574 # are no collections. 

575 if len(collections) <= 1: 

576 return base 

577 # We filter the dimension keys in the given relation through 

578 # DimensionGraph.required.names to minimize the set we partition on 

579 # and order it in a more index-friendly way. More precisely, any 

580 # index we define on dimensions will be consistent with this order, but 

581 # any particular index may not have the same dimension columns. 

582 dimensions = self.universe.extract( 

583 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)] 

584 ) 

585 find_first = FindFirstDataset( 

586 dimensions=DimensionKeyColumnTag.generate(dimensions.required.names), 

587 rank=DatasetColumnTag(dataset_type.name, "rank"), 

588 ) 

589 return find_first.apply( 

590 base, preferred_engine=context.preferred_engine, require_preferred_engine=True 

591 ).with_only_columns(base.columns - {find_first.rank}) 

592 

593 def make_doomed_dataset_relation( 

594 self, 

595 dataset_type: DatasetType, 

596 columns: Set[str], 

597 messages: Iterable[str], 

598 context: _C, 

599 ) -> Relation: 

600 """Construct a relation that represents a doomed query for datasets. 

601 

602 Parameters 

603 ---------- 

604 dataset_type : `DatasetType` 

605 Dataset type being queried. 

606 columns : `~collections.abc.Set` [ `str` ] 

607 Dataset columns to include (dimension key columns are always 

608 included). See `make_dataset_query_relation` for allowed values. 

609 messages : `~collections.abc.Iterable` [ `str` ] 

610 Diagnostic messages that explain why the query is doomed to yield 

611 no rows. 

612 context : `QueryContext` 

613 Context that manages per-query state. 

614 

615 Returns 

616 ------- 

617 relation : `lsst.daf.relation.Relation` 

618 Relation with the requested columns and no rows. 

619 """ 

620 column_tags: set[ColumnTag] = set( 

621 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names) 

622 ) 

623 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns)) 

624 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages)) 

625 

626 @abstractmethod 

627 def make_dimension_relation( 

628 self, 

629 dimensions: DimensionGraph, 

630 columns: Set[ColumnTag], 

631 context: _C, 

632 *, 

633 initial_relation: Relation | None = None, 

634 initial_join_max_columns: frozenset[ColumnTag] | None = None, 

635 initial_dimension_relationships: Set[frozenset[str]] | None = None, 

636 spatial_joins: Iterable[tuple[str, str]] = (), 

637 governor_constraints: Mapping[str, Set[str]], 

638 ) -> Relation: 

639 """Construct a relation that provides columns and constraints from 

640 dimension records. 

641 

642 Parameters 

643 ---------- 

644 dimensions : `DimensionGraph` 

645 Dimensions to include. The key columns for all dimensions (both 

646 required and implied) will be included in the returned relation. 

647 columns : `~collections.abc.Set` [ `ColumnTag` ] 

648 Dimension record columns to include. This set may include key 

649 column tags as well, though these may be ignored; the set of key 

650 columns to include is determined by the ``dimensions`` argument 

651 instead. 

652 context : `QueryContext` 

653 Context that manages per-query state. 

654 initial_relation : `~lsst.daf.relation.Relation`, optional 

655 Initial relation to join to the dimension relations. If this 

656 relation provides record columns, key columns, and relationships 

657 between key columns (see ``initial_dimension_relationships`` below) 

658 that would otherwise have been added by joining in a dimension 

659 element's relation, that relation may not be joined in at all. 

660 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional 

661 Maximum superset of common columns for joins to 

662 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL 

663 ``JOIN`` clauses). If provided, this is a subset of the dimension 

664 key columns in ``initial_relation``, which are otherwise all 

665 considered as potential common columns for joins. Ignored if 

666 ``initial_relation`` is not provided. 

667 initial_dimension_relationships : `~collections.abc.Set` \ 

668 [ `frozenset` [ `str` ] ], optional 

669 A set of sets of dimension names representing relationships between 

670 dimensions encoded in the rows of ``initial_relation``. If not 

671 provided (and ``initial_relation`` is), 

672 `extract_dimension_relationships` will be called on 

673 ``initial_relation``. 

674 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ] 

675 Iterable of dimension element name pairs that should be spatially 

676 joined. 

677 governor_constraints : `~collections.abc.Mapping` [ `str` \ 

678 [ `~collections.abc.Set` [ `str` ] ] ], optional 

679 Constraints on governor dimensions that are provided by other parts 

680 of the query that either have been included in ``initial_relation`` 

681 or are guaranteed to be added in the future. This is a mapping from 

682 governor dimension name to sets of values that dimension may take. 

683 

684 Returns 

685 ------- 

686 relation : `lsst.daf.relation.Relation` 

687 Relation containing the given dimension columns and constraints. 

688 """ 

689 raise NotImplementedError() 

690 

691 @abstractmethod 

692 def resolve_governor_constraints( 

693 self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C 

694 ) -> Mapping[str, Set[str]]: 

695 """Resolve governor dimension constraints provided by user input to 

696 a query against the content in the `Registry`. 

697 

698 Parameters 

699 ---------- 

700 dimensions : `DimensionGraph` 

701 Dimensions that bound the governor dimensions to consider (via 

702 ``dimensions.governors``, more specifically). 

703 constraints : `~collections.abc.Mapping` [ `str`, \ 

704 `~collections.abc.Set` [ `str` ] ] 

705 Constraints from user input to the query (e.g. from data IDs and 

706 string expression predicates). 

707 context : `QueryContext` 

708 Object that manages state for the query; used here to fetch the 

709 governor dimension record cache if it has not already been loaded. 

710 

711 Returns 

712 ------- 

713 resolved : `~collections.abc.Mapping` [ `str`, \ 

714 `~collections.abc.Set` [ `str` ] ] 

715 A shallow copy of ``constraints`` with keys equal to 

716 ``dimensions.governors.names`` and value sets constrained by the 

717 Registry content if they were not already in ``constraints``. 

718 

719 Raises 

720 ------ 

721 DataIdValueError 

722 Raised if ``constraints`` includes governor dimension values that 

723 are not present in the `Registry`. 

724 """ 

725 raise NotImplementedError() 

726 

727 @abstractmethod 

728 def get_dimension_record_cache( 

729 self, element_name: str, context: _C 

730 ) -> Mapping[DataCoordinate, DimensionRecord] | None: 

731 """Return a local cache of all `DimensionRecord` objects for a 

732 dimension element, fetching it if necessary. 

733 

734 Parameters 

735 ---------- 

736 element_name : `str` 

737 Name of the dimension element. 

738 context : `.queries.SqlQueryContext` 

739 Context to be used to execute queries when no cached result is 

740 available. 

741 

742 Returns 

743 ------- 

744 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \ 

745 `DimensionRecord` ] or `None` 

746 Mapping from data ID to dimension record, or `None` if this 

747 element's records are never cached. 

748 """ 

749 raise NotImplementedError() 

750 

751 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]: 

752 """Extract the dimension key relationships encoded in a relation tree. 

753 

754 Parameters 

755 ---------- 

756 relation : `Relation` 

757 Relation tree to process. 

758 

759 Returns 

760 ------- 

761 relationships : `set` [ `frozenset` [ `str` ] ] 

762 Set of sets of dimension names, where each inner set represents a 

763 relationship between dimensions. 

764 

765 Notes 

766 ----- 

767 Dimension relationships include both many-to-one implied dependencies 

768 and many-to-many joins backed by "always-join" dimension elements, and 

769 it's important to join in the dimension table that defines a 

770 relationship in any query involving dimensions that are a superset of 

771 that relationship. For example, let's consider a relation tree that 

772 joins dataset existence-check relations for two dataset types, with 

773 dimensions ``{instrument, exposure, detector}`` and ``{instrument, 

774 physical_filter}``. The joined relation appears to have all dimension 

775 keys in its expanded graph present except ``band``, and the system 

776 could easily correct this by joining that dimension in directly. But 

777 it's also missing the ``{instrument, exposure, physical_filter}`` 

778 relationship we'd get from the ``exposure`` dimension's own relation 

779 (``exposure`` implies ``physical_filter``) and the similar 

780 ``{instrument, physical_filter, band}`` relationship from the 

781 ``physical_filter`` dimension relation; we need the relationship logic 

782 to recognize that those dimensions need to be joined in as well in 

783 order for the full relation to have rows that represent valid data IDs. 

784 

785 The implementation of this method relies on the assumption that 

786 `LeafRelation` objects always have rows that are consistent with all 

787 defined relationships (i.e. are valid data IDs). This is true for not 

788 just dimension relations themselves, but anything created from queries 

789 based on them, including datasets and query results. It is possible to 

790 construct `LeafRelation` objects that don't satisfy this criteria (e.g. 

791 when accepting in user-provided data IDs), and in this case 

792 higher-level guards or warnings must be provided.`` 

793 """ 

794 return { 

795 frozenset( 

796 tag.dimension 

797 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns) 

798 ) 

799 for leaf_relation in self._extract_leaf_relations(relation).values() 

800 } 

801 

802 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]: 

803 """Recursively extract leaf relations from a relation tree. 

804 

805 Parameters 

806 ---------- 

807 relation : `Relation` 

808 Tree to process. 

809 

810 Returns 

811 ------- 

812 leaves : `dict` [ `str`, `LeafRelation` ] 

813 Leaf relations, keyed and deduplicated by name. 

814 """ 

815 match relation: 

816 case LeafRelation() as leaf: 

817 return {leaf.name: leaf} 

818 case UnaryOperationRelation(target=target): 

819 return self._extract_leaf_relations(target) 

820 case BinaryOperationRelation(lhs=lhs, rhs=rhs): 

821 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs) 

822 case MarkerRelation(target=target): 

823 return self._extract_leaf_relations(target) 

824 raise AssertionError("Match should be exhaustive and all branches should return.")