Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 38%

109 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from ... import _timespan 

30 

31__all__ = ("QueryBackend",) 

32 

33from abc import abstractmethod 

34from collections.abc import Iterable, Mapping, Sequence, Set 

35from typing import TYPE_CHECKING, Any, Generic, TypeVar 

36 

37from lsst.daf.relation import ( 

38 BinaryOperationRelation, 

39 ColumnExpression, 

40 ColumnTag, 

41 LeafRelation, 

42 MarkerRelation, 

43 Predicate, 

44 Relation, 

45 UnaryOperationRelation, 

46) 

47 

48from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag 

49from ..._dataset_type import DatasetType 

50from ...dimensions import DataCoordinate, DimensionGroup, DimensionRecord, DimensionUniverse 

51from .._collection_type import CollectionType 

52from .._exceptions import DatasetTypeError, MissingDatasetTypeError 

53from ..wildcards import CollectionWildcard 

54from ._query_context import QueryContext 

55from .find_first_dataset import FindFirstDataset 

56 

57if TYPE_CHECKING: 

58 from ..interfaces import CollectionRecord 

59 

60 

61_C = TypeVar("_C", bound=QueryContext) 

62 

63 

64class QueryBackend(Generic[_C]): 

65 """An interface for constructing and evaluating the 

66 `~lsst.daf.relation.Relation` objects that comprise registry queries. 

67 

68 This ABC is expected to have a concrete subclass for each concrete registry 

69 type, and most subclasses will be paired with a `QueryContext` subclass. 

70 See `QueryContext` for the division of responsibilities between these two 

71 interfaces. 

72 """ 

73 

74 @property 

75 @abstractmethod 

76 def universe(self) -> DimensionUniverse: 

77 """Definition of all dimensions and dimension elements for this 

78 registry (`DimensionUniverse`). 

79 """ 

80 raise NotImplementedError() 

81 

82 def context(self) -> _C: 

83 """Return a context manager that can be used to execute queries with 

84 this backend. 

85 

86 Returns 

87 ------- 

88 context : `QueryContext` 

89 Context manager that manages state and connections needed to 

90 execute queries. 

91 """ 

92 raise NotImplementedError() 

93 

94 @abstractmethod 

95 def get_collection_name(self, key: Any) -> str: 

96 """Return the collection name associated with a collection primary key 

97 value. 

98 

99 Parameters 

100 ---------- 

101 key 

102 Collection primary key value. 

103 

104 Returns 

105 ------- 

106 name : `str` 

107 Collection name. 

108 """ 

109 raise NotImplementedError() 

110 

111 @abstractmethod 

112 def resolve_collection_wildcard( 

113 self, 

114 expression: Any, 

115 *, 

116 collection_types: Set[CollectionType] = CollectionType.all(), 

117 done: set[str] | None = None, 

118 flatten_chains: bool = True, 

119 include_chains: bool | None = None, 

120 ) -> list[CollectionRecord]: 

121 """Return the collection records that match a wildcard expression. 

122 

123 Parameters 

124 ---------- 

125 expression 

126 Names and/or patterns for collections; will be passed to 

127 `CollectionWildcard.from_expression`. 

128 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

129 If provided, only yield collections of these types. 

130 done : `set` [ `str` ], optional 

131 A set of collection names that should be skipped, updated to 

132 include all processed collection names on return. 

133 flatten_chains : `bool`, optional 

134 If `True` (default) recursively yield the child collections of 

135 `~CollectionType.CHAINED` collections. 

136 include_chains : `bool`, optional 

137 If `False`, return records for `~CollectionType.CHAINED` 

138 collections themselves. The default is the opposite of 

139 ``flattenChains``: either return records for CHAINED collections or 

140 their children, but not both. 

141 

142 Returns 

143 ------- 

144 records : `list` [ `CollectionRecord` ] 

145 Matching collection records. 

146 """ 

147 raise NotImplementedError() 

148 

149 @abstractmethod 

150 def resolve_dataset_type_wildcard( 

151 self, 

152 expression: Any, 

153 components: bool | None = None, 

154 missing: list[str] | None = None, 

155 explicit_only: bool = False, 

156 components_deprecated: bool = True, 

157 ) -> dict[DatasetType, list[str | None]]: 

158 """Return the dataset types that match a wildcard expression. 

159 

160 Parameters 

161 ---------- 

162 expression 

163 Names and/or patterns for dataset types; will be passed to 

164 `DatasetTypeWildcard.from_expression`. 

165 components : `bool`, optional 

166 If `True`, apply all expression patterns to component dataset type 

167 names as well. If `False`, never apply patterns to components. If 

168 `None` (default), apply patterns to components only if their parent 

169 datasets were not matched by the expression. Fully-specified 

170 component datasets (`str` or `DatasetType` instances) are always 

171 included. 

172 missing : `list` of `str`, optional 

173 String dataset type names that were explicitly given (i.e. not 

174 regular expression patterns) but not found will be appended to this 

175 list, if it is provided. 

176 explicit_only : `bool`, optional 

177 If `True`, require explicit `DatasetType` instances or `str` names, 

178 with `re.Pattern` instances deprecated and ``...`` prohibited. 

179 components_deprecated : `bool`, optional 

180 If `True`, this is a context in which component dataset support is 

181 deprecated. This will result in a deprecation warning when 

182 ``components=True`` or ``components=None`` and a component dataset 

183 is matched. In the future this will become an error. 

184 

185 Returns 

186 ------- 

187 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] 

188 A mapping with resolved dataset types as keys and lists of 

189 matched component names as values, where `None` indicates the 

190 parent composite dataset type was matched. 

191 """ 

192 raise NotImplementedError() 

193 

194 def resolve_single_dataset_type_wildcard( 

195 self, 

196 expression: Any, 

197 components: bool | None = None, 

198 explicit_only: bool = False, 

199 components_deprecated: bool = True, 

200 ) -> tuple[DatasetType, list[str | None]]: 

201 """Return a single dataset type that matches a wildcard expression. 

202 

203 Parameters 

204 ---------- 

205 expression 

206 Names and/or patterns for the dataset type; will be passed to 

207 `DatasetTypeWildcard.from_expression`. 

208 components : `bool`, optional 

209 If `True`, apply all expression patterns to component dataset type 

210 names as well. If `False`, never apply patterns to components. If 

211 `None` (default), apply patterns to components only if their parent 

212 datasets were not matched by the expression. Fully-specified 

213 component datasets (`str` or `DatasetType` instances) are always 

214 included. 

215 explicit_only : `bool`, optional 

216 If `True`, require explicit `DatasetType` instances or `str` names, 

217 with `re.Pattern` instances deprecated and ``...`` prohibited. 

218 components_deprecated : `bool`, optional 

219 If `True`, this is a context in which component dataset support is 

220 deprecated. This will result in a deprecation warning when 

221 ``components=True`` or ``components=None`` and a component dataset 

222 is matched. In the future this will become an error. 

223 

224 Returns 

225 ------- 

226 single_parent : `DatasetType` 

227 The matched parent dataset type. 

228 single_components : `list` [ `str` | `None` ] 

229 The matched components that correspond to this parent, or `None` if 

230 the parent dataset type itself was matched. 

231 

232 Notes 

233 ----- 

234 This method really finds a single parent dataset type and any number of 

235 components, because it's only the parent dataset type that's known to 

236 registry at all; many callers are expected to discard the 

237 ``single_components`` return value. 

238 """ 

239 missing: list[str] = [] 

240 matching = self.resolve_dataset_type_wildcard( 

241 expression, 

242 components=components, 

243 missing=missing, 

244 explicit_only=explicit_only, 

245 components_deprecated=components_deprecated, 

246 ) 

247 if not matching: 

248 if missing: 

249 raise MissingDatasetTypeError( 

250 "\n".join( 

251 f"Dataset type {t!r} is not registered, so no instances of it can exist." 

252 for t in missing 

253 ) 

254 ) 

255 else: 

256 raise MissingDatasetTypeError( 

257 f"No registered dataset types matched expression {expression!r}, " 

258 "so no datasets will be found." 

259 ) 

260 if len(matching) > 1: 

261 raise DatasetTypeError( 

262 f"Expression {expression!r} matched multiple parent dataset types: " 

263 f"{[t.name for t in matching]}, but only one is allowed." 

264 ) 

265 ((single_parent, single_components),) = matching.items() 

266 if missing: 

267 raise DatasetTypeError( 

268 f"Expression {expression!r} appears to involve multiple dataset types, even though only " 

269 f"one ({single_parent.name}) is registered, and only one is allowed here." 

270 ) 

271 return single_parent, single_components 

272 

273 @abstractmethod 

274 def filter_dataset_collections( 

275 self, 

276 dataset_types: Iterable[DatasetType], 

277 collections: Sequence[CollectionRecord], 

278 *, 

279 governor_constraints: Mapping[str, Set[str]], 

280 rejections: list[str] | None = None, 

281 ) -> dict[DatasetType, list[CollectionRecord]]: 

282 """Filter a sequence of collections to those for which a dataset query 

283 might succeed. 

284 

285 Parameters 

286 ---------- 

287 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ] 

288 Dataset types that are being queried. Must include only parent 

289 or standalone dataset types, not components. 

290 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

291 Sequence of collections that will be searched. 

292 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

293 `~collections.abc.Set` [ `str` ] ], optional 

294 Constraints imposed by other aspects of the query on governor 

295 dimensions; collections inconsistent with these constraints will be 

296 skipped. 

297 rejections : `list` [ `str` ], optional 

298 If not `None`, a `list` that diagnostic messages will be appended 

299 to, for any collection that matches ``collections`` that is not 

300 returned. At least one message is guaranteed whenever the result 

301 is empty. 

302 

303 Returns 

304 ------- 

305 dataset_collections : `dict` [ `DatasetType`, \ 

306 `list` [ `CollectionRecord` ] ] 

307 The collections to search for each dataset. The dictionary's keys 

308 are always exactly ``dataset_types`` (in the same order), and each 

309 nested `list` of collections is ordered consistently with the 

310 given ``collections``. 

311 

312 Notes 

313 ----- 

314 This method accepts multiple dataset types and multiple collections at 

315 once to enable implementations to batch up the fetching of summary 

316 information needed to relate them. 

317 """ 

318 raise NotImplementedError() 

319 

320 def resolve_dataset_collections( 

321 self, 

322 dataset_type: DatasetType, 

323 collections: CollectionWildcard, 

324 *, 

325 governor_constraints: Mapping[str, Set[str]], 

326 rejections: list[str] | None = None, 

327 collection_types: Set[CollectionType] = CollectionType.all(), 

328 allow_calibration_collections: bool = False, 

329 ) -> list[CollectionRecord]: 

330 """Resolve the sequence of collections to query for a dataset type. 

331 

332 Parameters 

333 ---------- 

334 dataset_type : `DatasetType` 

335 Dataset type to be queried in the returned collections. 

336 collections : `CollectionWildcard` 

337 Expression for the collections to be queried. 

338 governor_constraints : `~collections.abc.Mapping` [ `str`, \ 

339 `~collections.abc.Set` ], optional 

340 Constraints imposed by other aspects of the query on governor 

341 dimensions; collections inconsistent with these constraints will be 

342 skipped. 

343 rejections : `list` [ `str` ], optional 

344 If not `None`, a `list` that diagnostic messages will be appended 

345 to, for any collection that matches ``collections`` that is not 

346 returned. At least one message is guaranteed whenever the result 

347 is empty. 

348 collection_types : `~collections.abc.Set` [ `CollectionType` ], \ 

349 optional 

350 Collection types to consider when resolving the collection 

351 expression. 

352 allow_calibration_collections : `bool`, optional 

353 If `False`, skip (with a ``rejections`` message) any calibration 

354 collections that match ``collections`` are not given explicitly by 

355 name, and raise `NotImplementedError` for any calibration 

356 collection that is given explicitly. This is a temporary option 

357 that will be removed when the query system can handle temporal 

358 joins involving calibration collections. 

359 

360 Returns 

361 ------- 

362 records : `list` [ `CollectionRecord` ] 

363 A new list of `CollectionRecord` instances, for collections that 

364 both match ``collections`` and may have datasets of the given type. 

365 

366 Notes 

367 ----- 

368 This is a higher-level driver for `resolve_collection_wildcard` and 

369 `filter_dataset_collections` that is mostly concerned with handling 

370 queries against `~Collection.Type.CALIBRATION` collections that aren't 

371 fully supported yet. Once that support improves, this method may be 

372 removed. 

373 """ 

374 if collections == CollectionWildcard() and collection_types == CollectionType.all(): 

375 collection_types = {CollectionType.RUN} 

376 explicit_collections = frozenset(collections.strings) 

377 matching_collection_records = self.resolve_collection_wildcard( 

378 collections, collection_types=collection_types 

379 ) 

380 ((_, filtered_collection_records),) = self.filter_dataset_collections( 

381 [dataset_type], 

382 matching_collection_records, 

383 governor_constraints=governor_constraints, 

384 rejections=rejections, 

385 ).items() 

386 if not allow_calibration_collections: 

387 supported_collection_records: list[CollectionRecord] = [] 

388 for record in filtered_collection_records: 

389 if record.type is CollectionType.CALIBRATION: 

390 # If collection name was provided explicitly then raise, 

391 # since this is a kind of query we don't support yet; 

392 # otherwise collection is a part of a chained one or regex 

393 # match, and we skip it to not break queries of other 

394 # included collections. 

395 if record.name in explicit_collections: 

396 raise NotImplementedError( 

397 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type " 

398 f"collection {record.name!r} is not yet supported." 

399 ) 

400 else: 

401 if rejections is not None: 

402 rejections.append( 

403 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION " 

404 f"collection {record.name!r} because calibration queries aren't fully " 

405 "implemented; this is not an error only because the query structure " 

406 "implies that searching this collection may be incidental." 

407 ) 

408 supported_collection_records.append(record) 

409 else: 

410 supported_collection_records.append(record) 

411 else: 

412 supported_collection_records = filtered_collection_records 

413 if not supported_collection_records and rejections is not None and not rejections: 

414 rejections.append(f"No collections to search matching expression {collections!r}.") 

415 return supported_collection_records 

416 

417 @abstractmethod 

418 def _make_dataset_query_relation_impl( 

419 self, 

420 dataset_type: DatasetType, 

421 collections: Sequence[CollectionRecord], 

422 columns: Set[str], 

423 context: _C, 

424 ) -> Relation: 

425 """Construct a relation that represents an unordered query for datasets 

426 that returns matching results from all given collections. 

427 

428 Parameters 

429 ---------- 

430 dataset_type : `DatasetType` 

431 Type for the datasets being queried. 

432 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

433 Records for collections to query. Should generally be the result 

434 of a call to `resolve_dataset_collections`, and must not be empty. 

435 context : `QueryContext` 

436 Context that manages per-query state. 

437 columns : `~collections.abc.Set` [ `str` ] 

438 Columns to include in the relation. See `Query.find_datasets` for 

439 details. 

440 

441 Returns 

442 ------- 

443 relation : `lsst.daf.relation.Relation` 

444 Relation representing a dataset query. 

445 

446 Notes 

447 ----- 

448 This method must be implemented by derived classes but is not 

449 responsible for joining the resulting relation to an existing relation. 

450 """ 

451 raise NotImplementedError() 

452 

453 def make_dataset_query_relation( 

454 self, 

455 dataset_type: DatasetType, 

456 collections: Sequence[CollectionRecord], 

457 columns: Set[str], 

458 context: _C, 

459 *, 

460 join_to: Relation | None = None, 

461 temporal_join_on: Set[ColumnTag] = frozenset(), 

462 ) -> Relation: 

463 """Construct a relation that represents an unordered query for datasets 

464 that returns matching results from all given collections. 

465 

466 Parameters 

467 ---------- 

468 dataset_type : `DatasetType` 

469 Type for the datasets being queried. 

470 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

471 Records for collections to query. Should generally be the result 

472 of a call to `resolve_dataset_collections`, and must not be empty. 

473 context : `QueryContext` 

474 Context that manages per-query state. 

475 columns : `~collections.abc.Set` [ `str` ] 

476 Columns to include in the relation. See `Query.find_datasets` for 

477 details. 

478 join_to : `Relation`, optional 

479 Another relation to join with the query for datasets in all 

480 collections. 

481 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional 

482 Timespan columns in ``join_to`` that calibration dataset timespans 

483 must overlap. Must already be present in ``join_to``. Ignored if 

484 ``join_to`` is `None` or if there are no calibration collections. 

485 

486 Returns 

487 ------- 

488 relation : `lsst.daf.relation.Relation` 

489 Relation representing a dataset query. 

490 """ 

491 # If we need to do a temporal join to a calibration collection, we need 

492 # to include the timespan column in the base query and prepare the join 

493 # predicate. 

494 join_predicates: list[Predicate] = [] 

495 base_timespan_tag: ColumnTag | None = None 

496 full_columns: set[str] = set(columns) 

497 if ( 

498 temporal_join_on 

499 and join_to is not None 

500 and any(r.type is CollectionType.CALIBRATION for r in collections) 

501 ): 

502 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan") 

503 rhs = ColumnExpression.reference(base_timespan_tag, dtype=_timespan.Timespan) 

504 full_columns.add("timespan") 

505 for timespan_tag in temporal_join_on: 

506 lhs = ColumnExpression.reference(timespan_tag, dtype=_timespan.Timespan) 

507 join_predicates.append(lhs.predicate_method("overlaps", rhs)) 

508 # Delegate to the concrete QueryBackend subclass to do most of the 

509 # work. 

510 result = self._make_dataset_query_relation_impl( 

511 dataset_type, 

512 collections, 

513 full_columns, 

514 context=context, 

515 ) 

516 if join_to is not None: 

517 result = join_to.join( 

518 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None 

519 ) 

520 if join_predicates and "timespan" not in columns: 

521 # Drop the timespan column we added for the join only if the 

522 # timespan wasn't requested in its own right. 

523 result = result.with_only_columns(result.columns - {base_timespan_tag}) 

524 return result 

525 

526 def make_dataset_search_relation( 

527 self, 

528 dataset_type: DatasetType, 

529 collections: Sequence[CollectionRecord], 

530 columns: Set[str], 

531 context: _C, 

532 *, 

533 join_to: Relation | None = None, 

534 temporal_join_on: Set[ColumnTag] = frozenset(), 

535 ) -> Relation: 

536 """Construct a relation that represents an order query for datasets 

537 that returns results from the first matching collection for each data 

538 ID. 

539 

540 Parameters 

541 ---------- 

542 dataset_type : `DatasetType` 

543 Type for the datasets being search. 

544 collections : `~collections.abc.Sequence` [ `CollectionRecord` ] 

545 Records for collections to search. Should generally be the result 

546 of a call to `resolve_dataset_collections`, and must not be empty. 

547 columns : `~collections.abc.Set` [ `str` ] 

548 Columns to include in the ``relation``. See 

549 `make_dataset_query_relation` for options. 

550 context : `QueryContext` 

551 Context that manages per-query state. 

552 join_to : `Relation`, optional 

553 Another relation to join with the query for datasets in all 

554 collections before filtering out out shadowed datasets. 

555 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional 

556 Timespan columns in ``join_to`` that calibration dataset timespans 

557 must overlap. Must already be present in ``join_to``. Ignored if 

558 ``join_to`` is `None` or if there are no calibration collections. 

559 

560 Returns 

561 ------- 

562 relation : `lsst.daf.relation.Relation` 

563 Relation representing a find-first dataset search. 

564 """ 

565 base = self.make_dataset_query_relation( 

566 dataset_type, 

567 collections, 

568 columns | {"rank"}, 

569 context=context, 

570 join_to=join_to, 

571 temporal_join_on=temporal_join_on, 

572 ) 

573 # Query-simplification shortcut: if there is only one collection, a 

574 # find-first search is just a regular result subquery. Same if there 

575 # are no collections. 

576 if len(collections) <= 1: 

577 return base 

578 # We filter the dimension keys in the given relation through 

579 # DimensionGroup.required.names to minimize the set we partition on 

580 # and order it in a more index-friendly way. More precisely, any 

581 # index we define on dimensions will be consistent with this order, but 

582 # any particular index may not have the same dimension columns. 

583 dimensions = self.universe.conform( 

584 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)] 

585 ) 

586 find_first = FindFirstDataset( 

587 dimensions=DimensionKeyColumnTag.generate(dimensions.required), 

588 rank=DatasetColumnTag(dataset_type.name, "rank"), 

589 ) 

590 return find_first.apply( 

591 base, preferred_engine=context.preferred_engine, require_preferred_engine=True 

592 ).with_only_columns(base.columns - {find_first.rank}) 

593 

594 def make_doomed_dataset_relation( 

595 self, 

596 dataset_type: DatasetType, 

597 columns: Set[str], 

598 messages: Iterable[str], 

599 context: _C, 

600 ) -> Relation: 

601 """Construct a relation that represents a doomed query for datasets. 

602 

603 Parameters 

604 ---------- 

605 dataset_type : `DatasetType` 

606 Dataset type being queried. 

607 columns : `~collections.abc.Set` [ `str` ] 

608 Dataset columns to include (dimension key columns are always 

609 included). See `make_dataset_query_relation` for allowed values. 

610 messages : `~collections.abc.Iterable` [ `str` ] 

611 Diagnostic messages that explain why the query is doomed to yield 

612 no rows. 

613 context : `QueryContext` 

614 Context that manages per-query state. 

615 

616 Returns 

617 ------- 

618 relation : `lsst.daf.relation.Relation` 

619 Relation with the requested columns and no rows. 

620 """ 

621 column_tags: set[ColumnTag] = set( 

622 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names) 

623 ) 

624 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns)) 

625 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages)) 

626 

627 @abstractmethod 

628 def make_dimension_relation( 

629 self, 

630 dimensions: DimensionGroup, 

631 columns: Set[ColumnTag], 

632 context: _C, 

633 *, 

634 initial_relation: Relation | None = None, 

635 initial_join_max_columns: frozenset[ColumnTag] | None = None, 

636 initial_dimension_relationships: Set[frozenset[str]] | None = None, 

637 spatial_joins: Iterable[tuple[str, str]] = (), 

638 governor_constraints: Mapping[str, Set[str]], 

639 ) -> Relation: 

640 """Construct a relation that provides columns and constraints from 

641 dimension records. 

642 

643 Parameters 

644 ---------- 

645 dimensions : `DimensionGroup` 

646 Dimensions to include. The key columns for all dimensions (both 

647 required and implied) will be included in the returned relation. 

648 columns : `~collections.abc.Set` [ `ColumnTag` ] 

649 Dimension record columns to include. This set may include key 

650 column tags as well, though these may be ignored; the set of key 

651 columns to include is determined by the ``dimensions`` argument 

652 instead. 

653 context : `QueryContext` 

654 Context that manages per-query state. 

655 initial_relation : `~lsst.daf.relation.Relation`, optional 

656 Initial relation to join to the dimension relations. If this 

657 relation provides record columns, key columns, and relationships 

658 between key columns (see ``initial_dimension_relationships`` below) 

659 that would otherwise have been added by joining in a dimension 

660 element's relation, that relation may not be joined in at all. 

661 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional 

662 Maximum superset of common columns for joins to 

663 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL 

664 ``JOIN`` clauses). If provided, this is a subset of the dimension 

665 key columns in ``initial_relation``, which are otherwise all 

666 considered as potential common columns for joins. Ignored if 

667 ``initial_relation`` is not provided. 

668 initial_dimension_relationships : `~collections.abc.Set` \ 

669 [ `frozenset` [ `str` ] ], optional 

670 A set of sets of dimension names representing relationships between 

671 dimensions encoded in the rows of ``initial_relation``. If not 

672 provided (and ``initial_relation`` is), 

673 `extract_dimension_relationships` will be called on 

674 ``initial_relation``. 

675 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ] 

676 Iterable of dimension element name pairs that should be spatially 

677 joined. 

678 governor_constraints : `~collections.abc.Mapping` [ `str` \ 

679 [ `~collections.abc.Set` [ `str` ] ] ], optional 

680 Constraints on governor dimensions that are provided by other parts 

681 of the query that either have been included in ``initial_relation`` 

682 or are guaranteed to be added in the future. This is a mapping from 

683 governor dimension name to sets of values that dimension may take. 

684 

685 Returns 

686 ------- 

687 relation : `lsst.daf.relation.Relation` 

688 Relation containing the given dimension columns and constraints. 

689 """ 

690 raise NotImplementedError() 

691 

692 @abstractmethod 

693 def resolve_governor_constraints( 

694 self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]], context: _C 

695 ) -> Mapping[str, Set[str]]: 

696 """Resolve governor dimension constraints provided by user input to 

697 a query against the content in the `Registry`. 

698 

699 Parameters 

700 ---------- 

701 dimensions : `DimensionGroup` 

702 Dimensions that bound the governor dimensions to consider (via 

703 ``dimensions.governors``, more specifically). 

704 constraints : `~collections.abc.Mapping` [ `str`, \ 

705 `~collections.abc.Set` [ `str` ] ] 

706 Constraints from user input to the query (e.g. from data IDs and 

707 string expression predicates). 

708 context : `QueryContext` 

709 Object that manages state for the query; used here to fetch the 

710 governor dimension record cache if it has not already been loaded. 

711 

712 Returns 

713 ------- 

714 resolved : `~collections.abc.Mapping` [ `str`, \ 

715 `~collections.abc.Set` [ `str` ] ] 

716 A shallow copy of ``constraints`` with keys equal to 

717 ``dimensions.governors.names`` and value sets constrained by the 

718 Registry content if they were not already in ``constraints``. 

719 

720 Raises 

721 ------ 

722 DataIdValueError 

723 Raised if ``constraints`` includes governor dimension values that 

724 are not present in the `Registry`. 

725 """ 

726 raise NotImplementedError() 

727 

728 @abstractmethod 

729 def get_dimension_record_cache( 

730 self, element_name: str, context: _C 

731 ) -> Mapping[DataCoordinate, DimensionRecord] | None: 

732 """Return a local cache of all `DimensionRecord` objects for a 

733 dimension element, fetching it if necessary. 

734 

735 Parameters 

736 ---------- 

737 element_name : `str` 

738 Name of the dimension element. 

739 context : `.queries.SqlQueryContext` 

740 Context to be used to execute queries when no cached result is 

741 available. 

742 

743 Returns 

744 ------- 

745 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \ 

746 `DimensionRecord` ] or `None` 

747 Mapping from data ID to dimension record, or `None` if this 

748 element's records are never cached. 

749 """ 

750 raise NotImplementedError() 

751 

752 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]: 

753 """Extract the dimension key relationships encoded in a relation tree. 

754 

755 Parameters 

756 ---------- 

757 relation : `Relation` 

758 Relation tree to process. 

759 

760 Returns 

761 ------- 

762 relationships : `set` [ `frozenset` [ `str` ] ] 

763 Set of sets of dimension names, where each inner set represents a 

764 relationship between dimensions. 

765 

766 Notes 

767 ----- 

768 Dimension relationships include both many-to-one implied dependencies 

769 and many-to-many joins backed by "always-join" dimension elements, and 

770 it's important to join in the dimension table that defines a 

771 relationship in any query involving dimensions that are a superset of 

772 that relationship. For example, let's consider a relation tree that 

773 joins dataset existence-check relations for two dataset types, with 

774 dimensions ``{instrument, exposure, detector}`` and ``{instrument, 

775 physical_filter}``. The joined relation appears to have all dimension 

776 keys in its expanded graph present except ``band``, and the system 

777 could easily correct this by joining that dimension in directly. But 

778 it's also missing the ``{instrument, exposure, physical_filter}`` 

779 relationship we'd get from the ``exposure`` dimension's own relation 

780 (``exposure`` implies ``physical_filter``) and the similar 

781 ``{instrument, physical_filter, band}`` relationship from the 

782 ``physical_filter`` dimension relation; we need the relationship logic 

783 to recognize that those dimensions need to be joined in as well in 

784 order for the full relation to have rows that represent valid data IDs. 

785 

786 The implementation of this method relies on the assumption that 

787 `LeafRelation` objects always have rows that are consistent with all 

788 defined relationships (i.e. are valid data IDs). This is true for not 

789 just dimension relations themselves, but anything created from queries 

790 based on them, including datasets and query results. It is possible to 

791 construct `LeafRelation` objects that don't satisfy this criteria (e.g. 

792 when accepting in user-provided data IDs), and in this case 

793 higher-level guards or warnings must be provided.`` 

794 """ 

795 return { 

796 frozenset( 

797 tag.dimension 

798 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns) 

799 ) 

800 for leaf_relation in self._extract_leaf_relations(relation).values() 

801 } 

802 

803 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]: 

804 """Recursively extract leaf relations from a relation tree. 

805 

806 Parameters 

807 ---------- 

808 relation : `Relation` 

809 Tree to process. 

810 

811 Returns 

812 ------- 

813 leaves : `dict` [ `str`, `LeafRelation` ] 

814 Leaf relations, keyed and deduplicated by name. 

815 """ 

816 match relation: 

817 case LeafRelation() as leaf: 

818 return {leaf.name: leaf} 

819 case UnaryOperationRelation(target=target): 

820 return self._extract_leaf_relations(target) 

821 case BinaryOperationRelation(lhs=lhs, rhs=rhs): 

822 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs) 

823 case MarkerRelation(target=target): 

824 return self._extract_leaf_relations(target) 

825 raise AssertionError("Match should be exhaustive and all branches should return.")