Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 29%

105 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-04 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBackend",) 

24 

25from abc import abstractmethod 

26from collections.abc import Iterable, Mapping, Sequence, Set 

27from typing import TYPE_CHECKING, Any, Generic, TypeVar 

28 

29from lsst.daf.relation import ( 

30 BinaryOperationRelation, 

31 ColumnTag, 

32 LeafRelation, 

33 MarkerRelation, 

34 Relation, 

35 UnaryOperationRelation, 

36) 

37 

38from ...core import ( 

39 DataCoordinate, 

40 DatasetColumnTag, 

41 DatasetType, 

42 DimensionGraph, 

43 DimensionKeyColumnTag, 

44 DimensionRecord, 

45 DimensionUniverse, 

46) 

47from .._collectionType import CollectionType 

48from .._exceptions import DatasetTypeError, MissingDatasetTypeError 

49from ..wildcards import CollectionWildcard 

50from ._query_context import QueryContext 

51from .find_first_dataset import FindFirstDataset 

52 

53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true

54 from ..interfaces import CollectionRecord 

55 

56 

57_C = TypeVar("_C", bound=QueryContext) 

58 

59 

60class QueryBackend(Generic[_C]): 

61 """An interface for constructing and evaluating the 

62 `~lsst.daf.relation.Relation` objects that comprise registry queries. 

63 

64 This ABC is expected to have a concrete subclass for each concrete registry 

65 type, and most subclasses will be paired with a `QueryContext` subclass. 

66 See `QueryContext` for the division of responsibilities between these two 

67 interfaces. 

68 """ 

69 

70 @property 

71 @abstractmethod 

72 def universe(self) -> DimensionUniverse: 

73 """Definition of all dimensions and dimension elements for this 

74 registry (`DimensionUniverse`). 

75 """ 

76 raise NotImplementedError() 

77 

78 def context(self) -> _C: 

79 """Return a context manager that can be used to execute queries with 

80 this backend. 

81 

82 Returns 

83 ------- 

84 context : `QueryContext` 

85 Context manager that manages state and connections needed to 

86 execute queries. 

87 """ 

88 raise NotImplementedError() 

89 

90 @abstractmethod 

91 def get_collection_name(self, key: Any) -> str: 

92 """Return the collection name associated with a collection primary key 

93 value. 

94 

95 Parameters 

96 ---------- 

97 key 

98 Collection primary key value. 

99 

100 Returns 

101 ------- 

102 name : `str` 

103 Collection name. 

104 """ 

105 raise NotImplementedError() 

106 

107 @abstractmethod 

108 def resolve_collection_wildcard( 

109 self, 

110 expression: Any, 

111 *, 

112 collection_types: Set[CollectionType] = CollectionType.all(), 

113 done: set[str] | None = None, 

114 flatten_chains: bool = True, 

115 include_chains: bool | None = None, 

116 ) -> list[CollectionRecord]: 

117 """Return the collection records that match a wildcard expression. 

118 

119 Parameters 

120 ---------- 

121 expression 

122 Names and/or patterns for collections; will be passed to 

123 `CollectionWildcard.from_expression`. 

124 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

125 If provided, only yield collections of these types. 

126 done : `set` [ `str` ], optional 

127 A set of collection names that should be skipped, updated to 

128 include all processed collection names on return. 

129 flatten_chains : `bool`, optional 

130 If `True` (default) recursively yield the child collections of 

131 `~CollectionType.CHAINED` collections. 

132 include_chains : `bool`, optional 

133 If `False`, return records for `~CollectionType.CHAINED` 

134 collections themselves. The default is the opposite of 

135 ``flattenChains``: either return records for CHAINED collections or 

136 their children, but not both. 

137 

138 Returns 

139 ------- 

140 records : `list` [ `CollectionRecord` ] 

141 Matching collection records. 

142 """ 

143 raise NotImplementedError() 

144 

145 @abstractmethod 

146 def resolve_dataset_type_wildcard( 

147 self, 

148 expression: Any, 

149 components: bool | None = None, 

150 missing: list[str] | None = None, 

151 explicit_only: bool = False, 

152 components_deprecated: bool = True, 

153 ) -> dict[DatasetType, list[str | None]]: 

154 """Return the dataset types that match a wildcard expression. 

155 

156 Parameters 

157 ---------- 

158 expression 

159 Names and/or patterns for dataset types; will be passed to 

160 `DatasetTypeWildcard.from_expression`. 

161 components : `bool`, optional 

162 If `True`, apply all expression patterns to component dataset type 

163 names as well. If `False`, never apply patterns to components. If 

164 `None` (default), apply patterns to components only if their parent 

165 datasets were not matched by the expression. Fully-specified 

166 component datasets (`str` or `DatasetType` instances) are always 

167 included. 

168 missing : `list` of `str`, optional 

169 String dataset type names that were explicitly given (i.e. not 

170 regular expression patterns) but not found will be appended to this 

171 list, if it is provided. 

172 explicit_only : `bool`, optional 

173 If `True`, require explicit `DatasetType` instances or `str` names, 

174 with `re.Pattern` instances deprecated and ``...`` prohibited. 

175 components_deprecated : `bool`, optional 

176 If `True`, this is a context in which component dataset support is 

177 deprecated. This will result in a deprecation warning when 

178 ``components=True`` or ``components=None`` and a component dataset 

179 is matched. In the future this will become an error. 

180 

181 Returns 

182 ------- 

183 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] 

184 A mapping with resolved dataset types as keys and lists of 

185 matched component names as values, where `None` indicates the 

186 parent composite dataset type was matched. 

187 """ 

188 raise NotImplementedError() 

189 

190 def resolve_single_dataset_type_wildcard( 

191 self, 

192 expression: Any, 

193 components: bool | None = None, 

194 explicit_only: bool = False, 

195 components_deprecated: bool = True, 

196 ) -> tuple[DatasetType, list[str | None]]: 

197 """Return a single dataset type that matches a wildcard expression. 

198 

199 Parameters 

200 ---------- 

201 expression 

202 Names and/or patterns for the dataset type; will be passed to 

203 `DatasetTypeWildcard.from_expression`. 

204 components : `bool`, optional 

205 If `True`, apply all expression patterns to component dataset type 

206 names as well. If `False`, never apply patterns to components. If 

207 `None` (default), apply patterns to components only if their parent 

208 datasets were not matched by the expression. Fully-specified 

209 component datasets (`str` or `DatasetType` instances) are always 

210 included. 

211 explicit_only : `bool`, optional 

212 If `True`, require explicit `DatasetType` instances or `str` names, 

213 with `re.Pattern` instances deprecated and ``...`` prohibited. 

214 components_deprecated : `bool`, optional 

215 If `True`, this is a context in which component dataset support is 

216 deprecated. This will result in a deprecation warning when 

217 ``components=True`` or ``components=None`` and a component dataset 

218 is matched. In the future this will become an error. 

219 

220 Returns 

221 ------- 

222 single_parent : `DatasetType` 

223 The matched parent dataset type. 

224 single_components : `list` [ `str` | `None` ] 

225 The matched components that correspond to this parent, or `None` if 

226 the parent dataset type itself was matched. 

227 

228 Notes 

229 ----- 

230 This method really finds a single parent dataset type and any number of 

231 components, because it's only the parent dataset type that's known to 

232 registry at all; many callers are expected to discard the 

233 ``single_components`` return value. 

234 """ 

235 missing: list[str] = [] 

236 matching = self.resolve_dataset_type_wildcard( 

237 expression, 

238 components=components, 

239 missing=missing, 

240 explicit_only=explicit_only, 

241 components_deprecated=components_deprecated, 

242 ) 

243 if not matching: 

244 if missing: 

245 raise MissingDatasetTypeError( 

246 "\n".join( 

247 f"Dataset type {t!r} is not registered, so no instances of it can exist." 

248 for t in missing 

249 ) 

250 ) 

251 else: 

252 raise MissingDatasetTypeError( 

253 f"No registered dataset types matched expression {expression!r}, " 

254 "so no datasets will be found." 

255 ) 

256 if len(matching) > 1: 

257 raise DatasetTypeError( 

258 f"Expression {expression!r} matched multiple parent dataset types: " 

259 f"{[t.name for t in matching]}, but only one is allowed." 

260 ) 

261 ((single_parent, single_components),) = matching.items() 

262 if missing: 

263 raise DatasetTypeError( 

264 f"Expression {expression!r} appears to involve multiple dataset types, even though only " 

265 f"one ({single_parent.name}) is registered, and only one is allowed here." 

266 ) 

267 return single_parent, single_components 

268 

269 @abstractmethod 

270 def filter_dataset_collections( 

271 self, 

272 dataset_types: Iterable[DatasetType], 

273 collections: Sequence[CollectionRecord], 

274 *, 

275 governor_constraints: Mapping[str, Set[str]], 

276 rejections: list[str] | None = None, 

277 ) -> dict[DatasetType, list[CollectionRecord]]: 

278 """Filter a sequence of collections to those for which a dataset query 

279 might succeed. 

280 

281 Parameters 

282 ---------- 

283 dataset_types : `Iterable` [ `DatasetType` ] 

284 Dataset types that are being queried. Must include only parent 

285 or standalone dataset types, not components. 

286 collections : `Sequence` [ `CollectionRecord` ] 

287 Sequence of collections that will be searched. 

288 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \ 

289 optional 

290 Constraints imposed by other aspects of the query on governor 

291 dimensions; collections inconsistent with these constraints will be 

292 skipped. 

293 rejections : `list` [ `str` ], optional 

294 If not `None`, a `list` that diagnostic messages will be appended 

295 to, for any collection that matches ``collections`` that is not 

296 returned. At least one message is guaranteed whenever the result 

297 is empty. 

298 

299 Returns 

300 ------- 

301 dataset_collections : `dict` [ `DatasetType`, \ 

302 `list` [ `CollectionRecord` ] ] 

303 The collections to search for each dataset. The dictionary's keys 

304 are always exactly ``dataset_types`` (in the same order), and each 

305 nested `list` of collections is ordered consistently with the 

306 given ``collections``. 

307 

308 Notes 

309 ----- 

310 This method accepts multiple dataset types and multiple collections at 

311 once to enable implementations to batch up the fetching of summary 

312 information needed to relate them. 

313 """ 

314 raise NotImplementedError() 

315 

316 def resolve_dataset_collections( 

317 self, 

318 dataset_type: DatasetType, 

319 collections: CollectionWildcard, 

320 *, 

321 governor_constraints: Mapping[str, Set[str]], 

322 rejections: list[str] | None = None, 

323 collection_types: Set[CollectionType] = CollectionType.all(), 

324 allow_calibration_collections: bool = False, 

325 ) -> list[CollectionRecord]: 

326 """Resolve the sequence of collections to query for a dataset type. 

327 

328 Parameters 

329 ---------- 

330 dataset_type : `DatasetType` 

331 Dataset type to be queried in the returned collections. 

332 collections : `CollectionWildcard` 

333 Expression for the collections to be queried. 

334 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \ 

335 optional 

336 Constraints imposed by other aspects of the query on governor 

337 dimensions; collections inconsistent with these constraints will be 

338 skipped. 

339 rejections : `list` [ `str` ], optional 

340 If not `None`, a `list` that diagnostic messages will be appended 

341 to, for any collection that matches ``collections`` that is not 

342 returned. At least one message is guaranteed whenever the result 

343 is empty. 

344 collection_types : `~collections.abc.Set` [ `CollectionType` ], \ 

345 optional 

346 Collection types to consider when resolving the collection 

347 expression. 

348 allow_calibration_collections : `bool`, optional 

349 If `False`, skip (with a ``rejections`` message) any calibration 

350 collections that match ``collections`` are not given explicitly by 

351 name, and raise `NotImplementedError` for any calibration 

352 collection that is given explicitly. This is a temporary option 

353 that will be removed when the query system can handle temporal 

354 joins involving calibration collections. 

355 

356 Returns 

357 ------- 

358 records : `list` [ `CollectionRecord` ] 

359 A new list of `CollectionRecord` instances, for collections that 

360 both match ``collections`` and may have datasets of the given type. 

361 

362 Notes 

363 ----- 

364 This is a higher-level driver for `resolve_collection_wildcard` and 

365 `filter_dataset_collections` that is mostly concerned with handling 

366 queries against `~Collection.Type.CALIBRATION` collections that aren't 

367 fully supported yet. Once that support improves, this method may be 

368 removed. 

369 """ 

370 if collections == CollectionWildcard() and collection_types == CollectionType.all(): 

371 collection_types = {CollectionType.RUN} 

372 explicit_collections = frozenset(collections.strings) 

373 matching_collection_records = self.resolve_collection_wildcard( 

374 collections, collection_types=collection_types 

375 ) 

376 ((_, filtered_collection_records),) = self.filter_dataset_collections( 

377 [dataset_type], 

378 matching_collection_records, 

379 governor_constraints=governor_constraints, 

380 rejections=rejections, 

381 ).items() 

382 if not allow_calibration_collections: 

383 supported_collection_records: list[CollectionRecord] = [] 

384 for record in filtered_collection_records: 

385 if record.type is CollectionType.CALIBRATION: 

386 # If collection name was provided explicitly then raise, 

387 # since this is a kind of query we don't support yet; 

388 # otherwise collection is a part of a chained one or regex 

389 # match, and we skip it to not break queries of other 

390 # included collections. 

391 if record.name in explicit_collections: 

392 raise NotImplementedError( 

393 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type " 

394 f"collection {record.name!r} is not yet supported." 

395 ) 

396 else: 

397 if rejections is not None: 

398 rejections.append( 

399 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION " 

400 f"collection {record.name!r} because calibration queries aren't fully " 

401 "implemented; this is not an error only because the query structure " 

402 "implies that searching this collection may be incidental." 

403 ) 

404 supported_collection_records.append(record) 

405 else: 

406 supported_collection_records.append(record) 

407 else: 

408 supported_collection_records = filtered_collection_records 

409 if not supported_collection_records and rejections is not None and not rejections: 

410 rejections.append(f"No collections to search matching expression {collections!r}.") 

411 return supported_collection_records 

412 

413 @abstractmethod 

414 def make_dataset_query_relation( 

415 self, 

416 dataset_type: DatasetType, 

417 collections: Sequence[CollectionRecord], 

418 columns: Set[str], 

419 context: _C, 

420 ) -> Relation: 

421 """Construct a relation that represents an unordered query for datasets 

422 that returns matching results from all given collections. 

423 

424 Parameters 

425 ---------- 

426 dataset_type : `DatasetType` 

427 Type for the datasets being queried. 

428 collections : `Sequence` [ `CollectionRecord` ] 

429 Records for collections to query. Should generally be the result 

430 of a call to `resolve_dataset_collections`, and must not be empty. 

431 context : `QueryContext` 

432 Context that manages per-query state. 

433 columns : `~collections.abc.Set` [ `str` ] 

434 Columns to include in the relation. See `Query.find_datasets` for 

435 details. 

436 

437 Returns 

438 ------- 

439 relation : `lsst.daf.relation.Relation` 

440 Relation representing a dataset query. 

441 """ 

442 raise NotImplementedError() 

443 

444 def make_dataset_search_relation( 

445 self, 

446 dataset_type: DatasetType, 

447 collections: Sequence[CollectionRecord], 

448 columns: Set[str], 

449 context: _C, 

450 *, 

451 join_to: Relation | None = None, 

452 ) -> Relation: 

453 """Construct a relation that represents an order query for datasets 

454 that returns results from the first matching collection for each 

455 data ID. 

456 

457 Parameters 

458 ---------- 

459 dataset_type : `DatasetType` 

460 Type for the datasets being search. 

461 collections : `Sequence` [ `CollectionRecord` ] 

462 Records for collections to search. Should generally be the result 

463 of a call to `resolve_dataset_collections`, and must not be empty. 

464 columns : `~collections.abc.Set` [ `str` ] 

465 Columns to include in the `relation. See 

466 `make_dataset_query_relation` for options. 

467 context : `QueryContext` 

468 Context that manages per-query state. 

469 join_to : `Relation`, optional 

470 Another relation to join with the query for datasets in all 

471 collections before filtering out out shadowed datasets. 

472 

473 Returns 

474 ------- 

475 relation : `lsst.daf.relation.Relation` 

476 Relation representing a find-first dataset search. 

477 """ 

478 base = self.make_dataset_query_relation( 

479 dataset_type, 

480 collections, 

481 columns | {"rank"}, 

482 context=context, 

483 ) 

484 if join_to is not None: 

485 base = join_to.join(base) 

486 # Query-simplification shortcut: if there is only one collection, a 

487 # find-first search is just a regular result subquery. Same if there 

488 # are no collections. 

489 if len(collections) <= 1: 

490 return base 

491 # We filter the dimension keys in the given relation through 

492 # DimensionGraph.required.names to minimize the set we partition on 

493 # and order it in a more index-friendly way. More precisely, any 

494 # index we define on dimensions will be consistent with this order, but 

495 # any particular index may not have the same dimension columns. 

496 dimensions = self.universe.extract( 

497 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)] 

498 ) 

499 find_first = FindFirstDataset( 

500 dimensions=DimensionKeyColumnTag.generate(dimensions.required.names), 

501 rank=DatasetColumnTag(dataset_type.name, "rank"), 

502 ) 

503 return find_first.apply( 

504 base, preferred_engine=context.preferred_engine, require_preferred_engine=True 

505 ).with_only_columns(base.columns - {find_first.rank}) 

506 

507 def make_doomed_dataset_relation( 

508 self, 

509 dataset_type: DatasetType, 

510 columns: Set[str], 

511 messages: Iterable[str], 

512 context: _C, 

513 ) -> Relation: 

514 """Construct a relation that represents a doomed query for datasets. 

515 

516 Parameters 

517 ---------- 

518 dataset_type : `DatasetType` 

519 Dataset type being queried. 

520 columns : `AbstractSet` [ `str` ] 

521 Dataset columns to include (dimension key columns are always 

522 included). See `make_dataset_query_relation` for allowed values. 

523 messages : `Iterable` [ `str` ] 

524 Diagnostic messages that explain why the query is doomed to yield 

525 no rows. 

526 context : `QueryContext` 

527 Context that manages per-query state. 

528 

529 Returns 

530 ------- 

531 relation : `lsst.daf.relation.Relation` 

532 Relation with the requested columns and no rows. 

533 """ 

534 column_tags: set[ColumnTag] = set( 

535 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names) 

536 ) 

537 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns)) 

538 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages)) 

539 

540 @abstractmethod 

541 def make_dimension_relation( 

542 self, 

543 dimensions: DimensionGraph, 

544 columns: Set[ColumnTag], 

545 context: _C, 

546 *, 

547 initial_relation: Relation | None = None, 

548 initial_join_max_columns: frozenset[ColumnTag] | None = None, 

549 initial_dimension_relationships: Set[frozenset[str]] | None = None, 

550 spatial_joins: Iterable[tuple[str, str]] = (), 

551 governor_constraints: Mapping[str, Set[str]], 

552 ) -> Relation: 

553 """Construct a relation that provides columns and constraints from 

554 dimension records. 

555 

556 Parameters 

557 ---------- 

558 dimensions : `DimensionGraph` 

559 Dimensions to include. The key columns for all dimensions (both 

560 required and implied) will be included in the returned relation. 

561 columns : `~collections.abc.Set` [ `ColumnTag` ] 

562 Dimension record columns to include. This set may include key 

563 column tags as well, though these may be ignored; the set of key 

564 columns to include is determined by the ``dimensions`` argument 

565 instead. 

566 context : `QueryContext` 

567 Context that manages per-query state. 

568 initial_relation : `~lsst.daf.relation.Relation`, optional 

569 Initial relation to join to the dimension relations. If this 

570 relation provides record columns, key columns, and relationships 

571 between key columns (see ``initial_dimension_relationships`` below) 

572 that would otherwise have been added by joining in a dimension 

573 element's relation, that relation may not be joined in at all. 

574 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional 

575 Maximum superset of common columns for joins to 

576 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL 

577 ``JOIN`` clauses). If provided, this is a subset of the dimension 

578 key columns in ``initial_relation``, which are otherwise all 

579 considered as potential common columns for joins. Ignored if 

580 ``initial_relation`` is not provided. 

581 initial_dimension_relationships : `~collections.abc.Set` [ `frozenset` 

582 [ `str` ] ], optional 

583 A set of sets of dimension names representing relationships between 

584 dimensions encoded in the rows of ``initial_relation``. If not 

585 provided (and ``initial_relation`` is), 

586 `extract_dimension_relationships` will be called on 

587 ``initial_relation``. 

588 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ] 

589 Iterable of dimension element name pairs that should be spatially 

590 joined. 

591 governor_constraints : `Mapping` [ `str` [ `~collections.abc.Set` 

592 [ `str` ] ] ], optional 

593 Constraints on governor dimensions that are provided by other parts 

594 of the query that either have been included in ``initial_relation`` 

595 or are guaranteed to be added in the future. This is a mapping from 

596 governor dimension name to sets of values that dimension may take. 

597 

598 Returns 

599 ------- 

600 relation : `lsst.daf.relation.Relation` 

601 Relation containing the given dimension columns and constraints. 

602 """ 

603 raise NotImplementedError() 

604 

605 @abstractmethod 

606 def resolve_governor_constraints( 

607 self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C 

608 ) -> Mapping[str, Set[str]]: 

609 """Resolve governor dimension constraints provided by user input to 

610 a query against the content in the `Registry`. 

611 

612 Parameters 

613 ---------- 

614 dimensions : `DimensionGraph` 

615 Dimensions that bound the governor dimensions to consider (via 

616 ``dimensions.governors``, more specifically). 

617 constraints : `Mapping` [ `str`, [ `~collections.abc.Set` 

618 [ `str` ] ] ] 

619 Constraints from user input to the query (e.g. from data IDs and 

620 string expression predicates). 

621 context : `QueryContext` 

622 Object that manages state for the query; used here to fetch the 

623 governor dimension record cache if it has not already been loaded. 

624 

625 Returns 

626 ------- 

627 resolved : `Mapping` [ `str`, [ `~collections.abc.Set` 

628 [ `str` ] ] ] 

629 A shallow copy of ``constraints`` with keys equal to 

630 ``dimensions.governors.names` and value sets constrained by the 

631 Registry content if they were not already in ``constraints``. 

632 

633 Raises 

634 ------ 

635 DataIdValueError 

636 Raised if ``constraints`` includes governor dimension values that 

637 are not present in the `Registry`. 

638 """ 

639 raise NotImplementedError() 

640 

641 @abstractmethod 

642 def get_dimension_record_cache( 

643 self, element_name: str, context: _C 

644 ) -> Mapping[DataCoordinate, DimensionRecord] | None: 

645 """Return a local cache of all `DimensionRecord` objects for a 

646 dimension element, fetching it if necessary. 

647 

648 Parameters 

649 ---------- 

650 element_name : `str` 

651 Name of the dimension element. 

652 context : `.queries.SqlQueryContext` 

653 Context to be used to execute queries when no cached result is 

654 available. 

655 

656 Returns 

657 ------- 

658 cache : `Mapping` [ `DataCoordinate`, `DimensionRecord` ] or `None` 

659 Mapping from data ID to dimension record, or `None` if this 

660 element's records are never cached. 

661 """ 

662 raise NotImplementedError() 

663 

664 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]: 

665 """Extract the dimension key relationships encoded in a relation tree. 

666 

667 Parameters 

668 ---------- 

669 relation : `Relation` 

670 Relation tree to process. 

671 

672 Returns 

673 ------- 

674 relationships : `set` [ `frozenset` [ `str` ] ] 

675 Set of sets of dimension names, where each inner set represents a 

676 relationship between dimensions. 

677 

678 Notes 

679 ----- 

680 Dimension relationships include both many-to-one implied dependencies 

681 and many-to-many joins backed by "always-join" dimension elements, and 

682 it's important to join in the dimension table that defines a 

683 relationship in any query involving dimensions that are a superset of 

684 that relationship. For example, let's consider a relation tree that 

685 joins dataset existence-check relations for two dataset types, with 

686 dimensions ``{instrument, exposure, detector}`` and ``{instrument, 

687 physical_filter}``. The joined relation appears to have all dimension 

688 keys in its expanded graph present except ``band``, and the system 

689 could easily correct this by joining that dimension in directly. But 

690 it's also missing the ``{instrument, exposure, physical_filter}`` 

691 relationship we'd get from the ``exposure`` dimension's own relation 

692 (``exposure`` implies ``phyiscal_filter``) and the similar 

693 ``{instrument, physical_filter, band}`` relationship from the 

694 ``physical_filter`` dimension relation; we need the relationship logic 

695 to recognize that those dimensions need to be joined in as well in 

696 order for the full relation to have rows that represent valid data IDs. 

697 

698 The implementation of this method relies on the assumption that 

699 `LeafRelation` objects always have rows that are consistent with all 

700 defined relationships (i.e. are valid data IDs). This is true for not 

701 just dimension relations themselves, but anything created from queries 

702 based on them, including datasets and query results. It is possible to 

703 construct `LeafRelation` objects that don't satisfy this criteria (e.g. 

704 when accepting in user-provided data IDs(, and in this case 

705 higher-level guards or warnings must be provided.`` 

706 """ 

707 return { 

708 frozenset( 

709 tag.dimension 

710 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns) 

711 ) 

712 for leaf_relation in self._extract_leaf_relations(relation).values() 

713 } 

714 

715 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]: 

716 """Recursively extract leaf relations from a relation tree. 

717 

718 Parameters 

719 ---------- 

720 relation : `Relation` 

721 Tree to process. 

722 

723 Returns 

724 ------- 

725 leaves : `dict` [ `str`, `LeafRelation` ] 

726 Leaf relations, keyed and deduplicated by name. 

727 """ 

728 match relation: 

729 case LeafRelation() as leaf: 

730 return {leaf.name: leaf} 

731 case UnaryOperationRelation(target=target): 

732 return self._extract_leaf_relations(target) 

733 case BinaryOperationRelation(lhs=lhs, rhs=rhs): 

734 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs) 

735 case MarkerRelation(target=target): 

736 return self._extract_leaf_relations(target) 

737 raise AssertionError("Match should be exhaustive and all branches should return.")