Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 29%

105 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-25 02:36 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ("QueryBackend",) 

24 

25from abc import abstractmethod 

26from collections.abc import Iterable, Mapping, Sequence, Set 

27from typing import TYPE_CHECKING, Any, Generic, TypeVar 

28 

29from lsst.daf.relation import ( 

30 BinaryOperationRelation, 

31 ColumnTag, 

32 LeafRelation, 

33 MarkerRelation, 

34 Relation, 

35 UnaryOperationRelation, 

36) 

37 

38from ...core import ( 

39 DataCoordinate, 

40 DatasetColumnTag, 

41 DatasetType, 

42 DimensionGraph, 

43 DimensionKeyColumnTag, 

44 DimensionRecord, 

45 DimensionUniverse, 

46) 

47from .._collectionType import CollectionType 

48from .._exceptions import DatasetTypeError, MissingDatasetTypeError 

49from ..wildcards import CollectionWildcard 

50from ._query_context import QueryContext 

51from .find_first_dataset import FindFirstDataset 

52 

53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true

54 from ..interfaces import CollectionRecord 

55 

56 

57_C = TypeVar("_C", bound=QueryContext) 

58 

59 

60class QueryBackend(Generic[_C]): 

61 """An interface for constructing and evaluating the 

62 `~lsst.daf.relation.Relation` objects that comprise registry queries. 

63 

64 This ABC is expected to have a concrete subclass for each concrete registry 

65 type, and most subclasses will be paired with a `QueryContext` subclass. 

66 See `QueryContext` for the division of responsibilities between these two 

67 interfaces. 

68 """ 

69 

70 @property 

71 @abstractmethod 

72 def universe(self) -> DimensionUniverse: 

73 """Definition of all dimensions and dimension elements for this 

74 registry (`DimensionUniverse`). 

75 """ 

76 raise NotImplementedError() 

77 

78 def context(self) -> _C: 

79 """Return a context manager that can be used to execute queries with 

80 this backend. 

81 

82 Returns 

83 ------- 

84 context : `QueryContext` 

85 Context manager that manages state and connections needed to 

86 execute queries. 

87 """ 

88 raise NotImplementedError() 

89 

90 @abstractmethod 

91 def get_collection_name(self, key: Any) -> str: 

92 """Return the collection name associated with a collection primary key 

93 value. 

94 

95 Parameters 

96 ---------- 

97 key 

98 Collection primary key value. 

99 

100 Returns 

101 ------- 

102 name : `str` 

103 Collection name. 

104 """ 

105 raise NotImplementedError() 

106 

107 @abstractmethod 

108 def resolve_collection_wildcard( 

109 self, 

110 expression: Any, 

111 *, 

112 collection_types: Set[CollectionType] = CollectionType.all(), 

113 done: set[str] | None = None, 

114 flatten_chains: bool = True, 

115 include_chains: bool | None = None, 

116 ) -> list[CollectionRecord]: 

117 """Return the collection records that match a wildcard expression. 

118 

119 Parameters 

120 ---------- 

121 expression 

122 Names and/or patterns for collections; will be passed to 

123 `CollectionWildcard.from_expression`. 

124 collection_types : `collections.abc.Set` [ `CollectionType` ], optional 

125 If provided, only yield collections of these types. 

126 done : `set` [ `str` ], optional 

127 A set of collection names that should be skipped, updated to 

128 include all processed collection names on return. 

129 flatten_chains : `bool`, optional 

130 If `True` (default) recursively yield the child collections of 

131 `~CollectionType.CHAINED` collections. 

132 include_chains : `bool`, optional 

133 If `False`, return records for `~CollectionType.CHAINED` 

134 collections themselves. The default is the opposite of 

135 ``flattenChains``: either return records for CHAINED collections or 

136 their children, but not both. 

137 

138 Returns 

139 ------- 

140 records : `list` [ `CollectionRecord` ] 

141 Matching collection records. 

142 """ 

143 raise NotImplementedError() 

144 

145 @abstractmethod 

146 def resolve_dataset_type_wildcard( 

147 self, 

148 expression: Any, 

149 components: bool | None = None, 

150 missing: list[str] | None = None, 

151 explicit_only: bool = False, 

152 components_deprecated: bool = True, 

153 ) -> dict[DatasetType, list[str | None]]: 

154 """Return the dataset types that match a wildcard expression. 

155 

156 Parameters 

157 ---------- 

158 expression 

159 Names and/or patterns for dataset types; will be passed to 

160 `DatasetTypeWildcard.from_expression`. 

161 components : `bool`, optional 

162 If `True`, apply all expression patterns to component dataset type 

163 names as well. If `False`, never apply patterns to components. If 

164 `None` (default), apply patterns to components only if their parent 

165 datasets were not matched by the expression. Fully-specified 

166 component datasets (`str` or `DatasetType` instances) are always 

167 included. 

168 missing : `list` of `str`, optional 

169 String dataset type names that were explicitly given (i.e. not 

170 regular expression patterns) but not found will be appended to this 

171 list, if it is provided. 

172 explicit_only : `bool`, optional 

173 If `True`, require explicit `DatasetType` instances or `str` names, 

174 with `re.Pattern` instances deprecated and ``...`` prohibited. 

175 components_deprecated : `bool`, optional 

176 If `True`, this is a context in which component dataset support is 

177 deprecated. This will result in a deprecation warning when 

178 ``components=True`` or ``components=None`` and a component dataset 

179 is matched. In the future this will become an error. 

180 

181 Returns 

182 ------- 

183 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ] 

184 A mapping with resolved dataset types as keys and lists of 

185 matched component names as values, where `None` indicates the 

186 parent composite dataset type was matched. 

187 """ 

188 raise NotImplementedError() 

189 

190 def resolve_single_dataset_type_wildcard( 

191 self, 

192 expression: Any, 

193 components: bool | None = None, 

194 explicit_only: bool = False, 

195 components_deprecated: bool = True, 

196 ) -> tuple[DatasetType, list[str | None]]: 

197 """Return a single dataset type that matches a wildcard expression. 

198 

199 Parameters 

200 ---------- 

201 expression 

202 Names and/or patterns for the dataset type; will be passed to 

203 `DatasetTypeWildcard.from_expression`. 

204 components : `bool`, optional 

205 If `True`, apply all expression patterns to component dataset type 

206 names as well. If `False`, never apply patterns to components. If 

207 `None` (default), apply patterns to components only if their parent 

208 datasets were not matched by the expression. Fully-specified 

209 component datasets (`str` or `DatasetType` instances) are always 

210 included. 

211 explicit_only : `bool`, optional 

212 If `True`, require explicit `DatasetType` instances or `str` names, 

213 with `re.Pattern` instances deprecated and ``...`` prohibited. 

214 components_deprecated : `bool`, optional 

215 If `True`, this is a context in which component dataset support is 

216 deprecated. This will result in a deprecation warning when 

217 ``components=True`` or ``components=None`` and a component dataset 

218 is matched. In the future this will become an error. 

219 

220 Returns 

221 ------- 

222 single_parent : `DatasetType` 

223 The matched parent dataset type. 

224 single_components : `list` [ `str` | `None` ] 

225 The matched components that correspond to this parent, or `None` if 

226 the parent dataset type itself was matched. 

227 

228 Notes 

229 ----- 

230 This method really finds a single parent dataset type and any number of 

231 components, because it's only the parent dataset type that's known to 

232 registry at all; many callers are expected to discard the 

233 ``single_components`` return value. 

234 """ 

235 missing: list[str] = [] 

236 matching = self.resolve_dataset_type_wildcard( 

237 expression, 

238 components=components, 

239 missing=missing, 

240 explicit_only=explicit_only, 

241 components_deprecated=components_deprecated, 

242 ) 

243 if not matching: 

244 if missing: 

245 raise MissingDatasetTypeError( 

246 "\n".join( 

247 f"Dataset type {t!r} is not registered, so no instances of it can exist." 

248 for t in missing 

249 ) 

250 ) 

251 else: 

252 raise MissingDatasetTypeError( 

253 f"No registered dataset types matched expression {expression!r}, " 

254 "so no datasets will be found." 

255 ) 

256 if len(matching) > 1: 

257 raise DatasetTypeError( 

258 f"Expression {expression!r} matched multiple parent dataset types: " 

259 f"{[t.name for t in matching]}, but only one is allowed." 

260 ) 

261 ((single_parent, single_components),) = matching.items() 

262 if missing: 

263 raise DatasetTypeError( 

264 f"Expression {expression!r} appears to involve multiple dataset types, even though only " 

265 f"one ({single_parent.name}) is registered, and only one is allowed here." 

266 ) 

267 return single_parent, single_components 

268 

269 @abstractmethod 

270 def filter_dataset_collections( 

271 self, 

272 dataset_types: Iterable[DatasetType], 

273 collections: Sequence[CollectionRecord], 

274 *, 

275 governor_constraints: Mapping[str, Set[str]], 

276 rejections: list[str] | None = None, 

277 ) -> dict[DatasetType, list[CollectionRecord]]: 

278 """Filter a sequence of collections to those for which a dataset query 

279 might succeed. 

280 

281 Parameters 

282 ---------- 

283 dataset_types : `Iterable` [ `DatasetType` ] 

284 Dataset types that are being queried. Must include only parent 

285 or standalone dataset types, not components. 

286 collections : `Sequence` [ `CollectionRecord` ] 

287 Sequence of collections that will be searched. 

288 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \ 

289 optional 

290 Constraints imposed by other aspects of the query on governor 

291 dimensions; collections inconsistent with these constraints will be 

292 skipped. 

293 rejections : `list` [ `str` ], optional 

294 If not `None`, a `list` that diagnostic messages will be appended 

295 to, for any collection that matches ``collections`` that is not 

296 returned. At least one message is guaranteed whenever the result 

297 is empty. 

298 

299 Returns 

300 ------- 

301 dataset_collections : `dict` [ `DatasetType`, \ 

302 `list` [ `CollectionRecord` ] ] 

303 The collections to search for each dataset. The dictionary's keys 

304 are always exactly ``dataset_types`` (in the same order), and each 

305 nested `list` of collections is ordered consistently with the 

306 given ``collections``. 

307 

308 Notes 

309 ----- 

310 This method accepts multiple dataset types and multiple collections at 

311 once to enable implementations to batch up the fetching of summary 

312 information needed to relate them. 

313 """ 

314 raise NotImplementedError() 

315 

316 def resolve_dataset_collections( 

317 self, 

318 dataset_type: DatasetType, 

319 collections: CollectionWildcard, 

320 *, 

321 governor_constraints: Mapping[str, Set[str]], 

322 rejections: list[str] | None = None, 

323 collection_types: Set[CollectionType] = CollectionType.all(), 

324 allow_calibration_collections: bool = False, 

325 ) -> list[CollectionRecord]: 

326 """Resolve the sequence of collections to query for a dataset type. 

327 

328 Parameters 

329 ---------- 

330 dataset_type : `DatasetType` 

331 Dataset type to be queried in the returned collections. 

332 collections : `CollectionWildcard` 

333 Expression for the collections to be queried. 

334 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \ 

335 optional 

336 Constraints imposed by other aspects of the query on governor 

337 dimensions; collections inconsistent with these constraints will be 

338 skipped. 

339 rejections : `list` [ `str` ], optional 

340 If not `None`, a `list` that diagnostic messages will be appended 

341 to, for any collection that matches ``collections`` that is not 

342 returned. At least one message is guaranteed whenever the result 

343 is empty. 

344 collection_types : `~collections.abc.Set` [ `CollectionType` ], \ 

345 optional 

346 Collection types to consider when resolving the collection 

347 expression. 

348 allow_calibration_collections : `bool`, optional 

349 If `False`, skip (with a ``rejections`` message) any calibration 

350 collections that match ``collections`` are not given explicitly by 

351 name, and raise `NotImplementedError` for any calibration 

352 collection that is given explicitly. This is a temporary option 

353 that will be removed when the query system can handle temporal 

354 joins involving calibration collections. 

355 

356 Returns 

357 ------- 

358 records : `list` [ `CollectionRecord` ] 

359 A new list of `CollectionRecord` instances, for collections that 

360 both match ``collections`` and may have datasets of the given type. 

361 

362 Notes 

363 ----- 

364 This is a higher-level driver for `resolve_collection_wildcard` and 

365 `filter_dataset_collections` that is mostly concerned with handling 

366 queries against `~Collection.Type.CALIBRATION` collections that aren't 

367 fully supported yet. Once that support improves, this method may be 

368 removed. 

369 """ 

370 if collections == CollectionWildcard() and collection_types == CollectionType.all(): 

371 collection_types = {CollectionType.RUN} 

372 explicit_collections = frozenset(collections.strings) 

373 matching_collection_records = self.resolve_collection_wildcard( 

374 collections, collection_types=collection_types 

375 ) 

376 ((_, filtered_collection_records),) = self.filter_dataset_collections( 

377 [dataset_type], 

378 matching_collection_records, 

379 governor_constraints=governor_constraints, 

380 rejections=rejections, 

381 ).items() 

382 if not allow_calibration_collections: 

383 supported_collection_records: list[CollectionRecord] = [] 

384 for record in filtered_collection_records: 

385 if record.type is CollectionType.CALIBRATION: 

386 # If collection name was provided explicitly then raise, 

387 # since this is a kind of query we don't support yet; 

388 # otherwise collection is a part of a chained one or regex 

389 # match, and we skip it to not break queries of other 

390 # included collections. 

391 if record.name in explicit_collections: 

392 raise NotImplementedError( 

393 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type " 

394 f"collection {record.name!r} is not yet supported." 

395 ) 

396 else: 

397 if rejections is not None: 

398 rejections.append( 

399 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION " 

400 f"collection {record.name!r} because calibration queries aren't fully " 

401 "implemented; this is not an error only because the query structure " 

402 "implies that searching this collection may be incidental." 

403 ) 

404 supported_collection_records.append(record) 

405 else: 

406 supported_collection_records.append(record) 

407 else: 

408 supported_collection_records = filtered_collection_records 

409 if not supported_collection_records and rejections is not None and not rejections: 

410 rejections.append(f"No collections to search matching expression {collections!r}.") 

411 return supported_collection_records 

412 

413 @abstractmethod 

414 def make_dataset_query_relation( 

415 self, 

416 dataset_type: DatasetType, 

417 collections: Sequence[CollectionRecord], 

418 columns: Set[str], 

419 context: _C, 

420 ) -> Relation: 

421 """Construct a relation that represents an unordered query for datasets 

422 that returns matching results from all given collections. 

423 

424 Parameters 

425 ---------- 

426 dataset_type : `DatasetType` 

427 Type for the datasets being queried. 

428 collections : `Sequence` [ `CollectionRecord` ] 

429 Records for collections to query. Should generally be the result 

430 of a call to `resolve_dataset_collections`, and must not be empty. 

431 context : `QueryContext` 

432 Context that manages per-query state. 

433 columns : `~collections.abc.Set` [ `str` ] 

434 Columns to include in the relation. See `Query.find_datasets` for 

435 details. 

436 Results 

437 ------- 

438 relation : `lsst.daf.relation.Relation` 

439 Relation representing a dataset query. 

440 """ 

441 raise NotImplementedError() 

442 

443 def make_dataset_search_relation( 

444 self, 

445 dataset_type: DatasetType, 

446 collections: Sequence[CollectionRecord], 

447 columns: Set[str], 

448 context: _C, 

449 *, 

450 join_to: Relation | None = None, 

451 ) -> Relation: 

452 """Construct a relation that represents an order query for datasets 

453 that returns results from the first matching collection for each 

454 data ID. 

455 

456 Parameters 

457 ---------- 

458 dataset_type : `DatasetType` 

459 Type for the datasets being search. 

460 collections : `Sequence` [ `CollectionRecord` ] 

461 Records for collections to search. Should generally be the result 

462 of a call to `resolve_dataset_collections`, and must not be empty. 

463 columns : `~collections.abc.Set` [ `str` ] 

464 Columns to include in the `relation. See 

465 `make_dataset_query_relation` for options. 

466 context : `QueryContext` 

467 Context that manages per-query state. 

468 join_to : `Relation`, optional 

469 Another relation to join with the query for datasets in all 

470 collections before filtering out out shadowed datasets. 

471 

472 Results 

473 ------- 

474 relation : `lsst.daf.relation.Relation` 

475 Relation representing a find-first dataset search. 

476 """ 

477 base = self.make_dataset_query_relation( 

478 dataset_type, 

479 collections, 

480 columns | {"rank"}, 

481 context=context, 

482 ) 

483 if join_to is not None: 

484 base = join_to.join(base) 

485 # Query-simplification shortcut: if there is only one collection, a 

486 # find-first search is just a regular result subquery. Same if there 

487 # are no collections. 

488 if len(collections) <= 1: 

489 return base 

490 # We filter the dimension keys in the given relation through 

491 # DimensionGraph.required.names to minimize the set we partition on 

492 # and order it in a more index-friendly way. More precisely, any 

493 # index we define on dimensions will be consistent with this order, but 

494 # any particular index may not have the same dimension columns. 

495 dimensions = self.universe.extract( 

496 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)] 

497 ) 

498 find_first = FindFirstDataset( 

499 dimensions=DimensionKeyColumnTag.generate(dimensions.required.names), 

500 rank=DatasetColumnTag(dataset_type.name, "rank"), 

501 ) 

502 return find_first.apply( 

503 base, preferred_engine=context.preferred_engine, require_preferred_engine=True 

504 ).with_only_columns(base.columns - {find_first.rank}) 

505 

506 def make_doomed_dataset_relation( 

507 self, 

508 dataset_type: DatasetType, 

509 columns: Set[str], 

510 messages: Iterable[str], 

511 context: _C, 

512 ) -> Relation: 

513 """Construct a relation that represents a doomed query for datasets. 

514 

515 Parameters 

516 ---------- 

517 dataset_type : `DatasetType` 

518 Dataset type being queried. 

519 columns : `AbstractSet` [ `str` ] 

520 Dataset columns to include (dimension key columns are always 

521 included). See `make_dataset_query_relation` for allowed values. 

522 messages : `Iterable` [ `str` ] 

523 Diagnostic messages that explain why the query is doomed to yield 

524 no rows. 

525 context : `QueryContext` 

526 Context that manages per-query state. 

527 

528 Results 

529 ------- 

530 relation : `lsst.daf.relation.Relation` 

531 Relation with the requested columns and no rows. 

532 """ 

533 column_tags: set[ColumnTag] = set( 

534 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names) 

535 ) 

536 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns)) 

537 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages)) 

538 

539 @abstractmethod 

540 def make_dimension_relation( 

541 self, 

542 dimensions: DimensionGraph, 

543 columns: Set[ColumnTag], 

544 context: _C, 

545 *, 

546 initial_relation: Relation | None = None, 

547 initial_join_max_columns: frozenset[ColumnTag] | None = None, 

548 initial_dimension_relationships: Set[frozenset[str]] | None = None, 

549 spatial_joins: Iterable[tuple[str, str]] = (), 

550 governor_constraints: Mapping[str, Set[str]], 

551 ) -> Relation: 

552 """Construct a relation that provides columns and constraints from 

553 dimension records. 

554 

555 Parameters 

556 ---------- 

557 dimensions : `DimensionGraph` 

558 Dimensions to include. The key columns for all dimensions (both 

559 required and implied) will be included in the returned relation. 

560 columns : `~collections.abc.Set` [ `ColumnTag` ] 

561 Dimension record columns to include. This set may include key 

562 column tags as well, though these may be ignored; the set of key 

563 columns to include is determined by the ``dimensions`` argument 

564 instead. 

565 context : `QueryContext` 

566 Context that manages per-query state. 

567 initial_relation : `~lsst.daf.relation.Relation`, optional 

568 Initial relation to join to the dimension relations. If this 

569 relation provides record columns, key columns, and relationships 

570 between key columns (see ``initial_dimension_relationships`` below) 

571 that would otherwise have been added by joining in a dimension 

572 element's relation, that relation may not be joined in at all. 

573 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional 

574 Maximum superset of common columns for joins to 

575 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL 

576 ``JOIN`` clauses). If provided, this is a subset of the dimension 

577 key columns in ``initial_relation``, which are otherwise all 

578 considered as potential common columns for joins. Ignored if 

579 ``initial_relation`` is not provided. 

580 initial_dimension_relationships : `~collections.abc.Set` [ `frozenset` 

581 [ `str` ] ], optional 

582 A set of sets of dimension names representing relationships between 

583 dimensions encoded in the rows of ``initial_relation``. If not 

584 provided (and ``initial_relation`` is), 

585 `extract_dimension_relationships` will be called on 

586 ``initial_relation``. 

587 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ] 

588 Iterable of dimension element name pairs that should be spatially 

589 joined. 

590 governor_constraints : `Mapping` [ `str` [ `~collections.abc.Set` 

591 [ `str` ] ] ], optional 

592 Constraints on governor dimensions that are provided by other parts 

593 of the query that either have been included in ``initial_relation`` 

594 or are guaranteed to be added in the future. This is a mapping from 

595 governor dimension name to sets of values that dimension may take. 

596 

597 Results 

598 ------- 

599 relation : `lsst.daf.relation.Relation` 

600 Relation containing the given dimension columns and constraints. 

601 """ 

602 raise NotImplementedError() 

603 

604 @abstractmethod 

605 def resolve_governor_constraints( 

606 self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C 

607 ) -> Mapping[str, Set[str]]: 

608 """Resolve governor dimension constraints provided by user input to 

609 a query against the content in the `Registry`. 

610 

611 Parameters 

612 ---------- 

613 dimensions : `DimensionGraph` 

614 Dimensions that bound the governor dimensions to consider (via 

615 ``dimensions.governors``, more specifically). 

616 constraints : `Mapping` [ `str`, [ `~collections.abc.Set` 

617 [ `str` ] ] ] 

618 Constraints from user input to the query (e.g. from data IDs and 

619 string expression predicates). 

620 context : `QueryContext` 

621 Object that manages state for the query; used here to fetch the 

622 governor dimension record cache if it has not already been loaded. 

623 

624 Returns 

625 ------- 

626 resolved : `Mapping` [ `str`, [ `~collections.abc.Set` 

627 [ `str` ] ] ] 

628 A shallow copy of ``constraints`` with keys equal to 

629 ``dimensions.governors.names` and value sets constrained by the 

630 Registry content if they were not already in ``constraints``. 

631 

632 Raises 

633 ------ 

634 DataIdValueError 

635 Raised if ``constraints`` includes governor dimension values that 

636 are not present in the `Registry`. 

637 """ 

638 raise NotImplementedError() 

639 

640 @abstractmethod 

641 def get_dimension_record_cache( 

642 self, element_name: str, context: _C 

643 ) -> Mapping[DataCoordinate, DimensionRecord] | None: 

644 """Return a local cache of all `DimensionRecord` objects for a 

645 dimension element, fetching it if necessary. 

646 

647 Parameters 

648 ---------- 

649 element_name : `str` 

650 Name of the dimension element. 

651 context : `.queries.SqlQueryContext` 

652 Context to be used to execute queries when no cached result is 

653 available. 

654 

655 Returns 

656 ------- 

657 cache : `Mapping` [ `DataCoordinate`, `DimensionRecord` ] or `None` 

658 Mapping from data ID to dimension record, or `None` if this 

659 element's records are never cached. 

660 """ 

661 raise NotImplementedError() 

662 

663 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]: 

664 """Extract the dimension key relationships encoded in a relation tree. 

665 

666 Parameters 

667 ---------- 

668 relation : `Relation` 

669 Relation tree to process. 

670 

671 Returns 

672 ------- 

673 relationships : `set` [ `frozenset` [ `str` ] ] 

674 Set of sets of dimension names, where each inner set represents a 

675 relationship between dimensions. 

676 

677 Notes 

678 ----- 

679 Dimension relationships include both many-to-one implied dependencies 

680 and many-to-many joins backed by "always-join" dimension elements, and 

681 it's important to join in the dimension table that defines a 

682 relationship in any query involving dimensions that are a superset of 

683 that relationship. For example, let's consider a relation tree that 

684 joins dataset existence-check relations for two dataset types, with 

685 dimensions ``{instrument, exposure, detector}`` and ``{instrument, 

686 physical_filter}``. The joined relation appears to have all dimension 

687 keys in its expanded graph present except ``band``, and the system 

688 could easily correct this by joining that dimension in directly. But 

689 it's also missing the ``{instrument, exposure, physical_filter}`` 

690 relationship we'd get from the ``exposure`` dimension's own relation 

691 (``exposure`` implies ``phyiscal_filter``) and the similar 

692 ``{instrument, physical_filter, band}`` relationship from the 

693 ``physical_filter`` dimension relation; we need the relationship logic 

694 to recognize that those dimensions need to be joined in as well in 

695 order for the full relation to have rows that represent valid data IDs. 

696 

697 The implementation of this method relies on the assumption that 

698 `LeafRelation` objects always have rows that are consistent with all 

699 defined relationships (i.e. are valid data IDs). This is true for not 

700 just dimension relations themselves, but anything created from queries 

701 based on them, including datasets and query results. It is possible to 

702 construct `LeafRelation` objects that don't satisfy this criteria (e.g. 

703 when accepting in user-provided data IDs(, and in this case 

704 higher-level guards or warnings must be provided.`` 

705 """ 

706 return { 

707 frozenset( 

708 tag.dimension 

709 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns) 

710 ) 

711 for leaf_relation in self._extract_leaf_relations(relation).values() 

712 } 

713 

714 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]: 

715 """Recursively extract leaf relations from a relation tree. 

716 

717 Parameters 

718 ---------- 

719 relation : `Relation` 

720 Tree to process. 

721 

722 Returns 

723 ------- 

724 leaves : `dict` [ `str`, `LeafRelation` ] 

725 Leaf relations, keyed and deduplicated by name. 

726 """ 

727 match relation: 

728 case LeafRelation() as leaf: 

729 return {leaf.name: leaf} 

730 case UnaryOperationRelation(target=target): 

731 return self._extract_leaf_relations(target) 

732 case BinaryOperationRelation(lhs=lhs, rhs=rhs): 

733 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs) 

734 case MarkerRelation(target=target): 

735 return self._extract_leaf_relations(target) 

736 raise AssertionError("Match should be exhaustive and all branches should return.")