Coverage for python/lsst/daf/butler/tests/butler_queries.py: 15%

190 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = () 

31 

32import os 

33import unittest 

34from abc import ABC, abstractmethod 

35from collections.abc import Iterable, Sequence 

36from typing import ClassVar 

37 

38import astropy.time 

39 

40from .._butler import Butler 

41from .._dataset_type import DatasetType 

42from .._timespan import Timespan 

43from ..dimensions import DataCoordinate, DimensionRecord 

44from ..direct_query_driver import DirectQueryDriver 

45from ..queries import DimensionRecordQueryResults, InvalidQueryError 

46from ..registry import CollectionType, NoDefaultCollectionError, RegistryDefaults 

47from ..registry.sql_registry import SqlRegistry 

48from ..transfers import YamlRepoImportBackend 

49from .utils import TestCaseMixin 

50 

51# Simplified tuples of the detector records we'll frequently be querying for. 

52DETECTOR_TUPLES = { 

53 1: ("Cam1", 1, "Aa", "SCIENCE"), 

54 2: ("Cam1", 2, "Ab", "SCIENCE"), 

55 3: ("Cam1", 3, "Ba", "SCIENCE"), 

56 4: ("Cam1", 4, "Bb", "WAVEFRONT"), 

57} 

58 

59 

60def make_detector_tuples(records: Iterable[DimensionRecord]) -> dict[int, tuple[str, int, str, str]]: 

61 """Make tuples with the same entries as DETECTOR_TUPLES from an iterable of 

62 detector dimension records. 

63 

64 Parameters 

65 ---------- 

66 records : `~collections.abc.Iterable` [ `.dimensions.DimensionRecord` ] 

67 Detector dimension records. 

68 

69 Returns 

70 ------- 

71 tuples : `dict` [ `int`, `tuple` ] 

72 Dictionary mapping detector ID to tuples with the same fields as the 

73 ``DETECTOR_TUPLES`` constant in this file. 

74 """ 

75 return {record.id: (record.instrument, record.id, record.full_name, record.purpose) for record in records} 

76 

77 

78class ButlerQueryTests(ABC, TestCaseMixin): 

79 """Base class for unit tests that test `lsst.daf.butler.Butler.query` 

80 implementations. 

81 """ 

82 

83 data_dir: ClassVar[str] 

84 """Root directory containing test data YAML files.""" 

85 

86 @abstractmethod 

87 def make_butler(self, *args: str) -> Butler: 

88 """Make Butler instance populated with data used in the tests below. 

89 

90 Parameters 

91 ---------- 

92 *args : str 

93 Names of the files to pass to `load_data`. 

94 

95 Returns 

96 ------- 

97 butler : `Butler` 

98 Butler to use for tests. 

99 """ 

100 raise NotImplementedError() 

101 

102 def load_data(self, registry: SqlRegistry, filename: str) -> None: 

103 """Load registry test data from ``data_dir/<filename>``, 

104 which should be a YAML import/export file. 

105 

106 This method should be called from implementations of `make_butler` 

107 where the Registry should exist. 

108 

109 Parameters 

110 ---------- 

111 registry : `SqlRegistry` 

112 The registry to use. 

113 filename : `str` 

114 Location of test data. 

115 """ 

116 with open(os.path.join(self.data_dir, filename)) as stream: 

117 backend = YamlRepoImportBackend(stream, registry) 

118 backend.register() 

119 backend.load(datastore=None) 

120 

121 def check_detector_records( 

122 self, 

123 results: DimensionRecordQueryResults, 

124 ids: Sequence[int] = (1, 2, 3, 4), 

125 ordered: bool = False, 

126 messages: Iterable[str] = (), 

127 doomed: bool = False, 

128 has_postprocessing: bool = False, 

129 ) -> None: 

130 self.assertEqual(results.element.name, "detector") 

131 self.assertEqual(results.dimensions, results.dimensions.universe["detector"].minimal_group) 

132 if has_postprocessing and not doomed: 

133 self.assertEqual(results.count(discard=True), len(ids)) 

134 self.assertGreaterEqual(results.count(discard=False, exact=False), len(ids)) 

135 with self.assertRaises(RuntimeError): 

136 results.count() 

137 else: 

138 self.assertEqual(results.count(discard=True), len(ids)) 

139 self.assertEqual(results.count(discard=False), len(ids)) 

140 self.assertEqual(results.count(discard=True, exact=False), len(ids)) 

141 self.assertEqual(results.count(discard=False, exact=False), len(ids)) 

142 self.assertEqual(results.any(), bool(ids)) 

143 if not doomed: 

144 self.assertTrue(results.any(exact=False, execute=False)) 

145 with self.assertRaises(RuntimeError): 

146 results.any(exact=True, execute=False) 

147 else: 

148 self.assertFalse(results.any(exact=False, execute=False)) 

149 self.assertFalse(results.any(exact=True, execute=False)) 

150 self.assertCountEqual(results.explain_no_results(), list(messages)) 

151 expected = [DETECTOR_TUPLES[i] for i in ids] 

152 queried = list(make_detector_tuples(results).values()) 

153 if ordered: 

154 self.assertEqual(queried, expected) 

155 else: 

156 self.assertCountEqual(queried, expected) 

157 

158 def test_simple_record_query(self) -> None: 

159 """Test query-system basics with simple queries for dimension 

160 records. 

161 

162 This includes tests for order_by, limit, and where expressions, but 

163 only for cases where there are no datasets, dimension projections, 

164 or spatial/temporal overlaps. 

165 """ 

166 butler = self.make_butler("base.yaml") 

167 with butler._query() as query: 

168 _x = query.expression_factory 

169 results = query.dimension_records("detector") 

170 self.check_detector_records(results) 

171 self.check_detector_records(results.order_by("detector"), ordered=True) 

172 self.check_detector_records( 

173 results.order_by(_x.detector.full_name.desc), [4, 3, 2, 1], ordered=True 

174 ) 

175 self.check_detector_records(results.order_by("detector").limit(2), [1, 2], ordered=True) 

176 self.check_detector_records(results.where(_x.detector.raft == "B", instrument="Cam1"), [3, 4]) 

177 

178 def test_implied_union_record_query(self) -> None: 

179 """Test queries for a dimension ('band') that uses "implied union" 

180 storage, in which its values are the union of the values for it in a 

181 another dimension (physical_filter) that implies it. 

182 """ 

183 butler = self.make_butler("base.yaml") 

184 band = butler.dimensions["band"] 

185 self.assertEqual(band.implied_union_target, butler.dimensions["physical_filter"]) 

186 with butler._query() as query: 

187 self.assertCountEqual( 

188 list(query.dimension_records("band")), 

189 [band.RecordClass(name="g"), band.RecordClass(name="r")], 

190 ) 

191 self.assertCountEqual( 

192 list(query.where(physical_filter="Cam1-R1", instrument="Cam1").dimension_records("band")), 

193 [band.RecordClass(name="r")], 

194 ) 

195 

196 def test_dataset_constrained_record_query(self) -> None: 

197 """Test a query for dimension records constrained by the existence of 

198 datasets of a particular type. 

199 """ 

200 butler = self.make_butler("base.yaml", "datasets.yaml") 

201 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

202 butler.registry.registerCollection("empty", CollectionType.RUN) 

203 butler.registry.registerCollection("chain", CollectionType.CHAINED) 

204 butler.registry.setCollectionChain("chain", ["imported_g", "empty", "imported_r"]) 

205 with butler._query() as query: 

206 # No collections here or in defaults is an error. 

207 with self.assertRaises(NoDefaultCollectionError): 

208 query.join_dataset_search("bias").dimension_records("detector").any() 

209 butler.registry.defaults = RegistryDefaults(collections=["chain"]) 

210 with butler._query() as query: 

211 _x = query.expression_factory 

212 # Simplest case: this collection only has the first 3 detectors. 

213 self.check_detector_records( 

214 query.join_dataset_search("bias", collections=["imported_g"]).dimension_records("detector"), 

215 [1, 2, 3], 

216 ) 

217 # Together these collections have two biases for two of the 

218 # detectors, but this shouldn't cause duplicate results. 

219 self.check_detector_records( 

220 query.join_dataset_search("bias", collections=["imported_g", "imported_r"]).dimension_records( 

221 "detector" 

222 ), 

223 ) 

224 # Again we've got the potential for duplicates due to multiple 

225 # datasets with the same data ID, and this time we force the 

226 # deduplication to happen outside the dataset-search subquery by 

227 # adding a WHERE filter on a dataset column. We also use the 

228 # defaulted collection ('chain') to supply the collection. 

229 self.check_detector_records( 

230 query.join_dataset_search("bias") 

231 .where( 

232 _x.any( 

233 _x.all(_x["bias"].collection == "imported_g", _x.detector.raft == "B"), 

234 _x.all(_x["bias"].collection == "imported_r", _x.detector.raft == "A"), 

235 ), 

236 instrument="Cam1", 

237 ) 

238 .dimension_records("detector"), 

239 [2, 3], 

240 ) 

241 # Flats have dimensions (physical_filter and band) we would 

242 # normally include in query for detector records. This also should 

243 # not cause duplicates. 

244 self.check_detector_records( 

245 query.join_dataset_search("flat", collections=["imported_g"]).dimension_records("detector"), 

246 [2, 3, 4], 

247 ) 

248 # No results, but for reasons we can't determine before we run the 

249 # query. 

250 self.check_detector_records( 

251 query.join_dataset_search("flat", collections=["imported_g"]) 

252 .where(_x.band == "r") 

253 .dimension_records("detector"), 

254 [], 

255 ) 

256 # No results, and we can diagnose why before we run the query. 

257 self.check_detector_records( 

258 query.join_dataset_search("bias", collections=["empty"]).dimension_records("detector"), 

259 [], 

260 messages=[ 

261 "Search for dataset type 'bias' is doomed to fail.", 

262 "No datasets of type 'bias' in collection 'empty'.", 

263 ], 

264 doomed=True, 

265 ) 

266 self.check_detector_records( 

267 query.join_dataset_search("bias", collections=["imported_g"]) 

268 .where(instrument="Cam2") 

269 .dimension_records("detector"), 

270 [], 

271 messages=[ 

272 "Search for dataset type 'bias' is doomed to fail.", 

273 "No datasets with instrument='Cam2' in collection 'imported_g'.", 

274 ], 

275 doomed=True, 

276 ) 

277 

278 def test_spatial_overlaps(self) -> None: 

279 """Test queries for dimension records with spatial overlaps. 

280 

281 Run tests/data/registry/spatial.py to plot the various regions used in 

282 this test. 

283 """ 

284 butler = self.make_butler("base.yaml", "spatial.yaml") 

285 # Set default governor data ID values both to test that code path and 

286 # to keep us from having to repeat them in every 'where' call below. 

287 butler.registry.defaults = RegistryDefaults(instrument="Cam1", skymap="SkyMap1") 

288 htm7 = butler.dimensions.skypix_dimensions["htm7"] 

289 with butler._query() as query: 

290 _x = query.expression_factory 

291 # Query for detectors from a particular visit that overlap an 

292 # explicit region. 

293 self.check_detector_records( 

294 query.where( 

295 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)), 

296 visit=1, 

297 ).dimension_records("detector"), 

298 [1, 3, 4], 

299 has_postprocessing=True, 

300 ) 

301 # Query for detectors from a particular visit that overlap an htm7 

302 # ID. This is basically the same query as the last one, but 

303 # expressed as a spatial join, and we can recognize that 

304 # postprocessing is not needed (while in the last case it did 

305 # nothing, but we couldn't tell that in advance because the query 

306 # didn't know the region came from htm7). 

307 self.check_detector_records( 

308 query.where( 

309 _x.visit_detector_region.region.overlaps(_x.htm7.region), 

310 visit=1, 

311 htm7=253954, 

312 ).dimension_records("detector"), 

313 [1, 3, 4], 

314 has_postprocessing=False, 

315 ) 

316 # Repeat the last query but with the spatial join implicit rather 

317 # than explicit. 

318 self.check_detector_records( 

319 query.where( 

320 visit=1, 

321 htm7=253954, 

322 ).dimension_records("detector"), 

323 [1, 3, 4], 

324 has_postprocessing=False, 

325 ) 

326 # Query for the detectors from any visit that overlap a region: 

327 # this gets contributions from multiple visits, and would have 

328 # duplicates if we didn't get rid of them via GROUP BY. 

329 self.check_detector_records( 

330 query.where( 

331 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)), 

332 ).dimension_records("detector"), 

333 [1, 2, 3, 4], 

334 has_postprocessing=True, 

335 ) 

336 # Once again we rewrite the region-constraint query as a spatial 

337 # join, which drops the postprocessing. This join has to be 

338 # explicit because `visit` no longer gets into the query dimensions 

339 # some other way, and without it `detector` is not spatial. 

340 self.check_detector_records( 

341 query.where( 

342 _x.visit_detector_region.region.overlaps(_x.htm7.region), 

343 htm7=253954, 

344 ).dimension_records("detector"), 

345 [1, 2, 3, 4], 

346 has_postprocessing=False, 

347 ) 

348 # Query for detectors from any visit that overlap a patch. This 

349 # requires joining visit_detector_region to htm7 and htm7 to patch, 

350 # and then some postprocessing. We want to make sure there are no 

351 # duplicates from a detector and patch both overlapping multiple 

352 # htm7 tiles (which affects detectors 1 and 2) and that 

353 # postprocessing filters out detector 4, which has one htm7 tile in 

354 # common with the patch but does not actually overlap it. 

355 self.check_detector_records( 

356 query.where( 

357 _x.visit_detector_region.region.overlaps(_x.patch.region), 

358 tract=0, 

359 patch=4, 

360 ).dimension_records("detector"), 

361 [1, 2, 3], 

362 has_postprocessing=True, 

363 ) 

364 # Query for that patch's region and express the previous query as 

365 # a region-constraint instead of a spatial join. 

366 (patch_record,) = query.where(tract=0, patch=4).dimension_records("patch") 

367 self.check_detector_records( 

368 query.where( 

369 _x.visit_detector_region.region.overlaps(patch_record.region), 

370 ).dimension_records("detector"), 

371 [1, 2, 3], 

372 has_postprocessing=True, 

373 ) 

374 # Combine postprocessing with order_by and limit. 

375 self.check_detector_records( 

376 query.where( 

377 _x.visit_detector_region.region.overlaps(patch_record.region), 

378 ) 

379 .dimension_records("detector") 

380 .order_by(_x.detector.desc) 

381 .limit(2), 

382 [3, 2], 

383 has_postprocessing=True, 

384 ) 

385 # Try a case where there are some records before postprocessing but 

386 # none afterwards. 

387 self.check_detector_records( 

388 query.where( 

389 _x.visit_detector_region.region.overlaps(patch_record.region), 

390 detector=4, 

391 ).dimension_records("detector"), 

392 [], 

393 has_postprocessing=True, 

394 ) 

395 

396 def test_common_skypix_overlaps(self) -> None: 

397 """Test spatial overlap queries that return htm7 records.""" 

398 butler = self.make_butler("base.yaml", "spatial.yaml") 

399 # Insert some datasets that use a skypix dimension, since some queries 

400 # are only possible if a superset of the skypix IDs are in the query 

401 # already. 

402 cat1 = DatasetType("cat1", dimensions=butler.dimensions.conform(["htm7"]), storageClass="ArrowTable") 

403 butler.registry.registerDatasetType(cat1) 

404 butler.registry.registerCollection("refcats", CollectionType.RUN) 

405 butler.registry.insertDatasets(cat1, [{"htm7": i} for i in range(253952, 253968)], run="refcats") 

406 with butler._query() as query: 

407 _x = query.expression_factory 

408 # Explicit join to patch. 

409 self.assertCountEqual( 

410 [ 

411 record.id 

412 for record in query.where( 

413 _x.htm7.region.overlaps(_x.patch.region), skymap="SkyMap1", tract=0, patch=4 

414 ).dimension_records("htm7") 

415 ], 

416 [253954, 253955], 

417 ) 

418 # Implicit join to patch. 

419 self.assertCountEqual( 

420 [ 

421 record.id 

422 for record in query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("htm7") 

423 ], 

424 [253954, 253955], 

425 ) 

426 # Constraint on the patch region (with the query not knowing it 

427 # corresponds to that patch). 

428 (patch,) = query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("patch") 

429 self.assertCountEqual( 

430 [ 

431 record.id 

432 for record in query.join_dataset_search("cat1", collections=["refcats"]) 

433 .where(_x.htm7.region.overlaps(patch.region)) 

434 .dimension_records("htm7") 

435 ], 

436 [253954, 253955], 

437 ) 

438 

439 def test_data_coordinate_upload(self) -> None: 

440 """Test queries for dimension records with a data coordinate upload.""" 

441 butler = self.make_butler("base.yaml", "spatial.yaml") 

442 with butler._query() as query: 

443 # Query with a data ID upload that has an irrelevant row (there's 

444 # no data with "Cam2"). 

445 self.check_detector_records( 

446 query.join_data_coordinates( 

447 [ 

448 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

449 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

450 DataCoordinate.standardize(instrument="Cam2", detector=4, universe=butler.dimensions), 

451 ] 

452 ).dimension_records("detector"), 

453 [1, 3], 

454 ) 

455 # Query with a data ID upload that directly contains duplicates, 

456 # which should not appear in the results. 

457 self.check_detector_records( 

458 query.join_data_coordinates( 

459 [ 

460 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

461 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

462 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

463 ] 

464 ).dimension_records("detector"), 

465 [1, 3], 

466 ) 

467 # Query with a data ID upload that has extra dimensions that could 

468 # also introduce duplicates if we're not careful. 

469 self.check_detector_records( 

470 query.join_data_coordinates( 

471 [ 

472 DataCoordinate.standardize( 

473 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions 

474 ), 

475 DataCoordinate.standardize( 

476 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions 

477 ), 

478 DataCoordinate.standardize( 

479 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions 

480 ), 

481 ] 

482 ).dimension_records("detector"), 

483 [1, 3], 

484 ) 

485 # Query with a data ID upload that has extra dimensions that are 

486 # used in a constraint. 

487 self.check_detector_records( 

488 query.join_data_coordinates( 

489 [ 

490 DataCoordinate.standardize( 

491 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions 

492 ), 

493 DataCoordinate.standardize( 

494 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions 

495 ), 

496 DataCoordinate.standardize( 

497 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions 

498 ), 

499 ] 

500 ) 

501 .where(instrument="Cam1", visit=2) 

502 .dimension_records("detector"), 

503 [3], 

504 ) 

505 # Query with a data ID upload that must be spatially joined to 

506 # the other dimensions. This join is added automatically. 

507 self.check_detector_records( 

508 query.join_data_coordinates( 

509 [ 

510 DataCoordinate.standardize( 

511 skymap="SkyMap1", tract=1, patch=1, universe=butler.dimensions 

512 ), 

513 DataCoordinate.standardize( 

514 skymap="SkyMap1", tract=1, patch=2, universe=butler.dimensions 

515 ), 

516 DataCoordinate.standardize( 

517 skymap="SkyMap1", tract=1, patch=3, universe=butler.dimensions 

518 ), 

519 ] 

520 ) 

521 .where(instrument="Cam1", visit=2) 

522 .dimension_records("detector"), 

523 [2, 3, 4], 

524 has_postprocessing=True, 

525 ) 

526 # Query with a data ID upload that embeds a spatial relationship. 

527 # This prevents automatic creation of a spatial join. To make the 

528 # test more interesting, the spatial relationship embedded in these 

529 # data IDs is nonsense: it includes combinations that do not 

530 # overlap, while leaving out combinations that do overlap. 

531 self.check_detector_records( 

532 query.join_data_coordinates( 

533 [ 

534 DataCoordinate.standardize( 

535 skymap="SkyMap1", 

536 tract=1, 

537 patch=1, 

538 instrument="Cam1", 

539 visit=1, 

540 detector=1, 

541 universe=butler.dimensions, 

542 ), 

543 DataCoordinate.standardize( 

544 skymap="SkyMap1", 

545 tract=1, 

546 patch=1, 

547 instrument="Cam1", 

548 visit=1, 

549 detector=2, 

550 universe=butler.dimensions, 

551 ), 

552 DataCoordinate.standardize( 

553 skymap="SkyMap1", 

554 tract=1, 

555 patch=3, 

556 instrument="Cam1", 

557 visit=1, 

558 detector=3, 

559 universe=butler.dimensions, 

560 ), 

561 ] 

562 ) 

563 .where(skymap="SkyMap1", tract=1, patch=1) 

564 .dimension_records("detector"), 

565 [1, 2], 

566 ) 

567 # Query with an empty data ID upload (not a useful thing to do, 

568 # but a way to probe edge-case behavior). 

569 self.check_detector_records( 

570 query.join_data_coordinates( 

571 [ 

572 DataCoordinate.make_empty(universe=butler.dimensions), 

573 ] 

574 ).dimension_records("detector"), 

575 [1, 2, 3, 4], 

576 ) 

577 

578 def test_data_coordinate_upload_force_temp_table(self) -> None: 

579 """Test queries for dimension records with a data coordinate upload 

580 that is so big it has to go into a temporary table rather than be 

581 included directly into the query via bind params (by making the 

582 threshold for making a a temporary table tiny). 

583 

584 This test assumes a DirectQueryDriver and is automatically skipped when 

585 some other driver is found. 

586 """ 

587 butler = self.make_butler("base.yaml", "spatial.yaml") 

588 with butler._query() as query: 

589 if not isinstance(query._driver, DirectQueryDriver): 

590 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.") 

591 query._driver._constant_rows_limit = 2 

592 self.check_detector_records( 

593 query.join_data_coordinates( 

594 [ 

595 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

596 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

597 DataCoordinate.standardize(instrument="Cam1", detector=4, universe=butler.dimensions), 

598 ] 

599 ).dimension_records("detector"), 

600 [1, 3, 4], 

601 ) 

602 

603 def test_materialization(self) -> None: 

604 """Test querying for dimension records against a materialized previous 

605 query. 

606 """ 

607 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

608 with butler._query() as query: 

609 _x = query.expression_factory 

610 # Simple case where the materialization has just the dimensions 

611 # we need for the rest of the query. 

612 self.check_detector_records( 

613 query.where(_x.detector.raft == "A", instrument="Cam1") 

614 .materialize() 

615 .dimension_records("detector"), 

616 [1, 2], 

617 ) 

618 # This materialization has extra dimensions that could cause 

619 # duplicates if we don't SELECT DISTINCT them away. 

620 self.check_detector_records( 

621 query.join_dimensions(["visit", "detector"]) 

622 .where(_x.detector.raft == "A", instrument="Cam1") 

623 .materialize() 

624 .dimension_records("detector"), 

625 [1, 2], 

626 ) 

627 # Materialize a spatial-join, which should prevent the creation 

628 # of a spatial join in the downstream query. 

629 self.check_detector_records( 

630 query.join_dimensions(["visit", "detector", "tract"]).materialize() 

631 # The patch constraint here should do nothing, because only the 

632 # spatial join from the materialization should exist. The 

633 # behavior is surprising no matter what here, and the 

634 # recommendation to users is to add an explicit overlap 

635 # expression any time it's not obvious what the default is. 

636 .where(skymap="SkyMap1", tract=0, instrument="Cam1", visit=2, patch=5).dimension_records( 

637 "detector" 

638 ), 

639 [1, 2], 

640 has_postprocessing=True, 

641 ) 

642 # Materialize with a dataset join. 

643 self.check_detector_records( 

644 query.join_dataset_search("bias", collections=["imported_g"]) 

645 .materialize(datasets=["bias"]) 

646 .dimension_records("detector"), 

647 [1, 2, 3], 

648 ) 

649 

650 def test_timespan_results(self) -> None: 

651 """Test returning dimension records that include timespans.""" 

652 butler = self.make_butler("base.yaml", "spatial.yaml") 

653 with butler._query() as query: 

654 self.assertCountEqual( 

655 [ 

656 (record.id, record.timespan.begin, record.timespan.end) 

657 for record in query.dimension_records("visit") 

658 ], 

659 [ 

660 ( 

661 1, 

662 astropy.time.Time("2021-09-09T03:00:00", format="isot", scale="tai"), 

663 astropy.time.Time("2021-09-09T03:01:00", format="isot", scale="tai"), 

664 ), 

665 ( 

666 2, 

667 astropy.time.Time("2021-09-09T03:02:00", format="isot", scale="tai"), 

668 astropy.time.Time("2021-09-09T03:03:00", format="isot", scale="tai"), 

669 ), 

670 ], 

671 ) 

672 

673 def test_direct_driver_paging(self) -> None: 

674 """Test queries for dimension records that require multiple pages (by 

675 making the threshold for making a a temporary table tiny). 

676 

677 This test assumes a DirectQueryDriver and is automatically skipped when 

678 some other driver is found. 

679 """ 

680 butler = self.make_butler("base.yaml") 

681 # Basic test where pages should be transparent. 

682 with butler._query() as query: 

683 if not isinstance(query._driver, DirectQueryDriver): 

684 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.") 

685 query._driver._raw_page_size = 2 

686 self.check_detector_records( 

687 query.dimension_records("detector"), 

688 [1, 2, 3, 4], 

689 ) 

690 # Test that it's an error to continue query iteration after closing the 

691 # context manager. 

692 with butler._query() as query: 

693 assert isinstance(query._driver, DirectQueryDriver) 

694 query._driver._raw_page_size = 2 

695 iterator = iter(query.dimension_records("detector")) 

696 next(iterator) 

697 with self.assertRaisesRegex(RuntimeError, "Cannot continue query result iteration"): 

698 list(iterator) 

699 

700 def test_column_expressions(self) -> None: 

701 """Test queries with a wide variant of column expressions.""" 

702 butler = self.make_butler("base.yaml", "spatial.yaml") 

703 butler.registry.defaults = RegistryDefaults(instrument="Cam1") 

704 with butler._query() as query: 

705 _x = query.expression_factory 

706 self.check_detector_records( 

707 query.where(_x.not_(_x.detector != 2)).dimension_records("detector"), 

708 [2], 

709 ) 

710 self.check_detector_records( 

711 query.where(_x.literal(2) == _x.detector).dimension_records("detector"), 

712 [2], 

713 ) 

714 self.check_detector_records( 

715 query.where(_x.literal(2) == _x.detector + 1).dimension_records("detector"), 

716 [1], 

717 ) 

718 self.check_detector_records( 

719 query.where(-_x.detector == -3).dimension_records("detector"), 

720 [3], 

721 ) 

722 self.check_detector_records( 

723 query.where(_x.detector == 1, _x.detector == 2).dimension_records("detector"), 

724 [], 

725 messages=["'where' expression requires both detector=2 and detector=1."], 

726 ) 

727 self.assertCountEqual( 

728 [ 

729 record.id 

730 for record in query.where( 

731 _x.visit.timespan.overlaps( 

732 Timespan( 

733 begin=astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"), 

734 end=None, 

735 ) 

736 ) 

737 ).dimension_records("visit") 

738 ], 

739 [2], 

740 ) 

741 self.assertCountEqual( 

742 [ 

743 record.id 

744 for record in query.where( 

745 _x.not_( 

746 _x.visit.timespan.end 

747 < astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"), 

748 ) 

749 ).dimension_records("visit") 

750 ], 

751 [2], 

752 ) 

753 self.assertCountEqual( 

754 [ 

755 record.id 

756 for record in query.where( 

757 _x.visit.timespan.begin 

758 > astropy.time.Time("2021-09-09T03:01:30", format="isot", scale="tai") 

759 ).dimension_records("visit") 

760 ], 

761 [2], 

762 ) 

763 self.assertCountEqual( 

764 [ 

765 record.id 

766 for record in query.where( 

767 (_x.visit.exposure_time + -(5.0 * _x.visit.zenith_angle)) > 0.0 

768 ).dimension_records("visit") 

769 ], 

770 [1], 

771 ) 

772 self.assertCountEqual( 

773 [ 

774 record.id 

775 for record in query.where(_x.visit.exposure_time - 5.0 >= 50.0).dimension_records("visit") 

776 ], 

777 [1], 

778 ) 

779 self.assertCountEqual( 

780 [record.id for record in query.where(_x.visit.id % 2 != 0).dimension_records("visit")], 

781 [1], 

782 ) 

783 self.assertCountEqual( 

784 [ 

785 record.id 

786 for record in query.where(_x.visit.zenith_angle / 5.0 <= 1.0).dimension_records("visit") 

787 ], 

788 [1], 

789 ) 

790 self.assertCountEqual( 

791 [record.id for record in query.where(_x.visit.timespan.is_null).dimension_records("visit")], 

792 [], 

793 ) 

794 self.assertCountEqual( 

795 [ 

796 record.id 

797 for record in query.where(_x.visit.exposure_time.is_null).dimension_records("visit") 

798 ], 

799 [], 

800 ) 

801 self.check_detector_records( 

802 query.where(_x.detector.in_iterable([1, 3, 4])).dimension_records("detector"), 

803 [1, 3, 4], 

804 ) 

805 self.check_detector_records( 

806 query.where(_x.detector.in_range(start=2, stop=None)).dimension_records("detector"), 

807 [2, 3, 4], 

808 ) 

809 self.check_detector_records( 

810 query.where(_x.detector.in_range(start=1, stop=3)).dimension_records("detector"), 

811 [1, 2], 

812 ) 

813 self.check_detector_records( 

814 query.where(_x.detector.in_range(start=1, stop=None, step=2)).dimension_records("detector"), 

815 [1, 3], 

816 ) 

817 self.check_detector_records( 

818 query.where(_x.detector.in_range(start=1, stop=2)).dimension_records("detector"), 

819 [1], 

820 ) 

821 # This is a complex way to write a much simpler query ("where 

822 # detector.raft == 'A'"), but it tests code paths that would 

823 # otherwise require a lot more test setup. 

824 self.check_detector_records( 

825 query.where( 

826 _x.detector.in_query(_x.detector, query.where(_x.detector.raft == "A")) 

827 ).dimension_records("detector"), 

828 [1, 2], 

829 ) 

830 # Error to reference tract without skymap in a WHERE clause. 

831 with self.assertRaises(InvalidQueryError): 

832 list(query.where(_x.tract == 4).dimension_records("patch"))