Coverage for python/lsst/daf/butler/tests/butler_queries.py: 15%

191 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-11 03:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = () 

31 

32import os 

33import unittest 

34from abc import ABC, abstractmethod 

35from collections.abc import Iterable, Sequence 

36from typing import ClassVar 

37 

38import astropy.time 

39 

40from .._butler import Butler 

41from .._dataset_type import DatasetType 

42from .._exceptions import InvalidQueryError 

43from .._timespan import Timespan 

44from ..dimensions import DataCoordinate, DimensionRecord 

45from ..direct_query_driver import DirectQueryDriver 

46from ..queries import DimensionRecordQueryResults 

47from ..registry import CollectionType, NoDefaultCollectionError, RegistryDefaults 

48from ..registry.sql_registry import SqlRegistry 

49from ..transfers import YamlRepoImportBackend 

50from .utils import TestCaseMixin 

51 

52# Simplified tuples of the detector records we'll frequently be querying for. 

53DETECTOR_TUPLES = { 

54 1: ("Cam1", 1, "Aa", "SCIENCE"), 

55 2: ("Cam1", 2, "Ab", "SCIENCE"), 

56 3: ("Cam1", 3, "Ba", "SCIENCE"), 

57 4: ("Cam1", 4, "Bb", "WAVEFRONT"), 

58} 

59 

60 

61def make_detector_tuples(records: Iterable[DimensionRecord]) -> dict[int, tuple[str, int, str, str]]: 

62 """Make tuples with the same entries as DETECTOR_TUPLES from an iterable of 

63 detector dimension records. 

64 

65 Parameters 

66 ---------- 

67 records : `~collections.abc.Iterable` [ `.dimensions.DimensionRecord` ] 

68 Detector dimension records. 

69 

70 Returns 

71 ------- 

72 tuples : `dict` [ `int`, `tuple` ] 

73 Dictionary mapping detector ID to tuples with the same fields as the 

74 ``DETECTOR_TUPLES`` constant in this file. 

75 """ 

76 return {record.id: (record.instrument, record.id, record.full_name, record.purpose) for record in records} 

77 

78 

79class ButlerQueryTests(ABC, TestCaseMixin): 

80 """Base class for unit tests that test `lsst.daf.butler.Butler.query` 

81 implementations. 

82 """ 

83 

84 data_dir: ClassVar[str] 

85 """Root directory containing test data YAML files.""" 

86 

87 @abstractmethod 

88 def make_butler(self, *args: str) -> Butler: 

89 """Make Butler instance populated with data used in the tests below. 

90 

91 Parameters 

92 ---------- 

93 *args : str 

94 Names of the files to pass to `load_data`. 

95 

96 Returns 

97 ------- 

98 butler : `Butler` 

99 Butler to use for tests. 

100 """ 

101 raise NotImplementedError() 

102 

103 def load_data(self, registry: SqlRegistry, filename: str) -> None: 

104 """Load registry test data from ``data_dir/<filename>``, 

105 which should be a YAML import/export file. 

106 

107 This method should be called from implementations of `make_butler` 

108 where the Registry should exist. 

109 

110 Parameters 

111 ---------- 

112 registry : `SqlRegistry` 

113 The registry to use. 

114 filename : `str` 

115 Location of test data. 

116 """ 

117 with open(os.path.join(self.data_dir, filename)) as stream: 

118 backend = YamlRepoImportBackend(stream, registry) 

119 backend.register() 

120 backend.load(datastore=None) 

121 

122 def check_detector_records( 

123 self, 

124 results: DimensionRecordQueryResults, 

125 ids: Sequence[int] = (1, 2, 3, 4), 

126 ordered: bool = False, 

127 messages: Iterable[str] = (), 

128 doomed: bool = False, 

129 has_postprocessing: bool = False, 

130 ) -> None: 

131 self.assertEqual(results.element.name, "detector") 

132 self.assertEqual(results.dimensions, results.dimensions.universe["detector"].minimal_group) 

133 if has_postprocessing and not doomed: 

134 self.assertEqual(results.count(discard=True), len(ids)) 

135 self.assertGreaterEqual(results.count(discard=False, exact=False), len(ids)) 

136 with self.assertRaisesRegex(InvalidQueryError, "^Cannot count query rows"): 

137 results.count() 

138 else: 

139 self.assertEqual(results.count(discard=True), len(ids)) 

140 self.assertEqual(results.count(discard=False), len(ids)) 

141 self.assertEqual(results.count(discard=True, exact=False), len(ids)) 

142 self.assertEqual(results.count(discard=False, exact=False), len(ids)) 

143 self.assertEqual(results.any(), bool(ids)) 

144 if not doomed: 

145 self.assertTrue(results.any(exact=False, execute=False)) 

146 with self.assertRaisesRegex(InvalidQueryError, "^Cannot obtain exact"): 

147 results.any(exact=True, execute=False) 

148 else: 

149 self.assertFalse(results.any(exact=False, execute=False)) 

150 self.assertFalse(results.any(exact=True, execute=False)) 

151 self.assertCountEqual(results.explain_no_results(), list(messages)) 

152 expected = [DETECTOR_TUPLES[i] for i in ids] 

153 queried = list(make_detector_tuples(results).values()) 

154 if ordered: 

155 self.assertEqual(queried, expected) 

156 else: 

157 self.assertCountEqual(queried, expected) 

158 

159 def test_simple_record_query(self) -> None: 

160 """Test query-system basics with simple queries for dimension 

161 records. 

162 

163 This includes tests for order_by, limit, and where expressions, but 

164 only for cases where there are no datasets, dimension projections, 

165 or spatial/temporal overlaps. 

166 """ 

167 butler = self.make_butler("base.yaml") 

168 with butler._query() as query: 

169 _x = query.expression_factory 

170 results = query.dimension_records("detector") 

171 self.check_detector_records(results) 

172 self.check_detector_records(results.order_by("detector"), ordered=True) 

173 self.check_detector_records( 

174 results.order_by(_x.detector.full_name.desc), [4, 3, 2, 1], ordered=True 

175 ) 

176 self.check_detector_records(results.order_by("detector").limit(2), [1, 2], ordered=True) 

177 self.check_detector_records(results.where(_x.detector.raft == "B", instrument="Cam1"), [3, 4]) 

178 

179 def test_implied_union_record_query(self) -> None: 

180 """Test queries for a dimension ('band') that uses "implied union" 

181 storage, in which its values are the union of the values for it in a 

182 another dimension (physical_filter) that implies it. 

183 """ 

184 butler = self.make_butler("base.yaml") 

185 band = butler.dimensions["band"] 

186 self.assertEqual(band.implied_union_target, butler.dimensions["physical_filter"]) 

187 with butler._query() as query: 

188 self.assertCountEqual( 

189 list(query.dimension_records("band")), 

190 [band.RecordClass(name="g"), band.RecordClass(name="r")], 

191 ) 

192 self.assertCountEqual( 

193 list(query.where(physical_filter="Cam1-R1", instrument="Cam1").dimension_records("band")), 

194 [band.RecordClass(name="r")], 

195 ) 

196 

197 def test_dataset_constrained_record_query(self) -> None: 

198 """Test a query for dimension records constrained by the existence of 

199 datasets of a particular type. 

200 """ 

201 butler = self.make_butler("base.yaml", "datasets.yaml") 

202 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

203 butler.registry.registerCollection("empty", CollectionType.RUN) 

204 butler.registry.registerCollection("chain", CollectionType.CHAINED) 

205 butler.registry.setCollectionChain("chain", ["imported_g", "empty", "imported_r"]) 

206 with butler._query() as query: 

207 # No collections here or in defaults is an error. 

208 with self.assertRaises(NoDefaultCollectionError): 

209 query.join_dataset_search("bias").dimension_records("detector").any() 

210 butler.registry.defaults = RegistryDefaults(collections=["chain"]) 

211 with butler._query() as query: 

212 _x = query.expression_factory 

213 # Simplest case: this collection only has the first 3 detectors. 

214 self.check_detector_records( 

215 query.join_dataset_search("bias", collections=["imported_g"]).dimension_records("detector"), 

216 [1, 2, 3], 

217 ) 

218 # Together these collections have two biases for two of the 

219 # detectors, but this shouldn't cause duplicate results. 

220 self.check_detector_records( 

221 query.join_dataset_search("bias", collections=["imported_g", "imported_r"]).dimension_records( 

222 "detector" 

223 ), 

224 ) 

225 # Again we've got the potential for duplicates due to multiple 

226 # datasets with the same data ID, and this time we force the 

227 # deduplication to happen outside the dataset-search subquery by 

228 # adding a WHERE filter on a dataset column. We also use the 

229 # defaulted collection ('chain') to supply the collection. 

230 self.check_detector_records( 

231 query.join_dataset_search("bias") 

232 .where( 

233 _x.any( 

234 _x.all(_x["bias"].collection == "imported_g", _x.detector.raft == "B"), 

235 _x.all(_x["bias"].collection == "imported_r", _x.detector.raft == "A"), 

236 ), 

237 instrument="Cam1", 

238 ) 

239 .dimension_records("detector"), 

240 [2, 3], 

241 ) 

242 # Flats have dimensions (physical_filter and band) we would 

243 # normally include in query for detector records. This also should 

244 # not cause duplicates. 

245 self.check_detector_records( 

246 query.join_dataset_search("flat", collections=["imported_g"]).dimension_records("detector"), 

247 [2, 3, 4], 

248 ) 

249 # No results, but for reasons we can't determine before we run the 

250 # query. 

251 self.check_detector_records( 

252 query.join_dataset_search("flat", collections=["imported_g"]) 

253 .where(_x.band == "r") 

254 .dimension_records("detector"), 

255 [], 

256 ) 

257 # No results, and we can diagnose why before we run the query. 

258 self.check_detector_records( 

259 query.join_dataset_search("bias", collections=["empty"]).dimension_records("detector"), 

260 [], 

261 messages=[ 

262 "Search for dataset type 'bias' is doomed to fail.", 

263 "No datasets of type 'bias' in collection 'empty'.", 

264 ], 

265 doomed=True, 

266 ) 

267 self.check_detector_records( 

268 query.join_dataset_search("bias", collections=["imported_g"]) 

269 .where(instrument="Cam2") 

270 .dimension_records("detector"), 

271 [], 

272 messages=[ 

273 "Search for dataset type 'bias' is doomed to fail.", 

274 "No datasets with instrument='Cam2' in collection 'imported_g'.", 

275 ], 

276 doomed=True, 

277 ) 

278 

279 def test_spatial_overlaps(self) -> None: 

280 """Test queries for dimension records with spatial overlaps. 

281 

282 Run tests/data/registry/spatial.py to plot the various regions used in 

283 this test. 

284 """ 

285 butler = self.make_butler("base.yaml", "spatial.yaml") 

286 # Set default governor data ID values both to test that code path and 

287 # to keep us from having to repeat them in every 'where' call below. 

288 butler.registry.defaults = RegistryDefaults(instrument="Cam1", skymap="SkyMap1") 

289 htm7 = butler.dimensions.skypix_dimensions["htm7"] 

290 with butler._query() as query: 

291 _x = query.expression_factory 

292 # Query for detectors from a particular visit that overlap an 

293 # explicit region. 

294 self.check_detector_records( 

295 query.where( 

296 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)), 

297 visit=1, 

298 ).dimension_records("detector"), 

299 [1, 3, 4], 

300 has_postprocessing=True, 

301 ) 

302 # Query for detectors from a particular visit that overlap an htm7 

303 # ID. This is basically the same query as the last one, but 

304 # expressed as a spatial join, and we can recognize that 

305 # postprocessing is not needed (while in the last case it did 

306 # nothing, but we couldn't tell that in advance because the query 

307 # didn't know the region came from htm7). 

308 self.check_detector_records( 

309 query.where( 

310 _x.visit_detector_region.region.overlaps(_x.htm7.region), 

311 visit=1, 

312 htm7=253954, 

313 ).dimension_records("detector"), 

314 [1, 3, 4], 

315 has_postprocessing=False, 

316 ) 

317 # Repeat the last query but with the spatial join implicit rather 

318 # than explicit. 

319 self.check_detector_records( 

320 query.where( 

321 visit=1, 

322 htm7=253954, 

323 ).dimension_records("detector"), 

324 [1, 3, 4], 

325 has_postprocessing=False, 

326 ) 

327 # Query for the detectors from any visit that overlap a region: 

328 # this gets contributions from multiple visits, and would have 

329 # duplicates if we didn't get rid of them via GROUP BY. 

330 self.check_detector_records( 

331 query.where( 

332 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)), 

333 ).dimension_records("detector"), 

334 [1, 2, 3, 4], 

335 has_postprocessing=True, 

336 ) 

337 # Once again we rewrite the region-constraint query as a spatial 

338 # join, which drops the postprocessing. This join has to be 

339 # explicit because `visit` no longer gets into the query dimensions 

340 # some other way, and without it `detector` is not spatial. 

341 self.check_detector_records( 

342 query.where( 

343 _x.visit_detector_region.region.overlaps(_x.htm7.region), 

344 htm7=253954, 

345 ).dimension_records("detector"), 

346 [1, 2, 3, 4], 

347 has_postprocessing=False, 

348 ) 

349 # Query for detectors from any visit that overlap a patch. This 

350 # requires joining visit_detector_region to htm7 and htm7 to patch, 

351 # and then some postprocessing. We want to make sure there are no 

352 # duplicates from a detector and patch both overlapping multiple 

353 # htm7 tiles (which affects detectors 1 and 2) and that 

354 # postprocessing filters out detector 4, which has one htm7 tile in 

355 # common with the patch but does not actually overlap it. 

356 self.check_detector_records( 

357 query.where( 

358 _x.visit_detector_region.region.overlaps(_x.patch.region), 

359 tract=0, 

360 patch=4, 

361 ).dimension_records("detector"), 

362 [1, 2, 3], 

363 has_postprocessing=True, 

364 ) 

365 # Query for that patch's region and express the previous query as 

366 # a region-constraint instead of a spatial join. 

367 (patch_record,) = query.where(tract=0, patch=4).dimension_records("patch") 

368 self.check_detector_records( 

369 query.where( 

370 _x.visit_detector_region.region.overlaps(patch_record.region), 

371 ).dimension_records("detector"), 

372 [1, 2, 3], 

373 has_postprocessing=True, 

374 ) 

375 # Combine postprocessing with order_by and limit. 

376 self.check_detector_records( 

377 query.where( 

378 _x.visit_detector_region.region.overlaps(patch_record.region), 

379 ) 

380 .dimension_records("detector") 

381 .order_by(_x.detector.desc) 

382 .limit(2), 

383 [3, 2], 

384 has_postprocessing=True, 

385 ) 

386 # Try a case where there are some records before postprocessing but 

387 # none afterwards. 

388 self.check_detector_records( 

389 query.where( 

390 _x.visit_detector_region.region.overlaps(patch_record.region), 

391 detector=4, 

392 ).dimension_records("detector"), 

393 [], 

394 has_postprocessing=True, 

395 ) 

396 

397 def test_common_skypix_overlaps(self) -> None: 

398 """Test spatial overlap queries that return htm7 records.""" 

399 butler = self.make_butler("base.yaml", "spatial.yaml") 

400 # Insert some datasets that use a skypix dimension, since some queries 

401 # are only possible if a superset of the skypix IDs are in the query 

402 # already. 

403 cat1 = DatasetType("cat1", dimensions=butler.dimensions.conform(["htm7"]), storageClass="ArrowTable") 

404 butler.registry.registerDatasetType(cat1) 

405 butler.registry.registerCollection("refcats", CollectionType.RUN) 

406 butler.registry.insertDatasets(cat1, [{"htm7": i} for i in range(253952, 253968)], run="refcats") 

407 with butler._query() as query: 

408 _x = query.expression_factory 

409 # Explicit join to patch. 

410 self.assertCountEqual( 

411 [ 

412 record.id 

413 for record in query.where( 

414 _x.htm7.region.overlaps(_x.patch.region), skymap="SkyMap1", tract=0, patch=4 

415 ).dimension_records("htm7") 

416 ], 

417 [253954, 253955], 

418 ) 

419 # Implicit join to patch. 

420 self.assertCountEqual( 

421 [ 

422 record.id 

423 for record in query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("htm7") 

424 ], 

425 [253954, 253955], 

426 ) 

427 # Constraint on the patch region (with the query not knowing it 

428 # corresponds to that patch). 

429 (patch,) = query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("patch") 

430 self.assertCountEqual( 

431 [ 

432 record.id 

433 for record in query.join_dataset_search("cat1", collections=["refcats"]) 

434 .where(_x.htm7.region.overlaps(patch.region)) 

435 .dimension_records("htm7") 

436 ], 

437 [253954, 253955], 

438 ) 

439 

440 def test_data_coordinate_upload(self) -> None: 

441 """Test queries for dimension records with a data coordinate upload.""" 

442 butler = self.make_butler("base.yaml", "spatial.yaml") 

443 with butler._query() as query: 

444 # Query with a data ID upload that has an irrelevant row (there's 

445 # no data with "Cam2"). 

446 self.check_detector_records( 

447 query.join_data_coordinates( 

448 [ 

449 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

450 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

451 DataCoordinate.standardize(instrument="Cam2", detector=4, universe=butler.dimensions), 

452 ] 

453 ).dimension_records("detector"), 

454 [1, 3], 

455 ) 

456 # Query with a data ID upload that directly contains duplicates, 

457 # which should not appear in the results. 

458 self.check_detector_records( 

459 query.join_data_coordinates( 

460 [ 

461 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

462 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

463 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

464 ] 

465 ).dimension_records("detector"), 

466 [1, 3], 

467 ) 

468 # Query with a data ID upload that has extra dimensions that could 

469 # also introduce duplicates if we're not careful. 

470 self.check_detector_records( 

471 query.join_data_coordinates( 

472 [ 

473 DataCoordinate.standardize( 

474 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions 

475 ), 

476 DataCoordinate.standardize( 

477 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions 

478 ), 

479 DataCoordinate.standardize( 

480 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions 

481 ), 

482 ] 

483 ).dimension_records("detector"), 

484 [1, 3], 

485 ) 

486 # Query with a data ID upload that has extra dimensions that are 

487 # used in a constraint. 

488 self.check_detector_records( 

489 query.join_data_coordinates( 

490 [ 

491 DataCoordinate.standardize( 

492 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions 

493 ), 

494 DataCoordinate.standardize( 

495 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions 

496 ), 

497 DataCoordinate.standardize( 

498 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions 

499 ), 

500 ] 

501 ) 

502 .where(instrument="Cam1", visit=2) 

503 .dimension_records("detector"), 

504 [3], 

505 ) 

506 # Query with a data ID upload that must be spatially joined to 

507 # the other dimensions. This join is added automatically. 

508 self.check_detector_records( 

509 query.join_data_coordinates( 

510 [ 

511 DataCoordinate.standardize( 

512 skymap="SkyMap1", tract=1, patch=1, universe=butler.dimensions 

513 ), 

514 DataCoordinate.standardize( 

515 skymap="SkyMap1", tract=1, patch=2, universe=butler.dimensions 

516 ), 

517 DataCoordinate.standardize( 

518 skymap="SkyMap1", tract=1, patch=3, universe=butler.dimensions 

519 ), 

520 ] 

521 ) 

522 .where(instrument="Cam1", visit=2) 

523 .dimension_records("detector"), 

524 [2, 3, 4], 

525 has_postprocessing=True, 

526 ) 

527 # Query with a data ID upload that embeds a spatial relationship. 

528 # This prevents automatic creation of a spatial join. To make the 

529 # test more interesting, the spatial relationship embedded in these 

530 # data IDs is nonsense: it includes combinations that do not 

531 # overlap, while leaving out combinations that do overlap. 

532 self.check_detector_records( 

533 query.join_data_coordinates( 

534 [ 

535 DataCoordinate.standardize( 

536 skymap="SkyMap1", 

537 tract=1, 

538 patch=1, 

539 instrument="Cam1", 

540 visit=1, 

541 detector=1, 

542 universe=butler.dimensions, 

543 ), 

544 DataCoordinate.standardize( 

545 skymap="SkyMap1", 

546 tract=1, 

547 patch=1, 

548 instrument="Cam1", 

549 visit=1, 

550 detector=2, 

551 universe=butler.dimensions, 

552 ), 

553 DataCoordinate.standardize( 

554 skymap="SkyMap1", 

555 tract=1, 

556 patch=3, 

557 instrument="Cam1", 

558 visit=1, 

559 detector=3, 

560 universe=butler.dimensions, 

561 ), 

562 ] 

563 ) 

564 .where(skymap="SkyMap1", tract=1, patch=1) 

565 .dimension_records("detector"), 

566 [1, 2], 

567 ) 

568 # Query with an empty data ID upload (not a useful thing to do, 

569 # but a way to probe edge-case behavior). 

570 self.check_detector_records( 

571 query.join_data_coordinates( 

572 [ 

573 DataCoordinate.make_empty(universe=butler.dimensions), 

574 ] 

575 ).dimension_records("detector"), 

576 [1, 2, 3, 4], 

577 ) 

578 

579 def test_data_coordinate_upload_force_temp_table(self) -> None: 

580 """Test queries for dimension records with a data coordinate upload 

581 that is so big it has to go into a temporary table rather than be 

582 included directly into the query via bind params (by making the 

583 threshold for making a a temporary table tiny). 

584 

585 This test assumes a DirectQueryDriver and is automatically skipped when 

586 some other driver is found. 

587 """ 

588 butler = self.make_butler("base.yaml", "spatial.yaml") 

589 with butler._query() as query: 

590 if not isinstance(query._driver, DirectQueryDriver): 

591 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.") 

592 query._driver._constant_rows_limit = 2 

593 self.check_detector_records( 

594 query.join_data_coordinates( 

595 [ 

596 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

597 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

598 DataCoordinate.standardize(instrument="Cam1", detector=4, universe=butler.dimensions), 

599 ] 

600 ).dimension_records("detector"), 

601 [1, 3, 4], 

602 ) 

603 

604 def test_materialization(self) -> None: 

605 """Test querying for dimension records against a materialized previous 

606 query. 

607 """ 

608 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

609 with butler._query() as query: 

610 _x = query.expression_factory 

611 # Simple case where the materialization has just the dimensions 

612 # we need for the rest of the query. 

613 self.check_detector_records( 

614 query.where(_x.detector.raft == "A", instrument="Cam1") 

615 .materialize() 

616 .dimension_records("detector"), 

617 [1, 2], 

618 ) 

619 # This materialization has extra dimensions that could cause 

620 # duplicates if we don't SELECT DISTINCT them away. 

621 self.check_detector_records( 

622 query.join_dimensions(["visit", "detector"]) 

623 .where(_x.detector.raft == "A", instrument="Cam1") 

624 .materialize() 

625 .dimension_records("detector"), 

626 [1, 2], 

627 ) 

628 # Materialize a spatial-join, which should prevent the creation 

629 # of a spatial join in the downstream query. 

630 self.check_detector_records( 

631 query.join_dimensions(["visit", "detector", "tract"]).materialize() 

632 # The patch constraint here should do nothing, because only the 

633 # spatial join from the materialization should exist. The 

634 # behavior is surprising no matter what here, and the 

635 # recommendation to users is to add an explicit overlap 

636 # expression any time it's not obvious what the default is. 

637 .where(skymap="SkyMap1", tract=0, instrument="Cam1", visit=2, patch=5).dimension_records( 

638 "detector" 

639 ), 

640 [1, 2], 

641 has_postprocessing=True, 

642 ) 

643 # Materialize with a dataset join. 

644 self.check_detector_records( 

645 query.join_dataset_search("bias", collections=["imported_g"]) 

646 .materialize(datasets=["bias"]) 

647 .dimension_records("detector"), 

648 [1, 2, 3], 

649 ) 

650 

651 def test_timespan_results(self) -> None: 

652 """Test returning dimension records that include timespans.""" 

653 butler = self.make_butler("base.yaml", "spatial.yaml") 

654 with butler._query() as query: 

655 self.assertCountEqual( 

656 [ 

657 (record.id, record.timespan.begin, record.timespan.end) 

658 for record in query.dimension_records("visit") 

659 ], 

660 [ 

661 ( 

662 1, 

663 astropy.time.Time("2021-09-09T03:00:00", format="isot", scale="tai"), 

664 astropy.time.Time("2021-09-09T03:01:00", format="isot", scale="tai"), 

665 ), 

666 ( 

667 2, 

668 astropy.time.Time("2021-09-09T03:02:00", format="isot", scale="tai"), 

669 astropy.time.Time("2021-09-09T03:03:00", format="isot", scale="tai"), 

670 ), 

671 ], 

672 ) 

673 

674 def test_direct_driver_paging(self) -> None: 

675 """Test queries for dimension records that require multiple pages (by 

676 making the threshold for making a a temporary table tiny). 

677 

678 This test assumes a DirectQueryDriver and is automatically skipped when 

679 some other driver is found. 

680 """ 

681 butler = self.make_butler("base.yaml") 

682 # Basic test where pages should be transparent. 

683 with butler._query() as query: 

684 if not isinstance(query._driver, DirectQueryDriver): 

685 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.") 

686 query._driver._raw_page_size = 2 

687 self.check_detector_records( 

688 query.dimension_records("detector"), 

689 [1, 2, 3, 4], 

690 ) 

691 # Test that it's an error to continue query iteration after closing the 

692 # context manager. 

693 with butler._query() as query: 

694 assert isinstance(query._driver, DirectQueryDriver) 

695 query._driver._raw_page_size = 2 

696 iterator = iter(query.dimension_records("detector")) 

697 next(iterator) 

698 with self.assertRaisesRegex(RuntimeError, "Cannot continue query result iteration"): 

699 list(iterator) 

700 

701 def test_column_expressions(self) -> None: 

702 """Test queries with a wide variant of column expressions.""" 

703 butler = self.make_butler("base.yaml", "spatial.yaml") 

704 butler.registry.defaults = RegistryDefaults(instrument="Cam1") 

705 with butler._query() as query: 

706 _x = query.expression_factory 

707 self.check_detector_records( 

708 query.where(_x.not_(_x.detector != 2)).dimension_records("detector"), 

709 [2], 

710 ) 

711 self.check_detector_records( 

712 query.where(_x.literal(2) == _x.detector).dimension_records("detector"), 

713 [2], 

714 ) 

715 self.check_detector_records( 

716 query.where(_x.literal(2) == _x.detector + 1).dimension_records("detector"), 

717 [1], 

718 ) 

719 self.check_detector_records( 

720 query.where(-_x.detector == -3).dimension_records("detector"), 

721 [3], 

722 ) 

723 self.check_detector_records( 

724 query.where(_x.detector == 1, _x.detector == 2).dimension_records("detector"), 

725 [], 

726 messages=["'where' expression requires both detector=2 and detector=1."], 

727 ) 

728 self.assertCountEqual( 

729 [ 

730 record.id 

731 for record in query.where( 

732 _x.visit.timespan.overlaps( 

733 Timespan( 

734 begin=astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"), 

735 end=None, 

736 ) 

737 ) 

738 ).dimension_records("visit") 

739 ], 

740 [2], 

741 ) 

742 self.assertCountEqual( 

743 [ 

744 record.id 

745 for record in query.where( 

746 _x.not_( 

747 _x.visit.timespan.end 

748 < astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"), 

749 ) 

750 ).dimension_records("visit") 

751 ], 

752 [2], 

753 ) 

754 self.assertCountEqual( 

755 [ 

756 record.id 

757 for record in query.where( 

758 _x.visit.timespan.begin 

759 > astropy.time.Time("2021-09-09T03:01:30", format="isot", scale="tai") 

760 ).dimension_records("visit") 

761 ], 

762 [2], 

763 ) 

764 self.assertCountEqual( 

765 [ 

766 record.id 

767 for record in query.where( 

768 (_x.visit.exposure_time + -(5.0 * _x.visit.zenith_angle)) > 0.0 

769 ).dimension_records("visit") 

770 ], 

771 [1], 

772 ) 

773 self.assertCountEqual( 

774 [ 

775 record.id 

776 for record in query.where(_x.visit.exposure_time - 5.0 >= 50.0).dimension_records("visit") 

777 ], 

778 [1], 

779 ) 

780 self.assertCountEqual( 

781 [record.id for record in query.where(_x.visit.id % 2 != 0).dimension_records("visit")], 

782 [1], 

783 ) 

784 self.assertCountEqual( 

785 [ 

786 record.id 

787 for record in query.where(_x.visit.zenith_angle / 5.0 <= 1.0).dimension_records("visit") 

788 ], 

789 [1], 

790 ) 

791 self.assertCountEqual( 

792 [record.id for record in query.where(_x.visit.timespan.is_null).dimension_records("visit")], 

793 [], 

794 ) 

795 self.assertCountEqual( 

796 [ 

797 record.id 

798 for record in query.where(_x.visit.exposure_time.is_null).dimension_records("visit") 

799 ], 

800 [], 

801 ) 

802 self.check_detector_records( 

803 query.where(_x.detector.in_iterable([1, 3, 4])).dimension_records("detector"), 

804 [1, 3, 4], 

805 ) 

806 self.check_detector_records( 

807 query.where(_x.detector.in_range(start=2, stop=None)).dimension_records("detector"), 

808 [2, 3, 4], 

809 ) 

810 self.check_detector_records( 

811 query.where(_x.detector.in_range(start=1, stop=3)).dimension_records("detector"), 

812 [1, 2], 

813 ) 

814 self.check_detector_records( 

815 query.where(_x.detector.in_range(start=1, stop=None, step=2)).dimension_records("detector"), 

816 [1, 3], 

817 ) 

818 self.check_detector_records( 

819 query.where(_x.detector.in_range(start=1, stop=2)).dimension_records("detector"), 

820 [1], 

821 ) 

822 # This is a complex way to write a much simpler query ("where 

823 # detector.raft == 'A'"), but it tests code paths that would 

824 # otherwise require a lot more test setup. 

825 self.check_detector_records( 

826 query.where( 

827 _x.detector.in_query(_x.detector, query.where(_x.detector.raft == "A")) 

828 ).dimension_records("detector"), 

829 [1, 2], 

830 ) 

831 # Error to reference tract without skymap in a WHERE clause. 

832 with self.assertRaises(InvalidQueryError): 

833 list(query.where(_x.tract == 4).dimension_records("patch"))