Coverage for python / lsst / daf / butler / tests / butler_queries.py: 8%

834 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-06 08:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = () 

31 

32import unittest 

33import unittest.mock 

34from abc import ABC, abstractmethod 

35from collections.abc import Iterable, Sequence 

36from operator import attrgetter 

37from uuid import UUID 

38 

39import astropy.coordinates 

40import astropy.time 

41from numpy import int64 

42 

43from lsst.sphgeom import LonLat, Region 

44 

45from .._butler import Butler 

46from .._collection_type import CollectionType 

47from .._dataset_ref import DatasetRef 

48from .._dataset_type import DatasetType 

49from .._exceptions import ( 

50 EmptyQueryResultError, 

51 InvalidQueryError, 

52 MissingCollectionError, 

53 MissingDatasetTypeError, 

54) 

55from .._timespan import Timespan 

56from ..dimensions import DataCoordinate, DimensionRecord 

57from ..direct_query_driver import DirectQueryDriver 

58from ..queries import DimensionRecordQueryResults, Query 

59from ..queries.tree import Predicate 

60from ..registry import NoDefaultCollectionError, RegistryDefaults 

61from .utils import TestCaseMixin 

62 

63# Simplified tuples of the detector records we'll frequently be querying for. 

64DETECTOR_TUPLES = { 

65 1: ("Cam1", 1, "Aa", "SCIENCE"), 

66 2: ("Cam1", 2, "Ab", "SCIENCE"), 

67 3: ("Cam1", 3, "Ba", "SCIENCE"), 

68 4: ("Cam1", 4, "Bb", "WAVEFRONT"), 

69} 

70 

71 

72def make_detector_tuples(records: Iterable[DimensionRecord]) -> dict[int, tuple[str, int, str, str]]: 

73 """Make tuples with the same entries as DETECTOR_TUPLES from an iterable of 

74 detector dimension records. 

75 

76 Parameters 

77 ---------- 

78 records : `~collections.abc.Iterable` [ `.dimensions.DimensionRecord` ] 

79 Detector dimension records. 

80 

81 Returns 

82 ------- 

83 tuples : `dict` [ `int`, `tuple` ] 

84 Dictionary mapping detector ID to tuples with the same fields as the 

85 ``DETECTOR_TUPLES`` constant in this file. 

86 """ 

87 return {record.id: (record.instrument, record.id, record.full_name, record.purpose) for record in records} 

88 

89 

90class ButlerQueryTests(ABC, TestCaseMixin): 

91 """Base class for unit tests that test `lsst.daf.butler.Butler.query` 

92 implementations. 

93 """ 

94 

95 @abstractmethod 

96 def make_butler(self, *args: str) -> Butler: 

97 """Make Butler instance populated with data used in the tests below. 

98 

99 Parameters 

100 ---------- 

101 *args : str 

102 Names of the files to pass to `load_data`. 

103 

104 Returns 

105 ------- 

106 butler : `Butler` 

107 Butler to use for tests. 

108 """ 

109 raise NotImplementedError() 

110 

111 def load_data(self, butler: Butler, filename: str) -> None: 

112 """Load registry test data from ``data_dir/<filename>``, 

113 which should be a YAML import/export file. 

114 

115 This method should be called from implementations of `make_butler` 

116 where the Registry should exist. 

117 

118 Parameters 

119 ---------- 

120 butler : `~lsst.daf.butler.Butler` 

121 The butler to use. 

122 filename : `str` 

123 Location of test data. 

124 """ 

125 butler.import_( 

126 filename=f"resource://lsst.daf.butler/tests/registry_data/{filename}", 

127 without_datastore=True, 

128 ) 

129 

130 def check_detector_records( 

131 self, 

132 results: DimensionRecordQueryResults, 

133 ids: Sequence[int] = (1, 2, 3, 4), 

134 ordered: bool = False, 

135 messages: Iterable[str] = (), 

136 doomed: bool = False, 

137 has_postprocessing: bool = False, 

138 ) -> None: 

139 self.assertEqual(results.element.name, "detector") 

140 self.assertEqual(results.dimensions, results.dimensions.universe["detector"].minimal_group) 

141 if has_postprocessing and not doomed: 

142 self.assertEqual(results.count(discard=True), len(ids)) 

143 self.assertGreaterEqual(results.count(discard=False, exact=False), len(ids)) 

144 with self.assertRaisesRegex(InvalidQueryError, "^Cannot count query rows"): 

145 results.count() 

146 else: 

147 self.assertEqual(results.count(discard=True), len(ids)) 

148 self.assertEqual(results.count(discard=False), len(ids)) 

149 self.assertEqual(results.count(discard=True, exact=False), len(ids)) 

150 self.assertEqual(results.count(discard=False, exact=False), len(ids)) 

151 self.assertEqual(results.any(), bool(ids)) 

152 if not doomed: 

153 self.assertTrue(results.any(exact=False, execute=False)) 

154 with self.assertRaisesRegex(InvalidQueryError, "^Cannot obtain exact"): 

155 results.any(exact=True, execute=False) 

156 else: 

157 self.assertFalse(results.any(exact=False, execute=False)) 

158 self.assertFalse(results.any(exact=True, execute=False)) 

159 self.assertCountEqual(results.explain_no_results(), list(messages)) 

160 self.check_detector_records_returned(list(results), ids=ids, ordered=ordered) 

161 

162 def check_detector_records_returned( 

163 self, 

164 results: list[DimensionRecord], 

165 ids: Sequence[int] = (1, 2, 3, 4), 

166 ordered: bool = False, 

167 ) -> None: 

168 expected = [DETECTOR_TUPLES[i] for i in ids] 

169 queried = list(make_detector_tuples(results).values()) 

170 if ordered: 

171 self.assertEqual(queried, expected) 

172 else: 

173 self.assertCountEqual(queried, expected) 

174 

175 def test_simple_record_query(self) -> None: 

176 """Test query-system basics with simple queries for dimension 

177 records. 

178 

179 This includes tests for order_by, limit, and where expressions, but 

180 only for cases where there are no datasets, dimension projections, 

181 or spatial/temporal overlaps. 

182 """ 

183 butler = self.make_butler("base.yaml") 

184 with butler.query() as query: 

185 _x = query.expression_factory 

186 results = query.dimension_records("detector") 

187 self.check_detector_records(results) 

188 self.check_detector_records_returned(butler.query_dimension_records("detector")) 

189 self.assertEqual(len(butler.query_dimension_records("detector", limit=0)), 0) 

190 self.check_detector_records(results.order_by("detector"), ordered=True) 

191 self.check_detector_records_returned( 

192 butler.query_dimension_records("detector", order_by="detector"), ordered=True 

193 ) 

194 self.check_detector_records( 

195 results.order_by(_x.detector.full_name.desc), [4, 3, 2, 1], ordered=True 

196 ) 

197 self.check_detector_records_returned( 

198 butler.query_dimension_records("detector", order_by="-full_name"), 

199 ids=[4, 3, 2, 1], 

200 ordered=True, 

201 ) 

202 self.check_detector_records(results.order_by("detector").limit(2), [1, 2], ordered=True) 

203 self.check_detector_records_returned( 

204 butler.query_dimension_records("detector", limit=2, order_by="detector"), 

205 ids=[1, 2], 

206 ordered=True, 

207 ) 

208 with self.assertLogs("lsst.daf.butler", level="WARNING") as wcm: 

209 self.check_detector_records_returned( 

210 butler.query_dimension_records("detector", limit=-2, order_by="-detector"), 

211 ids=[4, 3], 

212 ordered=True, 

213 ) 

214 self.assertIn("More dimension records are available", wcm.output[0]) 

215 self.check_detector_records(results.where(_x.detector.raft == "B", instrument="Cam1"), [3, 4]) 

216 self.check_detector_records_returned( 

217 butler.query_dimension_records( 

218 "detector", where="detector.raft = :R", bind={"R": "B"}, instrument="Cam1" 

219 ), 

220 ids=[3, 4], 

221 ) 

222 self.check_detector_records( 

223 results.where(_x.detector.full_name.glob("B?"), instrument="Cam1"), [3, 4] 

224 ) 

225 self.check_detector_records( 

226 results.where(_x.detector.full_name.glob("*a"), instrument="Cam1"), [1, 3] 

227 ) 

228 

229 # Test incorrect type for glob() parameter. 

230 with self.assertRaises(InvalidQueryError): 

231 results.where(_x.detector.full_name.glob(1), instrument="Cam1") # type: ignore[arg-type] 

232 

233 def test_simple_data_coordinate_query(self) -> None: 

234 butler = self.make_butler("base.yaml") 

235 

236 expected_detectors = [1, 2, 3, 4] 

237 universe = butler.dimensions 

238 expected_coordinates = [ 

239 DataCoordinate.standardize({"instrument": "Cam1", "detector": x}, universe=universe) 

240 for x in expected_detectors 

241 ] 

242 

243 with butler.query() as query: 

244 # Test empty query 

245 empty = DataCoordinate.make_empty(butler.dimensions) 

246 self.assertCountEqual(list(query.data_ids([])), [empty]) 

247 self.assertCountEqual(butler.query_data_ids([]), [empty]) 

248 

249 # Test query for a single dimension 

250 results = query.data_ids(["detector"]) 

251 self.assertCountEqual(list(results), expected_coordinates) 

252 

253 # Limit. 

254 results = query.data_ids(["detector"]).order_by("-detector").limit(2) 

255 self.assertCountEqual(list(results), expected_coordinates[2:]) 

256 

257 data_ids = butler.query_data_ids("detector") 

258 self.assertCountEqual(data_ids, expected_coordinates) 

259 

260 data_ids = butler.query_data_ids("detector", order_by="-detector", limit=2) 

261 self.assertCountEqual(data_ids, expected_coordinates[2:]) 

262 

263 with self.assertLogs("lsst.daf.butler", level="WARNING") as wcm: 

264 data_ids = butler.query_data_ids("detector", order_by="-detector", limit=-2) 

265 self.assertCountEqual(data_ids, expected_coordinates[2:]) 

266 self.assertIn("More data IDs are available", wcm.output[0]) 

267 

268 data_ids = butler.query_data_ids("detector", limit=0) 

269 self.assertEqual(len(data_ids), 0) 

270 

271 def test_simple_dataset_query(self) -> None: 

272 butler = self.make_butler("base.yaml", "datasets.yaml") 

273 with butler.query() as query: 

274 refs_q = list(query.datasets("bias", "imported_g").order_by("detector")) 

275 refs_simple = butler.query_datasets("bias", "imported_g", order_by="detector") 

276 self.assertCountEqual(refs_q, refs_simple) 

277 

278 for refs in (refs_q, refs_simple): 

279 self.assertEqual(len(refs), 3) 

280 self.assertEqual(refs[0].id, UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22")) 

281 self.assertEqual(refs[1].id, UUID("51352db4-a47a-447c-b12d-a50b206b17cd")) 

282 for detector, ref in enumerate(refs, 1): 

283 self.assertEqual(ref.datasetType.name, "bias") 

284 self.assertEqual(ref.dataId["instrument"], "Cam1") 

285 self.assertEqual(ref.dataId["detector"], detector) 

286 self.assertEqual(ref.run, "imported_g") 

287 

288 # Try again with limit. 

289 with butler.query() as query: 

290 refs_q = list(query.datasets("bias", "imported_g").order_by("detector").limit(2)) 

291 refs_simple = butler.query_datasets("bias", "imported_g", order_by="detector", limit=2) 

292 self.assertCountEqual(refs_q, refs_simple) 

293 self.assertEqual(len(refs_q), 2) 

294 self.assertEqual(refs_q[0].id, UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22")) 

295 self.assertEqual(refs_q[1].id, UUID("51352db4-a47a-447c-b12d-a50b206b17cd")) 

296 

297 # limit=0 means test the query but don't return anything and 

298 # don't complain. 

299 refs_simple = butler.query_datasets("bias", "imported_g", limit=0, explain=True) 

300 self.assertEqual(len(refs_simple), 0) 

301 

302 # Explicitly run with no restrictions. 

303 refs_simple = butler.query_datasets("bias", collections="*", find_first=False, limit=None) 

304 self.assertEqual(len(refs_simple), 6) 

305 

306 # Now limit the number of results and look for a warning. 

307 with self.assertLogs("lsst.daf.butler", level="WARNING") as lcm: 

308 refs_simple = butler.query_datasets("bias", collections="*", find_first=False, limit=-4) 

309 self.assertEqual(len(refs_simple), 4) 

310 self.assertIn("More datasets are available", lcm.output[0]) 

311 

312 with self.assertRaises(InvalidQueryError) as cm: 

313 butler.query_datasets("bias", "*", detector=100, instrument="Unknown", find_first=True) 

314 self.assertIn("Can not use wildcards", str(cm.exception)) 

315 with self.assertRaises(EmptyQueryResultError) as cm2: 

316 butler.query_datasets("bias", "*", detector=100, instrument="Unknown", find_first=False) 

317 self.assertIn("doomed", str(cm2.exception)) 

318 

319 # Test for a regression of an issue where "band" was not being included 

320 # in the data ID, despite being one of the dimensions in the "flat" 

321 # dataset type. 

322 # 

323 # "band" is implied by "physical_filter", so it's technically not a 

324 # 'required' dimension. However, the contract of query_datasets is 

325 # that hasFull() should be true, so implied dimensions must be 

326 # included. 

327 refs = butler.query_datasets("flat", "imported_r", where="detector = 2", instrument="Cam1") 

328 self.assertEqual(len(refs), 1) 

329 flat = refs[0] 

330 self.assertTrue(flat.dataId.hasFull()) 

331 self.assertEqual(flat.datasetType.name, "flat") 

332 self.assertEqual(flat.dataId["instrument"], "Cam1") 

333 self.assertEqual(flat.dataId["detector"], 2) 

334 self.assertEqual(flat.dataId["physical_filter"], "Cam1-R1") 

335 self.assertEqual(flat.dataId["band"], "r") 

336 

337 def test_general_query(self) -> None: 

338 """Test Query.general and its result.""" 

339 butler = self.make_butler("base.yaml", "datasets.yaml") 

340 dimensions = butler.dimensions["detector"].minimal_group 

341 

342 # Do simple dimension queries. 

343 with butler.query() as query: 

344 query = query.join_dimensions(dimensions) 

345 rows = list(query.general(dimensions).order_by("detector")) 

346 self.assertEqual( 

347 rows, 

348 [ 

349 {"instrument": "Cam1", "detector": 1}, 

350 {"instrument": "Cam1", "detector": 2}, 

351 {"instrument": "Cam1", "detector": 3}, 

352 {"instrument": "Cam1", "detector": 4}, 

353 ], 

354 ) 

355 rows = list( 

356 query.general(dimensions, "detector.full_name", "purpose").order_by( 

357 "-detector.purpose", "full_name" 

358 ) 

359 ) 

360 self.assertEqual( 

361 rows, 

362 [ 

363 { 

364 "instrument": "Cam1", 

365 "detector": 4, 

366 "detector.full_name": "Bb", 

367 "detector.purpose": "WAVEFRONT", 

368 }, 

369 { 

370 "instrument": "Cam1", 

371 "detector": 1, 

372 "detector.full_name": "Aa", 

373 "detector.purpose": "SCIENCE", 

374 }, 

375 { 

376 "instrument": "Cam1", 

377 "detector": 2, 

378 "detector.full_name": "Ab", 

379 "detector.purpose": "SCIENCE", 

380 }, 

381 { 

382 "instrument": "Cam1", 

383 "detector": 3, 

384 "detector.full_name": "Ba", 

385 "detector.purpose": "SCIENCE", 

386 }, 

387 ], 

388 ) 

389 rows = list( 

390 query.general(dimensions, "detector.full_name", "purpose").where( 

391 "instrument = 'Cam1' AND purpose = 'WAVEFRONT'" 

392 ) 

393 ) 

394 self.assertEqual( 

395 rows, 

396 [ 

397 { 

398 "instrument": "Cam1", 

399 "detector": 4, 

400 "detector.full_name": "Bb", 

401 "detector.purpose": "WAVEFRONT", 

402 }, 

403 ], 

404 ) 

405 result = query.general(dimensions, dimension_fields={"detector": {"full_name"}}) 

406 self.assertEqual(set(row["detector.full_name"] for row in result), {"Aa", "Ab", "Ba", "Bb"}) 

407 

408 # Use "flat" whose dimension group includes implied dimension. 

409 flat = butler.get_dataset_type("flat") 

410 dimensions = butler.dimensions.conform(["detector", "physical_filter"]) 

411 

412 # Do simple dataset queries in RUN collection. 

413 with butler.query() as query: 

414 query = query.join_dataset_search("flat", "imported_g") 

415 # This just returns data IDs. 

416 rows = list(query.general(dimensions).order_by("detector")) 

417 self.assertEqual( 

418 rows, 

419 [ 

420 {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "band": "g"}, 

421 {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-G", "band": "g"}, 

422 {"instrument": "Cam1", "detector": 4, "physical_filter": "Cam1-G", "band": "g"}, 

423 ], 

424 ) 

425 

426 result = query.general(dimensions, dataset_fields={"flat": ...}, find_first=True).order_by( 

427 "detector" 

428 ) 

429 ids = {row["flat.dataset_id"] for row in result} 

430 self.assertEqual( 

431 ids, 

432 { 

433 UUID("60c8a65c-7290-4c38-b1de-e3b1cdcf872d"), 

434 UUID("84239e7f-c41f-46d5-97b9-a27976b98ceb"), 

435 UUID("fd51bce1-2848-49d6-a378-f8a122f5139a"), 

436 }, 

437 ) 

438 

439 # Check what iter_tuples() returns 

440 row_tuples = list(result.iter_tuples(flat)) 

441 self.assertEqual(len(row_tuples), 3) 

442 for row_tuple in row_tuples: 

443 self.assertEqual(len(row_tuple.refs), 1) 

444 self.assertEqual(row_tuple.refs[0].datasetType, flat) 

445 self.assertTrue(row_tuple.refs[0].dataId.hasFull()) 

446 self.assertFalse(row_tuple.refs[0].dataId.hasRecords()) 

447 self.assertTrue(row_tuple.data_id.hasFull()) 

448 self.assertFalse(row_tuple.data_id.hasRecords()) 

449 self.assertEqual(row_tuple.data_id.dimensions, dimensions) 

450 self.assertEqual(row_tuple.raw_row["flat.run"], "imported_g") 

451 

452 flat1, flat2, flat3 = (row_tuple.refs[0] for row_tuple in row_tuples) 

453 

454 # Query datasets CALIBRATION/TAGGED collections. 

455 butler.registry.registerCollection("tagged", CollectionType.TAGGED) 

456 butler.registry.registerCollection("calib", CollectionType.CALIBRATION) 

457 

458 # Add two refs to tagged collection. 

459 butler.registry.associate("tagged", [flat1, flat2]) 

460 

461 # Certify some calibs. 

462 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

463 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

464 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

465 butler.registry.certify("calib", [flat1], Timespan(t1, t2)) 

466 butler.registry.certify("calib", [flat3], Timespan(t2, t3)) 

467 butler.registry.certify("calib", [flat1], Timespan(t3, None)) 

468 butler.registry.certify("calib", [flat2], Timespan.makeEmpty()) 

469 

470 # Query tagged collection. 

471 with butler.query() as query: 

472 query = query.join_dataset_search("flat", ["tagged"]) 

473 

474 result = query.general( 

475 dimensions, "flat.dataset_id", "flat.run", "flat.collection", find_first=False 

476 ) 

477 row_tuples = list(result.iter_tuples(flat)) 

478 self.assertEqual(len(row_tuples), 2) 

479 self.assertEqual({row_tuple.refs[0] for row_tuple in row_tuples}, {flat1, flat2}) 

480 self.assertEqual({row_tuple.raw_row["flat.collection"] for row_tuple in row_tuples}, {"tagged"}) 

481 

482 # Query calib collection. 

483 with butler.query() as query: 

484 query = query.join_dataset_search("flat", ["calib"]) 

485 result = query.general( 

486 dimensions, 

487 "flat.dataset_id", 

488 "flat.run", 

489 "flat.collection", 

490 "flat.timespan", 

491 find_first=False, 

492 ) 

493 row_tuples = list(result.iter_tuples(flat)) 

494 self.assertEqual(len(row_tuples), 4) 

495 self.assertEqual({row_tuple.refs[0] for row_tuple in row_tuples}, {flat1, flat2, flat3}) 

496 self.assertEqual({row_tuple.raw_row["flat.collection"] for row_tuple in row_tuples}, {"calib"}) 

497 self.assertEqual( 

498 {row_tuple.raw_row["flat.timespan"] for row_tuple in row_tuples}, 

499 {Timespan(t1, t2), Timespan(t2, t3), Timespan(t3, None), Timespan.makeEmpty()}, 

500 ) 

501 

502 # Query both tagged and calib collection. 

503 with butler.query() as query: 

504 query = query.join_dataset_search("flat", ["tagged", "calib"]) 

505 result = query.general( 

506 dimensions, 

507 "flat.dataset_id", 

508 "flat.run", 

509 "flat.collection", 

510 "flat.timespan", 

511 find_first=False, 

512 ) 

513 row_tuples = list(result.iter_tuples(flat)) 

514 self.assertEqual(len(row_tuples), 6) 

515 self.assertEqual( 

516 {row_tuple.raw_row["flat.collection"] for row_tuple in row_tuples}, {"calib", "tagged"} 

517 ) 

518 self.assertEqual( 

519 {row_tuple.raw_row["flat.timespan"] for row_tuple in row_tuples}, 

520 { 

521 Timespan(t1, t2), 

522 Timespan(t2, t3), 

523 Timespan(t3, None), 

524 Timespan.makeEmpty(), 

525 Timespan(None, None), 

526 }, 

527 ) 

528 

529 dimensions = butler.dimensions["detector"].minimal_group 

530 

531 # Include dimension records into query. 

532 with butler.query() as query: 

533 query = query.join_dimensions(dimensions) 

534 result = query.general(dimensions).order_by("detector") 

535 rows = list(result.with_dimension_records()) 

536 self.assertEqual( 

537 rows[0], 

538 { 

539 "instrument": "Cam1", 

540 "detector": 1, 

541 "instrument.visit_max": 1024, 

542 "instrument.visit_system": 1, 

543 "instrument.exposure_max": 512, 

544 "instrument.detector_max": 4, 

545 "instrument.class_name": "lsst.pipe.base.Instrument", 

546 "detector.full_name": "Aa", 

547 "detector.name_in_raft": "a", 

548 "detector.raft": "A", 

549 "detector.purpose": "SCIENCE", 

550 }, 

551 ) 

552 

553 dimensions = butler.dimensions.conform(["detector", "physical_filter"]) 

554 

555 # DataIds should come with records. 

556 with butler.query() as query: 

557 query = query.join_dataset_search("flat", "imported_g") 

558 result = query.general(dimensions, dataset_fields={"flat": ...}, find_first=True).order_by( 

559 "detector" 

560 ) 

561 result = result.with_dimension_records() 

562 row_tuples = list(result.iter_tuples(flat)) 

563 self.assertEqual(len(row_tuples), 3) 

564 for row_tuple in row_tuples: 

565 self.assertTrue(row_tuple.data_id.hasRecords()) 

566 self.assertEqual(len(row_tuple.refs), 1) 

567 self.assertTrue(row_tuple.refs[0].dataId.hasRecords()) 

568 

569 def test_query_ingest_date(self) -> None: 

570 """Test general query returning ingest_date field.""" 

571 before_ingest = astropy.time.Time.now() 

572 butler = self.make_butler("base.yaml", "datasets.yaml") 

573 dimensions = butler.dimensions.conform(["detector", "physical_filter"]) 

574 

575 # Check that returned type of ingest_date is astropy Time, must work 

576 # for schema versions 1 and 2 of datasets manager. 

577 with butler.query() as query: 

578 query = query.join_dataset_search("flat", "imported_g") 

579 rows = list(query.general(dimensions, dataset_fields={"flat": {"ingest_date"}}, find_first=False)) 

580 self.assertEqual(len(rows), 3) 

581 for row in rows: 

582 self.assertIsInstance(row["flat.ingest_date"], astropy.time.Time) 

583 

584 # Check that WHERE accepts astropy time 

585 with butler.query() as query: 

586 query = query.join_dataset_search("flat", "imported_g") 

587 query1 = query.where("flat.ingest_date < :before_ingest", bind={"before_ingest": before_ingest}) 

588 rows = list(query1.general(dimensions)) 

589 self.assertEqual(len(rows), 0) 

590 query1 = query.where("flat.ingest_date >= :before_ingest", bind={"before_ingest": before_ingest}) 

591 rows = list(query1.general(dimensions)) 

592 self.assertEqual(len(rows), 3) 

593 # Same with a time in string literal. 

594 query1 = query.where(f"flat.ingest_date < T'mjd/{before_ingest.tai.mjd}'") 

595 rows = list(query1.general(dimensions)) 

596 self.assertEqual(len(rows), 0) 

597 

598 def test_implied_union_record_query(self) -> None: 

599 """Test queries for a dimension ('band') that uses "implied union" 

600 storage, in which its values are the union of the values for it in a 

601 another dimension (physical_filter) that implies it. 

602 """ 

603 butler = self.make_butler("base.yaml") 

604 band = butler.dimensions["band"] 

605 self.assertEqual(band.implied_union_target, butler.dimensions["physical_filter"]) 

606 with butler.query() as query: 

607 self.assertCountEqual( 

608 list(query.dimension_records("band")), 

609 [band.RecordClass(name="g"), band.RecordClass(name="r")], 

610 ) 

611 self.assertCountEqual( 

612 list(query.where(physical_filter="Cam1-R1", instrument="Cam1").dimension_records("band")), 

613 [band.RecordClass(name="r")], 

614 ) 

615 self.assertCountEqual( 

616 butler.query_dimension_records("band"), 

617 [band.RecordClass(name="g"), band.RecordClass(name="r")], 

618 ) 

619 self.assertCountEqual( 

620 butler.query_dimension_records("band", physical_filter="Cam1-R1", instrument="Cam1"), 

621 [band.RecordClass(name="r")], 

622 ) 

623 

624 def test_dataset_constrained_record_query(self) -> None: 

625 """Test a query for dimension records constrained by the existence of 

626 datasets of a particular type. 

627 """ 

628 butler = self.make_butler("base.yaml", "datasets.yaml") 

629 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

630 butler.collections.register("empty", CollectionType.RUN) 

631 butler.collections.register("chain", CollectionType.CHAINED) 

632 butler.collections.redefine_chain("chain", ["imported_g", "empty", "imported_r"]) 

633 with butler.query() as query: 

634 # No collections here or in defaults is an error. 

635 with self.assertRaises(NoDefaultCollectionError): 

636 query.join_dataset_search("bias").dimension_records("detector").any() 

637 butler.registry.defaults = RegistryDefaults(collections=["chain"]) 

638 with butler.query() as query: 

639 _x = query.expression_factory 

640 # Simplest case: this collection only has the first 3 detectors. 

641 self.check_detector_records( 

642 query.join_dataset_search("bias", collections=["imported_g"]).dimension_records("detector"), 

643 [1, 2, 3], 

644 ) 

645 # Together these collections have two biases for two of the 

646 # detectors, but this shouldn't cause duplicate results. 

647 self.check_detector_records( 

648 query.join_dataset_search("bias", collections=["imported_g", "imported_r"]).dimension_records( 

649 "detector" 

650 ), 

651 ) 

652 # Again we've got the potential for duplicates due to multiple 

653 # datasets with the same data ID, and this time we force the 

654 # deduplication to happen outside the dataset-search subquery by 

655 # adding a WHERE filter on a dataset column. We also use the 

656 # defaulted collection ('chain') to supply the collection. 

657 self.check_detector_records( 

658 query.join_dataset_search("bias") 

659 .where( 

660 _x.any( 

661 _x.all(_x["bias"].collection == "imported_g", _x.detector.raft == "B"), 

662 _x.all(_x["bias"].collection == "imported_r", _x.detector.raft == "A"), 

663 ), 

664 instrument="Cam1", 

665 ) 

666 .dimension_records("detector"), 

667 [2, 3], 

668 ) 

669 # Flats have dimensions (physical_filter and band) we would 

670 # normally include in query for detector records. This also should 

671 # not cause duplicates. 

672 self.check_detector_records( 

673 query.join_dataset_search("flat", collections=["imported_g"]).dimension_records("detector"), 

674 [2, 3, 4], 

675 ) 

676 # No results, but for reasons we can't determine before we run the 

677 # query. 

678 self.check_detector_records( 

679 query.join_dataset_search("flat", collections=["imported_g"]) 

680 .where(_x.band == "r") 

681 .dimension_records("detector"), 

682 [], 

683 ) 

684 # No results, and we can diagnose why before we run the query. 

685 self.check_detector_records( 

686 query.join_dataset_search("bias", collections=["empty"]).dimension_records("detector"), 

687 [], 

688 messages=[ 

689 "Search for dataset type 'bias' in ['empty'] is doomed to fail.", 

690 "No datasets of type 'bias' in collection 'empty'.", 

691 ], 

692 doomed=True, 

693 ) 

694 self.check_detector_records( 

695 query.join_dataset_search("bias", collections=["imported_g"]) 

696 .where(instrument="Cam2") 

697 .dimension_records("detector"), 

698 [], 

699 messages=[ 

700 "Search for dataset type 'bias' in ['imported_g'] is doomed to fail.", 

701 "No datasets with instrument='Cam2' in collection 'imported_g'.", 

702 ], 

703 doomed=True, 

704 ) 

705 

706 def test_duplicate_overlaps(self) -> None: 

707 """Test for query option that enables duplicate rows in queries that 

708 use skypix overalps. 

709 """ 

710 butler = self.make_butler("base.yaml", "spatial.yaml") 

711 butler.registry.defaults = RegistryDefaults(instrument="Cam1", skymap="SkyMap1") 

712 with butler.query() as query: 

713 data_ids = list(query.data_ids(["visit", "detector", "patch"]).where(visit=1, detector=1)) 

714 self.assertCountEqual( 

715 [(data_id["tract"], data_id["patch"]) for data_id in data_ids], [(0, 0), (0, 2)] 

716 ) 

717 

718 query._allow_duplicate_overlaps = True 

719 data_ids = list(query.data_ids(["visit", "detector", "patch"]).where(visit=1, detector=1)) 

720 self.assertCountEqual( 

721 [(data_id["tract"], data_id["patch"]) for data_id in data_ids], 

722 [(0, 0), (0, 0), (0, 2), (0, 2)], 

723 ) 

724 

725 def test_spatial_overlaps(self) -> None: 

726 """Test queries for dimension records with spatial overlaps. 

727 

728 Run tests/data/registry/spatial.py to plot the various regions used in 

729 this test. 

730 """ 

731 butler = self.make_butler("base.yaml", "spatial.yaml") 

732 # Set default governor data ID values both to test that code path and 

733 # to keep us from having to repeat them in every 'where' call below. 

734 butler.registry.defaults = RegistryDefaults(instrument="Cam1", skymap="SkyMap1") 

735 htm7 = butler.dimensions.skypix_dimensions["htm7"] 

736 with butler.query() as query: 

737 _x = query.expression_factory 

738 # Query for detectors from a particular visit that overlap an 

739 # explicit region. 

740 self.check_detector_records( 

741 query.where( 

742 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)), 

743 visit=1, 

744 ).dimension_records("detector"), 

745 [1, 3, 4], 

746 has_postprocessing=True, 

747 ) 

748 self.check_detector_records_returned( 

749 butler.query_dimension_records( 

750 "detector", 

751 where="visit_detector_region.region OVERLAPS :region", 

752 bind={"region": htm7.pixelization.pixel(253954)}, 

753 visit=1, 

754 ), 

755 ids=[1, 3, 4], 

756 ) 

757 # Query for detectors from a particular visit that overlap an htm7 

758 # ID. This is basically the same query as the last one, but 

759 # expressed as a spatial join, and we can recognize that 

760 # postprocessing is not needed (while in the last case it did 

761 # nothing, but we couldn't tell that in advance because the query 

762 # didn't know the region came from htm7). 

763 self.check_detector_records( 

764 query.where( 

765 _x.visit_detector_region.region.overlaps(_x.htm7.region), 

766 visit=1, 

767 htm7=253954, 

768 ).dimension_records("detector"), 

769 [1, 3, 4], 

770 has_postprocessing=False, 

771 ) 

772 # Repeat the last query but with the spatial join implicit rather 

773 # than explicit. 

774 self.check_detector_records( 

775 query.where( 

776 visit=1, 

777 htm7=253954, 

778 ).dimension_records("detector"), 

779 [1, 3, 4], 

780 has_postprocessing=False, 

781 ) 

782 self.check_detector_records_returned( 

783 butler.query_dimension_records( 

784 "detector", 

785 visit=1, 

786 htm7=253954, 

787 ), 

788 ids=[1, 3, 4], 

789 ) 

790 # Query for the detectors from any visit that overlap a region: 

791 # this gets contributions from multiple visits, and would have 

792 # duplicates if we didn't get rid of them via GROUP BY. 

793 self.check_detector_records( 

794 query.where( 

795 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)), 

796 ).dimension_records("detector"), 

797 [1, 2, 3, 4], 

798 has_postprocessing=True, 

799 ) 

800 self.check_detector_records_returned( 

801 butler.query_dimension_records( 

802 "detector", 

803 where="visit_detector_region.region OVERLAPS :region", 

804 bind={"region": htm7.pixelization.pixel(253954)}, 

805 ), 

806 ids=[1, 2, 3, 4], 

807 ) 

808 # Once again we rewrite the region-constraint query as a spatial 

809 # join, which drops the postprocessing. This join has to be 

810 # explicit because `visit` no longer gets into the query dimensions 

811 # some other way, and without it `detector` is not spatial. 

812 self.check_detector_records( 

813 query.where( 

814 _x.visit_detector_region.region.overlaps(_x.htm7.region), 

815 htm7=253954, 

816 ).dimension_records("detector"), 

817 [1, 2, 3, 4], 

818 has_postprocessing=False, 

819 ) 

820 # Query for detectors from any visit that overlap a patch. This 

821 # requires joining visit_detector_region to htm7 and htm7 to patch, 

822 # and then some postprocessing. We want to make sure there are no 

823 # duplicates from a detector and patch both overlapping multiple 

824 # htm7 tiles (which affects detectors 1 and 2) and that 

825 # postprocessing filters out detector 4, which has one htm7 tile in 

826 # common with the patch but does not actually overlap it. 

827 self.check_detector_records( 

828 query.where( 

829 _x.visit_detector_region.region.overlaps(_x.patch.region), 

830 tract=0, 

831 patch=4, 

832 ).dimension_records("detector"), 

833 [1, 2, 3], 

834 has_postprocessing=True, 

835 ) 

836 # Same as above, but with a materialization. 

837 self.check_detector_records( 

838 query.where( 

839 _x.visit_detector_region.region.overlaps(_x.patch.region), 

840 tract=0, 

841 patch=4, 

842 ) 

843 .materialize(dimensions=["detector"]) 

844 .dimension_records("detector"), 

845 [1, 2, 3], 

846 has_postprocessing=True, 

847 ) 

848 # Query for that patch's region and express the previous query as 

849 # a region-constraint instead of a spatial join. 

850 (patch_record,) = query.where(tract=0, patch=4).dimension_records("patch") 

851 self.check_detector_records( 

852 query.where( 

853 _x.visit_detector_region.region.overlaps(patch_record.region), 

854 ).dimension_records("detector"), 

855 [1, 2, 3], 

856 has_postprocessing=True, 

857 ) 

858 self.check_detector_records_returned( 

859 butler.query_dimension_records( 

860 "detector", 

861 where="visit_detector_region.region OVERLAPS :region", 

862 bind={"region": patch_record.region}, 

863 ), 

864 ids=[1, 2, 3], 

865 ) 

866 # Query for detectors where a patch/visit+detector overlap is 

867 # satisfied, in the case where there are no rows with an overlap, 

868 # but the union of the patch regions overlaps the union of the 

869 # visit+detector regions. 

870 self.check_detector_records( 

871 query.where( 

872 _x.visit_detector_region.region.overlaps(_x.patch.region), 

873 _x.any( 

874 _x.all(_x.tract == 1, _x.visit == 1), 

875 _x.all(_x.tract == 0, _x.patch == 0, _x.visit == 2), 

876 ), 

877 ).dimension_records("detector"), 

878 [], 

879 has_postprocessing=True, 

880 ) 

881 # Combine postprocessing with order_by and limit. 

882 self.check_detector_records( 

883 query.where( 

884 _x.visit_detector_region.region.overlaps(patch_record.region), 

885 ) 

886 .dimension_records("detector") 

887 .order_by(_x.detector.desc) 

888 .limit(2), 

889 [3, 2], 

890 has_postprocessing=True, 

891 ) 

892 self.check_detector_records_returned( 

893 butler.query_dimension_records( 

894 "detector", 

895 where="visit_detector_region.region OVERLAPS :region", 

896 bind={"region": patch_record.region}, 

897 order_by="-detector", 

898 limit=2, 

899 ), 

900 ids=[3, 2], 

901 ) 

902 # Try a case where there are some records before postprocessing but 

903 # none afterwards. 

904 self.check_detector_records( 

905 query.where( 

906 _x.visit_detector_region.region.overlaps(patch_record.region), 

907 detector=4, 

908 ).dimension_records("detector"), 

909 [], 

910 has_postprocessing=True, 

911 ) 

912 self.check_detector_records_returned( 

913 butler.query_dimension_records( 

914 "detector", 

915 where="visit_detector_region.region OVERLAPS :region", 

916 bind={"region": patch_record.region}, 

917 detector=4, 

918 explain=False, 

919 ), 

920 ids=[], 

921 ) 

922 # Check spatial queries using points instead of regions. 

923 # This (ra, dec) is a point in the center of the region for visit 

924 # 1, detector 3. 

925 ra = 0.25209391431545386 # degrees 

926 dec = 0.9269112711026793 # degrees 

927 

928 def _check_visit_id(query: Query) -> None: 

929 result = list(query.data_ids(["visit", "detector"])) 

930 self.assertEqual(len(result), 1) 

931 id = result[0] 

932 self.assertEqual(id["visit"], 1) 

933 self.assertEqual(id["detector"], 3) 

934 

935 # Basic POINT() syntax. 

936 _check_visit_id(query.where(f"visit_detector_region.region OVERLAPS POINT({ra}, {dec})")) 

937 _check_visit_id(query.where(f"POINT({ra}, {dec}) OVERLAPS visit_detector_region.region")) 

938 

939 # dec of 1 is close enough to still be in the region, and tests 

940 # conversion of integer to float. 

941 _check_visit_id(query.where(f"visit_detector_region.region OVERLAPS POINT({ra}, 1)")) 

942 

943 # Negative values are allowed for dec, since it's defined as -90 to 

944 # 90. Tract 1, patch 4 slightly overlaps some negative dec values. 

945 result = list(query.where("patch.region OVERLAPS POINT(0.335, -0.000000001)").data_ids(["patch"])) 

946 self.assertEqual(len(result), 1) 

947 id = result[0] 

948 self.assertEqual(id["patch"], 4) 

949 self.assertEqual(id["tract"], 1) 

950 # Out of bounds dec values are not allowed. 

951 with self.assertRaisesRegex(ValueError, "invalid latitude angle"): 

952 list(query.where("patch.region OVERLAPS POINT(0.335, -91)").data_ids(["patch"])) 

953 

954 # Negative ra values are allowed. 

955 _check_visit_id(query.where(f"POINT({ra - 360}, {dec}) OVERLAPS visit_detector_region.region")) 

956 

957 # Substitute ra and dec values via bind instead of literals in the 

958 # string. 

959 _check_visit_id( 

960 query.where( 

961 "visit_detector_region.region OVERLAPS POINT(:ra, :dec)", bind={"ra": ra, "dec": dec} 

962 ) 

963 ) 

964 

965 # Bind in a point object instead of specifying ra/dec separately. 

966 _check_visit_id( 

967 query.where( 

968 "visit_detector_region.region OVERLAPS :my_point", 

969 bind={"my_point": LonLat.fromDegrees(ra, dec)}, 

970 ) 

971 ) 

972 _check_visit_id( 

973 query.where( 

974 "visit_detector_region.region OVERLAPS :my_point", 

975 bind={"my_point": astropy.coordinates.SkyCoord(ra, dec, frame="icrs", unit="deg")}, 

976 ) 

977 ) 

978 # Make sure alternative coordinate frames in astropy SkyCoord are 

979 # handled. 

980 _check_visit_id( 

981 query.where( 

982 "visit_detector_region.region OVERLAPS :my_point", 

983 bind={ 

984 "my_point": astropy.coordinates.SkyCoord( 

985 ra, dec, frame="icrs", unit="deg" 

986 ).transform_to("galactic") 

987 }, 

988 ) 

989 ) 

990 

991 # Compare against literal values using ExpressionFactory. 

992 _check_visit_id( 

993 query.where(_x.visit_detector_region.region.overlaps(LonLat.fromDegrees(ra, dec))) 

994 ) 

995 _check_visit_id( 

996 query.where( 

997 _x.visit_detector_region.region.overlaps( 

998 astropy.coordinates.SkyCoord(ra, dec, frame="icrs", unit="deg") 

999 ) 

1000 ) 

1001 ) 

1002 

1003 # Check errors for invalid syntax. 

1004 with self.assertRaisesRegex( 

1005 InvalidQueryError, r"Expression 'visit.id' in POINT\(\) is not a literal number." 

1006 ): 

1007 query.where(f"visit_detector_region.region OVERLAPS POINT(visit.id, {dec})") 

1008 with self.assertRaisesRegex( 

1009 InvalidQueryError, r"Expression ''not-a-number'' in POINT\(\) is not a literal number." 

1010 ): 

1011 query.where(f"visit_detector_region.region OVERLAPS POINT({ra}, 'not-a-number')") 

1012 

1013 # astropy's SkyCoord can be array-valued, but we expect only a 

1014 # single point. 

1015 array_point = astropy.coordinates.SkyCoord( 

1016 ra=[10, 11, 12, 13], dec=[41, -5, 42, 0], unit="deg", frame="icrs" 

1017 ) 

1018 with self.assertRaisesRegex(ValueError, "Astropy SkyCoord contained an array of points"): 

1019 query.where( 

1020 "visit_detector_region.region OVERLAPS :my_point", 

1021 bind={"my_point": array_point}, 

1022 ) 

1023 

1024 def test_auto_spatial_joins(self) -> None: 

1025 """Test the addition of automatic spatial joins in the presence and 

1026 absence of datasets with dimensions that cross spatial families. 

1027 """ 

1028 butler = self.make_butler("base.yaml", "spatial.yaml") 

1029 # Set default governor data ID values both to test that code path and 

1030 # to keep us from having to repeat them in every 'where' call below. 

1031 butler.registry.defaults = RegistryDefaults(instrument="Cam1", skymap="SkyMap1") 

1032 # Add some datasets with {tract, visit, detector} dimensions. 

1033 # These will cover all {visit, detector}s that overlap tract=0. 

1034 cat = DatasetType( 

1035 "cat", 

1036 dimensions=butler.dimensions.conform(["visit", "detector", "tract"]), 

1037 storageClass="ArrowTable", 

1038 ) 

1039 butler.registry.registerDatasetType(cat) 

1040 butler.collections.register("run1") 

1041 butler.registry.insertDatasets( 

1042 cat, 

1043 [ 

1044 {"instrument": "Cam1", "skymap": "SkyMap1", "visit": 1, "detector": 1, "tract": 0}, 

1045 {"instrument": "Cam1", "skymap": "SkyMap1", "visit": 1, "detector": 2, "tract": 0}, 

1046 {"instrument": "Cam1", "skymap": "SkyMap1", "visit": 1, "detector": 3, "tract": 0}, 

1047 {"instrument": "Cam1", "skymap": "SkyMap1", "visit": 1, "detector": 4, "tract": 0}, 

1048 {"instrument": "Cam1", "skymap": "SkyMap1", "visit": 2, "detector": 1, "tract": 0}, 

1049 {"instrument": "Cam1", "skymap": "SkyMap1", "visit": 2, "detector": 2, "tract": 0}, 

1050 ], 

1051 run="run1", 

1052 ) 

1053 with butler.query() as query: 

1054 # When we query for just these dimensions with that dataset type 

1055 # included, we shouldn't need a spatial join, because we assume 

1056 # it's embedded in the rows of that dataset search. 

1057 q1 = query.join_dataset_search(cat, "run1").data_ids(["visit", "detector", "tract"]) 

1058 # If there's no explicit spatial join, there's no postprocessing, 

1059 # and hence we can do an exact count with discard=False. 

1060 self.assertEqual(q1.count(exact=True, discard=False), 6) 

1061 self.assertCountEqual( 

1062 [(row["visit"], row["detector"], row["tract"]) for row in q1], 

1063 [ 

1064 (1, 1, 0), 

1065 (1, 2, 0), 

1066 (1, 3, 0), 

1067 (1, 4, 0), 

1068 (2, 1, 0), 

1069 (2, 2, 0), 

1070 ], 

1071 ) 

1072 # When we query for these dimensions and another one that wants 

1073 # a more precise spatial join, we should get that more precise 

1074 # spatial join. 

1075 q1 = query.join_dataset_search(cat, "run1").data_ids(["visit", "detector", "tract", "patch"]) 

1076 self.assertCountEqual( 

1077 [(row["visit"], row["detector"], row["tract"], row["patch"]) for row in q1], 

1078 [ 

1079 (1, 1, 0, 0), 

1080 (1, 1, 0, 2), 

1081 (1, 2, 0, 0), 

1082 (1, 2, 0, 1), 

1083 (1, 2, 0, 2), 

1084 (1, 2, 0, 3), 

1085 (1, 3, 0, 2), 

1086 (1, 3, 0, 4), 

1087 (1, 4, 0, 2), 

1088 (1, 4, 0, 3), 

1089 (2, 1, 0, 4), 

1090 (2, 2, 0, 4), 

1091 (2, 2, 0, 5), 

1092 ], 

1093 ) 

1094 

1095 def test_common_skypix_overlaps(self) -> None: 

1096 """Test spatial overlap queries that return htm7 records.""" 

1097 butler = self.make_butler("base.yaml", "spatial.yaml") 

1098 # Insert some datasets that use a skypix dimension, since some queries 

1099 # are only possible if a superset of the skypix IDs are in the query 

1100 # already. 

1101 cat1 = DatasetType("cat1", dimensions=butler.dimensions.conform(["htm7"]), storageClass="ArrowTable") 

1102 butler.registry.registerDatasetType(cat1) 

1103 butler.registry.registerCollection("refcats", CollectionType.RUN) 

1104 butler.registry.insertDatasets(cat1, [{"htm7": i} for i in range(253952, 253968)], run="refcats") 

1105 with butler.query() as query: 

1106 _x = query.expression_factory 

1107 # Explicit join to patch. 

1108 self.assertCountEqual( 

1109 [ 

1110 record.id 

1111 for record in query.where( 

1112 _x.htm7.region.overlaps(_x.patch.region), skymap="SkyMap1", tract=0, patch=4 

1113 ).dimension_records("htm7") 

1114 ], 

1115 [253954, 253955], 

1116 ) 

1117 # Implicit join to patch. 

1118 self.assertCountEqual( 

1119 [ 

1120 record.id 

1121 for record in query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("htm7") 

1122 ], 

1123 [253954, 253955], 

1124 ) 

1125 self.assertCountEqual( 

1126 [ 

1127 record.id 

1128 for record in butler.query_dimension_records("htm7", skymap="SkyMap1", tract=0, patch=4) 

1129 ], 

1130 [253954, 253955], 

1131 ) 

1132 # Constraint on the patch region (with the query not knowing it 

1133 # corresponds to that patch). 

1134 (patch,) = query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("patch") 

1135 self.assertCountEqual( 

1136 [ 

1137 record.id 

1138 for record in query.join_dataset_search("cat1", collections=["refcats"]) 

1139 .where(_x.htm7.region.overlaps(patch.region)) 

1140 .dimension_records("htm7") 

1141 ], 

1142 [253954, 253955], 

1143 ) 

1144 

1145 def test_spatial_constraint_queries(self) -> None: 

1146 """Test queries in which one spatial dimension in the constraint (data 

1147 ID or ``where`` string) constrains a different spatial dimension in the 

1148 query result columns. 

1149 """ 

1150 butler = self.make_butler("base.yaml", "spatial.yaml") 

1151 with butler.query() as query: 

1152 # This tests the case where the 'tract' region is needed for 

1153 # postprocessing, to compare against the visit region, but is not 

1154 # needed in the resulting data ID. 

1155 self.assertCountEqual( 

1156 [0], 

1157 [ 

1158 data_id["tract"] 

1159 for data_id in query.data_ids(["tract"]).where({"instrument": "Cam1", "visit": 1}) 

1160 ], 

1161 ) 

1162 self.assertCountEqual( 

1163 [0], 

1164 [ 

1165 data_id["tract"] 

1166 for data_id in butler.query_data_ids(["tract"], instrument="Cam1", visit=1) 

1167 ], 

1168 ) 

1169 

1170 # This tests the case where the 'tract' region is needed in 

1171 # postprocessing AND is also returned in the result rows. 

1172 region_hex = ( 

1173 "7022408b0df0feef3f20378b0df0fe6f3fe23d8b0df0fe8f3ff1d8af0460ffef3f" 

1174 "efcfaf0460ff6f3f75e0830388ff873f31aaeb0730ffef3fb0a5eb0730ff7f3f65" 

1175 "bdf00564ff873f31aaeb0730ffef3fb1aeeb0730ff7f3f65bdf00564ff873f3e1c" 

1176 "2f0fe0feef3f6e57630b28ff873fef52630b28ff873f911ade5e30fdef3f2d9626" 

1177 "47e4fd873f0d952647e4fd973f553df64a80fdef3fd438f64a80fd7f3f20af3838" 

1178 "20fe973f58462440b0fdef3f573d2440b0fd6f3fe2351b3044fe973f22408b0df0" 

1179 "feef3f20378b0df0fe6f3f61428b0df0fe8f3f" 

1180 ) 

1181 self.assertEqual( 

1182 [(0, region_hex)], 

1183 [ 

1184 (record.id, record.region.encode().hex()) 

1185 for record in query.dimension_records("tract").where({"instrument": "Cam1", "visit": 1}) 

1186 ], 

1187 ) 

1188 self.assertEqual( 

1189 [(0, region_hex)], 

1190 [ 

1191 (record.id, record.region.encode().hex()) 

1192 for record in butler.query_dimension_records("tract", instrument="Cam1", visit=1) 

1193 ], 

1194 ) 

1195 

1196 def test_data_coordinate_upload(self) -> None: 

1197 """Test queries for dimension records with a data coordinate upload.""" 

1198 butler = self.make_butler("base.yaml", "spatial.yaml") 

1199 with butler.query() as query: 

1200 # Query with a data ID upload that has an irrelevant row (there's 

1201 # no data with "Cam2"). 

1202 self.check_detector_records( 

1203 query.join_data_coordinates( 

1204 [ 

1205 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

1206 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

1207 DataCoordinate.standardize(instrument="Cam2", detector=4, universe=butler.dimensions), 

1208 ] 

1209 ).dimension_records("detector"), 

1210 [1, 3], 

1211 ) 

1212 # Query with a data ID upload that directly contains duplicates, 

1213 # which should not appear in the results. 

1214 self.check_detector_records( 

1215 query.join_data_coordinates( 

1216 [ 

1217 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

1218 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

1219 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

1220 ] 

1221 ).dimension_records("detector"), 

1222 [1, 3], 

1223 ) 

1224 # Query with a data ID upload that has extra dimensions that could 

1225 # also introduce duplicates if we're not careful. 

1226 self.check_detector_records( 

1227 query.join_data_coordinates( 

1228 [ 

1229 DataCoordinate.standardize( 

1230 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions 

1231 ), 

1232 DataCoordinate.standardize( 

1233 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions 

1234 ), 

1235 DataCoordinate.standardize( 

1236 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions 

1237 ), 

1238 ] 

1239 ).dimension_records("detector"), 

1240 [1, 3], 

1241 ) 

1242 # Query with a data ID upload that has extra dimensions that are 

1243 # used in a constraint. 

1244 self.check_detector_records( 

1245 query.join_data_coordinates( 

1246 [ 

1247 DataCoordinate.standardize( 

1248 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions 

1249 ), 

1250 DataCoordinate.standardize( 

1251 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions 

1252 ), 

1253 DataCoordinate.standardize( 

1254 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions 

1255 ), 

1256 ] 

1257 ) 

1258 .where(instrument="Cam1", visit=2) 

1259 .dimension_records("detector"), 

1260 [3], 

1261 ) 

1262 # Query with a data ID upload that must be spatially joined to 

1263 # the other dimensions. This join is added automatically. 

1264 self.check_detector_records( 

1265 query.join_data_coordinates( 

1266 [ 

1267 DataCoordinate.standardize( 

1268 skymap="SkyMap1", tract=1, patch=1, universe=butler.dimensions 

1269 ), 

1270 DataCoordinate.standardize( 

1271 skymap="SkyMap1", tract=1, patch=2, universe=butler.dimensions 

1272 ), 

1273 DataCoordinate.standardize( 

1274 skymap="SkyMap1", tract=1, patch=3, universe=butler.dimensions 

1275 ), 

1276 ] 

1277 ) 

1278 .where(instrument="Cam1", visit=2) 

1279 .dimension_records("detector"), 

1280 [2, 3, 4], 

1281 has_postprocessing=True, 

1282 ) 

1283 # Query with a data ID upload that embeds a spatial relationship. 

1284 # This prevents automatic creation of a spatial join. To make the 

1285 # test more interesting, the spatial relationship embedded in these 

1286 # data IDs is nonsense: it includes combinations that do not 

1287 # overlap, while leaving out combinations that do overlap. 

1288 self.check_detector_records( 

1289 query.join_data_coordinates( 

1290 [ 

1291 DataCoordinate.standardize( 

1292 skymap="SkyMap1", 

1293 tract=1, 

1294 patch=1, 

1295 instrument="Cam1", 

1296 visit=1, 

1297 detector=1, 

1298 universe=butler.dimensions, 

1299 ), 

1300 DataCoordinate.standardize( 

1301 skymap="SkyMap1", 

1302 tract=1, 

1303 patch=1, 

1304 instrument="Cam1", 

1305 visit=1, 

1306 detector=2, 

1307 universe=butler.dimensions, 

1308 ), 

1309 DataCoordinate.standardize( 

1310 skymap="SkyMap1", 

1311 tract=1, 

1312 patch=3, 

1313 instrument="Cam1", 

1314 visit=1, 

1315 detector=3, 

1316 universe=butler.dimensions, 

1317 ), 

1318 ] 

1319 ) 

1320 .where(skymap="SkyMap1", tract=1, patch=1) 

1321 .dimension_records("detector"), 

1322 [1, 2], 

1323 ) 

1324 # Query with an empty data ID upload (not a useful thing to do, 

1325 # but a way to probe edge-case behavior). 

1326 self.check_detector_records( 

1327 query.join_data_coordinates( 

1328 [ 

1329 DataCoordinate.make_empty(universe=butler.dimensions), 

1330 ] 

1331 ).dimension_records("detector"), 

1332 [1, 2, 3, 4], 

1333 ) 

1334 

1335 def test_data_coordinate_upload_force_temp_table(self) -> None: 

1336 """Test queries for dimension records with a data coordinate upload 

1337 that is so big it has to go into a temporary table rather than be 

1338 included directly into the query via bind params (by making the 

1339 threshold for making a a temporary table tiny). 

1340 

1341 This test assumes a DirectQueryDriver and is automatically skipped when 

1342 some other driver is found. 

1343 """ 

1344 butler = self.make_butler("base.yaml", "spatial.yaml") 

1345 with butler.query() as query: 

1346 if not isinstance(query._driver, DirectQueryDriver): 

1347 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.") 

1348 query._driver._constant_rows_limit = 2 

1349 data_coordinates = [ 

1350 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions), 

1351 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions), 

1352 DataCoordinate.standardize(instrument="Cam1", detector=4, universe=butler.dimensions), 

1353 ] 

1354 self.check_detector_records( 

1355 query.join_data_coordinates(data_coordinates).dimension_records("detector"), 

1356 [1, 3, 4], 

1357 ) 

1358 

1359 # Make sure it can fall back to a VALUES clause if temporary tables 

1360 # are not supported by the DB. 

1361 with unittest.mock.patch.object(query._driver.db, "_allow_temporary_tables", False): 

1362 self.check_detector_records( 

1363 query.join_data_coordinates(data_coordinates).dimension_records("detector"), 

1364 [1, 3, 4], 

1365 ) 

1366 

1367 def test_materialization(self) -> None: 

1368 """Test querying for dimension records against a materialized previous 

1369 query. 

1370 """ 

1371 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

1372 with butler.query() as query: 

1373 _x = query.expression_factory 

1374 # Simple case where the materialization has just the dimensions 

1375 # we need for the rest of the query. 

1376 self.check_detector_records( 

1377 query.where(_x.detector.raft == "A", instrument="Cam1") 

1378 .materialize() 

1379 .dimension_records("detector"), 

1380 [1, 2], 

1381 ) 

1382 # This materialization has extra dimensions that could cause 

1383 # duplicates if we don't SELECT DISTINCT them away. 

1384 self.check_detector_records( 

1385 query.join_dimensions(["visit", "detector"]) 

1386 .where(_x.detector.raft == "A", instrument="Cam1") 

1387 .materialize() 

1388 .dimension_records("detector"), 

1389 [1, 2], 

1390 ) 

1391 # Materialize a spatial-join, which should prevent the creation 

1392 # of a spatial join in the downstream query. 

1393 self.check_detector_records( 

1394 query.join_dimensions(["visit", "detector", "tract"]) 

1395 .materialize() 

1396 .where(skymap="SkyMap1", tract=0, instrument="Cam1", visit=2) 

1397 .dimension_records("detector"), 

1398 [1, 2], 

1399 has_postprocessing=True, 

1400 ) 

1401 # Materialize with a dataset join. 

1402 self.check_detector_records( 

1403 query.join_dataset_search("bias", collections=["imported_g"]) 

1404 .materialize(datasets=["bias"]) 

1405 .dimension_records("detector"), 

1406 [1, 2, 3], 

1407 ) 

1408 

1409 def test_materialization_find_first(self) -> None: 

1410 """Test querying for datasets with find_first against a materialized 

1411 query. 

1412 """ 

1413 butler = self.make_butler("ci_hsc-subset.yaml", "ci_hsc-subset-skymap.yaml") 

1414 

1415 run = "HSC/runs/ci_hsc/20240806T180642Z" 

1416 extra_run = "HSC/runs/ci_hsc/20240806T180642Z-extra" 

1417 

1418 # Find few datasets to duplicate. 

1419 refs = butler.query_datasets("calexp", run, limit=3) 

1420 data_ids = [ref.dataId for ref in refs] 

1421 

1422 butler.collections.register(extra_run) 

1423 butler.registry.insertDatasets("calexp", data_ids, extra_run) 

1424 

1425 collections = [run, extra_run, "skymaps"] 

1426 with butler.query() as query: 

1427 query = query.join_dimensions( 

1428 [ 

1429 "instrument", 

1430 "physical_filter", 

1431 "band", 

1432 "visit", 

1433 "detector", 

1434 "day_obs", 

1435 "skymap", 

1436 "tract", 

1437 ] 

1438 ) 

1439 query = query.join_dataset_search("skyMap", collections) 

1440 query = query.join_dataset_search("calexp", collections) 

1441 query = query.where({}, "instrument='HSC' AND skymap='discrete/ci_hsc'", bind=None) 

1442 m_query = query.materialize() 

1443 _ = list(m_query.datasets("skyMap", collections)) 

1444 _ = list(m_query.datasets("calexp", collections)) 

1445 

1446 def test_materialization_no_results(self) -> None: 

1447 """Test querying for datasets when materialized table is empty.""" 

1448 butler = self.make_butler("ci_hsc-subset.yaml", "ci_hsc-subset-skymap.yaml") 

1449 

1450 run = "HSC/runs/ci_hsc/20240806T180642Z" 

1451 

1452 # Register a dataset type but do not add any datasets. 

1453 butler.registry.registerDatasetType( 

1454 DatasetType("nothing", ["visit", "detector"], "int", universe=butler.dimensions) 

1455 ) 

1456 

1457 collections = [run] 

1458 with butler.query() as query: 

1459 query = query.join_dimensions( 

1460 [ 

1461 "instrument", 

1462 "physical_filter", 

1463 "band", 

1464 "visit", 

1465 "detector", 

1466 "day_obs", 

1467 "skymap", 

1468 "tract", 

1469 ] 

1470 ) 

1471 query = query.join_dataset_search("calexp", collections) 

1472 query = query.join_dataset_search("nothing", collections) 

1473 query = query.where({}, "instrument='HSC' AND skymap='discrete/ci_hsc'", bind=None) 

1474 no_results = "\n".join(query.explain_no_results()) 

1475 self.assertIn("No datasets of type 'nothing'", no_results) 

1476 

1477 m_query = query.materialize() 

1478 result = m_query.datasets("nothing") 

1479 self.assertFalse(result.any()) 

1480 no_results = "\n".join(result.explain_no_results()) 

1481 self.assertIn("No datasets of type 'nothing'", no_results) 

1482 

1483 def test_timespan_results(self) -> None: 

1484 """Test returning dimension records that include timespans.""" 

1485 butler = self.make_butler("base.yaml", "spatial.yaml") 

1486 with butler.query() as query: 

1487 query_results = list(query.dimension_records("visit")) 

1488 simple_results = butler.query_dimension_records("visit") 

1489 for results in (query_results, simple_results): 

1490 self.assertCountEqual( 

1491 [(record.id, record.timespan.begin, record.timespan.end) for record in results], 

1492 [ 

1493 ( 

1494 1, 

1495 astropy.time.Time("2021-09-09T03:00:00", format="isot", scale="tai"), 

1496 astropy.time.Time("2021-09-09T03:01:00", format="isot", scale="tai"), 

1497 ), 

1498 ( 

1499 2, 

1500 astropy.time.Time("2021-09-09T03:02:00", format="isot", scale="tai"), 

1501 astropy.time.Time("2021-09-09T03:03:00", format="isot", scale="tai"), 

1502 ), 

1503 ], 

1504 ) 

1505 

1506 def test_direct_driver_paging(self) -> None: 

1507 """Test queries for dimension records that require multiple pages (by 

1508 making the page size tiny for DirectQueryDriver). 

1509 

1510 For RemoteQueryDriver, we can't manipulate the page size so this just 

1511 checks that the driver context manager logic is executing. 

1512 """ 

1513 butler = self.make_butler("base.yaml") 

1514 # Basic test where pages should be transparent. 

1515 with butler.query() as query: 

1516 if isinstance(query._driver, DirectQueryDriver): 

1517 query._driver._raw_page_size = 2 

1518 self.check_detector_records( 

1519 query.dimension_records("detector"), 

1520 [1, 2, 3, 4], 

1521 ) 

1522 # Test that it's an error to continue query iteration after closing the 

1523 # context manager. 

1524 with butler.query() as query: 

1525 if isinstance(query._driver, DirectQueryDriver): 

1526 query._driver._raw_page_size = 2 

1527 iterator = iter(query.dimension_records("detector")) 

1528 next(iterator) 

1529 with self.assertRaisesRegex(RuntimeError, "Cannot continue query result iteration"): 

1530 list(iterator) 

1531 

1532 def test_column_expressions(self) -> None: 

1533 """Test queries with a wide variant of column expressions.""" 

1534 butler = self.make_butler("base.yaml", "spatial.yaml") 

1535 butler.registry.defaults = RegistryDefaults(instrument="Cam1") 

1536 with butler.query() as query: 

1537 _x = query.expression_factory 

1538 self.check_detector_records( 

1539 query.where(_x.not_(_x.detector != 2)).dimension_records("detector"), 

1540 [2], 

1541 ) 

1542 self.check_detector_records_returned( 

1543 butler.query_dimension_records("detector", where="NOT (detector != 2)"), 

1544 [2], 

1545 ) 

1546 self.check_detector_records( 

1547 # Empty string expression should evaluate to True. 

1548 query.where(_x.detector == 2, "").dimension_records("detector"), 

1549 [2], 

1550 ) 

1551 self.check_detector_records( 

1552 query.where(_x.literal(2) == _x.detector).dimension_records("detector"), 

1553 [2], 

1554 ) 

1555 self.check_detector_records( 

1556 query.where(_x.literal(2) == _x.detector + 1).dimension_records("detector"), 

1557 [1], 

1558 ) 

1559 self.check_detector_records( 

1560 query.where(-_x.detector == -3).dimension_records("detector"), 

1561 [3], 

1562 ) 

1563 self.check_detector_records( 

1564 query.where(_x.detector == 1, _x.detector == 2).dimension_records("detector"), 

1565 [], 

1566 messages=["'where' expression requires both detector=2 and detector=1."], 

1567 ) 

1568 self.assertCountEqual( 

1569 [ 

1570 record.id 

1571 for record in query.where( 

1572 # Datetime equal to the "begin" of the timespan. 

1573 _x.visit.timespan.overlaps( 

1574 astropy.time.Time("2021-09-09T03:00:00", format="isot", scale="tai") 

1575 ) 

1576 ).dimension_records("visit") 

1577 ], 

1578 # Timespan begin bound is inclusive, so the record should 

1579 # match. 

1580 [1], 

1581 ) 

1582 self.assertCountEqual( 

1583 [ 

1584 record.id 

1585 for record in query.where( 

1586 # Datetime equal to the "end" of the timespan. 

1587 _x.visit.timespan.overlaps( 

1588 astropy.time.Time("2021-09-09T03:01:00", format="isot", scale="tai") 

1589 ) 

1590 ).dimension_records("visit") 

1591 ], 

1592 # Timespan end bound is exclusive, so we should get no records. 

1593 [], 

1594 ) 

1595 self.assertCountEqual( 

1596 [ 

1597 record.id 

1598 for record in query.where( 

1599 # In the middle of the timespan. 

1600 _x.visit.timespan.overlaps( 

1601 astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai") 

1602 ) 

1603 ).dimension_records("visit") 

1604 ], 

1605 [2], 

1606 ) 

1607 self.assertCountEqual( 

1608 [ 

1609 record.id 

1610 for record in butler.query_dimension_records( 

1611 # In the middle of the timespan. 

1612 "visit", 

1613 where="visit.timespan OVERLAPS(:ts)", 

1614 bind={"ts": astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai")}, 

1615 ) 

1616 ], 

1617 [2], 

1618 ) 

1619 self.assertCountEqual( 

1620 [ 

1621 record.id 

1622 for record in query.where( 

1623 _x.visit.timespan.overlaps( 

1624 Timespan( 

1625 begin=astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"), 

1626 end=None, 

1627 ) 

1628 ) 

1629 ).dimension_records("visit") 

1630 ], 

1631 [2], 

1632 ) 

1633 self.assertCountEqual( 

1634 [ 

1635 record.id 

1636 for record in query.where( 

1637 _x.not_( 

1638 _x.visit.timespan.end 

1639 < astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"), 

1640 ) 

1641 ).dimension_records("visit") 

1642 ], 

1643 [2], 

1644 ) 

1645 self.assertCountEqual( 

1646 [ 

1647 record.id 

1648 for record in query.where( 

1649 _x.visit.timespan.begin 

1650 > astropy.time.Time("2021-09-09T03:01:30", format="isot", scale="tai") 

1651 ).dimension_records("visit") 

1652 ], 

1653 [2], 

1654 ) 

1655 self.assertCountEqual( 

1656 [ 

1657 record.id 

1658 for record in query.where( 

1659 (_x.visit.exposure_time + -(5.0 * _x.visit.zenith_angle)) > 0.0 

1660 ).dimension_records("visit") 

1661 ], 

1662 [1], 

1663 ) 

1664 self.assertCountEqual( 

1665 [ 

1666 record.id 

1667 for record in query.where(_x.visit.exposure_time - 5.0 >= 50.0).dimension_records("visit") 

1668 ], 

1669 [1], 

1670 ) 

1671 self.assertCountEqual( 

1672 [record.id for record in query.where(_x.visit.id % 2 != 0).dimension_records("visit")], 

1673 [1], 

1674 ) 

1675 self.assertCountEqual( 

1676 [ 

1677 record.id 

1678 for record in query.where(_x.visit.zenith_angle / 5.0 <= 1.0).dimension_records("visit") 

1679 ], 

1680 [1], 

1681 ) 

1682 self.assertCountEqual( 

1683 [record.id for record in query.where(_x.visit.timespan.is_null).dimension_records("visit")], 

1684 [], 

1685 ) 

1686 self.assertCountEqual( 

1687 [ 

1688 record.id 

1689 for record in query.where(_x.visit.exposure_time.is_null).dimension_records("visit") 

1690 ], 

1691 [], 

1692 ) 

1693 

1694 # Allow comparison of float columns with int literals 

1695 self.assertCountEqual( 

1696 [record.id for record in query.where("visit.exposure_time > 50").dimension_records("visit")], 

1697 [1], 

1698 ) 

1699 self.assertCountEqual( 

1700 [record.id for record in query.where(_x.visit.exposure_time > 50).dimension_records("visit")], 

1701 [1], 

1702 ) 

1703 

1704 self.check_detector_records( 

1705 query.where(_x.detector.in_iterable([1, 3, 4])).dimension_records("detector"), 

1706 [1, 3, 4], 

1707 ) 

1708 self.check_detector_records_returned( 

1709 butler.query_dimension_records( 

1710 "detector", where="detector IN (:det)", bind={"det": [1, 3, 4]} 

1711 ), 

1712 [1, 3, 4], 

1713 ) 

1714 self.check_detector_records( 

1715 query.where(_x.detector.in_range(start=2, stop=None)).dimension_records("detector"), 

1716 [2, 3, 4], 

1717 ) 

1718 self.check_detector_records( 

1719 query.where(_x.detector.in_range(start=1, stop=3)).dimension_records("detector"), 

1720 [1, 2], 

1721 ) 

1722 self.check_detector_records( 

1723 query.where(_x.detector.in_range(start=1, stop=None, step=2)).dimension_records("detector"), 

1724 [1, 3], 

1725 ) 

1726 self.check_detector_records( 

1727 query.where(_x.detector.in_range(start=1, stop=2)).dimension_records("detector"), 

1728 [1], 

1729 ) 

1730 # This is a complex way to write a much simpler query ("where 

1731 # detector.raft == 'A'"), but it tests code paths that would 

1732 # otherwise require a lot more test setup. 

1733 self.check_detector_records( 

1734 query.where( 

1735 _x.detector.in_query(_x.detector, query.where(_x.detector.raft == "A")) 

1736 ).dimension_records("detector"), 

1737 [1, 2], 

1738 ) 

1739 # Error to reference tract without skymap in a WHERE clause. 

1740 with self.assertRaises(InvalidQueryError): 

1741 list(query.where(_x.tract == 4).dimension_records("patch")) 

1742 

1743 def test_boolean_columns(self) -> None: 

1744 """Test that boolean columns work as expected when specifying 

1745 expressions. 

1746 """ 

1747 # Exposure is the only dimension that has boolean columns, and this set 

1748 # of data has most of the pre-requisites for exposure set up. 

1749 butler = self.make_butler("base.yaml", "spatial.yaml") 

1750 butler.registry.insertDimensionData("group", {"instrument": "Cam1", "name": "1"}) 

1751 

1752 base_data = {"instrument": "Cam1", "physical_filter": "Cam1-R1", "group": "1", "day_obs": 20210909} 

1753 

1754 TRUE_ID = 1000 

1755 FALSE_ID_1 = 2001 

1756 FALSE_ID_2 = 2002 

1757 NULL_ID_1 = 3000 

1758 records = [ 

1759 {"id": TRUE_ID, "obs_id": "true-1", "can_see_sky": True}, 

1760 {"id": FALSE_ID_1, "obs_id": "false-1", "can_see_sky": False, "observation_type": "science"}, 

1761 {"id": FALSE_ID_2, "obs_id": "false-2", "can_see_sky": False, "observation_type": None}, 

1762 {"id": NULL_ID_1, "obs_id": "null-1", "can_see_sky": None}, 

1763 ] 

1764 for record in records: 

1765 butler.registry.insertDimensionData("exposure", base_data | record) 

1766 

1767 # Go through the registry interface to cover the old query system, too. 

1768 # This can be removed once the old query system is removed. 

1769 def _run_registry_query(where: str) -> list[int]: 

1770 return _get_exposure_ids_from_dimension_records( 

1771 butler.registry.queryDimensionRecords("exposure", where=where, instrument="Cam1") 

1772 ) 

1773 

1774 def _run_simple_query(where: str) -> list[int]: 

1775 return _get_exposure_ids_from_dimension_records( 

1776 butler.query_dimension_records("exposure", where=where, instrument="Cam1") 

1777 ) 

1778 

1779 def _run_query(where: str) -> list[int]: 

1780 with butler.query() as query: 

1781 return _get_exposure_ids_from_dimension_records( 

1782 query.dimension_records("exposure").where(where, instrument="Cam1") 

1783 ) 

1784 

1785 # Test boolean columns in the `where` string syntax. 

1786 for test, query_func in [ 

1787 ("registry", _run_registry_query), 

1788 ("new-query", _run_query), 

1789 ("simple", _run_simple_query), 

1790 ]: 

1791 with self.subTest(test): 

1792 # Boolean columns should be usable standalone as an expression. 

1793 self.assertCountEqual(query_func("exposure.can_see_sky"), [TRUE_ID]) 

1794 

1795 # You can find false values in the column with NOT. The NOT of 

1796 # NULL is NULL, consistent with SQL semantics -- so records 

1797 # with NULL can_see_sky are not included here. 

1798 self.assertCountEqual(query_func("NOT exposure.can_see_sky"), [FALSE_ID_1, FALSE_ID_2]) 

1799 

1800 # Make sure the bare column composes with other expressions 

1801 # correctly. 

1802 self.assertCountEqual( 

1803 query_func("exposure.can_see_sky OR exposure = 2001"), [TRUE_ID, FALSE_ID_1] 

1804 ) 

1805 

1806 # Find nulls and non-nulls. 

1807 # 

1808 # This is run only against the new query system. It appears that the 

1809 # `= NULL` syntax never had test coverage in the old query system and 

1810 # doesn't work for any column types. Not worth fixing since we are 

1811 # dropping that code soon. 

1812 nulls = [NULL_ID_1] 

1813 non_nulls = [TRUE_ID, FALSE_ID_1, FALSE_ID_2] 

1814 self.assertCountEqual(_run_query("exposure.can_see_sky = NULL"), nulls) 

1815 self.assertCountEqual(_run_query("exposure.can_see_sky != NULL"), non_nulls) 

1816 self.assertCountEqual(_run_query("NULL = exposure.can_see_sky"), nulls) 

1817 self.assertCountEqual(_run_query("NULL != exposure.can_see_sky"), non_nulls) 

1818 

1819 # You can't do a NULL check on an arbitrary boolean predicate. 

1820 with self.assertRaises(InvalidQueryError): 

1821 _run_query("NULL = (exposure.can_see_sky AND exposure = 2001)") 

1822 

1823 # Check null finding for non-boolean columns, too. 

1824 self.assertEqual( 

1825 _run_query("exposure.observation_type = NULL AND NOT exposure.can_see_sky"), [FALSE_ID_2] 

1826 ) 

1827 self.assertEqual( 

1828 _run_query("exposure.observation_type != NULL AND NOT exposure.can_see_sky"), [FALSE_ID_1] 

1829 ) 

1830 self.assertEqual( 

1831 _run_query("NULL = exposure.observation_type AND NOT exposure.can_see_sky"), [FALSE_ID_2] 

1832 ) 

1833 self.assertEqual( 

1834 _run_query("NULL != exposure.observation_type AND NOT exposure.can_see_sky"), [FALSE_ID_1] 

1835 ) 

1836 

1837 # Test boolean columns in ExpressionFactory. 

1838 with butler.query() as query: 

1839 x = query.expression_factory 

1840 

1841 def do_query(constraint: Predicate) -> list[int]: 

1842 return _get_exposure_ids_from_dimension_records( 

1843 query.dimension_records("exposure").where(constraint, instrument="Cam1") 

1844 ) 

1845 

1846 # Boolean columns should be usable standalone as a Predicate. 

1847 self.assertCountEqual(do_query(x.exposure.can_see_sky.as_boolean()), [TRUE_ID]) 

1848 

1849 # You can find false values in the column with NOT. The NOT of 

1850 # NULL is NULL, consistent with SQL semantics -- so records 

1851 # with NULL can_see_sky are not included here. 

1852 self.assertCountEqual( 

1853 do_query(x.exposure.can_see_sky.as_boolean().logical_not()), [FALSE_ID_1, FALSE_ID_2] 

1854 ) 

1855 

1856 # Searching for nulls works. 

1857 self.assertCountEqual(do_query(x.exposure.can_see_sky.is_null), [NULL_ID_1]) 

1858 

1859 # Attempting to use operators that only apply to non-boolean types 

1860 # is an error. 

1861 with self.assertRaisesRegex( 

1862 InvalidQueryError, 

1863 r"Boolean expression 'exposure.can_see_sky' can't be used directly in other expressions." 

1864 r" Call the 'as_boolean\(\)' method to convert it to a Predicate instead.", 

1865 ): 

1866 x.exposure.can_see_sky == 1 

1867 

1868 # Non-boolean types can't be converted directly to Predicate. 

1869 with self.assertRaisesRegex( 

1870 InvalidQueryError, 

1871 r"Expression 'exposure.observation_type' with type 'string' can't be used directly" 

1872 r" as a boolean value.", 

1873 ): 

1874 x.exposure.observation_type.as_boolean() 

1875 

1876 def test_dataset_region_queries(self) -> None: 

1877 """Test region queries for datasets.""" 

1878 # Import data to play with. 

1879 butler = self.make_butler("base.yaml", "ci_hsc-subset.yaml") 

1880 

1881 run = "HSC/runs/ci_hsc/20240806T180642Z" 

1882 with butler.query() as query: 

1883 # Return everything. 

1884 results = query.datasets("calexp", collections=run) 

1885 # Sort by data coordinate. 

1886 refs = sorted(results.with_dimension_records(), key=attrgetter("dataId")) 

1887 self.assertEqual(len(refs), 33) 

1888 

1889 # Use a region from the first visit. 

1890 first_visit_region = refs[0].dataId.visit.region # type: ignore 

1891 

1892 # Get a visit detector region from the first ref. 

1893 with butler.query() as query: 

1894 data_id = refs[0].dataId.mapping 

1895 records = list(query.dimension_records("visit_detector_region").where(**data_id)) # type: ignore 

1896 self.assertEqual(len(records), 1) 

1897 

1898 for pos, use_bind, count in ( 

1899 ("CIRCLE 320. -0.25 10.", True, 33), # Match everything. 

1900 ("CIRCLE 321.0 -0.4 0.01", True, 1), # Should be small region on 1 detector. 

1901 ("CIRCLE 321.1 -0.35 0.02", True, 2), 

1902 ("CIRCLE 321.1 -0.48 0.05", True, 1), # Center off the region. 

1903 ("CIRCLE 321.0 -0.5 0.01", True, 0), # No overlap. 

1904 (first_visit_region.to_ivoa_pos(), True, 33), # Visit region overlaps everything. 

1905 (records[0].region.to_ivoa_pos(), True, 17), # Some overlap. 

1906 ("CIRCLE(320., -0.25, 10.)", False, 33), # Match everything. 

1907 ("CIRCLE(321.0, -0.4, 0.01)", False, 1), # Should be small region on 1 detector. 

1908 ("CIRCLE(321.0, -0.5, 0.01)", False, 0), # No overlap. 

1909 ("BOX(320, -0.25, 5, 5)", False, 33), # Match everything. 

1910 ("BOX(321.0, -0.4, 0.01, 0.01)", False, 1), # Should be small region on 1 detector. 

1911 ("BOX(321.0, -0.5, 0.01, 0.01)", False, 0), # No overlap. 

1912 ("POLYGON(320, -10, 320, 10, 340, 10, 340, -10)", False, 33), # Match everything. 

1913 ("POLYGON(320.99, -0.401, 320.99, -0.399, 321.01, -0.399, 321.01, -0.401)", False, 1), 

1914 ("POLYGON(320.99, -0.501, 320.99, -0.499, 321.01, -0.499, 321.01, -0.501)", False, 0), 

1915 ("REGION('CIRCLE 320. -0.25 10.')", False, 33), # Match everything. 

1916 ("REGION('RANGE 310 330 -10 10')", False, 33), # Match everything. 

1917 ("REGION('RANGE 320.99 321.01 -0.401 -0.399')", False, 1), # Small region on 1 detector. 

1918 ("REGION('POLYGON 320.99 -0.501 320.99 -0.499 321.01 -0.499 321.01 -0.501')", False, 0), 

1919 ): 

1920 if use_bind: 

1921 overlap_where = "visit_detector_region.region OVERLAPS :POS" 

1922 bind = {"POS": Region.from_ivoa_pos(pos)} 

1923 else: 

1924 overlap_where = f"visit_detector_region.region OVERLAPS {pos}" 

1925 bind = {} 

1926 with butler.query() as query: 

1927 results = query.datasets("calexp", collections=run) 

1928 results = results.where(f"instrument = 'HSC' AND {overlap_where}", bind=bind) 

1929 refs = list(results) 

1930 self.assertEqual(len(refs), count, f"POS={pos} REFS={refs}") 

1931 

1932 simple_refs = butler.query_datasets( 

1933 "calexp", 

1934 collections=run, 

1935 instrument="HSC", 

1936 where=overlap_where, 

1937 bind=bind, 

1938 explain=False, 

1939 ) 

1940 self.assertCountEqual(refs, simple_refs) 

1941 

1942 def test_dataset_time_queries(self) -> None: 

1943 """Test temporal queries for datasets.""" 

1944 # Import data to play with. 

1945 butler = self.make_butler("base.yaml", "ci_hsc-subset.yaml") 

1946 

1947 # Some times from the test data. 

1948 v_903334_pre = astropy.time.Time("2013-01-01T12:00:00", scale="tai", format="isot") 

1949 v_903334_mid = astropy.time.Time("2013-06-17T13:29:20", scale="tai", format="isot") 

1950 v_904014_pre = astropy.time.Time("2013-11-01T12:00:00", scale="tai", format="isot") 

1951 v_904014_post = astropy.time.Time("2013-12-21T12:00:00", scale="tai", format="isot") 

1952 

1953 with butler.query() as query: 

1954 run = "HSC/runs/ci_hsc/20240806T180642Z" 

1955 results = query.datasets("calexp", collections=run) 

1956 

1957 # Use a time during the middle of a visit. 

1958 v_903334 = results.where( 

1959 "instrument = 'HSC' and visit.timespan OVERLAPS(:ts)", bind={"ts": v_903334_mid} 

1960 ) 

1961 self.assertEqual(len(list(v_903334)), 4) 

1962 

1963 # Timespan covering first half of the data. 

1964 first_half = results.where( 

1965 "instrument = 'HSC' and visit.timespan OVERLAPS(:t1, :t2)", 

1966 bind={"t1": v_903334_pre, "t2": v_904014_pre}, 

1967 ) 

1968 self.assertEqual(len(list(first_half)), 17) 

1969 

1970 # Query using a timespan object. 

1971 with_ts = results.where( 

1972 "instrument = 'HSC' and visit.timespan OVERLAPS(:ts)", 

1973 bind={"ts": Timespan(v_904014_pre, v_904014_post)}, 

1974 ) 

1975 self.assertEqual(len(list(with_ts)), 16) 

1976 

1977 def test_calibration_join_queries(self) -> None: 

1978 """Test using the 'general' query result type to join observations to 

1979 calibration datasets temporally. 

1980 

1981 We have to use general results because we want calibration DatasetRefs 

1982 and data IDs that include the observation identifiers (which are not 

1983 part of the calibration dataset dimensions). 

1984 """ 

1985 butler = self.make_butler("base.yaml", "datasets.yaml") 

1986 # Set up some timestamps. 

1987 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

1988 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

1989 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

1990 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai") 

1991 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai") 

1992 # Insert some exposure records with timespans between each sequential 

1993 # pair of those. 

1994 butler.registry.insertDimensionData( 

1995 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)} 

1996 ) 

1997 butler.registry.insertDimensionData( 

1998 "group", 

1999 {"instrument": "Cam1", "name": "group0"}, 

2000 {"instrument": "Cam1", "name": "group1"}, 

2001 {"instrument": "Cam1", "name": "group2"}, 

2002 {"instrument": "Cam1", "name": "group3"}, 

2003 ) 

2004 butler.registry.insertDimensionData( 

2005 "exposure", 

2006 { 

2007 "instrument": "Cam1", 

2008 "id": 0, 

2009 "group": "group0", 

2010 "obs_id": "zero", 

2011 "physical_filter": "Cam1-G", 

2012 "day_obs": 20200101, 

2013 "timespan": Timespan(t1, t2), 

2014 }, 

2015 { 

2016 "instrument": "Cam1", 

2017 "id": 1, 

2018 "group": "group1", 

2019 "obs_id": "one", 

2020 "physical_filter": "Cam1-G", 

2021 "day_obs": 20200101, 

2022 "timespan": Timespan(t2, t3), 

2023 }, 

2024 { 

2025 "instrument": "Cam1", 

2026 "id": 2, 

2027 "group": "group2", 

2028 "obs_id": "two", 

2029 "physical_filter": "Cam1-G", 

2030 "day_obs": 20200101, 

2031 "timespan": Timespan(t3, t4), 

2032 }, 

2033 { 

2034 "instrument": "Cam1", 

2035 "id": 3, 

2036 "group": "group3", 

2037 "obs_id": "three", 

2038 "physical_filter": "Cam1-G", 

2039 "day_obs": 20200101, 

2040 "timespan": Timespan(t4, t5), 

2041 }, 

2042 ) 

2043 # Get references to the datasets we imported. 

2044 bias = butler.get_dataset_type("bias") 

2045 bias2a = butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

2046 assert bias2a is not None 

2047 bias3a = butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

2048 assert bias3a is not None 

2049 bias2b = butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

2050 assert bias2b is not None 

2051 bias3b = butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

2052 assert bias3b is not None 

2053 # Register the main calibration collection we'll be working with. 

2054 collection = "Cam1/calibs" 

2055 butler.collections.register(collection, type=CollectionType.CALIBRATION) 

2056 # Certify 2a dataset with [t2, t4) validity. 

2057 butler.registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

2058 # Certify 3a over [t1, t3). 

2059 butler.registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

2060 # Certify 2b and 3b together over [t4, ∞). 

2061 butler.registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

2062 # Query for (bias, exposure, detector) combinations. 

2063 base_data_id = DataCoordinate.standardize(instrument="Cam1", universe=butler.dimensions) 

2064 with butler.query() as q: 

2065 x = q.expression_factory 

2066 q = q.join_dimensions(["exposure"]) 

2067 q = q.join_dataset_search("bias", [collection]) 

2068 # Query for all calibs with an explicit temporal join. 

2069 self.assertCountEqual( 

2070 [ 

2071 (data_id, refs[0]) 

2072 for data_id, refs, _ in q.where( 

2073 x["bias"].timespan.overlaps(x.exposure.timespan), base_data_id 

2074 ) 

2075 .general( 

2076 butler.dimensions.conform(["exposure", "detector"]), 

2077 dataset_fields={"bias": ...}, 

2078 find_first=True, 

2079 ) 

2080 .iter_tuples(bias) 

2081 ], 

2082 [ 

2083 (DataCoordinate.standardize(base_data_id, detector=2, exposure=1), bias2a), 

2084 (DataCoordinate.standardize(base_data_id, detector=2, exposure=2), bias2a), 

2085 (DataCoordinate.standardize(base_data_id, detector=3, exposure=0), bias3a), 

2086 (DataCoordinate.standardize(base_data_id, detector=3, exposure=1), bias3a), 

2087 (DataCoordinate.standardize(base_data_id, detector=2, exposure=3), bias2b), 

2088 (DataCoordinate.standardize(base_data_id, detector=3, exposure=3), bias3b), 

2089 ], 

2090 ) 

2091 # Query for all calibs with the temporal join implicit and the 

2092 # dimensions given as an incomplete list (detector is added by 

2093 # the dataset results). 

2094 self.assertCountEqual( 

2095 [ 

2096 (data_id, refs[0]) 

2097 for data_id, refs, _ in q.where(base_data_id) 

2098 .general(["exposure"], dataset_fields={"bias": ...}, find_first=True) 

2099 .iter_tuples(bias) 

2100 ], 

2101 [ 

2102 (DataCoordinate.standardize(base_data_id, detector=2, exposure=1), bias2a), 

2103 (DataCoordinate.standardize(base_data_id, detector=2, exposure=2), bias2a), 

2104 (DataCoordinate.standardize(base_data_id, detector=3, exposure=0), bias3a), 

2105 (DataCoordinate.standardize(base_data_id, detector=3, exposure=1), bias3a), 

2106 (DataCoordinate.standardize(base_data_id, detector=2, exposure=3), bias2b), 

2107 (DataCoordinate.standardize(base_data_id, detector=3, exposure=3), bias3b), 

2108 ], 

2109 ) 

2110 # Query with an explicit timespan, but no calibration collections. 

2111 # This should succeed because the timespan for the dataset_tags tables 

2112 # are logically unbounded, not Null. 

2113 with butler.query() as query: 

2114 timespan_column = query.expression_factory["bias"].timespan 

2115 result = ( 

2116 query.datasets("bias", collections=["imported_g"]) 

2117 .where(instrument="Cam1", detector=2) 

2118 .where( 

2119 timespan_column.overlaps( 

2120 Timespan(begin=t1, end=t2), 

2121 ) 

2122 ) 

2123 ) 

2124 refs = list(result) 

2125 self.assertEqual([ref.id for ref in refs], [bias2a.id]) 

2126 

2127 # Query with an explicit timespan and a RUN collection ahead of 

2128 # a CALIBRATION collection that would also match; the RUN collection 

2129 # should win. 

2130 with butler.query() as query: 

2131 timespan_column = query.expression_factory["bias"].timespan 

2132 result = ( 

2133 query.datasets("bias", collections=["imported_g", collection]) 

2134 .where(instrument="Cam1", detector=2) 

2135 .where( 

2136 timespan_column.overlaps( 

2137 Timespan(begin=t1, end=t2), 

2138 ) 

2139 ) 

2140 ) 

2141 refs = list(result) 

2142 self.assertEqual([ref.id for ref in refs], [bias2a.id]) 

2143 

2144 # Query in multiple collections, with one of the collections being a 

2145 # calibration collection. This triggers special cases related to 

2146 # timespan columns in the query code. 

2147 refs = butler.query_datasets( 

2148 "bias", 

2149 collections=[collection, "imported_g"], 

2150 where="instrument = 'Cam1' and detector = 2", 

2151 find_first=False, 

2152 ) 

2153 self.assertEqual( 

2154 sorted([str(ref.id) for ref in refs]), 

2155 ["51352db4-a47a-447c-b12d-a50b206b17cd", "87f3e68d-258d-41b7-8ea5-edf3557ccb30"], 

2156 ) 

2157 with butler.query() as query: 

2158 query = query.join_dataset_search("bias", [collection, "imported_g"]) 

2159 query = query.where("instrument = 'Cam1' and detector = 2") 

2160 

2161 results = list( 

2162 query.general( 

2163 ["detector"], 

2164 dataset_fields={"bias": {"dataset_id"}}, 

2165 find_first=False, 

2166 ).iter_tuples() 

2167 ) 

2168 # Dataset ID should be de-duplicated with no collection/timespan 

2169 # column present. 

2170 self.assertEqual( 

2171 sorted([str(x.raw_row["bias.dataset_id"]) for x in results]), 

2172 ["51352db4-a47a-447c-b12d-a50b206b17cd", "87f3e68d-258d-41b7-8ea5-edf3557ccb30"], 

2173 ) 

2174 

2175 results = list( 

2176 query.general( 

2177 ["detector"], 

2178 dataset_fields={"bias": {"dataset_id", "timespan"}}, 

2179 find_first=False, 

2180 ).iter_tuples() 

2181 ) 

2182 # We should have one row for each timespan associated with each 

2183 # dataset ID. The extra copy of "51352..." comes from the run 

2184 # collection, with a timespan of None. 

2185 self.assertEqual( 

2186 sorted([str(x.raw_row["bias.dataset_id"]) for x in results]), 

2187 [ 

2188 "51352db4-a47a-447c-b12d-a50b206b17cd", 

2189 "51352db4-a47a-447c-b12d-a50b206b17cd", 

2190 "87f3e68d-258d-41b7-8ea5-edf3557ccb30", 

2191 ], 

2192 ) 

2193 

2194 results = list( 

2195 query.general( 

2196 ["detector"], 

2197 dataset_fields={"bias": {"dataset_id", "collection"}}, 

2198 find_first=False, 

2199 ).iter_tuples() 

2200 ) 

2201 # We should have one row for each collection associated with each 

2202 # dataset ID. 

2203 self.assertEqual( 

2204 sorted([(str(x.raw_row["bias.dataset_id"]), x.raw_row["bias.collection"]) for x in results]), 

2205 [ 

2206 ("51352db4-a47a-447c-b12d-a50b206b17cd", "Cam1/calibs"), 

2207 ("51352db4-a47a-447c-b12d-a50b206b17cd", "imported_g"), 

2208 ("87f3e68d-258d-41b7-8ea5-edf3557ccb30", "Cam1/calibs"), 

2209 ], 

2210 ) 

2211 

2212 def test_collection_query_info(self) -> None: 

2213 butler = self.make_butler("base.yaml", "datasets.yaml") 

2214 

2215 info = butler.collections.query_info("imported_g", include_summary=True) 

2216 self.assertEqual(len(info), 1) 

2217 dataset_types = info[0].dataset_types 

2218 assert dataset_types is not None 

2219 self.assertCountEqual(dataset_types, ["flat", "bias"]) 

2220 

2221 info = butler.collections.query_info("imported_g", include_summary=True, summary_datasets=["flat"]) 

2222 self.assertEqual(len(info), 1) 

2223 dataset_types = info[0].dataset_types 

2224 assert dataset_types is not None 

2225 self.assertCountEqual(dataset_types, ["flat"]) 

2226 

2227 def test_dataset_queries(self) -> None: 

2228 butler = self.make_butler("base.yaml", "spatial.yaml") 

2229 

2230 # Need a dataset with some spatial information to trigger aggregate 

2231 # value logic in queries. 

2232 butler.registry.registerDatasetType( 

2233 DatasetType("dt", ["visit", "detector"], "int", universe=butler.dimensions) 

2234 ) 

2235 butler.collections.register("run1") 

2236 butler.registry.insertDatasets("dt", [{"instrument": "Cam1", "visit": 1, "detector": 1}], "run1") 

2237 

2238 # Tests for a regression of DM-46340, where invalid SQL would be 

2239 # generated when the list of collections is a single run collection and 

2240 # there is region-postprocessing logic involved. This was due to 

2241 # missing type information associated with the "run" dataset field. 

2242 result = butler.query_datasets( 

2243 "dt", 

2244 "run1", 

2245 where="instrument='Cam1' and skymap='SkyMap1' and visit=1 and tract=0", 

2246 with_dimension_records=True, 

2247 ) 

2248 self.assertEqual(result[0].dataId, {"instrument": "Cam1", "visit": 1, "detector": 1}) 

2249 self.assertEqual(result[0].run, "run1") 

2250 

2251 # A similar issue to the "run" issue above was occuring with the 

2252 # 'collection' dataset field. 

2253 with butler.query() as query: 

2254 rows = list( 

2255 query.join_dataset_search("dt", "run1") 

2256 .where("instrument='Cam1' and skymap='SkyMap1' and visit=1 and tract=0") 

2257 .general( 

2258 dimensions=["visit", "detector"], 

2259 dataset_fields={"dt": set(["collection"])}, 

2260 find_first=True, 

2261 ) 

2262 ) 

2263 self.assertEqual(len(rows), 1) 

2264 self.assertEqual(rows[0]["visit"], 1) 

2265 self.assertEqual(rows[0]["dt.collection"], "run1") 

2266 

2267 # Test that dataset fields like ingest_date can be used in the 'where' 

2268 # clause. 

2269 result = butler.query_datasets("dt", "run1", where="ingest_date > T'2000-01-01'") 

2270 self.assertEqual(len(result), 1) 

2271 result = butler.query_datasets("dt", "run1", where="ingest_date < T'2000-01-01'", explain=False) 

2272 self.assertEqual(len(result), 0) 

2273 result = butler.query_datasets( 

2274 "dt", "run1", where="ingest_date OVERLAPS (T'2000-01-01', T'2099-01-01')" 

2275 ) 

2276 self.assertEqual(len(result), 1) 

2277 result = butler.query_datasets( 

2278 "dt", "run1", where="(T'2000-01-01', T'2099-01-01') OVERLAPS ingest_date" 

2279 ) 

2280 self.assertEqual(len(result), 1) 

2281 result = butler.query_datasets( 

2282 "dt", "run1", where="(T'2000-01-01', T'2001-01-01') OVERLAPS ingest_date", explain=False 

2283 ) 

2284 self.assertEqual(len(result), 0) 

2285 

2286 def test_multiple_instrument_queries(self) -> None: 

2287 """Test that multiple-instrument queries are not rejected as having 

2288 governor dimension ambiguities. 

2289 """ 

2290 butler = self.make_butler("base.yaml") 

2291 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

2292 self.assertCountEqual( 

2293 butler.query_data_ids(["detector"], where="instrument='Cam1' OR instrument='Cam2'"), 

2294 [ 

2295 DataCoordinate.standardize(instrument="Cam1", detector=n, universe=butler.dimensions) 

2296 for n in range(1, 5) 

2297 ], 

2298 ) 

2299 self.assertCountEqual( 

2300 butler.query_data_ids( 

2301 ["detector"], 

2302 where="(instrument='Cam1' OR instrument='Cam2') AND visit.region OVERLAPS :region", 

2303 bind={"region": Region.from_ivoa_pos("CIRCLE 320. -0.25 10.")}, 

2304 explain=False, 

2305 ), 

2306 # No visits in this test dataset means no result, but the point of 

2307 # the test is just that the query can be constructed at all. 

2308 [], 

2309 ) 

2310 self.assertCountEqual( 

2311 butler.query_data_ids( 

2312 ["instrument"], 

2313 where="(instrument='Cam1' AND detector=2) OR (instrument='Cam2' AND detector=500)", 

2314 explain=False, 

2315 ), 

2316 [DataCoordinate.standardize(instrument="Cam1", universe=butler.dimensions)], 

2317 ) 

2318 

2319 def test_default_data_id(self) -> None: 

2320 butler = self.make_butler("base.yaml") 

2321 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

2322 butler.registry.insertDimensionData( 

2323 "physical_filter", {"instrument": "Cam2", "name": "Cam2-G", "band": "g"} 

2324 ) 

2325 

2326 # With no default data ID, queries should return results for all 

2327 # instruments. 

2328 result = butler.query_dimension_records("physical_filter") 

2329 names = [x.name for x in result] 

2330 self.assertCountEqual(names, ["Cam1-G", "Cam1-R1", "Cam1-R2", "Cam2-G"]) 

2331 

2332 result = butler.query_dimension_records("physical_filter", where="band='g'") 

2333 names = [x.name for x in result] 

2334 self.assertCountEqual(names, ["Cam1-G", "Cam2-G"]) 

2335 

2336 # When there is no default data ID and a where clause references 

2337 # something depending on instrument, it throws an error as a 

2338 # sanity check. 

2339 # In this case, 'instrument' is not part of the dimensions returned by 

2340 # the query, so there is extra logic needed to detect the need for the 

2341 # default data ID. 

2342 with self.assertRaisesRegex( 

2343 InvalidQueryError, 

2344 "Query 'where' expression references a dimension dependent on instrument" 

2345 " without constraining it directly.", 

2346 ): 

2347 butler.query_data_ids(["band"], where="physical_filter='Cam1-G'") 

2348 

2349 # Override the default data ID to specify a default instrument for 

2350 # subsequent tests. 

2351 butler.registry.defaults = RegistryDefaults(instrument="Cam1") 

2352 

2353 # When a where clause references something depending on instrument, use 

2354 # the default data ID to constrain the instrument. 

2355 # In this case, 'instrument' is not part of the dimensions returned by 

2356 # the query, so there is extra logic needed to detect the need for the 

2357 # default data ID. 

2358 data_ids = butler.query_data_ids(["band"], where="physical_filter='Cam1-G'") 

2359 self.assertEqual([x["band"] for x in data_ids], ["g"]) 

2360 # Default data ID instrument=Cam1 does not match Cam2, so there are no 

2361 # results. 

2362 data_ids = butler.query_data_ids(["band"], where="physical_filter='Cam2-G'", explain=False) 

2363 self.assertEqual(data_ids, []) 

2364 # Overriding the default lets us get the results. 

2365 data_ids = butler.query_data_ids(["band"], where="instrument='Cam2' and physical_filter='Cam2-G'") 

2366 self.assertEqual([x["band"] for x in data_ids], ["g"]) 

2367 

2368 # Query for a dimension that depends on instrument should pull in the 

2369 # default data ID instrument="Cam1" to constrain results. 

2370 result = butler.query_dimension_records("physical_filter") 

2371 names = [x.name for x in result] 

2372 self.assertCountEqual(names, ["Cam1-G", "Cam1-R1", "Cam1-R2"]) 

2373 

2374 # Query for a dimension that depends on instrument should pull in the 

2375 # default data ID instrument="Cam1" to constrain results, if the where 

2376 # clause does not explicitly specify instrument. 

2377 result = butler.query_dimension_records("physical_filter", where="band='g'") 

2378 names = [x.name for x in result] 

2379 self.assertEqual(names, ["Cam1-G"]) 

2380 

2381 # Queries that specify instrument explicitly in the where clause 

2382 # should ignore the default data ID. 

2383 result = butler.query_dimension_records("physical_filter", where="instrument='Cam2'") 

2384 names = [x.name for x in result] 

2385 self.assertCountEqual(names, ["Cam2-G"]) 

2386 

2387 result = butler.query_dimension_records("physical_filter", where="instrument IN ('Cam2')") 

2388 names = [x.name for x in result] 

2389 self.assertCountEqual(names, ["Cam2-G"]) 

2390 

2391 def test_unusual_column_literals(self) -> None: 

2392 butler = self.make_butler("base.yaml") 

2393 

2394 # Users frequently use numpy integer types as literals in queries. 

2395 result = butler.query_dimension_records( 

2396 "detector", data_id={"instrument": "Cam1", "detector": int64(1)} 

2397 ) 

2398 names = [x.full_name for x in result] 

2399 self.assertEqual(names, ["Aa"]) 

2400 

2401 result = butler.query_dimension_records( 

2402 "detector", where="instrument='Cam1' and detector=:an_integer", bind={"an_integer": int64(2)} 

2403 ) 

2404 names = [x.full_name for x in result] 

2405 self.assertEqual(names, ["Ab"]) 

2406 

2407 with butler.query() as query: 

2408 x = query.expression_factory 

2409 result = list( 

2410 query.dimension_records("detector").where(x.instrument == "Cam1", x.detector == int64(3)) 

2411 ) 

2412 names = [x.full_name for x in result] 

2413 self.assertEqual(names, ["Ba"]) 

2414 

2415 def test_query_all_datasets(self) -> None: 

2416 butler = self.make_butler("base.yaml", "datasets.yaml") 

2417 

2418 # Make sure that refs are coming out well-formed. 

2419 datasets = butler.query_all_datasets("imported_r", where="detector = 2", instrument="Cam1") 

2420 datasets.sort(key=lambda ref: ref.datasetType.name) 

2421 self.assertEqual(len(datasets), 2) 

2422 bias = datasets[0] 

2423 self.assertEqual(bias.datasetType.name, "bias") 

2424 self.assertEqual(bias.dataId["instrument"], "Cam1") 

2425 self.assertEqual(bias.dataId["detector"], 2) 

2426 self.assertEqual(bias.run, "imported_r") 

2427 self.assertEqual(bias.id, UUID("87f3e68d-258d-41b7-8ea5-edf3557ccb30")) 

2428 flat = datasets[1] 

2429 self.assertEqual(flat.datasetType.name, "flat") 

2430 self.assertEqual(flat.dataId["instrument"], "Cam1") 

2431 self.assertEqual(flat.dataId["detector"], 2) 

2432 self.assertEqual(flat.dataId["physical_filter"], "Cam1-R1") 

2433 self.assertEqual(flat.dataId["band"], "r") 

2434 self.assertEqual(flat.run, "imported_r") 

2435 self.assertEqual(flat.id, UUID("c1296796-56c5-4acf-9b49-40d920c6f840")) 

2436 

2437 # Querying for everything finds everything. 

2438 results = butler.query_all_datasets("*", find_first=False) 

2439 self.assertEqual(len(results), 13) 

2440 

2441 # constraining by data ID works 

2442 detector_1_ids = ("d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c", "e15ab039-bc8b-4135-87c5-90902a7c0b22") 

2443 results = butler.query_all_datasets( 

2444 "*", data_id={"detector": 1, "instrument": "Cam1"}, find_first=False 

2445 ) 

2446 self.assertCountEqual(detector_1_ids, _ref_uuids(results)) 

2447 

2448 # bind values work. 

2449 results = butler.query_all_datasets( 

2450 "*", where="detector=:my_bind and instrument='Cam1'", bind={"my_bind": 1}, find_first=False 

2451 ) 

2452 self.assertCountEqual(detector_1_ids, _ref_uuids(results)) 

2453 

2454 # find_first requires ordered collections. 

2455 with self.assertRaisesRegex(InvalidQueryError, "Can not use wildcards"): 

2456 results = butler.query_all_datasets("*") 

2457 

2458 butler.collections.register("chain", CollectionType.CHAINED) 

2459 butler.collections.redefine_chain("chain", ["imported_g", "imported_r"]) 

2460 results = butler.query_all_datasets( 

2461 "chain", where="detector=2 and instrument = 'Cam1'", find_first=True 

2462 ) 

2463 # find_first searches the collection chain in order. 

2464 self.assertCountEqual( 

2465 _ref_uuids(results), 

2466 [ 

2467 "51352db4-a47a-447c-b12d-a50b206b17cd", # imported_g bias 

2468 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d", # imported_g flat 

2469 "c1296796-56c5-4acf-9b49-40d920c6f840", # imported_r flat 

2470 # There is also a bias dataset with detector=2 in imported_r, 

2471 # but it is masked by the presence of the same data ID in 

2472 # imported_g. 

2473 ], 

2474 ) 

2475 

2476 # collection searches work. 

2477 results = butler.query_all_datasets( 

2478 "*g", where="detector=1 and instrument = 'Cam1'", find_first=False 

2479 ) 

2480 self.assertEqual(_ref_uuids(results), ["e15ab039-bc8b-4135-87c5-90902a7c0b22"]) 

2481 

2482 # we raise for missing collections with explicit names. 

2483 with self.assertRaises(MissingCollectionError): 

2484 results = butler.query_all_datasets("nonexistent") 

2485 # we don't raise for collection wildcard searches that find nothing. 

2486 results = butler.query_all_datasets("nonexistent*", find_first=False) 

2487 self.assertEqual(results, []) 

2488 

2489 # dataset type searches work. 

2490 results = butler.query_all_datasets( 

2491 "*", name="b*", where="detector=1 and instrument = 'Cam1'", find_first=False 

2492 ) 

2493 self.assertEqual(_ref_uuids(results), ["e15ab039-bc8b-4135-87c5-90902a7c0b22"]) 

2494 

2495 # Missing dataset types raise. 

2496 with self.assertRaises(MissingDatasetTypeError): 

2497 results = butler.query_all_datasets("chain", name=["notfound", "flat"]) 

2498 with self.assertRaises(MissingDatasetTypeError): 

2499 results = butler.query_all_datasets("chain", name="notfound*") 

2500 

2501 # Limit of 3 lands at the boundary of a dataset type. 

2502 # Limit of 4 is in the middle of a dataset type. 

2503 for limit in [3, 4]: 

2504 with self.subTest(limit=limit): 

2505 results = butler.query_all_datasets("imported_g", limit=limit) 

2506 self.assertEqual(len(results), limit) 

2507 with self.assertLogs(level="WARNING") as log: 

2508 results = butler.query_all_datasets("imported_g", limit=-limit) 

2509 self.assertEqual(len(results), limit) 

2510 self.assertIn("requested limit", log.output[0]) 

2511 

2512 results = butler.query_all_datasets("imported_g", limit=0) 

2513 self.assertEqual(len(results), 0) 

2514 

2515 # 'where' constraints that don't apply to all dataset types follow the 

2516 # same rules as query_datasets. 

2517 results = butler.query_all_datasets( 

2518 "*", where="detector = 2 and band = 'g' and instrument = 'Cam1'", find_first=False 

2519 ) 

2520 self.assertCountEqual( 

2521 _ref_uuids(results), 

2522 [ 

2523 # bias does not have 'band' 

2524 "51352db4-a47a-447c-b12d-a50b206b17cd", 

2525 "87f3e68d-258d-41b7-8ea5-edf3557ccb30", 

2526 # flat does have 'band', and we filter based on it 

2527 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d", 

2528 ], 

2529 ) 

2530 

2531 # Default collections and data ID apply. 

2532 butler.registry.defaults = RegistryDefaults(collections="imported_g") 

2533 results = butler.query_all_datasets(where="detector = 2") 

2534 self.assertCountEqual( 

2535 _ref_uuids(results), 

2536 ["51352db4-a47a-447c-b12d-a50b206b17cd", "60c8a65c-7290-4c38-b1de-e3b1cdcf872d"], 

2537 ) 

2538 

2539 def test_irrelevant_governor_constraints(self) -> None: 

2540 """Test that constraining an irrelevant governor dimension doesn't 

2541 break dataset queries. 

2542 """ 

2543 butler = self.make_butler("base.yaml", "spatial.yaml") 

2544 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

2545 a = DatasetType("a", {"detector"}, "StructuredDataDict", universe=butler.dimensions) 

2546 b = DatasetType("b", {"tract"}, "StructuredDataDict", universe=butler.dimensions) 

2547 butler.registry.registerDatasetType(a) 

2548 butler.registry.registerDatasetType(b) 

2549 collection = "run1" 

2550 butler.collections.register(collection) 

2551 (ref_a,) = butler.registry.insertDatasets(a, [{"instrument": "Cam1", "detector": 2}], run=collection) 

2552 (ref_b,) = butler.registry.insertDatasets(b, [{"skymap": "SkyMap1", "tract": 1}], run=collection) 

2553 # First, some sanity-check query that's mostly to check the test setup. 

2554 self.assertEqual(butler.query_datasets("a", collections=collection), [ref_a]) 

2555 self.assertEqual(butler.query_datasets("b", collections=collection), [ref_b]) 

2556 # Now check that we can get both with an irrelevant constraint. 

2557 # In the first case, there is a dataset of a different type that is 

2558 # consistent with the constraint in the collection: 

2559 self.assertEqual(butler.query_datasets("a", collections=collection, skymap="SkyMap1"), [ref_a]) 

2560 # In the second case there is a dataset of a different type that is 

2561 # inconsistent with the constraint in the collection: 

2562 self.assertEqual(butler.query_datasets("b", collections=collection, instrument="Cam2"), [ref_b]) 

2563 

2564 def test_inferred_primary_key(self) -> None: 

2565 """Test expressions that have an unqualified reference to a primary key 

2566 field whose dimension must be inferred from context. 

2567 """ 

2568 butler = self.make_butler("base.yaml") 

2569 self.assertEqual( 

2570 butler.query_dimension_records("detector", instrument="Cam1", where="id=2"), 

2571 butler.query_dimension_records("detector", instrument="Cam1", detector=2), 

2572 ) 

2573 

2574 def test_glob_expression(self) -> None: 

2575 """Test GLOB() function in user expressions.""" 

2576 butler = self.make_butler("base.yaml") 

2577 

2578 tests = ( 

2579 ("full_name", "*", 4), 

2580 ("full_name", "\\*", 0), 

2581 ("full_name", "A*", 2), 

2582 ("full_name", "A?", 2), 

2583 ("full_name", "??", 4), 

2584 ("full_name", "*a", 2), 

2585 ("full_name", "A[ab]", 0), 

2586 ("purpose", "*EN?E", 3), 

2587 ("purpose", "\\*CIENC\\*", 0), 

2588 ("full_name", "%", 0), 

2589 ("full_name", "__", 0), 

2590 ("full_name", "a", 0), 

2591 ("full_name", "", 0), 

2592 ) 

2593 

2594 for column, pattern, count in tests: 

2595 # Pattern as a literal string. 

2596 records = butler.query_dimension_records( 

2597 "detector", instrument="Cam1", where=f"GLOB({column}, '{pattern}')", explain=False 

2598 ) 

2599 self.assertEqual(len(records), count) 

2600 

2601 # Check that bind works with pattern. 

2602 records = butler.query_dimension_records( 

2603 "detector", 

2604 instrument="Cam1", 

2605 where=f"GLOB({column}, :pattern)", 

2606 explain=False, 

2607 bind={"pattern": pattern}, 

2608 ) 

2609 self.assertEqual(len(records), count) 

2610 

2611 # Check that glob works on dimension itself, not just metadata. 

2612 records = butler.query_dimension_records( 

2613 "detector", where="GLOB(instrument, '?a*1') AND GLOB(full_name, '*')" 

2614 ) 

2615 self.assertEqual(len(records), 4) 

2616 

2617 # Check exceptions. 

2618 with self.assertRaisesRegex(InvalidQueryError, "first argument must be a string column"): 

2619 butler.query_dimension_records("detector", instrument="Cam1", where="GLOB(detector, '*')") 

2620 

2621 # This ofails at parser level because parser expects string literal. 

2622 with self.assertRaisesRegex(InvalidQueryError, "Failed to parse expression"): 

2623 butler.query_dimension_records("detector", instrument="Cam1", where="GLOB(full_name, full_name)") 

2624 

2625 def test_dataset_id_queries(self) -> None: 

2626 """Test queries on dataset_id.""" 

2627 butler = self.make_butler("base.yaml", "datasets.yaml") 

2628 

2629 dataset_id = UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22") 

2630 

2631 refs = butler.query_datasets( 

2632 "bias", 

2633 "imported_g", 

2634 instrument="Cam1", 

2635 where="dataset_id = :ID", 

2636 bind={"ID": dataset_id}, 

2637 ) 

2638 self.assertEqual({ref.id for ref in refs}, {dataset_id}) 

2639 

2640 dataset_ids = { 

2641 UUID("87f3e68d-258d-41b7-8ea5-edf3557ccb30"), 

2642 UUID("dc0ef017-dc94-4118-b431-d65b1ef89a5f"), 

2643 UUID("e255067d-dcc5-4f39-9824-0baa5817d3e5"), 

2644 } 

2645 refs = butler.query_datasets( 

2646 "bias", 

2647 "imported_r", 

2648 instrument="Cam1", 

2649 where="bias.dataset_id IN (:IDS)", 

2650 bind={"IDS": dataset_ids}, 

2651 ) 

2652 self.assertEqual({ref.id for ref in refs}, dataset_ids) 

2653 

2654 refs = butler.query_datasets( 

2655 "bias", 

2656 "imported_g", 

2657 instrument="Cam1", 

2658 where="dataset_id = UUID('e15ab039-bc8b-4135-87c5-90902a7c0b22')", 

2659 ) 

2660 self.assertEqual({ref.id for ref in refs}, {dataset_id}) 

2661 

2662 refs = butler.query_datasets( 

2663 "bias", 

2664 "imported_r", 

2665 instrument="Cam1", 

2666 where=( 

2667 "bias.dataset_id IN (" 

2668 "UUID('87f3e68d-258d-41b7-8ea5-edf3557ccb30'), " 

2669 "UUID('dc0ef017-dc94-4118-b431-d65b1ef89a5f'), " 

2670 "UUID('e255067d-dcc5-4f39-9824-0baa5817d3e5')" 

2671 ")" 

2672 ), 

2673 ) 

2674 self.assertEqual({ref.id for ref in refs}, dataset_ids) 

2675 

2676 

2677def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]: 

2678 output = [] 

2679 for rec in dimension_records: 

2680 id = rec.dataId["exposure"] 

2681 assert isinstance(id, int) 

2682 output.append(id) 

2683 

2684 return output 

2685 

2686 

2687def _ref_uuids(refs: list[DatasetRef]) -> list[str]: 

2688 return [str(ref.id) for ref in refs]