Coverage for python/lsst/daf/butler/tests/butler_query.py: 6%

560 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-01 11:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ButlerQueryTests"] 

31 

32import itertools 

33import os 

34import re 

35from abc import ABC, abstractmethod 

36from collections import defaultdict, namedtuple 

37from collections.abc import Iterable 

38from typing import TYPE_CHECKING, Any, cast 

39 

40import astropy.time 

41import lsst.sphgeom 

42from lsst.daf.relation import RelationalAlgebraError 

43 

44from .._dataset_type import DatasetType 

45from .._exceptions import EmptyQueryResultError 

46from ..dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

47from ..registry._collection_type import CollectionType 

48from ..registry._exceptions import ( 

49 DataIdValueError, 

50 DatasetTypeError, 

51 DatasetTypeExpressionError, 

52 MissingCollectionError, 

53 MissingDatasetTypeError, 

54) 

55from ..transfers import YamlRepoImportBackend 

56from .utils import TestCaseMixin 

57 

58if TYPE_CHECKING: 

59 from .._butler import Butler 

60 from .._dataset_ref import DatasetRef 

61 from .._query_results import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

62 from ..dimensions import DimensionGroup, DimensionRecord 

63 from ..registry.sql_registry import SqlRegistry 

64 

65 

66class ButlerQueryTests(ABC, TestCaseMixin): 

67 """Base class for unit tests that test `lsst.daf.butler.Butler.query` 

68 implementations. 

69 """ 

70 

71 data_dir: str 

72 """Root directory containing test data YAML files.""" 

73 

74 @abstractmethod 

75 def make_butler(self, *args: str) -> Butler: 

76 """Make Butler instance populated with data used in the tests below. 

77 

78 Parameters 

79 ---------- 

80 *args : str 

81 Names of the files to pass to `load_data`. 

82 """ 

83 raise NotImplementedError() 

84 

85 def load_data(self, registry: SqlRegistry, filename: str) -> None: 

86 """Load registry test data from ``data_dir/<filename>``, 

87 which should be a YAML import/export file. 

88 

89 This method should be called from implementations of `make_butler` 

90 where the Registry should exist. 

91 

92 Parameters 

93 ---------- 

94 registry : `SqlRegistry` 

95 The registry to use. 

96 filename : `str` 

97 Location of test data. 

98 """ 

99 with open(os.path.join(self.data_dir, filename)) as stream: 

100 backend = YamlRepoImportBackend(stream, registry) 

101 backend.register() 

102 backend.load(datastore=None) 

103 

104 def make_bias_collection(self, registry: SqlRegistry) -> None: 

105 """Make "biases" collection containing only bias datasets. 

106 

107 Parameters 

108 ---------- 

109 registry : `SqlRegistry` 

110 The registry to use. 

111 

112 Notes 

113 ----- 

114 Default test dataset has two collections, each with both flats and 

115 biases. This adds a new collection for biases, only if "imported_g" 

116 collection exists (usually loaded from datasets.yaml). 

117 

118 This method should be called from implementations of `make_butler` 

119 where the Registry should exist. 

120 """ 

121 try: 

122 registry.getCollectionType("imported_g") 

123 except MissingCollectionError: 

124 return 

125 registry.registerCollection("biases", CollectionType.TAGGED) 

126 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

127 

128 def test_query_data_ids_convenience(self) -> None: 

129 """Basic test for `Butler.query_data_ids` method.""" 

130 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

131 

132 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> list[DataCoordinate]: 

133 """Call query_data_ids with some default arguments.""" 

134 return butler._query_data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs) 

135 

136 result = _do_query("visit") 

137 self.assertEqual(len(result), 2) 

138 self.assertCountEqual( 

139 [data_id.mapping for data_id in result], 

140 [ 

141 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}, 

142 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"}, 

143 ], 

144 ) 

145 

146 self.assertTrue(all(data_id.hasFull() for data_id in result)) 

147 self.assertFalse(any(data_id.hasRecords() for data_id in result)) 

148 

149 # Test user expression. 

150 where = "physical_filter = filter_name" 

151 bind = {"filter_name": "Cam1-G"} 

152 result = _do_query("visit", where=where, bind=bind) 

153 self.assertEqual( 

154 [data_id.mapping for data_id in result], 

155 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}], 

156 ) 

157 

158 # Test chained methods, some modify original result in place, so build 

159 # new result for each one. 

160 result = _do_query("visit", order_by="-band") 

161 self.assertEqual([data_id["visit"] for data_id in result], [2, 1]) 

162 

163 result = _do_query("visit", order_by=("-band",), limit=1) 

164 self.assertEqual([data_id["visit"] for data_id in result], [2]) 

165 

166 result = _do_query("visit", order_by=("-band",), limit=1, offset=1) 

167 self.assertEqual([data_id["visit"] for data_id in result], [1]) 

168 

169 with self.assertRaisesRegex(TypeError, "offset is specified without limit"): 

170 result = _do_query("visit", order_by="-band", offset=1000) 

171 

172 # Empty result but suppress exception. 

173 result = _do_query("visit", order_by="-band", limit=1, offset=1000, explain=False) 

174 self.assertFalse(result) 

175 

176 # Empty result, will raise an exception. 

177 with self.assertRaises(EmptyQueryResultError) as exc_cm: 

178 _do_query("visit", order_by="-band", limit=1, offset=1000) 

179 self.assertTrue(exc_cm.exception.reasons) 

180 

181 def test_query_data_ids(self) -> None: 

182 """Basic test for `Butler.query().data_ids()` method.""" 

183 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

184 

185 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

186 """Call query.data_ids with some default arguments.""" 

187 with butler._query() as query: 

188 return query.data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs) 

189 

190 result = _do_query("visit") 

191 self.assertEqual(result.count(), 2) 

192 self.assertTrue(result.any()) 

193 self.assertCountEqual( 

194 [data_id.mapping for data_id in result], 

195 [ 

196 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}, 

197 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"}, 

198 ], 

199 ) 

200 

201 self.assertTrue(result.has_full()) 

202 self.assertFalse(result.has_records()) 

203 

204 with result.materialize() as materialized: 

205 result = materialized.expanded() 

206 self.assertEqual(result.count(), 2) 

207 self.assertTrue(result.has_records()) 

208 

209 # Test user expression. 

210 where = "physical_filter = filter_name" 

211 bind = {"filter_name": "Cam1-G"} 

212 result = _do_query("visit", where=where, bind=bind) 

213 self.assertEqual( 

214 [data_id.mapping for data_id in result], 

215 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}], 

216 ) 

217 

218 # Test chained methods, some modify original result in place, so build 

219 # new result for each one. 

220 result = _do_query("visit") 

221 result = result.order_by("-band") 

222 self.assertEqual([data_id["visit"] for data_id in result], [2, 1]) 

223 

224 result = _do_query("visit") 

225 result = result.order_by("-band").limit(1) 

226 self.assertEqual([data_id["visit"] for data_id in result], [2]) 

227 

228 result = _do_query("visit") 

229 result = result.order_by("-band").limit(1, 1) 

230 self.assertEqual([data_id["visit"] for data_id in result], [1]) 

231 

232 result = _do_query("visit") 

233 result = result.order_by("-band").limit(1, 1000) 

234 self.assertFalse(result.any()) 

235 self.assertGreater(len(list(result.explain_no_results())), 0) 

236 

237 def test_query_dimension_records_convenience(self) -> None: 

238 """Basic test for `Butler.query_dimension_records` method.""" 

239 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

240 

241 def _do_query(element: str, **kwargs: Any) -> list[DimensionRecord]: 

242 """Call query_dimension_records with some default arguments.""" 

243 return butler._query_dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs) 

244 

245 result = _do_query("visit") 

246 self.assertEqual(len(result), 2) 

247 self.assertEqual( 

248 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result), 

249 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)}, 

250 ) 

251 

252 # Test user expression. 

253 where = "physical_filter = filter_name" 

254 bind = {"filter_name": "Cam1-G"} 

255 result = _do_query("visit", where=where, bind=bind) 

256 self.assertEqual(len(result), 1) 

257 self.assertEqual([record.id for record in result], [1]) 

258 

259 result = _do_query("visit", order_by="-visit") 

260 self.assertEqual([record.id for record in result], [2, 1]) 

261 

262 result = _do_query("visit", order_by=("-visit",), limit=1) 

263 self.assertEqual([record.id for record in result], [2]) 

264 

265 result = _do_query("visit", order_by=("-visit",), limit=1, offset=1) 

266 self.assertEqual([record.id for record in result], [1]) 

267 

268 with self.assertRaisesRegex(TypeError, "offset is specified without limit"): 

269 result = _do_query("visit", order_by="-visit", offset=1000) 

270 

271 result = _do_query("visit", order_by="-visit", limit=1, offset=1000, explain=False) 

272 self.assertFalse(result) 

273 

274 with self.assertRaises(EmptyQueryResultError) as exc_cm: 

275 _do_query("visit", order_by="-visit", limit=1, offset=1000) 

276 self.assertTrue(exc_cm.exception.reasons) 

277 

278 def test_query_dimension_records(self) -> None: 

279 """Basic test for `_query_dimension_records` method.""" 

280 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

281 

282 def _do_query(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

283 """Call query.dimension_records with some default arguments.""" 

284 with butler._query() as query: 

285 return query.dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs) 

286 

287 result = _do_query("visit") 

288 self.assertEqual(result.count(), 2) 

289 self.assertTrue(result.any()) 

290 self.assertEqual( 

291 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result), 

292 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)}, 

293 ) 

294 

295 # Test user expression. 

296 where = "physical_filter = filter_name" 

297 bind = {"filter_name": "Cam1-G"} 

298 result = _do_query("visit", where=where, bind=bind) 

299 self.assertEqual(result.count(), 1) 

300 self.assertEqual([record.id for record in result], [1]) 

301 

302 result = _do_query("visit") 

303 result = result.order_by("-visit") 

304 self.assertEqual([record.id for record in result], [2, 1]) 

305 

306 result = _do_query("visit") 

307 result = result.order_by("-visit").limit(1) 

308 self.assertEqual([record.id for record in result], [2]) 

309 

310 result = _do_query("visit") 

311 result = result.order_by("-visit").limit(1, 1) 

312 self.assertEqual([record.id for record in result], [1]) 

313 

314 result = _do_query("visit") 

315 result = result.order_by("-visit").limit(1, 1000) 

316 self.assertFalse(result.any()) 

317 self.assertGreater(len(list(result.explain_no_results())), 0) 

318 

319 def test_query_datasets_convenience(self) -> None: 

320 """Basic test for `Butler.query_datasets` method.""" 

321 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

322 

323 def _do_query(dataset: Any, **kwargs: Any) -> list[DatasetRef]: 

324 return butler._query_datasets(dataset, **kwargs) 

325 

326 result = _do_query(..., collections=["imported_g"]) 

327 self.assertEqual(len(result), 6) 

328 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4]) 

329 

330 # Test user expression. 

331 where = "detector IN (detectors) and instrument = instr" 

332 bind = {"detectors": (2, 3), "instr": "Cam1"} 

333 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind) 

334 self.assertEqual(len(result), 8) 

335 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3}) 

336 

337 where = "detector = 1000000 and instrument = 'Cam1'" 

338 result = _do_query(..., collections=..., find_first=False, where=where, explain=False) 

339 self.assertFalse(result) 

340 

341 with self.assertRaises(EmptyQueryResultError) as exc_cm: 

342 _do_query(..., collections=..., find_first=False, where=where) 

343 self.assertTrue(exc_cm.exception.reasons) 

344 

345 def test_query_datasets(self) -> None: 

346 """Basic test for `_query_datasets` method.""" 

347 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

348 

349 def _do_query(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

350 with butler._query() as query: 

351 return query.datasets(dataset, **kwargs) 

352 

353 result = _do_query(..., collections=["imported_g"]) 

354 self.assertEqual(result.count(), 6) 

355 self.assertTrue(result.any()) 

356 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4]) 

357 

358 by_type = list(result.by_dataset_type()) 

359 self.assertEqual(len(by_type), 2) 

360 self.assertEqual(set(item.dataset_type.name for item in by_type), {"bias", "flat"}) 

361 

362 with result.materialize() as materialized: 

363 result = materialized.expanded() 

364 self.assertEqual(result.count(), 6) 

365 for ref in result: 

366 self.assertTrue(ref.dataId.hasRecords()) 

367 

368 # Test user expression. 

369 where = "detector IN (detectors) and instrument = instr" 

370 bind = {"detectors": (2, 3), "instr": "Cam1"} 

371 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind) 

372 self.assertEqual(result.count(), 8) 

373 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3}) 

374 

375 where = "detector = 1000000 and instrument = 'Cam1'" 

376 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind) 

377 self.assertFalse(result.any()) 

378 self.assertGreater(len(list(result.explain_no_results())), 0) 

379 

380 def test_query_result_summaries(self) -> None: 

381 """Test summary methods like `count`, `any`, and `explain_no_results` 

382 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

383 """ 

384 # This method was copied almost verbatim from Registry test class, 

385 # replacing Registry methods with new Butler methods. 

386 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

387 

388 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

389 with butler._query() as query: 

390 return query.data_ids(dimensions, **kwargs) 

391 

392 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

393 with butler._query() as query: 

394 return query.datasets(dataset, **kwargs) 

395 

396 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

397 with butler._query() as query: 

398 return query.dimension_records(element, **kwargs) 

399 

400 # First query yields two results, and involves no postprocessing. 

401 query1 = _query_data_ids(["physical_filter"], band="r") 

402 self.assertTrue(query1.any(execute=False, exact=False)) 

403 self.assertTrue(query1.any(execute=True, exact=False)) 

404 self.assertTrue(query1.any(execute=True, exact=True)) 

405 self.assertEqual(query1.count(exact=False), 2) 

406 self.assertEqual(query1.count(exact=True), 2) 

407 self.assertFalse(list(query1.explain_no_results())) 

408 # Second query should yield no results, which we should see when 

409 # we attempt to expand the data ID. 

410 query2 = _query_data_ids(["physical_filter"], band="h") 

411 # There's no execute=False, exact=Fals test here because the behavior 

412 # not something we want to guarantee in this case (and exact=False 

413 # says either answer is legal). 

414 self.assertFalse(query2.any(execute=True, exact=False)) 

415 self.assertFalse(query2.any(execute=True, exact=True)) 

416 self.assertEqual(query2.count(exact=False), 0) 

417 self.assertEqual(query2.count(exact=True), 0) 

418 self.assertTrue(list(query2.explain_no_results())) 

419 # These queries yield no results due to various problems that can be 

420 # spotted prior to execution, yielding helpful diagnostics. 

421 base_query = _query_data_ids(["detector", "physical_filter"]) 

422 queries_and_snippets: list[Any] = [ 

423 ( 

424 # Dataset type name doesn't match any existing dataset types. 

425 _query_datasets("nonexistent", collections=..., find_first=False), 

426 ["nonexistent"], 

427 ), 

428 ( 

429 # Dataset type object isn't registered. 

430 _query_datasets( 

431 DatasetType( 

432 "nonexistent", 

433 dimensions=["instrument"], 

434 universe=butler.dimensions, 

435 storageClass="Image", 

436 ), 

437 collections=..., 

438 find_first=False, 

439 ), 

440 ["nonexistent"], 

441 ), 

442 ( 

443 # No datasets of this type in this collection. 

444 _query_datasets("flat", collections=["biases"]), 

445 ["flat", "biases"], 

446 ), 

447 ( 

448 # No datasets of this type in this collection. 

449 base_query.find_datasets("flat", collections=["biases"]), 

450 ["flat", "biases"], 

451 ), 

452 ( 

453 # No collections matching at all. 

454 _query_datasets("flat", collections=re.compile("potato.+"), find_first=False), 

455 ["potato"], 

456 ), 

457 ] 

458 

459 with self.assertRaises(MissingDatasetTypeError): 

460 queries_and_snippets.append( 

461 ( 

462 # Dataset type name doesn't match any existing dataset 

463 # types. 

464 _query_data_ids(["detector"], datasets=["nonexistent"], collections=...), 

465 ["nonexistent"], 

466 ) 

467 ) 

468 with self.assertRaises(MissingDatasetTypeError): 

469 queries_and_snippets.append( 

470 ( 

471 # Dataset type name doesn't match any existing dataset 

472 # types. 

473 _query_dimension_records("detector", datasets=["nonexistent"], collections=...), 

474 ["nonexistent"], 

475 ) 

476 ) 

477 for query, snippets in queries_and_snippets: 

478 self.assertFalse(query.any(execute=False, exact=False)) 

479 self.assertFalse(query.any(execute=True, exact=False)) 

480 self.assertFalse(query.any(execute=True, exact=True)) 

481 self.assertEqual(query.count(exact=False), 0) 

482 self.assertEqual(query.count(exact=True), 0) 

483 messages = list(query.explain_no_results()) 

484 self.assertTrue(messages) 

485 # Want all expected snippets to appear in at least one message. 

486 self.assertTrue( 

487 any( 

488 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

489 ), 

490 messages, 

491 ) 

492 

493 # This query does yield results, but should also emit a warning because 

494 # dataset type patterns to queryDataIds is deprecated; just look for 

495 # the warning. 

496 with self.assertRaises(DatasetTypeExpressionError): 

497 _query_data_ids(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

498 

499 # These queries yield no results due to problems that can be identified 

500 # by cheap follow-up queries, yielding helpful diagnostics. 

501 for query, snippets in [ 

502 ( 

503 # No records for one of the involved dimensions. 

504 _query_data_ids(["subfilter"]), 

505 ["no rows", "subfilter"], 

506 ), 

507 ( 

508 # No records for one of the involved dimensions. 

509 _query_dimension_records("subfilter"), 

510 ["no rows", "subfilter"], 

511 ), 

512 ]: 

513 self.assertFalse(query.any(execute=True, exact=False)) 

514 self.assertFalse(query.any(execute=True, exact=True)) 

515 self.assertEqual(query.count(exact=True), 0) 

516 messages = list(query.explain_no_results()) 

517 self.assertTrue(messages) 

518 # Want all expected snippets to appear in at least one message. 

519 self.assertTrue( 

520 any( 

521 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

522 ), 

523 messages, 

524 ) 

525 

526 # This query yields four overlaps in the database, but one is filtered 

527 # out in postprocessing. The count queries aren't accurate because 

528 # they don't account for duplication that happens due to an internal 

529 # join against commonSkyPix. 

530 query3 = _query_data_ids(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

531 self.assertEqual( 

532 { 

533 DataCoordinate.standardize( 

534 instrument="Cam1", 

535 skymap="SkyMap1", 

536 visit=v, 

537 tract=t, 

538 universe=butler.dimensions, 

539 ) 

540 for v, t in [(1, 0), (2, 0), (2, 1)] 

541 }, 

542 set(query3), 

543 ) 

544 self.assertTrue(query3.any(execute=False, exact=False)) 

545 self.assertTrue(query3.any(execute=True, exact=False)) 

546 self.assertTrue(query3.any(execute=True, exact=True)) 

547 self.assertGreaterEqual(query3.count(exact=False), 4) 

548 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

549 self.assertFalse(list(query3.explain_no_results())) 

550 # This query yields overlaps in the database, but all are filtered 

551 # out in postprocessing. The count queries again aren't very useful. 

552 # We have to use `where=` here to avoid an optimization that 

553 # (currently) skips the spatial postprocess-filtering because it 

554 # recognizes that no spatial join is necessary. That's not ideal, but 

555 # fixing it is out of scope for this ticket. 

556 query4 = _query_data_ids( 

557 ["visit", "tract"], 

558 instrument="Cam1", 

559 skymap="SkyMap1", 

560 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

561 ) 

562 self.assertFalse(set(query4)) 

563 self.assertTrue(query4.any(execute=False, exact=False)) 

564 self.assertTrue(query4.any(execute=True, exact=False)) 

565 self.assertFalse(query4.any(execute=True, exact=True)) 

566 self.assertGreaterEqual(query4.count(exact=False), 1) 

567 self.assertEqual(query4.count(exact=True, discard=True), 0) 

568 messages = list(query4.explain_no_results()) 

569 self.assertTrue(messages) 

570 self.assertTrue(any("overlap" in message for message in messages)) 

571 # This query should yield results from one dataset type but not the 

572 # other, which is not registered. 

573 query5 = _query_datasets(["bias", "nonexistent"], collections=["biases"]) 

574 self.assertTrue(set(query5)) 

575 self.assertTrue(query5.any(execute=False, exact=False)) 

576 self.assertTrue(query5.any(execute=True, exact=False)) 

577 self.assertTrue(query5.any(execute=True, exact=True)) 

578 self.assertGreaterEqual(query5.count(exact=False), 1) 

579 self.assertGreaterEqual(query5.count(exact=True), 1) 

580 self.assertFalse(list(query5.explain_no_results())) 

581 # This query applies a selection that yields no results, fully in the 

582 # database. Explaining why it fails involves traversing the relation 

583 # tree and running a LIMIT 1 query at each level that has the potential 

584 # to remove rows. 

585 query6 = _query_dimension_records( 

586 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

587 ) 

588 self.assertEqual(query6.count(exact=True), 0) 

589 messages = list(query6.explain_no_results()) 

590 self.assertTrue(messages) 

591 self.assertTrue(any("no-purpose" in message for message in messages)) 

592 

593 def test_query_results(self) -> None: 

594 """Test querying for data IDs and then manipulating the QueryResults 

595 object returned to perform other queries. 

596 """ 

597 # This method was copied almost verbatim from Registry test class, 

598 # replacing Registry methods with new Butler methods. 

599 butler = self.make_butler("base.yaml", "datasets.yaml") 

600 

601 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

602 with butler._query() as query: 

603 return query.data_ids(dimensions, **kwargs) 

604 

605 bias = butler.get_dataset_type("bias") 

606 flat = butler.get_dataset_type("flat") 

607 # Obtain expected results from methods other than those we're testing 

608 # here. That includes: 

609 # - the dimensions of the data IDs we want to query: 

610 expected_dimensions = butler.dimensions.conform(["detector", "physical_filter"]) 

611 # - the dimensions of some other data IDs we'll extract from that: 

612 expected_subset_dimensions = butler.dimensions.conform(["detector"]) 

613 # - the data IDs we expect to obtain from the first queries: 

614 expectedDataIds = DataCoordinateSet( 

615 { 

616 DataCoordinate.standardize( 

617 instrument="Cam1", detector=d, physical_filter=p, universe=butler.dimensions 

618 ) 

619 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

620 }, 

621 dimensions=expected_dimensions, 

622 hasFull=False, 

623 hasRecords=False, 

624 ) 

625 # - the flat datasets we expect to find from those data IDs, in just 

626 # one collection (so deduplication is irrelevant): 

627 expectedFlats = [ 

628 butler.find_dataset( 

629 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

630 ), 

631 butler.find_dataset( 

632 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

633 ), 

634 butler.find_dataset( 

635 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

636 ), 

637 ] 

638 # - the data IDs we expect to extract from that: 

639 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

640 # - the bias datasets we expect to find from those data IDs, after we 

641 # subset-out the physical_filter dimension, both with duplicates: 

642 expectedAllBiases = [ 

643 ref 

644 for ref in [ 

645 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

646 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

647 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

648 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

649 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

650 ] 

651 if ref is not None 

652 ] 

653 # - ...and without duplicates: 

654 expectedDeduplicatedBiases = [ 

655 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

656 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

657 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

658 ] 

659 # Test against those expected results, using a "lazy" query for the 

660 # data IDs (which re-executes that query each time we use it to do 

661 # something new). 

662 dataIds = _query_data_ids( 

663 ["detector", "physical_filter"], 

664 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

665 instrument="Cam1", 

666 ) 

667 self.assertEqual(dataIds.dimensions, expected_dimensions) 

668 self.assertEqual(set(dataIds), set(expectedDataIds)) 

669 self.assertCountEqual( 

670 list( 

671 dataIds.find_datasets( 

672 flat, 

673 collections=["imported_r"], 

674 ) 

675 ), 

676 expectedFlats, 

677 ) 

678 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

679 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

680 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

681 self.assertCountEqual( 

682 list( 

683 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=False) 

684 ), 

685 expectedAllBiases, 

686 ) 

687 self.assertCountEqual( 

688 list( 

689 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=True) 

690 ), 

691 expectedDeduplicatedBiases, 

692 ) 

693 

694 # Searching for a dataset with dimensions we had projected away 

695 # restores those dimensions. 

696 self.assertCountEqual( 

697 list(subsetDataIds.find_datasets("flat", collections=["imported_r"], find_first=True)), 

698 expectedFlats, 

699 ) 

700 

701 # Use a component dataset type. 

702 self.assertCountEqual( 

703 [ 

704 ref.makeComponentRef("image") 

705 for ref in subsetDataIds.find_datasets( 

706 bias, 

707 collections=["imported_r", "imported_g"], 

708 find_first=False, 

709 ) 

710 ], 

711 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

712 ) 

713 

714 # Use a named dataset type that does not exist and a dataset type 

715 # object that does not exist. 

716 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

717 

718 # Test both string name and dataset type object. 

719 tests: tuple[tuple[DatasetType | str, str], ...] = ( 

720 (unknown_type, unknown_type.name), 

721 (unknown_type.name, unknown_type.name), 

722 ) 

723 for test_type, test_type_name in tests: 

724 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

725 list( 

726 subsetDataIds.find_datasets( 

727 test_type, collections=["imported_r", "imported_g"], find_first=True 

728 ) 

729 ) 

730 

731 # Materialize the bias dataset queries (only) by putting the results 

732 # into temporary tables, then repeat those tests. 

733 with subsetDataIds.find_datasets( 

734 bias, collections=["imported_r", "imported_g"], find_first=False 

735 ).materialize() as biases: 

736 self.assertCountEqual(list(biases), expectedAllBiases) 

737 with subsetDataIds.find_datasets( 

738 bias, collections=["imported_r", "imported_g"], find_first=True 

739 ).materialize() as biases: 

740 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

741 # Materialize the data ID subset query, but not the dataset queries. 

742 with subsetDataIds.materialize() as subsetDataIds: 

743 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

744 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

745 self.assertCountEqual( 

746 list( 

747 subsetDataIds.find_datasets( 

748 bias, collections=["imported_r", "imported_g"], find_first=False 

749 ) 

750 ), 

751 expectedAllBiases, 

752 ) 

753 self.assertCountEqual( 

754 list( 

755 subsetDataIds.find_datasets( 

756 bias, collections=["imported_r", "imported_g"], find_first=True 

757 ) 

758 ), 

759 expectedDeduplicatedBiases, 

760 ) 

761 # Materialize the dataset queries, too. 

762 with subsetDataIds.find_datasets( 

763 bias, collections=["imported_r", "imported_g"], find_first=False 

764 ).materialize() as biases: 

765 self.assertCountEqual(list(biases), expectedAllBiases) 

766 with subsetDataIds.find_datasets( 

767 bias, collections=["imported_r", "imported_g"], find_first=True 

768 ).materialize() as biases: 

769 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

770 # Materialize the original query, but none of the follow-up queries. 

771 with dataIds.materialize() as dataIds: 

772 self.assertEqual(dataIds.dimensions, expected_dimensions) 

773 self.assertEqual(set(dataIds), set(expectedDataIds)) 

774 self.assertCountEqual( 

775 list( 

776 dataIds.find_datasets( 

777 flat, 

778 collections=["imported_r"], 

779 ) 

780 ), 

781 expectedFlats, 

782 ) 

783 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

784 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

785 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

786 self.assertCountEqual( 

787 list( 

788 subsetDataIds.find_datasets( 

789 bias, collections=["imported_r", "imported_g"], find_first=False 

790 ) 

791 ), 

792 expectedAllBiases, 

793 ) 

794 self.assertCountEqual( 

795 list( 

796 subsetDataIds.find_datasets( 

797 bias, collections=["imported_r", "imported_g"], find_first=True 

798 ) 

799 ), 

800 expectedDeduplicatedBiases, 

801 ) 

802 # Materialize just the bias dataset queries. 

803 with subsetDataIds.find_datasets( 

804 bias, collections=["imported_r", "imported_g"], find_first=False 

805 ).materialize() as biases: 

806 self.assertCountEqual(list(biases), expectedAllBiases) 

807 with subsetDataIds.find_datasets( 

808 bias, collections=["imported_r", "imported_g"], find_first=True 

809 ).materialize() as biases: 

810 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

811 # Materialize the subset data ID query, but not the dataset 

812 # queries. 

813 with subsetDataIds.materialize() as subsetDataIds: 

814 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

815 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

816 self.assertCountEqual( 

817 list( 

818 subsetDataIds.find_datasets( 

819 bias, collections=["imported_r", "imported_g"], find_first=False 

820 ) 

821 ), 

822 expectedAllBiases, 

823 ) 

824 self.assertCountEqual( 

825 list( 

826 subsetDataIds.find_datasets( 

827 bias, collections=["imported_r", "imported_g"], find_first=True 

828 ) 

829 ), 

830 expectedDeduplicatedBiases, 

831 ) 

832 # Materialize the bias dataset queries, too, so now we're 

833 # materializing every single step. 

834 with subsetDataIds.find_datasets( 

835 bias, collections=["imported_r", "imported_g"], find_first=False 

836 ).materialize() as biases: 

837 self.assertCountEqual(list(biases), expectedAllBiases) 

838 with subsetDataIds.find_datasets( 

839 bias, collections=["imported_r", "imported_g"], find_first=True 

840 ).materialize() as biases: 

841 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

842 

843 def test_query_datasets_deduplication(self) -> None: 

844 """Test that the findFirst option to query.datasets selects datasets 

845 from collections in the order given". 

846 """ 

847 # This method was copied almost verbatim from Registry test class, 

848 # replacing Registry methods with new Butler methods. 

849 butler = self.make_butler("base.yaml", "datasets.yaml") 

850 

851 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

852 with butler._query() as query: 

853 return query.datasets(dataset, **kwargs) 

854 

855 self.assertCountEqual( 

856 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=False)), 

857 [ 

858 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

859 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

860 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

861 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

862 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

863 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

864 ], 

865 ) 

866 self.assertCountEqual( 

867 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=True)), 

868 [ 

869 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

870 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

871 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

872 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

873 ], 

874 ) 

875 self.assertCountEqual( 

876 list(_query_datasets("bias", collections=["imported_r", "imported_g"], find_first=True)), 

877 [ 

878 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

879 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

880 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

881 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

882 ], 

883 ) 

884 

885 def test_query_data_ids_order_by(self) -> None: 

886 """Test order_by and limit on result returned by query.data_ids().""" 

887 # This method was copied almost verbatim from Registry test class, 

888 # replacing Registry methods with new Butler methods. 

889 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

890 

891 def do_query( 

892 dimensions: Iterable[str] = ("visit", "tract"), datasets: Any = None, collections: Any = None 

893 ) -> DataCoordinateQueryResults: 

894 with butler._query() as query: 

895 return query.data_ids( 

896 dimensions, 

897 datasets=datasets, 

898 collections=collections, 

899 instrument="Cam1", 

900 skymap="SkyMap1", 

901 ) 

902 

903 Test = namedtuple( 

904 "Test", 

905 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

906 defaults=(None, None, None), 

907 ) 

908 

909 test_data = ( 

910 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

911 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

912 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

913 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

914 Test( 

915 "tract.id,visit.id", 

916 "tract,visit", 

917 ((0, 1), (0, 1), (0, 2)), 

918 limit=(3,), 

919 ), 

920 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

921 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

922 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

923 Test( 

924 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

925 ), 

926 Test( 

927 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

928 ), 

929 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

930 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

931 Test( 

932 "tract,-timespan.begin,timespan.end", 

933 "tract,visit", 

934 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

935 ), 

936 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

937 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

938 Test( 

939 "tract,detector", 

940 "tract,detector", 

941 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

942 datasets="flat", 

943 collections="imported_r", 

944 ), 

945 Test( 

946 "tract,detector.full_name", 

947 "tract,detector", 

948 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

949 datasets="flat", 

950 collections="imported_r", 

951 ), 

952 Test( 

953 "tract,detector.raft,detector.name_in_raft", 

954 "tract,detector", 

955 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

956 datasets="flat", 

957 collections="imported_r", 

958 ), 

959 ) 

960 

961 for test in test_data: 

962 order_by = test.order_by.split(",") 

963 keys = test.keys.split(",") 

964 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

965 if test.limit is not None: 

966 query = query.limit(*test.limit) 

967 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

968 self.assertEqual(dataIds, test.result) 

969 

970 # and materialize 

971 query = do_query(keys).order_by(*order_by) 

972 if test.limit is not None: 

973 query = query.limit(*test.limit) 

974 with self.assertRaises(RelationalAlgebraError): 

975 with query.materialize(): 

976 pass # pragma: no cover 

977 

978 # errors in a name 

979 for order_by in ("", "-"): 

980 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

981 list(do_query().order_by(order_by)) 

982 

983 for order_by in ("undimension.name", "-undimension.name"): 

984 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

985 list(do_query().order_by(order_by)) 

986 

987 for order_by in ("attract", "-attract"): 

988 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

989 list(do_query().order_by(order_by)) 

990 

991 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

992 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

993 

994 with self.assertRaisesRegex( 

995 ValueError, 

996 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

997 r"qualify timespan with specific dimension name\.", 

998 ): 

999 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

1000 

1001 with self.assertRaisesRegex( 

1002 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

1003 ): 

1004 list(do_query("tract").order_by("timespan.begin")) 

1005 

1006 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

1007 list(do_query("tract").order_by("tract.timespan.begin")) 

1008 

1009 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

1010 list(do_query("tract").order_by("tract.name")) 

1011 

1012 with self.assertRaisesRegex( 

1013 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

1014 ): 

1015 list(do_query("visit").order_by("timestamp.begin")) 

1016 

1017 def test_query_int_range_expressions(self) -> None: 

1018 """Test integer range expressions in ``where`` arguments. 

1019 

1020 Note that our expressions use inclusive stop values, unlike Python's. 

1021 """ 

1022 butler = self.make_butler("base.yaml") 

1023 

1024 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1025 with butler._query() as query: 

1026 return query.data_ids(dimensions, **kwargs) 

1027 

1028 self.assertEqual( 

1029 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

1030 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

1031 ) 

1032 self.assertEqual( 

1033 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

1034 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

1035 ) 

1036 self.assertEqual( 

1037 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

1038 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

1039 ) 

1040 

1041 def test_query_data_ids_expression_error(self) -> None: 

1042 """Test error checking of 'where' expressions in query.data_ids.""" 

1043 butler = self.make_butler("base.yaml") 

1044 

1045 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1046 with butler._query() as query: 

1047 return query.data_ids(dimensions, **kwargs) 

1048 

1049 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

1050 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

1051 _query_data_ids(["detector"], where="foo.bar = 12") 

1052 with self.assertRaisesRegex( 

1053 LookupError, "Dimension element name cannot be inferred in this context." 

1054 ): 

1055 _query_data_ids(["detector"], where="timespan.end < time", bind=bind) 

1056 

1057 def test_query_data_ids_governor_exceptions(self) -> None: 

1058 """Test exceptions raised by query.data_ids for incorrect governors.""" 

1059 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

1060 

1061 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1062 with butler._query() as query: 

1063 return query.data_ids(dimensions, **kwargs) 

1064 

1065 Test = namedtuple( 

1066 "Test", 

1067 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

1068 defaults=(None, None, None, {}, None, 0), 

1069 ) 

1070 

1071 test_data = ( 

1072 Test("tract,visit", count=6), 

1073 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

1074 Test( 

1075 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

1076 ), 

1077 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

1078 Test( 

1079 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

1080 ), 

1081 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

1082 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

1083 Test( 

1084 "tract,visit", 

1085 where="instrument=cam AND skymap=map", 

1086 bind={"cam": "Cam1", "map": "SkyMap1"}, 

1087 count=6, 

1088 ), 

1089 Test( 

1090 "tract,visit", 

1091 where="instrument=cam AND skymap=map", 

1092 bind={"cam": "Cam", "map": "SkyMap"}, 

1093 exception=DataIdValueError, 

1094 ), 

1095 ) 

1096 

1097 for test in test_data: 

1098 dimensions = test.dimensions.split(",") 

1099 if test.exception: 

1100 with self.assertRaises(test.exception): 

1101 _query_data_ids( 

1102 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1103 ).count() 

1104 else: 

1105 query = _query_data_ids( 

1106 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1107 ) 

1108 self.assertEqual(query.count(discard=True), test.count) 

1109 

1110 # and materialize 

1111 if test.exception: 

1112 with self.assertRaises(test.exception): 

1113 query = _query_data_ids( 

1114 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1115 ) 

1116 else: 

1117 query = _query_data_ids( 

1118 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1119 ) 

1120 with query.materialize() as materialized: 

1121 self.assertEqual(materialized.count(discard=True), test.count) 

1122 

1123 def test_query_dimension_records_exceptions(self) -> None: 

1124 """Test exceptions raised by query.dimension_records().""" 

1125 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

1126 

1127 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1128 with butler._query() as query: 

1129 return query.dimension_records(element, **kwargs) 

1130 

1131 result = _query_dimension_records("detector") 

1132 self.assertEqual(result.count(), 4) 

1133 result = _query_dimension_records("detector", instrument="Cam1") 

1134 self.assertEqual(result.count(), 4) 

1135 result = _query_dimension_records("detector", data_id={"instrument": "Cam1"}) 

1136 self.assertEqual(result.count(), 4) 

1137 result = _query_dimension_records("detector", where="instrument='Cam1'") 

1138 self.assertEqual(result.count(), 4) 

1139 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

1140 self.assertEqual(result.count(), 4) 

1141 

1142 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

1143 result = _query_dimension_records("detector", instrument="NotCam1") 

1144 

1145 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

1146 result = _query_dimension_records("detector", data_id={"instrument": "NotCam1"}) 

1147 

1148 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1149 result = _query_dimension_records("detector", where="instrument='NotCam1'") 

1150 

1151 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1152 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "NotCam1"}) 

1153 

1154 def test_query_dimension_records_order_by(self) -> None: 

1155 """Test order_by and limit on result returned by 

1156 query.dimension_records(). 

1157 """ 

1158 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

1159 

1160 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1161 with butler._query() as query: 

1162 return query.dimension_records(element, **kwargs) 

1163 

1164 def do_query( 

1165 element: str, datasets: Any = None, collections: Any = None 

1166 ) -> DimensionRecordQueryResults: 

1167 return _query_dimension_records( 

1168 element, instrument="Cam1", datasets=datasets, collections=collections 

1169 ) 

1170 

1171 query = do_query("detector") 

1172 self.assertEqual(len(list(query)), 4) 

1173 

1174 Test = namedtuple( 

1175 "Test", 

1176 ("element", "order_by", "result", "limit", "datasets", "collections"), 

1177 defaults=(None, None, None), 

1178 ) 

1179 

1180 test_data = ( 

1181 Test("detector", "detector", (1, 2, 3, 4)), 

1182 Test("detector", "-detector", (4, 3, 2, 1)), 

1183 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

1184 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

1185 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

1186 Test("visit", "visit", (1, 2)), 

1187 Test("visit", "-visit.id", (2, 1)), 

1188 Test("visit", "zenith_angle", (1, 2)), 

1189 Test("visit", "-visit.name", (2, 1)), 

1190 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

1191 ) 

1192 

1193 for test in test_data: 

1194 order_by = test.order_by.split(",") 

1195 query = do_query(test.element).order_by(*order_by) 

1196 if test.limit is not None: 

1197 query = query.limit(*test.limit) 

1198 dataIds = tuple(rec.id for rec in query) 

1199 self.assertEqual(dataIds, test.result) 

1200 

1201 # errors in a name 

1202 for order_by in ("", "-"): 

1203 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

1204 list(do_query("detector").order_by(order_by)) 

1205 

1206 for order_by in ("undimension.name", "-undimension.name"): 

1207 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

1208 list(do_query("detector").order_by(order_by)) 

1209 

1210 for order_by in ("attract", "-attract"): 

1211 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

1212 list(do_query("detector").order_by(order_by)) 

1213 

1214 for order_by in ("timestamp.begin", "-timestamp.begin"): 

1215 with self.assertRaisesRegex( 

1216 ValueError, 

1217 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

1218 r"perhaps you meant 'timespan.begin'\?", 

1219 ): 

1220 list(do_query("visit").order_by(order_by)) 

1221 

1222 def test_skypix_constraint_queries(self) -> None: 

1223 """Test queries spatially constrained by a skypix data ID.""" 

1224 butler = self.make_butler("hsc-rc2-subset.yaml") 

1225 

1226 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1227 with butler._query() as query: 

1228 return query.data_ids(dimensions, **kwargs) 

1229 

1230 patch_regions = { 

1231 (data_id["tract"], data_id["patch"]): data_id.region 

1232 for data_id in _query_data_ids(["patch"]).expanded() 

1233 if data_id.region is not None 

1234 } 

1235 skypix_dimension = cast(SkyPixDimension, butler.dimensions["htm11"]) 

1236 # This check ensures the test doesn't become trivial due to a config 

1237 # change; if it does, just pick a different HTML level. 

1238 self.assertNotEqual(skypix_dimension, butler.dimensions.commonSkyPix) 

1239 # Gather all skypix IDs that definitely overlap at least one of these 

1240 # patches. 

1241 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

1242 for patch_region in patch_regions.values(): 

1243 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

1244 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

1245 # and does not overlap at least one other patch. 

1246 for skypix_id in itertools.chain.from_iterable( 

1247 range(begin, end) for begin, end in relevant_skypix_ids 

1248 ): 

1249 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

1250 overlapping_patches = { 

1251 patch_key 

1252 for patch_key, patch_region in patch_regions.items() 

1253 if not patch_region.isDisjointFrom(skypix_region) 

1254 } 

1255 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

1256 break 

1257 else: 

1258 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

1259 self.assertEqual( 

1260 { 

1261 (data_id["tract"], data_id["patch"]) 

1262 for data_id in _query_data_ids( 

1263 ["patch"], 

1264 data_id={skypix_dimension.name: skypix_id}, 

1265 ) 

1266 }, 

1267 overlapping_patches, 

1268 ) 

1269 # Test that a three-way join that includes the common skypix system in 

1270 # the dimensions doesn't generate redundant join terms in the query. 

1271 full_data_ids = set( 

1272 _query_data_ids(["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC").expanded() 

1273 ) 

1274 self.assertGreater(len(full_data_ids), 0) 

1275 for data_id in full_data_ids: 

1276 tract = data_id.records["tract"] 

1277 visit = data_id.records["visit"] 

1278 htm7 = data_id.records["htm7"] 

1279 assert tract is not None and visit is not None and htm7 is not None 

1280 self.assertFalse(tract.region.isDisjointFrom(htm7.region)) 

1281 self.assertFalse(visit.region.isDisjointFrom(htm7.region)) 

1282 

1283 def test_bind_in_query_datasets(self) -> None: 

1284 """Test that the bind parameter is correctly forwarded in 

1285 query.datasets recursion. 

1286 """ 

1287 butler = self.make_butler("base.yaml", "datasets.yaml") 

1288 

1289 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

1290 with butler._query() as query: 

1291 return query.datasets(dataset, **kwargs) 

1292 

1293 # Importing datasets from yaml should go through the code path where 

1294 # we update collection summaries as we insert datasets. 

1295 self.assertEqual( 

1296 set(_query_datasets("flat", band="r", collections=..., find_first=False)), 

1297 set( 

1298 _query_datasets( 

1299 "flat", where="band=my_band", bind={"my_band": "r"}, collections=..., find_first=False 

1300 ) 

1301 ), 

1302 ) 

1303 

1304 def test_dataset_constrained_dimension_record_queries(self) -> None: 

1305 """Test that query.dimension_records works even when given a dataset 

1306 constraint whose dimensions extend beyond the requested dimension 

1307 element's. 

1308 """ 

1309 butler = self.make_butler("base.yaml", "datasets.yaml") 

1310 

1311 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1312 with butler._query() as query: 

1313 return query.dimension_records(element, **kwargs) 

1314 

1315 # Query for physical_filter dimension records, using a dataset that 

1316 # has both physical_filter and dataset dimensions. 

1317 records = _query_dimension_records( 

1318 "physical_filter", 

1319 datasets=["flat"], 

1320 collections="imported_r", 

1321 ) 

1322 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

1323 # Trying to constrain by all dataset types is an error. 

1324 with self.assertRaises(TypeError): 

1325 list(_query_dimension_records("physical_filter", datasets=..., collections="imported_r")) 

1326 

1327 def test_exposure_queries(self) -> None: 

1328 """Test query methods using arguments sourced from the exposure log 

1329 service. 

1330 

1331 The most complete test dataset currently available to daf_butler tests 

1332 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

1333 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

1334 dimension records as it was focused on providing nontrivial spatial 

1335 overlaps between visit+detector and tract+patch. So in this test we 

1336 need to translate queries that originally used the exposure dimension 

1337 to use the (very similar) visit dimension instead. 

1338 """ 

1339 butler = self.make_butler("hsc-rc2-subset.yaml") 

1340 

1341 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1342 with butler._query() as query: 

1343 return query.data_ids(dimensions, **kwargs) 

1344 

1345 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1346 with butler._query() as query: 

1347 return query.dimension_records(element, **kwargs) 

1348 

1349 self.assertEqual( 

1350 [ 

1351 record.id 

1352 for record in _query_dimension_records("visit", instrument="HSC").order_by("id").limit(5) 

1353 ], 

1354 [318, 322, 326, 330, 332], 

1355 ) 

1356 self.assertEqual( 

1357 [ 

1358 data_id["visit"] 

1359 for data_id in _query_data_ids(["visit"], instrument="HSC").order_by("id").limit(5) 

1360 ], 

1361 [318, 322, 326, 330, 332], 

1362 ) 

1363 self.assertEqual( 

1364 [ 

1365 record.id 

1366 for record in _query_dimension_records("detector", instrument="HSC") 

1367 .order_by("full_name") 

1368 .limit(5) 

1369 ], 

1370 [73, 72, 71, 70, 65], 

1371 ) 

1372 self.assertEqual( 

1373 [ 

1374 data_id["detector"] 

1375 for data_id in _query_data_ids(["detector"], instrument="HSC").order_by("full_name").limit(5) 

1376 ], 

1377 [73, 72, 71, 70, 65], 

1378 ) 

1379 

1380 def test_spatial_join(self) -> None: 

1381 """Test queries that involve spatial overlap joins.""" 

1382 butler = self.make_butler("hsc-rc2-subset.yaml") 

1383 

1384 def _query_data_ids( 

1385 dimensions: DimensionGroup | list[str] | str, **kwargs: Any 

1386 ) -> DataCoordinateQueryResults: 

1387 with butler._query() as query: 

1388 return query.data_ids(dimensions, **kwargs) 

1389 

1390 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1391 with butler._query() as query: 

1392 return query.dimension_records(element, **kwargs) 

1393 

1394 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1395 # the TopologicalFamily they belong to. We'll relate all elements in 

1396 # each family to all of the elements in each other family. 

1397 families = defaultdict(set) 

1398 # Dictionary of {element.name: {dataId: region}}. 

1399 regions = {} 

1400 for element in butler.dimensions.database_elements: 

1401 if element.spatial is not None: 

1402 families[element.spatial.name].add(element) 

1403 regions[element.name] = { 

1404 record.dataId: record.region for record in _query_dimension_records(element.name) 

1405 } 

1406 

1407 # If this check fails, it's not necessarily a problem - it may just be 

1408 # a reasonable change to the default dimension definitions - but the 

1409 # test below depends on there being more than one family to do anything 

1410 # useful. 

1411 self.assertEqual(len(families), 2) 

1412 

1413 # Overlap DatabaseDimensionElements with each other. 

1414 for family1, family2 in itertools.combinations(families, 2): 

1415 for element1, element2 in itertools.product(families[family1], families[family2]): 

1416 dimensions = element1.minimal_group | element2.minimal_group 

1417 # Construct expected set of overlapping data IDs via a 

1418 # brute-force comparison of the regions we've already fetched. 

1419 expected = { 

1420 DataCoordinate.standardize( 

1421 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1422 ) 

1423 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1424 regions[element1.name].items(), regions[element2.name].items() 

1425 ) 

1426 if not region1.isDisjointFrom(region2) 

1427 } 

1428 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1429 queried = set(_query_data_ids(dimensions)) 

1430 self.assertEqual(expected, queried) 

1431 

1432 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1433 commonSkyPix = butler.dimensions.commonSkyPix 

1434 for elementName, these_regions in regions.items(): 

1435 dimensions = butler.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1436 expected = set() 

1437 for dataId, region in these_regions.items(): 

1438 for begin, end in commonSkyPix.pixelization.envelope(region): 

1439 expected.update( 

1440 DataCoordinate.standardize( 

1441 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1442 ) 

1443 for index in range(begin, end) 

1444 ) 

1445 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1446 queried = set(_query_data_ids(dimensions)) 

1447 self.assertEqual(expected, queried)