Coverage for python/lsst/daf/butler/tests/butler_query.py: 6%

560 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 10:56 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["ButlerQueryTests"] 

31 

32import itertools 

33import os 

34import re 

35from abc import ABC, abstractmethod 

36from collections import defaultdict, namedtuple 

37from collections.abc import Iterable 

38from typing import TYPE_CHECKING, Any, cast 

39 

40import astropy.time 

41import lsst.sphgeom 

42from lsst.daf.relation import RelationalAlgebraError 

43 

44from .._dataset_type import DatasetType 

45from .._exceptions import EmptyQueryResultError 

46from ..dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension 

47from ..registry._collection_type import CollectionType 

48from ..registry._exceptions import DataIdValueError, DatasetTypeError, MissingCollectionError 

49from ..transfers import YamlRepoImportBackend 

50from .utils import TestCaseMixin 

51 

52if TYPE_CHECKING: 

53 from .._butler import Butler 

54 from .._dataset_ref import DatasetRef 

55 from .._query_results import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults 

56 from ..dimensions import DimensionGroup, DimensionRecord 

57 from ..registry.sql_registry import SqlRegistry 

58 

59 

60class ButlerQueryTests(ABC, TestCaseMixin): 

61 """Base class for unit tests that test `lsst.daf.butler.Butler.query` 

62 implementations. 

63 """ 

64 

65 data_dir: str 

66 """Root directory containing test data YAML files.""" 

67 

68 @abstractmethod 

69 def make_butler(self, *args: str) -> Butler: 

70 """Make Butler instance populated with data used in the tests below. 

71 

72 Parameters 

73 ---------- 

74 *args : str 

75 Names of the files to pass to `load_data`. 

76 """ 

77 raise NotImplementedError() 

78 

79 def load_data(self, registry: SqlRegistry, filename: str) -> None: 

80 """Load registry test data from ``data_dir/<filename>``, 

81 which should be a YAML import/export file. 

82 

83 This method should be called from implementations of `make_butler` 

84 where the Registry should exist. 

85 """ 

86 with open(os.path.join(self.data_dir, filename)) as stream: 

87 backend = YamlRepoImportBackend(stream, registry) 

88 backend.register() 

89 backend.load(datastore=None) 

90 

91 def make_bias_collection(self, registry: SqlRegistry) -> None: 

92 """Make "biases" collection containing only bias datasets. 

93 

94 Default test dataset has two collections, each with both flats and 

95 biases. This adds a new collection for biases, only if "imported_g" 

96 collection exists (usually loaded from datasets.yaml). 

97 

98 This method should be called from implementations of `make_butler` 

99 where the Registry should exist. 

100 """ 

101 try: 

102 registry.getCollectionType("imported_g") 

103 except MissingCollectionError: 

104 return 

105 registry.registerCollection("biases", CollectionType.TAGGED) 

106 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"])) 

107 

108 def test_query_data_ids_convenience(self) -> None: 

109 """Basic test for `Butler.query_data_ids` method.""" 

110 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

111 

112 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> list[DataCoordinate]: 

113 """Call query_data_ids with some default arguments.""" 

114 return butler._query_data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs) 

115 

116 result = _do_query("visit") 

117 self.assertEqual(len(result), 2) 

118 self.assertCountEqual( 

119 [data_id.mapping for data_id in result], 

120 [ 

121 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}, 

122 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"}, 

123 ], 

124 ) 

125 

126 self.assertTrue(all(data_id.hasFull() for data_id in result)) 

127 self.assertFalse(any(data_id.hasRecords() for data_id in result)) 

128 

129 # Test user expression. 

130 where = "physical_filter = filter_name" 

131 bind = {"filter_name": "Cam1-G"} 

132 result = _do_query("visit", where=where, bind=bind) 

133 self.assertEqual( 

134 [data_id.mapping for data_id in result], 

135 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}], 

136 ) 

137 

138 # Test chained methods, some modify original result in place, so build 

139 # new result for each one. 

140 result = _do_query("visit", order_by="-band") 

141 self.assertEqual([data_id["visit"] for data_id in result], [2, 1]) 

142 

143 result = _do_query("visit", order_by=("-band",), limit=1) 

144 self.assertEqual([data_id["visit"] for data_id in result], [2]) 

145 

146 result = _do_query("visit", order_by=("-band",), limit=1, offset=1) 

147 self.assertEqual([data_id["visit"] for data_id in result], [1]) 

148 

149 with self.assertRaisesRegex(TypeError, "offset is specified without limit"): 

150 result = _do_query("visit", order_by="-band", offset=1000) 

151 

152 # Empty result but suppress exception. 

153 result = _do_query("visit", order_by="-band", limit=1, offset=1000, explain=False) 

154 self.assertFalse(result) 

155 

156 # Empty result, will raise an exception. 

157 with self.assertRaises(EmptyQueryResultError) as exc_cm: 

158 _do_query("visit", order_by="-band", limit=1, offset=1000) 

159 self.assertTrue(exc_cm.exception.reasons) 

160 

161 def test_query_data_ids(self) -> None: 

162 """Basic test for `Butler.query().data_ids()` method.""" 

163 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

164 

165 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

166 """Call query.data_ids with some default arguments.""" 

167 with butler._query() as query: 

168 return query.data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs) 

169 

170 result = _do_query("visit") 

171 self.assertEqual(result.count(), 2) 

172 self.assertTrue(result.any()) 

173 self.assertCountEqual( 

174 [data_id.mapping for data_id in result], 

175 [ 

176 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}, 

177 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"}, 

178 ], 

179 ) 

180 

181 self.assertTrue(result.has_full()) 

182 self.assertFalse(result.has_records()) 

183 

184 with result.materialize() as materialized: 

185 result = materialized.expanded() 

186 self.assertEqual(result.count(), 2) 

187 self.assertTrue(result.has_records()) 

188 

189 # Test user expression. 

190 where = "physical_filter = filter_name" 

191 bind = {"filter_name": "Cam1-G"} 

192 result = _do_query("visit", where=where, bind=bind) 

193 self.assertEqual( 

194 [data_id.mapping for data_id in result], 

195 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}], 

196 ) 

197 

198 # Test chained methods, some modify original result in place, so build 

199 # new result for each one. 

200 result = _do_query("visit") 

201 result = result.order_by("-band") 

202 self.assertEqual([data_id["visit"] for data_id in result], [2, 1]) 

203 

204 result = _do_query("visit") 

205 result = result.order_by("-band").limit(1) 

206 self.assertEqual([data_id["visit"] for data_id in result], [2]) 

207 

208 result = _do_query("visit") 

209 result = result.order_by("-band").limit(1, 1) 

210 self.assertEqual([data_id["visit"] for data_id in result], [1]) 

211 

212 result = _do_query("visit") 

213 result = result.order_by("-band").limit(1, 1000) 

214 self.assertFalse(result.any()) 

215 self.assertGreater(len(list(result.explain_no_results())), 0) 

216 

217 def test_query_dimension_records_convenience(self) -> None: 

218 """Basic test for `Butler.query_dimension_records` method.""" 

219 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

220 

221 def _do_query(element: str, **kwargs: Any) -> list[DimensionRecord]: 

222 """Call query_dimension_records with some default arguments.""" 

223 return butler._query_dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs) 

224 

225 result = _do_query("visit") 

226 self.assertEqual(len(result), 2) 

227 self.assertEqual( 

228 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result), 

229 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)}, 

230 ) 

231 

232 # Test user expression. 

233 where = "physical_filter = filter_name" 

234 bind = {"filter_name": "Cam1-G"} 

235 result = _do_query("visit", where=where, bind=bind) 

236 self.assertEqual(len(result), 1) 

237 self.assertEqual([record.id for record in result], [1]) 

238 

239 result = _do_query("visit", order_by="-visit") 

240 self.assertEqual([record.id for record in result], [2, 1]) 

241 

242 result = _do_query("visit", order_by=("-visit",), limit=1) 

243 self.assertEqual([record.id for record in result], [2]) 

244 

245 result = _do_query("visit", order_by=("-visit",), limit=1, offset=1) 

246 self.assertEqual([record.id for record in result], [1]) 

247 

248 with self.assertRaisesRegex(TypeError, "offset is specified without limit"): 

249 result = _do_query("visit", order_by="-visit", offset=1000) 

250 

251 result = _do_query("visit", order_by="-visit", limit=1, offset=1000, explain=False) 

252 self.assertFalse(result) 

253 

254 with self.assertRaises(EmptyQueryResultError) as exc_cm: 

255 _do_query("visit", order_by="-visit", limit=1, offset=1000) 

256 self.assertTrue(exc_cm.exception.reasons) 

257 

258 def test_query_dimension_records(self) -> None: 

259 """Basic test for `_query_dimension_records` method.""" 

260 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

261 

262 def _do_query(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

263 """Call query.dimension_records with some default arguments.""" 

264 with butler._query() as query: 

265 return query.dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs) 

266 

267 result = _do_query("visit") 

268 self.assertEqual(result.count(), 2) 

269 self.assertTrue(result.any()) 

270 self.assertEqual( 

271 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result), 

272 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)}, 

273 ) 

274 

275 # Test user expression. 

276 where = "physical_filter = filter_name" 

277 bind = {"filter_name": "Cam1-G"} 

278 result = _do_query("visit", where=where, bind=bind) 

279 self.assertEqual(result.count(), 1) 

280 self.assertEqual([record.id for record in result], [1]) 

281 

282 result = _do_query("visit") 

283 result = result.order_by("-visit") 

284 self.assertEqual([record.id for record in result], [2, 1]) 

285 

286 result = _do_query("visit") 

287 result = result.order_by("-visit").limit(1) 

288 self.assertEqual([record.id for record in result], [2]) 

289 

290 result = _do_query("visit") 

291 result = result.order_by("-visit").limit(1, 1) 

292 self.assertEqual([record.id for record in result], [1]) 

293 

294 result = _do_query("visit") 

295 result = result.order_by("-visit").limit(1, 1000) 

296 self.assertFalse(result.any()) 

297 self.assertGreater(len(list(result.explain_no_results())), 0) 

298 

299 def test_query_datasets_convenience(self) -> None: 

300 """Basic test for `Butler.query_datasets` method.""" 

301 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

302 

303 def _do_query(dataset: Any, **kwargs: Any) -> list[DatasetRef]: 

304 return butler._query_datasets(dataset, **kwargs) 

305 

306 result = _do_query(..., collections=["imported_g"]) 

307 self.assertEqual(len(result), 6) 

308 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4]) 

309 

310 # Test user expression. 

311 where = "detector IN (detectors) and instrument = instr" 

312 bind = {"detectors": (2, 3), "instr": "Cam1"} 

313 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind) 

314 self.assertEqual(len(result), 8) 

315 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3}) 

316 

317 where = "detector = 1000000 and instrument = 'Cam1'" 

318 result = _do_query(..., collections=..., find_first=False, where=where, explain=False) 

319 self.assertFalse(result) 

320 

321 with self.assertRaises(EmptyQueryResultError) as exc_cm: 

322 _do_query(..., collections=..., find_first=False, where=where) 

323 self.assertTrue(exc_cm.exception.reasons) 

324 

325 def test_query_datasets(self) -> None: 

326 """Basic test for `_query_datasets` method.""" 

327 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

328 

329 def _do_query(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

330 with butler._query() as query: 

331 return query.datasets(dataset, **kwargs) 

332 

333 result = _do_query(..., collections=["imported_g"]) 

334 self.assertEqual(result.count(), 6) 

335 self.assertTrue(result.any()) 

336 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4]) 

337 

338 by_type = list(result.by_parent_dataset_type()) 

339 self.assertEqual(len(by_type), 2) 

340 self.assertEqual(set(item.parent_dataset_type.name for item in by_type), {"bias", "flat"}) 

341 

342 with result.materialize() as materialized: 

343 result = materialized.expanded() 

344 self.assertEqual(result.count(), 6) 

345 for ref in result: 

346 self.assertTrue(ref.dataId.hasRecords()) 

347 

348 # Test user expression. 

349 where = "detector IN (detectors) and instrument = instr" 

350 bind = {"detectors": (2, 3), "instr": "Cam1"} 

351 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind) 

352 self.assertEqual(result.count(), 8) 

353 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3}) 

354 

355 where = "detector = 1000000 and instrument = 'Cam1'" 

356 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind) 

357 self.assertFalse(result.any()) 

358 self.assertGreater(len(list(result.explain_no_results())), 0) 

359 

360 def test_query_result_summaries(self) -> None: 

361 """Test summary methods like `count`, `any`, and `explain_no_results` 

362 on `DataCoordinateQueryResults` and `DatasetQueryResults`. 

363 """ 

364 # This method was copied almost verbatim from Registry test class, 

365 # replacing Registry methods with new Butler methods. 

366 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

367 

368 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

369 with butler._query() as query: 

370 return query.data_ids(dimensions, **kwargs) 

371 

372 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

373 with butler._query() as query: 

374 return query.datasets(dataset, **kwargs) 

375 

376 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

377 with butler._query() as query: 

378 return query.dimension_records(element, **kwargs) 

379 

380 # First query yields two results, and involves no postprocessing. 

381 query1 = _query_data_ids(["physical_filter"], band="r") 

382 self.assertTrue(query1.any(execute=False, exact=False)) 

383 self.assertTrue(query1.any(execute=True, exact=False)) 

384 self.assertTrue(query1.any(execute=True, exact=True)) 

385 self.assertEqual(query1.count(exact=False), 2) 

386 self.assertEqual(query1.count(exact=True), 2) 

387 self.assertFalse(list(query1.explain_no_results())) 

388 # Second query should yield no results, which we should see when 

389 # we attempt to expand the data ID. 

390 query2 = _query_data_ids(["physical_filter"], band="h") 

391 # There's no execute=False, exact=Fals test here because the behavior 

392 # not something we want to guarantee in this case (and exact=False 

393 # says either answer is legal). 

394 self.assertFalse(query2.any(execute=True, exact=False)) 

395 self.assertFalse(query2.any(execute=True, exact=True)) 

396 self.assertEqual(query2.count(exact=False), 0) 

397 self.assertEqual(query2.count(exact=True), 0) 

398 self.assertTrue(list(query2.explain_no_results())) 

399 # These queries yield no results due to various problems that can be 

400 # spotted prior to execution, yielding helpful diagnostics. 

401 base_query = _query_data_ids(["detector", "physical_filter"]) 

402 queries_and_snippets: list[Any] = [ 

403 ( 

404 # Dataset type name doesn't match any existing dataset types. 

405 _query_datasets("nonexistent", collections=..., find_first=False), 

406 ["nonexistent"], 

407 ), 

408 ( 

409 # Dataset type object isn't registered. 

410 _query_datasets( 

411 DatasetType( 

412 "nonexistent", 

413 dimensions=["instrument"], 

414 universe=butler.dimensions, 

415 storageClass="Image", 

416 ), 

417 collections=..., 

418 find_first=False, 

419 ), 

420 ["nonexistent"], 

421 ), 

422 ( 

423 # No datasets of this type in this collection. 

424 _query_datasets("flat", collections=["biases"]), 

425 ["flat", "biases"], 

426 ), 

427 ( 

428 # No datasets of this type in this collection. 

429 base_query.find_datasets("flat", collections=["biases"]), 

430 ["flat", "biases"], 

431 ), 

432 ( 

433 # No collections matching at all. 

434 _query_datasets("flat", collections=re.compile("potato.+"), find_first=False), 

435 ["potato"], 

436 ), 

437 ] 

438 # The behavior of these additional queries is slated to change in the 

439 # future, so we also check for deprecation warnings. 

440 with self.assertWarns(FutureWarning): 

441 queries_and_snippets.append( 

442 ( 

443 # Dataset type name doesn't match any existing dataset 

444 # types. 

445 _query_data_ids(["detector"], datasets=["nonexistent"], collections=...), 

446 ["nonexistent"], 

447 ) 

448 ) 

449 with self.assertWarns(FutureWarning): 

450 queries_and_snippets.append( 

451 ( 

452 # Dataset type name doesn't match any existing dataset 

453 # types. 

454 _query_dimension_records("detector", datasets=["nonexistent"], collections=...), 

455 ["nonexistent"], 

456 ) 

457 ) 

458 for query, snippets in queries_and_snippets: 

459 self.assertFalse(query.any(execute=False, exact=False)) 

460 self.assertFalse(query.any(execute=True, exact=False)) 

461 self.assertFalse(query.any(execute=True, exact=True)) 

462 self.assertEqual(query.count(exact=False), 0) 

463 self.assertEqual(query.count(exact=True), 0) 

464 messages = list(query.explain_no_results()) 

465 self.assertTrue(messages) 

466 # Want all expected snippets to appear in at least one message. 

467 self.assertTrue( 

468 any( 

469 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

470 ), 

471 messages, 

472 ) 

473 

474 # This query does yield results, but should also emit a warning because 

475 # dataset type patterns to queryDataIds is deprecated; just look for 

476 # the warning. 

477 with self.assertWarns(FutureWarning): 

478 _query_data_ids(["detector"], datasets=re.compile("^nonexistent$"), collections=...) 

479 

480 # These queries yield no results due to problems that can be identified 

481 # by cheap follow-up queries, yielding helpful diagnostics. 

482 for query, snippets in [ 

483 ( 

484 # No records for one of the involved dimensions. 

485 _query_data_ids(["subfilter"]), 

486 ["no rows", "subfilter"], 

487 ), 

488 ( 

489 # No records for one of the involved dimensions. 

490 _query_dimension_records("subfilter"), 

491 ["no rows", "subfilter"], 

492 ), 

493 ]: 

494 self.assertFalse(query.any(execute=True, exact=False)) 

495 self.assertFalse(query.any(execute=True, exact=True)) 

496 self.assertEqual(query.count(exact=True), 0) 

497 messages = list(query.explain_no_results()) 

498 self.assertTrue(messages) 

499 # Want all expected snippets to appear in at least one message. 

500 self.assertTrue( 

501 any( 

502 all(snippet in message for snippet in snippets) for message in query.explain_no_results() 

503 ), 

504 messages, 

505 ) 

506 

507 # This query yields four overlaps in the database, but one is filtered 

508 # out in postprocessing. The count queries aren't accurate because 

509 # they don't account for duplication that happens due to an internal 

510 # join against commonSkyPix. 

511 query3 = _query_data_ids(["visit", "tract"], instrument="Cam1", skymap="SkyMap1") 

512 self.assertEqual( 

513 { 

514 DataCoordinate.standardize( 

515 instrument="Cam1", 

516 skymap="SkyMap1", 

517 visit=v, 

518 tract=t, 

519 universe=butler.dimensions, 

520 ) 

521 for v, t in [(1, 0), (2, 0), (2, 1)] 

522 }, 

523 set(query3), 

524 ) 

525 self.assertTrue(query3.any(execute=False, exact=False)) 

526 self.assertTrue(query3.any(execute=True, exact=False)) 

527 self.assertTrue(query3.any(execute=True, exact=True)) 

528 self.assertGreaterEqual(query3.count(exact=False), 4) 

529 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3) 

530 self.assertFalse(list(query3.explain_no_results())) 

531 # This query yields overlaps in the database, but all are filtered 

532 # out in postprocessing. The count queries again aren't very useful. 

533 # We have to use `where=` here to avoid an optimization that 

534 # (currently) skips the spatial postprocess-filtering because it 

535 # recognizes that no spatial join is necessary. That's not ideal, but 

536 # fixing it is out of scope for this ticket. 

537 query4 = _query_data_ids( 

538 ["visit", "tract"], 

539 instrument="Cam1", 

540 skymap="SkyMap1", 

541 where="visit=1 AND detector=1 AND tract=0 AND patch=4", 

542 ) 

543 self.assertFalse(set(query4)) 

544 self.assertTrue(query4.any(execute=False, exact=False)) 

545 self.assertTrue(query4.any(execute=True, exact=False)) 

546 self.assertFalse(query4.any(execute=True, exact=True)) 

547 self.assertGreaterEqual(query4.count(exact=False), 1) 

548 self.assertEqual(query4.count(exact=True, discard=True), 0) 

549 messages = list(query4.explain_no_results()) 

550 self.assertTrue(messages) 

551 self.assertTrue(any("overlap" in message for message in messages)) 

552 # This query should yield results from one dataset type but not the 

553 # other, which is not registered. 

554 query5 = _query_datasets(["bias", "nonexistent"], collections=["biases"]) 

555 self.assertTrue(set(query5)) 

556 self.assertTrue(query5.any(execute=False, exact=False)) 

557 self.assertTrue(query5.any(execute=True, exact=False)) 

558 self.assertTrue(query5.any(execute=True, exact=True)) 

559 self.assertGreaterEqual(query5.count(exact=False), 1) 

560 self.assertGreaterEqual(query5.count(exact=True), 1) 

561 self.assertFalse(list(query5.explain_no_results())) 

562 # This query applies a selection that yields no results, fully in the 

563 # database. Explaining why it fails involves traversing the relation 

564 # tree and running a LIMIT 1 query at each level that has the potential 

565 # to remove rows. 

566 query6 = _query_dimension_records( 

567 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1" 

568 ) 

569 self.assertEqual(query6.count(exact=True), 0) 

570 messages = list(query6.explain_no_results()) 

571 self.assertTrue(messages) 

572 self.assertTrue(any("no-purpose" in message for message in messages)) 

573 

574 def test_query_results(self) -> None: 

575 """Test querying for data IDs and then manipulating the QueryResults 

576 object returned to perform other queries. 

577 """ 

578 # This method was copied almost verbatim from Registry test class, 

579 # replacing Registry methods with new Butler methods. 

580 butler = self.make_butler("base.yaml", "datasets.yaml") 

581 

582 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

583 with butler._query() as query: 

584 return query.data_ids(dimensions, **kwargs) 

585 

586 bias = butler.get_dataset_type("bias") 

587 flat = butler.get_dataset_type("flat") 

588 # Obtain expected results from methods other than those we're testing 

589 # here. That includes: 

590 # - the dimensions of the data IDs we want to query: 

591 expected_dimensions = butler.dimensions.conform(["detector", "physical_filter"]) 

592 # - the dimensions of some other data IDs we'll extract from that: 

593 expected_subset_dimensions = butler.dimensions.conform(["detector"]) 

594 # - the data IDs we expect to obtain from the first queries: 

595 expectedDataIds = DataCoordinateSet( 

596 { 

597 DataCoordinate.standardize( 

598 instrument="Cam1", detector=d, physical_filter=p, universe=butler.dimensions 

599 ) 

600 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

601 }, 

602 dimensions=expected_dimensions, 

603 hasFull=False, 

604 hasRecords=False, 

605 ) 

606 # - the flat datasets we expect to find from those data IDs, in just 

607 # one collection (so deduplication is irrelevant): 

608 expectedFlats = [ 

609 butler.find_dataset( 

610 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r" 

611 ), 

612 butler.find_dataset( 

613 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r" 

614 ), 

615 butler.find_dataset( 

616 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r" 

617 ), 

618 ] 

619 # - the data IDs we expect to extract from that: 

620 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions) 

621 # - the bias datasets we expect to find from those data IDs, after we 

622 # subset-out the physical_filter dimension, both with duplicates: 

623 expectedAllBiases = [ 

624 ref 

625 for ref in [ 

626 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

627 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

628 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

629 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

630 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

631 ] 

632 if ref is not None 

633 ] 

634 # - ...and without duplicates: 

635 expectedDeduplicatedBiases = [ 

636 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

637 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

638 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

639 ] 

640 # Test against those expected results, using a "lazy" query for the 

641 # data IDs (which re-executes that query each time we use it to do 

642 # something new). 

643 dataIds = _query_data_ids( 

644 ["detector", "physical_filter"], 

645 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

646 instrument="Cam1", 

647 ) 

648 self.assertEqual(dataIds.dimensions, expected_dimensions) 

649 self.assertEqual(set(dataIds), set(expectedDataIds)) 

650 self.assertCountEqual( 

651 list( 

652 dataIds.find_datasets( 

653 flat, 

654 collections=["imported_r"], 

655 ) 

656 ), 

657 expectedFlats, 

658 ) 

659 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

660 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

661 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

662 self.assertCountEqual( 

663 list( 

664 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=False) 

665 ), 

666 expectedAllBiases, 

667 ) 

668 self.assertCountEqual( 

669 list( 

670 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=True) 

671 ), 

672 expectedDeduplicatedBiases, 

673 ) 

674 

675 # Searching for a dataset with dimensions we had projected away 

676 # restores those dimensions. 

677 self.assertCountEqual( 

678 list(subsetDataIds.find_datasets("flat", collections=["imported_r"], find_first=True)), 

679 expectedFlats, 

680 ) 

681 

682 # Use a component dataset type. 

683 self.assertCountEqual( 

684 [ 

685 ref.makeComponentRef("image") 

686 for ref in subsetDataIds.find_datasets( 

687 bias, 

688 collections=["imported_r", "imported_g"], 

689 find_first=False, 

690 ) 

691 ], 

692 [ref.makeComponentRef("image") for ref in expectedAllBiases], 

693 ) 

694 

695 # Use a named dataset type that does not exist and a dataset type 

696 # object that does not exist. 

697 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure") 

698 

699 # Test both string name and dataset type object. 

700 tests: tuple[tuple[DatasetType | str, str], ...] = ( 

701 (unknown_type, unknown_type.name), 

702 (unknown_type.name, unknown_type.name), 

703 ) 

704 for test_type, test_type_name in tests: 

705 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name): 

706 list( 

707 subsetDataIds.find_datasets( 

708 test_type, collections=["imported_r", "imported_g"], find_first=True 

709 ) 

710 ) 

711 

712 # Materialize the bias dataset queries (only) by putting the results 

713 # into temporary tables, then repeat those tests. 

714 with subsetDataIds.find_datasets( 

715 bias, collections=["imported_r", "imported_g"], find_first=False 

716 ).materialize() as biases: 

717 self.assertCountEqual(list(biases), expectedAllBiases) 

718 with subsetDataIds.find_datasets( 

719 bias, collections=["imported_r", "imported_g"], find_first=True 

720 ).materialize() as biases: 

721 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

722 # Materialize the data ID subset query, but not the dataset queries. 

723 with subsetDataIds.materialize() as subsetDataIds: 

724 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

725 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

726 self.assertCountEqual( 

727 list( 

728 subsetDataIds.find_datasets( 

729 bias, collections=["imported_r", "imported_g"], find_first=False 

730 ) 

731 ), 

732 expectedAllBiases, 

733 ) 

734 self.assertCountEqual( 

735 list( 

736 subsetDataIds.find_datasets( 

737 bias, collections=["imported_r", "imported_g"], find_first=True 

738 ) 

739 ), 

740 expectedDeduplicatedBiases, 

741 ) 

742 # Materialize the dataset queries, too. 

743 with subsetDataIds.find_datasets( 

744 bias, collections=["imported_r", "imported_g"], find_first=False 

745 ).materialize() as biases: 

746 self.assertCountEqual(list(biases), expectedAllBiases) 

747 with subsetDataIds.find_datasets( 

748 bias, collections=["imported_r", "imported_g"], find_first=True 

749 ).materialize() as biases: 

750 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

751 # Materialize the original query, but none of the follow-up queries. 

752 with dataIds.materialize() as dataIds: 

753 self.assertEqual(dataIds.dimensions, expected_dimensions) 

754 self.assertEqual(set(dataIds), set(expectedDataIds)) 

755 self.assertCountEqual( 

756 list( 

757 dataIds.find_datasets( 

758 flat, 

759 collections=["imported_r"], 

760 ) 

761 ), 

762 expectedFlats, 

763 ) 

764 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True) 

765 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

766 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

767 self.assertCountEqual( 

768 list( 

769 subsetDataIds.find_datasets( 

770 bias, collections=["imported_r", "imported_g"], find_first=False 

771 ) 

772 ), 

773 expectedAllBiases, 

774 ) 

775 self.assertCountEqual( 

776 list( 

777 subsetDataIds.find_datasets( 

778 bias, collections=["imported_r", "imported_g"], find_first=True 

779 ) 

780 ), 

781 expectedDeduplicatedBiases, 

782 ) 

783 # Materialize just the bias dataset queries. 

784 with subsetDataIds.find_datasets( 

785 bias, collections=["imported_r", "imported_g"], find_first=False 

786 ).materialize() as biases: 

787 self.assertCountEqual(list(biases), expectedAllBiases) 

788 with subsetDataIds.find_datasets( 

789 bias, collections=["imported_r", "imported_g"], find_first=True 

790 ).materialize() as biases: 

791 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

792 # Materialize the subset data ID query, but not the dataset 

793 # queries. 

794 with subsetDataIds.materialize() as subsetDataIds: 

795 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions) 

796 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds)) 

797 self.assertCountEqual( 

798 list( 

799 subsetDataIds.find_datasets( 

800 bias, collections=["imported_r", "imported_g"], find_first=False 

801 ) 

802 ), 

803 expectedAllBiases, 

804 ) 

805 self.assertCountEqual( 

806 list( 

807 subsetDataIds.find_datasets( 

808 bias, collections=["imported_r", "imported_g"], find_first=True 

809 ) 

810 ), 

811 expectedDeduplicatedBiases, 

812 ) 

813 # Materialize the bias dataset queries, too, so now we're 

814 # materializing every single step. 

815 with subsetDataIds.find_datasets( 

816 bias, collections=["imported_r", "imported_g"], find_first=False 

817 ).materialize() as biases: 

818 self.assertCountEqual(list(biases), expectedAllBiases) 

819 with subsetDataIds.find_datasets( 

820 bias, collections=["imported_r", "imported_g"], find_first=True 

821 ).materialize() as biases: 

822 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

823 

824 def test_query_datasets_deduplication(self) -> None: 

825 """Test that the findFirst option to query.datasets selects datasets 

826 from collections in the order given". 

827 """ 

828 # This method was copied almost verbatim from Registry test class, 

829 # replacing Registry methods with new Butler methods. 

830 butler = self.make_butler("base.yaml", "datasets.yaml") 

831 

832 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

833 with butler._query() as query: 

834 return query.datasets(dataset, **kwargs) 

835 

836 self.assertCountEqual( 

837 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=False)), 

838 [ 

839 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

840 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

841 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

842 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

843 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

844 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

845 ], 

846 ) 

847 self.assertCountEqual( 

848 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=True)), 

849 [ 

850 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

851 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

852 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

853 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

854 ], 

855 ) 

856 self.assertCountEqual( 

857 list(_query_datasets("bias", collections=["imported_r", "imported_g"], find_first=True)), 

858 [ 

859 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

860 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

861 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

862 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

863 ], 

864 ) 

865 

866 def test_query_data_ids_order_by(self) -> None: 

867 """Test order_by and limit on result returned by query.data_ids().""" 

868 # This method was copied almost verbatim from Registry test class, 

869 # replacing Registry methods with new Butler methods. 

870 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

871 

872 def do_query( 

873 dimensions: Iterable[str] = ("visit", "tract"), datasets: Any = None, collections: Any = None 

874 ) -> DataCoordinateQueryResults: 

875 with butler._query() as query: 

876 return query.data_ids( 

877 dimensions, 

878 datasets=datasets, 

879 collections=collections, 

880 instrument="Cam1", 

881 skymap="SkyMap1", 

882 ) 

883 

884 Test = namedtuple( 

885 "Test", 

886 ("order_by", "keys", "result", "limit", "datasets", "collections"), 

887 defaults=(None, None, None), 

888 ) 

889 

890 test_data = ( 

891 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

892 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))), 

893 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))), 

894 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))), 

895 Test( 

896 "tract.id,visit.id", 

897 "tract,visit", 

898 ((0, 1), (0, 1), (0, 2)), 

899 limit=(3,), 

900 ), 

901 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)), 

902 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)), 

903 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)), 

904 Test( 

905 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)) 

906 ), 

907 Test( 

908 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2)) 

909 ), 

910 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

911 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))), 

912 Test( 

913 "tract,-timespan.begin,timespan.end", 

914 "tract,visit", 

915 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)), 

916 ), 

917 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()), 

918 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()), 

919 Test( 

920 "tract,detector", 

921 "tract,detector", 

922 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

923 datasets="flat", 

924 collections="imported_r", 

925 ), 

926 Test( 

927 "tract,detector.full_name", 

928 "tract,detector", 

929 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

930 datasets="flat", 

931 collections="imported_r", 

932 ), 

933 Test( 

934 "tract,detector.raft,detector.name_in_raft", 

935 "tract,detector", 

936 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)), 

937 datasets="flat", 

938 collections="imported_r", 

939 ), 

940 ) 

941 

942 for test in test_data: 

943 order_by = test.order_by.split(",") 

944 keys = test.keys.split(",") 

945 query = do_query(keys, test.datasets, test.collections).order_by(*order_by) 

946 if test.limit is not None: 

947 query = query.limit(*test.limit) 

948 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query) 

949 self.assertEqual(dataIds, test.result) 

950 

951 # and materialize 

952 query = do_query(keys).order_by(*order_by) 

953 if test.limit is not None: 

954 query = query.limit(*test.limit) 

955 with self.assertRaises(RelationalAlgebraError): 

956 with query.materialize(): 

957 pass # pragma: no cover 

958 

959 # errors in a name 

960 for order_by in ("", "-"): 

961 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

962 list(do_query().order_by(order_by)) 

963 

964 for order_by in ("undimension.name", "-undimension.name"): 

965 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"): 

966 list(do_query().order_by(order_by)) 

967 

968 for order_by in ("attract", "-attract"): 

969 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"): 

970 list(do_query().order_by(order_by)) 

971 

972 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"): 

973 list(do_query(("exposure", "visit")).order_by("exposure_time")) 

974 

975 with self.assertRaisesRegex( 

976 ValueError, 

977 r"Timespan exists in more than one dimension element \(exposure, visit\); " 

978 r"qualify timespan with specific dimension name\.", 

979 ): 

980 list(do_query(("exposure", "visit")).order_by("timespan.begin")) 

981 

982 with self.assertRaisesRegex( 

983 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'" 

984 ): 

985 list(do_query("tract").order_by("timespan.begin")) 

986 

987 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"): 

988 list(do_query("tract").order_by("tract.timespan.begin")) 

989 

990 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."): 

991 list(do_query("tract").order_by("tract.name")) 

992 

993 with self.assertRaisesRegex( 

994 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?" 

995 ): 

996 list(do_query("visit").order_by("timestamp.begin")) 

997 

998 def test_query_int_range_expressions(self) -> None: 

999 """Test integer range expressions in ``where`` arguments. 

1000 

1001 Note that our expressions use inclusive stop values, unlike Python's. 

1002 """ 

1003 butler = self.make_butler("base.yaml") 

1004 

1005 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1006 with butler._query() as query: 

1007 return query.data_ids(dimensions, **kwargs) 

1008 

1009 self.assertEqual( 

1010 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..2)")), 

1011 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]}, 

1012 ) 

1013 self.assertEqual( 

1014 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..4:2)")), 

1015 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]}, 

1016 ) 

1017 self.assertEqual( 

1018 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (2..4:2)")), 

1019 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]}, 

1020 ) 

1021 

1022 def test_query_data_ids_expression_error(self) -> None: 

1023 """Test error checking of 'where' expressions in query.data_ids.""" 

1024 butler = self.make_butler("base.yaml") 

1025 

1026 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1027 with butler._query() as query: 

1028 return query.data_ids(dimensions, **kwargs) 

1029 

1030 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")} 

1031 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."): 

1032 _query_data_ids(["detector"], where="foo.bar = 12") 

1033 with self.assertRaisesRegex( 

1034 LookupError, "Dimension element name cannot be inferred in this context." 

1035 ): 

1036 _query_data_ids(["detector"], where="timespan.end < time", bind=bind) 

1037 

1038 def test_query_data_ids_governor_exceptions(self) -> None: 

1039 """Test exceptions raised by query.data_ids for incorrect governors.""" 

1040 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

1041 

1042 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1043 with butler._query() as query: 

1044 return query.data_ids(dimensions, **kwargs) 

1045 

1046 Test = namedtuple( 

1047 "Test", 

1048 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"), 

1049 defaults=(None, None, None, {}, None, 0), 

1050 ) 

1051 

1052 test_data = ( 

1053 Test("tract,visit", count=6), 

1054 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

1055 Test( 

1056 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError 

1057 ), 

1058 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6), 

1059 Test( 

1060 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError 

1061 ), 

1062 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6), 

1063 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError), 

1064 Test( 

1065 "tract,visit", 

1066 where="instrument=cam AND skymap=map", 

1067 bind={"cam": "Cam1", "map": "SkyMap1"}, 

1068 count=6, 

1069 ), 

1070 Test( 

1071 "tract,visit", 

1072 where="instrument=cam AND skymap=map", 

1073 bind={"cam": "Cam", "map": "SkyMap"}, 

1074 exception=DataIdValueError, 

1075 ), 

1076 ) 

1077 

1078 for test in test_data: 

1079 dimensions = test.dimensions.split(",") 

1080 if test.exception: 

1081 with self.assertRaises(test.exception): 

1082 _query_data_ids( 

1083 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1084 ).count() 

1085 else: 

1086 query = _query_data_ids( 

1087 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1088 ) 

1089 self.assertEqual(query.count(discard=True), test.count) 

1090 

1091 # and materialize 

1092 if test.exception: 

1093 with self.assertRaises(test.exception): 

1094 query = _query_data_ids( 

1095 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1096 ) 

1097 else: 

1098 query = _query_data_ids( 

1099 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs 

1100 ) 

1101 with query.materialize() as materialized: 

1102 self.assertEqual(materialized.count(discard=True), test.count) 

1103 

1104 def test_query_dimension_records_exceptions(self) -> None: 

1105 """Test exceptions raised by query.dimension_records().""" 

1106 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

1107 

1108 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1109 with butler._query() as query: 

1110 return query.dimension_records(element, **kwargs) 

1111 

1112 result = _query_dimension_records("detector") 

1113 self.assertEqual(result.count(), 4) 

1114 result = _query_dimension_records("detector", instrument="Cam1") 

1115 self.assertEqual(result.count(), 4) 

1116 result = _query_dimension_records("detector", data_id={"instrument": "Cam1"}) 

1117 self.assertEqual(result.count(), 4) 

1118 result = _query_dimension_records("detector", where="instrument='Cam1'") 

1119 self.assertEqual(result.count(), 4) 

1120 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "Cam1"}) 

1121 self.assertEqual(result.count(), 4) 

1122 

1123 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

1124 result = _query_dimension_records("detector", instrument="NotCam1") 

1125 

1126 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"): 

1127 result = _query_dimension_records("detector", data_id={"instrument": "NotCam1"}) 

1128 

1129 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1130 result = _query_dimension_records("detector", where="instrument='NotCam1'") 

1131 

1132 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"): 

1133 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "NotCam1"}) 

1134 

1135 def test_query_dimension_records_order_by(self) -> None: 

1136 """Test order_by and limit on result returned by 

1137 query.dimension_records(). 

1138 """ 

1139 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml") 

1140 

1141 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1142 with butler._query() as query: 

1143 return query.dimension_records(element, **kwargs) 

1144 

1145 def do_query( 

1146 element: str, datasets: Any = None, collections: Any = None 

1147 ) -> DimensionRecordQueryResults: 

1148 return _query_dimension_records( 

1149 element, instrument="Cam1", datasets=datasets, collections=collections 

1150 ) 

1151 

1152 query = do_query("detector") 

1153 self.assertEqual(len(list(query)), 4) 

1154 

1155 Test = namedtuple( 

1156 "Test", 

1157 ("element", "order_by", "result", "limit", "datasets", "collections"), 

1158 defaults=(None, None, None), 

1159 ) 

1160 

1161 test_data = ( 

1162 Test("detector", "detector", (1, 2, 3, 4)), 

1163 Test("detector", "-detector", (4, 3, 2, 1)), 

1164 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)), 

1165 Test("detector", "-detector.purpose", (4,), limit=(1,)), 

1166 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)), 

1167 Test("visit", "visit", (1, 2)), 

1168 Test("visit", "-visit.id", (2, 1)), 

1169 Test("visit", "zenith_angle", (1, 2)), 

1170 Test("visit", "-visit.name", (2, 1)), 

1171 Test("visit", "day_obs,-timespan.begin", (2, 1)), 

1172 ) 

1173 

1174 for test in test_data: 

1175 order_by = test.order_by.split(",") 

1176 query = do_query(test.element).order_by(*order_by) 

1177 if test.limit is not None: 

1178 query = query.limit(*test.limit) 

1179 dataIds = tuple(rec.id for rec in query) 

1180 self.assertEqual(dataIds, test.result) 

1181 

1182 # errors in a name 

1183 for order_by in ("", "-"): 

1184 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"): 

1185 list(do_query("detector").order_by(order_by)) 

1186 

1187 for order_by in ("undimension.name", "-undimension.name"): 

1188 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"): 

1189 list(do_query("detector").order_by(order_by)) 

1190 

1191 for order_by in ("attract", "-attract"): 

1192 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."): 

1193 list(do_query("detector").order_by(order_by)) 

1194 

1195 for order_by in ("timestamp.begin", "-timestamp.begin"): 

1196 with self.assertRaisesRegex( 

1197 ValueError, 

1198 r"Element name mismatch: 'timestamp' instead of 'visit'; " 

1199 r"perhaps you meant 'timespan.begin'\?", 

1200 ): 

1201 list(do_query("visit").order_by(order_by)) 

1202 

1203 def test_skypix_constraint_queries(self) -> None: 

1204 """Test queries spatially constrained by a skypix data ID.""" 

1205 butler = self.make_butler("hsc-rc2-subset.yaml") 

1206 

1207 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1208 with butler._query() as query: 

1209 return query.data_ids(dimensions, **kwargs) 

1210 

1211 patch_regions = { 

1212 (data_id["tract"], data_id["patch"]): data_id.region 

1213 for data_id in _query_data_ids(["patch"]).expanded() 

1214 if data_id.region is not None 

1215 } 

1216 skypix_dimension = cast(SkyPixDimension, butler.dimensions["htm11"]) 

1217 # This check ensures the test doesn't become trivial due to a config 

1218 # change; if it does, just pick a different HTML level. 

1219 self.assertNotEqual(skypix_dimension, butler.dimensions.commonSkyPix) 

1220 # Gather all skypix IDs that definitely overlap at least one of these 

1221 # patches. 

1222 relevant_skypix_ids = lsst.sphgeom.RangeSet() 

1223 for patch_region in patch_regions.values(): 

1224 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region) 

1225 # Look for a "nontrivial" skypix_id that overlaps at least one patch 

1226 # and does not overlap at least one other patch. 

1227 for skypix_id in itertools.chain.from_iterable( 

1228 range(begin, end) for begin, end in relevant_skypix_ids 

1229 ): 

1230 skypix_region = skypix_dimension.pixelization.pixel(skypix_id) 

1231 overlapping_patches = { 

1232 patch_key 

1233 for patch_key, patch_region in patch_regions.items() 

1234 if not patch_region.isDisjointFrom(skypix_region) 

1235 } 

1236 if overlapping_patches and overlapping_patches != patch_regions.keys(): 

1237 break 

1238 else: 

1239 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.") 

1240 self.assertEqual( 

1241 { 

1242 (data_id["tract"], data_id["patch"]) 

1243 for data_id in _query_data_ids( 

1244 ["patch"], 

1245 data_id={skypix_dimension.name: skypix_id}, 

1246 ) 

1247 }, 

1248 overlapping_patches, 

1249 ) 

1250 # Test that a three-way join that includes the common skypix system in 

1251 # the dimensions doesn't generate redundant join terms in the query. 

1252 full_data_ids = set( 

1253 _query_data_ids(["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC").expanded() 

1254 ) 

1255 self.assertGreater(len(full_data_ids), 0) 

1256 for data_id in full_data_ids: 

1257 tract = data_id.records["tract"] 

1258 visit = data_id.records["visit"] 

1259 htm7 = data_id.records["htm7"] 

1260 assert tract is not None and visit is not None and htm7 is not None 

1261 self.assertFalse(tract.region.isDisjointFrom(htm7.region)) 

1262 self.assertFalse(visit.region.isDisjointFrom(htm7.region)) 

1263 

1264 def test_bind_in_query_datasets(self) -> None: 

1265 """Test that the bind parameter is correctly forwarded in 

1266 query.datasets recursion. 

1267 """ 

1268 butler = self.make_butler("base.yaml", "datasets.yaml") 

1269 

1270 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults: 

1271 with butler._query() as query: 

1272 return query.datasets(dataset, **kwargs) 

1273 

1274 # Importing datasets from yaml should go through the code path where 

1275 # we update collection summaries as we insert datasets. 

1276 self.assertEqual( 

1277 set(_query_datasets("flat", band="r", collections=..., find_first=False)), 

1278 set( 

1279 _query_datasets( 

1280 "flat", where="band=my_band", bind={"my_band": "r"}, collections=..., find_first=False 

1281 ) 

1282 ), 

1283 ) 

1284 

1285 def test_dataset_constrained_dimension_record_queries(self) -> None: 

1286 """Test that query.dimension_records works even when given a dataset 

1287 constraint whose dimensions extend beyond the requested dimension 

1288 element's. 

1289 """ 

1290 butler = self.make_butler("base.yaml", "datasets.yaml") 

1291 

1292 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1293 with butler._query() as query: 

1294 return query.dimension_records(element, **kwargs) 

1295 

1296 # Query for physical_filter dimension records, using a dataset that 

1297 # has both physical_filter and dataset dimensions. 

1298 records = _query_dimension_records( 

1299 "physical_filter", 

1300 datasets=["flat"], 

1301 collections="imported_r", 

1302 ) 

1303 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"}) 

1304 # Trying to constrain by all dataset types is an error. 

1305 with self.assertRaises(TypeError): 

1306 list(_query_dimension_records("physical_filter", datasets=..., collections="imported_r")) 

1307 

1308 def test_exposure_queries(self) -> None: 

1309 """Test query methods using arguments sourced from the exposure log 

1310 service. 

1311 

1312 The most complete test dataset currently available to daf_butler tests 

1313 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the 

1314 the lsst/rc2_subset GitHub repo), but that does not have 'exposure' 

1315 dimension records as it was focused on providing nontrivial spatial 

1316 overlaps between visit+detector and tract+patch. So in this test we 

1317 need to translate queries that originally used the exposure dimension 

1318 to use the (very similar) visit dimension instead. 

1319 """ 

1320 butler = self.make_butler("hsc-rc2-subset.yaml") 

1321 

1322 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults: 

1323 with butler._query() as query: 

1324 return query.data_ids(dimensions, **kwargs) 

1325 

1326 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1327 with butler._query() as query: 

1328 return query.dimension_records(element, **kwargs) 

1329 

1330 self.assertEqual( 

1331 [ 

1332 record.id 

1333 for record in _query_dimension_records("visit", instrument="HSC").order_by("id").limit(5) 

1334 ], 

1335 [318, 322, 326, 330, 332], 

1336 ) 

1337 self.assertEqual( 

1338 [ 

1339 data_id["visit"] 

1340 for data_id in _query_data_ids(["visit"], instrument="HSC").order_by("id").limit(5) 

1341 ], 

1342 [318, 322, 326, 330, 332], 

1343 ) 

1344 self.assertEqual( 

1345 [ 

1346 record.id 

1347 for record in _query_dimension_records("detector", instrument="HSC") 

1348 .order_by("full_name") 

1349 .limit(5) 

1350 ], 

1351 [73, 72, 71, 70, 65], 

1352 ) 

1353 self.assertEqual( 

1354 [ 

1355 data_id["detector"] 

1356 for data_id in _query_data_ids(["detector"], instrument="HSC").order_by("full_name").limit(5) 

1357 ], 

1358 [73, 72, 71, 70, 65], 

1359 ) 

1360 

1361 def test_spatial_join(self) -> None: 

1362 """Test queries that involve spatial overlap joins.""" 

1363 butler = self.make_butler("hsc-rc2-subset.yaml") 

1364 

1365 def _query_data_ids( 

1366 dimensions: DimensionGroup | list[str] | str, **kwargs: Any 

1367 ) -> DataCoordinateQueryResults: 

1368 with butler._query() as query: 

1369 return query.data_ids(dimensions, **kwargs) 

1370 

1371 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults: 

1372 with butler._query() as query: 

1373 return query.dimension_records(element, **kwargs) 

1374 

1375 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1376 # the TopologicalFamily they belong to. We'll relate all elements in 

1377 # each family to all of the elements in each other family. 

1378 families = defaultdict(set) 

1379 # Dictionary of {element.name: {dataId: region}}. 

1380 regions = {} 

1381 for element in butler.dimensions.database_elements: 

1382 if element.spatial is not None: 

1383 families[element.spatial.name].add(element) 

1384 regions[element.name] = { 

1385 record.dataId: record.region for record in _query_dimension_records(element.name) 

1386 } 

1387 

1388 # If this check fails, it's not necessarily a problem - it may just be 

1389 # a reasonable change to the default dimension definitions - but the 

1390 # test below depends on there being more than one family to do anything 

1391 # useful. 

1392 self.assertEqual(len(families), 2) 

1393 

1394 # Overlap DatabaseDimensionElements with each other. 

1395 for family1, family2 in itertools.combinations(families, 2): 

1396 for element1, element2 in itertools.product(families[family1], families[family2]): 

1397 dimensions = element1.minimal_group | element2.minimal_group 

1398 # Construct expected set of overlapping data IDs via a 

1399 # brute-force comparison of the regions we've already fetched. 

1400 expected = { 

1401 DataCoordinate.standardize( 

1402 {**dataId1.required, **dataId2.required}, dimensions=dimensions 

1403 ) 

1404 for (dataId1, region1), (dataId2, region2) in itertools.product( 

1405 regions[element1.name].items(), regions[element2.name].items() 

1406 ) 

1407 if not region1.isDisjointFrom(region2) 

1408 } 

1409 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1410 queried = set(_query_data_ids(dimensions)) 

1411 self.assertEqual(expected, queried) 

1412 

1413 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1414 commonSkyPix = butler.dimensions.commonSkyPix 

1415 for elementName, these_regions in regions.items(): 

1416 dimensions = butler.dimensions[elementName].minimal_group | commonSkyPix.minimal_group 

1417 expected = set() 

1418 for dataId, region in these_regions.items(): 

1419 for begin, end in commonSkyPix.pixelization.envelope(region): 

1420 expected.update( 

1421 DataCoordinate.standardize( 

1422 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions 

1423 ) 

1424 for index in range(begin, end) 

1425 ) 

1426 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1427 queried = set(_query_data_ids(dimensions)) 

1428 self.assertEqual(expected, queried)