Coverage for python/lsst/daf/butler/tests/butler_query.py: 6%
560 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ButlerQueryTests"]
32import itertools
33import os
34import re
35from abc import ABC, abstractmethod
36from collections import defaultdict, namedtuple
37from collections.abc import Iterable
38from typing import TYPE_CHECKING, Any, cast
40import astropy.time
41import lsst.sphgeom
42from lsst.daf.relation import RelationalAlgebraError
44from .._dataset_type import DatasetType
45from .._exceptions import EmptyQueryResultError
46from ..dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
47from ..registry._collection_type import CollectionType
48from ..registry._exceptions import (
49 DataIdValueError,
50 DatasetTypeError,
51 DatasetTypeExpressionError,
52 MissingCollectionError,
53 MissingDatasetTypeError,
54)
55from ..transfers import YamlRepoImportBackend
56from .utils import TestCaseMixin
58if TYPE_CHECKING:
59 from .._butler import Butler
60 from .._dataset_ref import DatasetRef
61 from .._query_results import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
62 from ..dimensions import DimensionGroup, DimensionRecord
63 from ..registry.sql_registry import SqlRegistry
66class ButlerQueryTests(ABC, TestCaseMixin):
67 """Base class for unit tests that test `lsst.daf.butler.Butler.query`
68 implementations.
69 """
71 data_dir: str
72 """Root directory containing test data YAML files."""
74 @abstractmethod
75 def make_butler(self, *args: str) -> Butler:
76 """Make Butler instance populated with data used in the tests below.
78 Parameters
79 ----------
80 *args : str
81 Names of the files to pass to `load_data`.
82 """
83 raise NotImplementedError()
85 def load_data(self, registry: SqlRegistry, filename: str) -> None:
86 """Load registry test data from ``data_dir/<filename>``,
87 which should be a YAML import/export file.
89 This method should be called from implementations of `make_butler`
90 where the Registry should exist.
92 Parameters
93 ----------
94 registry : `SqlRegistry`
95 The registry to use.
96 filename : `str`
97 Location of test data.
98 """
99 with open(os.path.join(self.data_dir, filename)) as stream:
100 backend = YamlRepoImportBackend(stream, registry)
101 backend.register()
102 backend.load(datastore=None)
104 def make_bias_collection(self, registry: SqlRegistry) -> None:
105 """Make "biases" collection containing only bias datasets.
107 Parameters
108 ----------
109 registry : `SqlRegistry`
110 The registry to use.
112 Notes
113 -----
114 Default test dataset has two collections, each with both flats and
115 biases. This adds a new collection for biases, only if "imported_g"
116 collection exists (usually loaded from datasets.yaml).
118 This method should be called from implementations of `make_butler`
119 where the Registry should exist.
120 """
121 try:
122 registry.getCollectionType("imported_g")
123 except MissingCollectionError:
124 return
125 registry.registerCollection("biases", CollectionType.TAGGED)
126 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
128 def test_query_data_ids_convenience(self) -> None:
129 """Basic test for `Butler.query_data_ids` method."""
130 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
132 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> list[DataCoordinate]:
133 """Call query_data_ids with some default arguments."""
134 return butler._query_data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs)
136 result = _do_query("visit")
137 self.assertEqual(len(result), 2)
138 self.assertCountEqual(
139 [data_id.mapping for data_id in result],
140 [
141 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"},
142 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"},
143 ],
144 )
146 self.assertTrue(all(data_id.hasFull() for data_id in result))
147 self.assertFalse(any(data_id.hasRecords() for data_id in result))
149 # Test user expression.
150 where = "physical_filter = filter_name"
151 bind = {"filter_name": "Cam1-G"}
152 result = _do_query("visit", where=where, bind=bind)
153 self.assertEqual(
154 [data_id.mapping for data_id in result],
155 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}],
156 )
158 # Test chained methods, some modify original result in place, so build
159 # new result for each one.
160 result = _do_query("visit", order_by="-band")
161 self.assertEqual([data_id["visit"] for data_id in result], [2, 1])
163 result = _do_query("visit", order_by=("-band",), limit=1)
164 self.assertEqual([data_id["visit"] for data_id in result], [2])
166 result = _do_query("visit", order_by=("-band",), limit=1, offset=1)
167 self.assertEqual([data_id["visit"] for data_id in result], [1])
169 with self.assertRaisesRegex(TypeError, "offset is specified without limit"):
170 result = _do_query("visit", order_by="-band", offset=1000)
172 # Empty result but suppress exception.
173 result = _do_query("visit", order_by="-band", limit=1, offset=1000, explain=False)
174 self.assertFalse(result)
176 # Empty result, will raise an exception.
177 with self.assertRaises(EmptyQueryResultError) as exc_cm:
178 _do_query("visit", order_by="-band", limit=1, offset=1000)
179 self.assertTrue(exc_cm.exception.reasons)
181 def test_query_data_ids(self) -> None:
182 """Basic test for `Butler.query().data_ids()` method."""
183 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
185 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
186 """Call query.data_ids with some default arguments."""
187 with butler._query() as query:
188 return query.data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs)
190 result = _do_query("visit")
191 self.assertEqual(result.count(), 2)
192 self.assertTrue(result.any())
193 self.assertCountEqual(
194 [data_id.mapping for data_id in result],
195 [
196 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"},
197 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"},
198 ],
199 )
201 self.assertTrue(result.has_full())
202 self.assertFalse(result.has_records())
204 with result.materialize() as materialized:
205 result = materialized.expanded()
206 self.assertEqual(result.count(), 2)
207 self.assertTrue(result.has_records())
209 # Test user expression.
210 where = "physical_filter = filter_name"
211 bind = {"filter_name": "Cam1-G"}
212 result = _do_query("visit", where=where, bind=bind)
213 self.assertEqual(
214 [data_id.mapping for data_id in result],
215 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}],
216 )
218 # Test chained methods, some modify original result in place, so build
219 # new result for each one.
220 result = _do_query("visit")
221 result = result.order_by("-band")
222 self.assertEqual([data_id["visit"] for data_id in result], [2, 1])
224 result = _do_query("visit")
225 result = result.order_by("-band").limit(1)
226 self.assertEqual([data_id["visit"] for data_id in result], [2])
228 result = _do_query("visit")
229 result = result.order_by("-band").limit(1, 1)
230 self.assertEqual([data_id["visit"] for data_id in result], [1])
232 result = _do_query("visit")
233 result = result.order_by("-band").limit(1, 1000)
234 self.assertFalse(result.any())
235 self.assertGreater(len(list(result.explain_no_results())), 0)
237 def test_query_dimension_records_convenience(self) -> None:
238 """Basic test for `Butler.query_dimension_records` method."""
239 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
241 def _do_query(element: str, **kwargs: Any) -> list[DimensionRecord]:
242 """Call query_dimension_records with some default arguments."""
243 return butler._query_dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs)
245 result = _do_query("visit")
246 self.assertEqual(len(result), 2)
247 self.assertEqual(
248 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result),
249 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)},
250 )
252 # Test user expression.
253 where = "physical_filter = filter_name"
254 bind = {"filter_name": "Cam1-G"}
255 result = _do_query("visit", where=where, bind=bind)
256 self.assertEqual(len(result), 1)
257 self.assertEqual([record.id for record in result], [1])
259 result = _do_query("visit", order_by="-visit")
260 self.assertEqual([record.id for record in result], [2, 1])
262 result = _do_query("visit", order_by=("-visit",), limit=1)
263 self.assertEqual([record.id for record in result], [2])
265 result = _do_query("visit", order_by=("-visit",), limit=1, offset=1)
266 self.assertEqual([record.id for record in result], [1])
268 with self.assertRaisesRegex(TypeError, "offset is specified without limit"):
269 result = _do_query("visit", order_by="-visit", offset=1000)
271 result = _do_query("visit", order_by="-visit", limit=1, offset=1000, explain=False)
272 self.assertFalse(result)
274 with self.assertRaises(EmptyQueryResultError) as exc_cm:
275 _do_query("visit", order_by="-visit", limit=1, offset=1000)
276 self.assertTrue(exc_cm.exception.reasons)
278 def test_query_dimension_records(self) -> None:
279 """Basic test for `_query_dimension_records` method."""
280 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
282 def _do_query(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
283 """Call query.dimension_records with some default arguments."""
284 with butler._query() as query:
285 return query.dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs)
287 result = _do_query("visit")
288 self.assertEqual(result.count(), 2)
289 self.assertTrue(result.any())
290 self.assertEqual(
291 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result),
292 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)},
293 )
295 # Test user expression.
296 where = "physical_filter = filter_name"
297 bind = {"filter_name": "Cam1-G"}
298 result = _do_query("visit", where=where, bind=bind)
299 self.assertEqual(result.count(), 1)
300 self.assertEqual([record.id for record in result], [1])
302 result = _do_query("visit")
303 result = result.order_by("-visit")
304 self.assertEqual([record.id for record in result], [2, 1])
306 result = _do_query("visit")
307 result = result.order_by("-visit").limit(1)
308 self.assertEqual([record.id for record in result], [2])
310 result = _do_query("visit")
311 result = result.order_by("-visit").limit(1, 1)
312 self.assertEqual([record.id for record in result], [1])
314 result = _do_query("visit")
315 result = result.order_by("-visit").limit(1, 1000)
316 self.assertFalse(result.any())
317 self.assertGreater(len(list(result.explain_no_results())), 0)
319 def test_query_datasets_convenience(self) -> None:
320 """Basic test for `Butler.query_datasets` method."""
321 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
323 def _do_query(dataset: Any, **kwargs: Any) -> list[DatasetRef]:
324 return butler._query_datasets(dataset, **kwargs)
326 result = _do_query(..., collections=["imported_g"])
327 self.assertEqual(len(result), 6)
328 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4])
330 # Test user expression.
331 where = "detector IN (detectors) and instrument = instr"
332 bind = {"detectors": (2, 3), "instr": "Cam1"}
333 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind)
334 self.assertEqual(len(result), 8)
335 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3})
337 where = "detector = 1000000 and instrument = 'Cam1'"
338 result = _do_query(..., collections=..., find_first=False, where=where, explain=False)
339 self.assertFalse(result)
341 with self.assertRaises(EmptyQueryResultError) as exc_cm:
342 _do_query(..., collections=..., find_first=False, where=where)
343 self.assertTrue(exc_cm.exception.reasons)
345 def test_query_datasets(self) -> None:
346 """Basic test for `_query_datasets` method."""
347 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
349 def _do_query(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
350 with butler._query() as query:
351 return query.datasets(dataset, **kwargs)
353 result = _do_query(..., collections=["imported_g"])
354 self.assertEqual(result.count(), 6)
355 self.assertTrue(result.any())
356 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4])
358 by_type = list(result.by_dataset_type())
359 self.assertEqual(len(by_type), 2)
360 self.assertEqual(set(item.dataset_type.name for item in by_type), {"bias", "flat"})
362 with result.materialize() as materialized:
363 result = materialized.expanded()
364 self.assertEqual(result.count(), 6)
365 for ref in result:
366 self.assertTrue(ref.dataId.hasRecords())
368 # Test user expression.
369 where = "detector IN (detectors) and instrument = instr"
370 bind = {"detectors": (2, 3), "instr": "Cam1"}
371 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind)
372 self.assertEqual(result.count(), 8)
373 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3})
375 where = "detector = 1000000 and instrument = 'Cam1'"
376 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind)
377 self.assertFalse(result.any())
378 self.assertGreater(len(list(result.explain_no_results())), 0)
380 def test_query_result_summaries(self) -> None:
381 """Test summary methods like `count`, `any`, and `explain_no_results`
382 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
383 """
384 # This method was copied almost verbatim from Registry test class,
385 # replacing Registry methods with new Butler methods.
386 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
388 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
389 with butler._query() as query:
390 return query.data_ids(dimensions, **kwargs)
392 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
393 with butler._query() as query:
394 return query.datasets(dataset, **kwargs)
396 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
397 with butler._query() as query:
398 return query.dimension_records(element, **kwargs)
400 # First query yields two results, and involves no postprocessing.
401 query1 = _query_data_ids(["physical_filter"], band="r")
402 self.assertTrue(query1.any(execute=False, exact=False))
403 self.assertTrue(query1.any(execute=True, exact=False))
404 self.assertTrue(query1.any(execute=True, exact=True))
405 self.assertEqual(query1.count(exact=False), 2)
406 self.assertEqual(query1.count(exact=True), 2)
407 self.assertFalse(list(query1.explain_no_results()))
408 # Second query should yield no results, which we should see when
409 # we attempt to expand the data ID.
410 query2 = _query_data_ids(["physical_filter"], band="h")
411 # There's no execute=False, exact=Fals test here because the behavior
412 # not something we want to guarantee in this case (and exact=False
413 # says either answer is legal).
414 self.assertFalse(query2.any(execute=True, exact=False))
415 self.assertFalse(query2.any(execute=True, exact=True))
416 self.assertEqual(query2.count(exact=False), 0)
417 self.assertEqual(query2.count(exact=True), 0)
418 self.assertTrue(list(query2.explain_no_results()))
419 # These queries yield no results due to various problems that can be
420 # spotted prior to execution, yielding helpful diagnostics.
421 base_query = _query_data_ids(["detector", "physical_filter"])
422 queries_and_snippets: list[Any] = [
423 (
424 # Dataset type name doesn't match any existing dataset types.
425 _query_datasets("nonexistent", collections=..., find_first=False),
426 ["nonexistent"],
427 ),
428 (
429 # Dataset type object isn't registered.
430 _query_datasets(
431 DatasetType(
432 "nonexistent",
433 dimensions=["instrument"],
434 universe=butler.dimensions,
435 storageClass="Image",
436 ),
437 collections=...,
438 find_first=False,
439 ),
440 ["nonexistent"],
441 ),
442 (
443 # No datasets of this type in this collection.
444 _query_datasets("flat", collections=["biases"]),
445 ["flat", "biases"],
446 ),
447 (
448 # No datasets of this type in this collection.
449 base_query.find_datasets("flat", collections=["biases"]),
450 ["flat", "biases"],
451 ),
452 (
453 # No collections matching at all.
454 _query_datasets("flat", collections=re.compile("potato.+"), find_first=False),
455 ["potato"],
456 ),
457 ]
459 with self.assertRaises(MissingDatasetTypeError):
460 queries_and_snippets.append(
461 (
462 # Dataset type name doesn't match any existing dataset
463 # types.
464 _query_data_ids(["detector"], datasets=["nonexistent"], collections=...),
465 ["nonexistent"],
466 )
467 )
468 with self.assertRaises(MissingDatasetTypeError):
469 queries_and_snippets.append(
470 (
471 # Dataset type name doesn't match any existing dataset
472 # types.
473 _query_dimension_records("detector", datasets=["nonexistent"], collections=...),
474 ["nonexistent"],
475 )
476 )
477 for query, snippets in queries_and_snippets:
478 self.assertFalse(query.any(execute=False, exact=False))
479 self.assertFalse(query.any(execute=True, exact=False))
480 self.assertFalse(query.any(execute=True, exact=True))
481 self.assertEqual(query.count(exact=False), 0)
482 self.assertEqual(query.count(exact=True), 0)
483 messages = list(query.explain_no_results())
484 self.assertTrue(messages)
485 # Want all expected snippets to appear in at least one message.
486 self.assertTrue(
487 any(
488 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
489 ),
490 messages,
491 )
493 # This query does yield results, but should also emit a warning because
494 # dataset type patterns to queryDataIds is deprecated; just look for
495 # the warning.
496 with self.assertRaises(DatasetTypeExpressionError):
497 _query_data_ids(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
499 # These queries yield no results due to problems that can be identified
500 # by cheap follow-up queries, yielding helpful diagnostics.
501 for query, snippets in [
502 (
503 # No records for one of the involved dimensions.
504 _query_data_ids(["subfilter"]),
505 ["no rows", "subfilter"],
506 ),
507 (
508 # No records for one of the involved dimensions.
509 _query_dimension_records("subfilter"),
510 ["no rows", "subfilter"],
511 ),
512 ]:
513 self.assertFalse(query.any(execute=True, exact=False))
514 self.assertFalse(query.any(execute=True, exact=True))
515 self.assertEqual(query.count(exact=True), 0)
516 messages = list(query.explain_no_results())
517 self.assertTrue(messages)
518 # Want all expected snippets to appear in at least one message.
519 self.assertTrue(
520 any(
521 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
522 ),
523 messages,
524 )
526 # This query yields four overlaps in the database, but one is filtered
527 # out in postprocessing. The count queries aren't accurate because
528 # they don't account for duplication that happens due to an internal
529 # join against commonSkyPix.
530 query3 = _query_data_ids(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
531 self.assertEqual(
532 {
533 DataCoordinate.standardize(
534 instrument="Cam1",
535 skymap="SkyMap1",
536 visit=v,
537 tract=t,
538 universe=butler.dimensions,
539 )
540 for v, t in [(1, 0), (2, 0), (2, 1)]
541 },
542 set(query3),
543 )
544 self.assertTrue(query3.any(execute=False, exact=False))
545 self.assertTrue(query3.any(execute=True, exact=False))
546 self.assertTrue(query3.any(execute=True, exact=True))
547 self.assertGreaterEqual(query3.count(exact=False), 4)
548 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
549 self.assertFalse(list(query3.explain_no_results()))
550 # This query yields overlaps in the database, but all are filtered
551 # out in postprocessing. The count queries again aren't very useful.
552 # We have to use `where=` here to avoid an optimization that
553 # (currently) skips the spatial postprocess-filtering because it
554 # recognizes that no spatial join is necessary. That's not ideal, but
555 # fixing it is out of scope for this ticket.
556 query4 = _query_data_ids(
557 ["visit", "tract"],
558 instrument="Cam1",
559 skymap="SkyMap1",
560 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
561 )
562 self.assertFalse(set(query4))
563 self.assertTrue(query4.any(execute=False, exact=False))
564 self.assertTrue(query4.any(execute=True, exact=False))
565 self.assertFalse(query4.any(execute=True, exact=True))
566 self.assertGreaterEqual(query4.count(exact=False), 1)
567 self.assertEqual(query4.count(exact=True, discard=True), 0)
568 messages = list(query4.explain_no_results())
569 self.assertTrue(messages)
570 self.assertTrue(any("overlap" in message for message in messages))
571 # This query should yield results from one dataset type but not the
572 # other, which is not registered.
573 query5 = _query_datasets(["bias", "nonexistent"], collections=["biases"])
574 self.assertTrue(set(query5))
575 self.assertTrue(query5.any(execute=False, exact=False))
576 self.assertTrue(query5.any(execute=True, exact=False))
577 self.assertTrue(query5.any(execute=True, exact=True))
578 self.assertGreaterEqual(query5.count(exact=False), 1)
579 self.assertGreaterEqual(query5.count(exact=True), 1)
580 self.assertFalse(list(query5.explain_no_results()))
581 # This query applies a selection that yields no results, fully in the
582 # database. Explaining why it fails involves traversing the relation
583 # tree and running a LIMIT 1 query at each level that has the potential
584 # to remove rows.
585 query6 = _query_dimension_records(
586 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
587 )
588 self.assertEqual(query6.count(exact=True), 0)
589 messages = list(query6.explain_no_results())
590 self.assertTrue(messages)
591 self.assertTrue(any("no-purpose" in message for message in messages))
593 def test_query_results(self) -> None:
594 """Test querying for data IDs and then manipulating the QueryResults
595 object returned to perform other queries.
596 """
597 # This method was copied almost verbatim from Registry test class,
598 # replacing Registry methods with new Butler methods.
599 butler = self.make_butler("base.yaml", "datasets.yaml")
601 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
602 with butler._query() as query:
603 return query.data_ids(dimensions, **kwargs)
605 bias = butler.get_dataset_type("bias")
606 flat = butler.get_dataset_type("flat")
607 # Obtain expected results from methods other than those we're testing
608 # here. That includes:
609 # - the dimensions of the data IDs we want to query:
610 expected_dimensions = butler.dimensions.conform(["detector", "physical_filter"])
611 # - the dimensions of some other data IDs we'll extract from that:
612 expected_subset_dimensions = butler.dimensions.conform(["detector"])
613 # - the data IDs we expect to obtain from the first queries:
614 expectedDataIds = DataCoordinateSet(
615 {
616 DataCoordinate.standardize(
617 instrument="Cam1", detector=d, physical_filter=p, universe=butler.dimensions
618 )
619 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
620 },
621 dimensions=expected_dimensions,
622 hasFull=False,
623 hasRecords=False,
624 )
625 # - the flat datasets we expect to find from those data IDs, in just
626 # one collection (so deduplication is irrelevant):
627 expectedFlats = [
628 butler.find_dataset(
629 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
630 ),
631 butler.find_dataset(
632 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
633 ),
634 butler.find_dataset(
635 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
636 ),
637 ]
638 # - the data IDs we expect to extract from that:
639 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
640 # - the bias datasets we expect to find from those data IDs, after we
641 # subset-out the physical_filter dimension, both with duplicates:
642 expectedAllBiases = [
643 ref
644 for ref in [
645 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
646 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
647 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
648 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
649 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
650 ]
651 if ref is not None
652 ]
653 # - ...and without duplicates:
654 expectedDeduplicatedBiases = [
655 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
656 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
657 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
658 ]
659 # Test against those expected results, using a "lazy" query for the
660 # data IDs (which re-executes that query each time we use it to do
661 # something new).
662 dataIds = _query_data_ids(
663 ["detector", "physical_filter"],
664 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
665 instrument="Cam1",
666 )
667 self.assertEqual(dataIds.dimensions, expected_dimensions)
668 self.assertEqual(set(dataIds), set(expectedDataIds))
669 self.assertCountEqual(
670 list(
671 dataIds.find_datasets(
672 flat,
673 collections=["imported_r"],
674 )
675 ),
676 expectedFlats,
677 )
678 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
679 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
680 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
681 self.assertCountEqual(
682 list(
683 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=False)
684 ),
685 expectedAllBiases,
686 )
687 self.assertCountEqual(
688 list(
689 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=True)
690 ),
691 expectedDeduplicatedBiases,
692 )
694 # Searching for a dataset with dimensions we had projected away
695 # restores those dimensions.
696 self.assertCountEqual(
697 list(subsetDataIds.find_datasets("flat", collections=["imported_r"], find_first=True)),
698 expectedFlats,
699 )
701 # Use a component dataset type.
702 self.assertCountEqual(
703 [
704 ref.makeComponentRef("image")
705 for ref in subsetDataIds.find_datasets(
706 bias,
707 collections=["imported_r", "imported_g"],
708 find_first=False,
709 )
710 ],
711 [ref.makeComponentRef("image") for ref in expectedAllBiases],
712 )
714 # Use a named dataset type that does not exist and a dataset type
715 # object that does not exist.
716 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
718 # Test both string name and dataset type object.
719 tests: tuple[tuple[DatasetType | str, str], ...] = (
720 (unknown_type, unknown_type.name),
721 (unknown_type.name, unknown_type.name),
722 )
723 for test_type, test_type_name in tests:
724 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
725 list(
726 subsetDataIds.find_datasets(
727 test_type, collections=["imported_r", "imported_g"], find_first=True
728 )
729 )
731 # Materialize the bias dataset queries (only) by putting the results
732 # into temporary tables, then repeat those tests.
733 with subsetDataIds.find_datasets(
734 bias, collections=["imported_r", "imported_g"], find_first=False
735 ).materialize() as biases:
736 self.assertCountEqual(list(biases), expectedAllBiases)
737 with subsetDataIds.find_datasets(
738 bias, collections=["imported_r", "imported_g"], find_first=True
739 ).materialize() as biases:
740 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
741 # Materialize the data ID subset query, but not the dataset queries.
742 with subsetDataIds.materialize() as subsetDataIds:
743 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
744 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
745 self.assertCountEqual(
746 list(
747 subsetDataIds.find_datasets(
748 bias, collections=["imported_r", "imported_g"], find_first=False
749 )
750 ),
751 expectedAllBiases,
752 )
753 self.assertCountEqual(
754 list(
755 subsetDataIds.find_datasets(
756 bias, collections=["imported_r", "imported_g"], find_first=True
757 )
758 ),
759 expectedDeduplicatedBiases,
760 )
761 # Materialize the dataset queries, too.
762 with subsetDataIds.find_datasets(
763 bias, collections=["imported_r", "imported_g"], find_first=False
764 ).materialize() as biases:
765 self.assertCountEqual(list(biases), expectedAllBiases)
766 with subsetDataIds.find_datasets(
767 bias, collections=["imported_r", "imported_g"], find_first=True
768 ).materialize() as biases:
769 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
770 # Materialize the original query, but none of the follow-up queries.
771 with dataIds.materialize() as dataIds:
772 self.assertEqual(dataIds.dimensions, expected_dimensions)
773 self.assertEqual(set(dataIds), set(expectedDataIds))
774 self.assertCountEqual(
775 list(
776 dataIds.find_datasets(
777 flat,
778 collections=["imported_r"],
779 )
780 ),
781 expectedFlats,
782 )
783 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
784 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
785 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
786 self.assertCountEqual(
787 list(
788 subsetDataIds.find_datasets(
789 bias, collections=["imported_r", "imported_g"], find_first=False
790 )
791 ),
792 expectedAllBiases,
793 )
794 self.assertCountEqual(
795 list(
796 subsetDataIds.find_datasets(
797 bias, collections=["imported_r", "imported_g"], find_first=True
798 )
799 ),
800 expectedDeduplicatedBiases,
801 )
802 # Materialize just the bias dataset queries.
803 with subsetDataIds.find_datasets(
804 bias, collections=["imported_r", "imported_g"], find_first=False
805 ).materialize() as biases:
806 self.assertCountEqual(list(biases), expectedAllBiases)
807 with subsetDataIds.find_datasets(
808 bias, collections=["imported_r", "imported_g"], find_first=True
809 ).materialize() as biases:
810 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
811 # Materialize the subset data ID query, but not the dataset
812 # queries.
813 with subsetDataIds.materialize() as subsetDataIds:
814 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
815 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
816 self.assertCountEqual(
817 list(
818 subsetDataIds.find_datasets(
819 bias, collections=["imported_r", "imported_g"], find_first=False
820 )
821 ),
822 expectedAllBiases,
823 )
824 self.assertCountEqual(
825 list(
826 subsetDataIds.find_datasets(
827 bias, collections=["imported_r", "imported_g"], find_first=True
828 )
829 ),
830 expectedDeduplicatedBiases,
831 )
832 # Materialize the bias dataset queries, too, so now we're
833 # materializing every single step.
834 with subsetDataIds.find_datasets(
835 bias, collections=["imported_r", "imported_g"], find_first=False
836 ).materialize() as biases:
837 self.assertCountEqual(list(biases), expectedAllBiases)
838 with subsetDataIds.find_datasets(
839 bias, collections=["imported_r", "imported_g"], find_first=True
840 ).materialize() as biases:
841 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
843 def test_query_datasets_deduplication(self) -> None:
844 """Test that the findFirst option to query.datasets selects datasets
845 from collections in the order given".
846 """
847 # This method was copied almost verbatim from Registry test class,
848 # replacing Registry methods with new Butler methods.
849 butler = self.make_butler("base.yaml", "datasets.yaml")
851 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
852 with butler._query() as query:
853 return query.datasets(dataset, **kwargs)
855 self.assertCountEqual(
856 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=False)),
857 [
858 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
859 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
860 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
861 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
862 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
863 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
864 ],
865 )
866 self.assertCountEqual(
867 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=True)),
868 [
869 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
870 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
871 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
872 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
873 ],
874 )
875 self.assertCountEqual(
876 list(_query_datasets("bias", collections=["imported_r", "imported_g"], find_first=True)),
877 [
878 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
879 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
880 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
881 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
882 ],
883 )
885 def test_query_data_ids_order_by(self) -> None:
886 """Test order_by and limit on result returned by query.data_ids()."""
887 # This method was copied almost verbatim from Registry test class,
888 # replacing Registry methods with new Butler methods.
889 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
891 def do_query(
892 dimensions: Iterable[str] = ("visit", "tract"), datasets: Any = None, collections: Any = None
893 ) -> DataCoordinateQueryResults:
894 with butler._query() as query:
895 return query.data_ids(
896 dimensions,
897 datasets=datasets,
898 collections=collections,
899 instrument="Cam1",
900 skymap="SkyMap1",
901 )
903 Test = namedtuple(
904 "Test",
905 ("order_by", "keys", "result", "limit", "datasets", "collections"),
906 defaults=(None, None, None),
907 )
909 test_data = (
910 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
911 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
912 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
913 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
914 Test(
915 "tract.id,visit.id",
916 "tract,visit",
917 ((0, 1), (0, 1), (0, 2)),
918 limit=(3,),
919 ),
920 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
921 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
922 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
923 Test(
924 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
925 ),
926 Test(
927 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
928 ),
929 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
930 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
931 Test(
932 "tract,-timespan.begin,timespan.end",
933 "tract,visit",
934 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
935 ),
936 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
937 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
938 Test(
939 "tract,detector",
940 "tract,detector",
941 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
942 datasets="flat",
943 collections="imported_r",
944 ),
945 Test(
946 "tract,detector.full_name",
947 "tract,detector",
948 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
949 datasets="flat",
950 collections="imported_r",
951 ),
952 Test(
953 "tract,detector.raft,detector.name_in_raft",
954 "tract,detector",
955 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
956 datasets="flat",
957 collections="imported_r",
958 ),
959 )
961 for test in test_data:
962 order_by = test.order_by.split(",")
963 keys = test.keys.split(",")
964 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
965 if test.limit is not None:
966 query = query.limit(*test.limit)
967 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
968 self.assertEqual(dataIds, test.result)
970 # and materialize
971 query = do_query(keys).order_by(*order_by)
972 if test.limit is not None:
973 query = query.limit(*test.limit)
974 with self.assertRaises(RelationalAlgebraError):
975 with query.materialize():
976 pass # pragma: no cover
978 # errors in a name
979 for order_by in ("", "-"):
980 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
981 list(do_query().order_by(order_by))
983 for order_by in ("undimension.name", "-undimension.name"):
984 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
985 list(do_query().order_by(order_by))
987 for order_by in ("attract", "-attract"):
988 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
989 list(do_query().order_by(order_by))
991 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
992 list(do_query(("exposure", "visit")).order_by("exposure_time"))
994 with self.assertRaisesRegex(
995 ValueError,
996 r"Timespan exists in more than one dimension element \(exposure, visit\); "
997 r"qualify timespan with specific dimension name\.",
998 ):
999 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
1001 with self.assertRaisesRegex(
1002 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
1003 ):
1004 list(do_query("tract").order_by("timespan.begin"))
1006 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
1007 list(do_query("tract").order_by("tract.timespan.begin"))
1009 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
1010 list(do_query("tract").order_by("tract.name"))
1012 with self.assertRaisesRegex(
1013 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
1014 ):
1015 list(do_query("visit").order_by("timestamp.begin"))
1017 def test_query_int_range_expressions(self) -> None:
1018 """Test integer range expressions in ``where`` arguments.
1020 Note that our expressions use inclusive stop values, unlike Python's.
1021 """
1022 butler = self.make_butler("base.yaml")
1024 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1025 with butler._query() as query:
1026 return query.data_ids(dimensions, **kwargs)
1028 self.assertEqual(
1029 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..2)")),
1030 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
1031 )
1032 self.assertEqual(
1033 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
1034 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
1035 )
1036 self.assertEqual(
1037 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
1038 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
1039 )
1041 def test_query_data_ids_expression_error(self) -> None:
1042 """Test error checking of 'where' expressions in query.data_ids."""
1043 butler = self.make_butler("base.yaml")
1045 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1046 with butler._query() as query:
1047 return query.data_ids(dimensions, **kwargs)
1049 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
1050 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
1051 _query_data_ids(["detector"], where="foo.bar = 12")
1052 with self.assertRaisesRegex(
1053 LookupError, "Dimension element name cannot be inferred in this context."
1054 ):
1055 _query_data_ids(["detector"], where="timespan.end < time", bind=bind)
1057 def test_query_data_ids_governor_exceptions(self) -> None:
1058 """Test exceptions raised by query.data_ids for incorrect governors."""
1059 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
1061 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1062 with butler._query() as query:
1063 return query.data_ids(dimensions, **kwargs)
1065 Test = namedtuple(
1066 "Test",
1067 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
1068 defaults=(None, None, None, {}, None, 0),
1069 )
1071 test_data = (
1072 Test("tract,visit", count=6),
1073 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
1074 Test(
1075 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
1076 ),
1077 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
1078 Test(
1079 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
1080 ),
1081 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
1082 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
1083 Test(
1084 "tract,visit",
1085 where="instrument=cam AND skymap=map",
1086 bind={"cam": "Cam1", "map": "SkyMap1"},
1087 count=6,
1088 ),
1089 Test(
1090 "tract,visit",
1091 where="instrument=cam AND skymap=map",
1092 bind={"cam": "Cam", "map": "SkyMap"},
1093 exception=DataIdValueError,
1094 ),
1095 )
1097 for test in test_data:
1098 dimensions = test.dimensions.split(",")
1099 if test.exception:
1100 with self.assertRaises(test.exception):
1101 _query_data_ids(
1102 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1103 ).count()
1104 else:
1105 query = _query_data_ids(
1106 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1107 )
1108 self.assertEqual(query.count(discard=True), test.count)
1110 # and materialize
1111 if test.exception:
1112 with self.assertRaises(test.exception):
1113 query = _query_data_ids(
1114 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1115 )
1116 else:
1117 query = _query_data_ids(
1118 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1119 )
1120 with query.materialize() as materialized:
1121 self.assertEqual(materialized.count(discard=True), test.count)
1123 def test_query_dimension_records_exceptions(self) -> None:
1124 """Test exceptions raised by query.dimension_records()."""
1125 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
1127 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1128 with butler._query() as query:
1129 return query.dimension_records(element, **kwargs)
1131 result = _query_dimension_records("detector")
1132 self.assertEqual(result.count(), 4)
1133 result = _query_dimension_records("detector", instrument="Cam1")
1134 self.assertEqual(result.count(), 4)
1135 result = _query_dimension_records("detector", data_id={"instrument": "Cam1"})
1136 self.assertEqual(result.count(), 4)
1137 result = _query_dimension_records("detector", where="instrument='Cam1'")
1138 self.assertEqual(result.count(), 4)
1139 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "Cam1"})
1140 self.assertEqual(result.count(), 4)
1142 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
1143 result = _query_dimension_records("detector", instrument="NotCam1")
1145 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
1146 result = _query_dimension_records("detector", data_id={"instrument": "NotCam1"})
1148 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1149 result = _query_dimension_records("detector", where="instrument='NotCam1'")
1151 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1152 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "NotCam1"})
1154 def test_query_dimension_records_order_by(self) -> None:
1155 """Test order_by and limit on result returned by
1156 query.dimension_records().
1157 """
1158 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
1160 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1161 with butler._query() as query:
1162 return query.dimension_records(element, **kwargs)
1164 def do_query(
1165 element: str, datasets: Any = None, collections: Any = None
1166 ) -> DimensionRecordQueryResults:
1167 return _query_dimension_records(
1168 element, instrument="Cam1", datasets=datasets, collections=collections
1169 )
1171 query = do_query("detector")
1172 self.assertEqual(len(list(query)), 4)
1174 Test = namedtuple(
1175 "Test",
1176 ("element", "order_by", "result", "limit", "datasets", "collections"),
1177 defaults=(None, None, None),
1178 )
1180 test_data = (
1181 Test("detector", "detector", (1, 2, 3, 4)),
1182 Test("detector", "-detector", (4, 3, 2, 1)),
1183 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
1184 Test("detector", "-detector.purpose", (4,), limit=(1,)),
1185 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
1186 Test("visit", "visit", (1, 2)),
1187 Test("visit", "-visit.id", (2, 1)),
1188 Test("visit", "zenith_angle", (1, 2)),
1189 Test("visit", "-visit.name", (2, 1)),
1190 Test("visit", "day_obs,-timespan.begin", (2, 1)),
1191 )
1193 for test in test_data:
1194 order_by = test.order_by.split(",")
1195 query = do_query(test.element).order_by(*order_by)
1196 if test.limit is not None:
1197 query = query.limit(*test.limit)
1198 dataIds = tuple(rec.id for rec in query)
1199 self.assertEqual(dataIds, test.result)
1201 # errors in a name
1202 for order_by in ("", "-"):
1203 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
1204 list(do_query("detector").order_by(order_by))
1206 for order_by in ("undimension.name", "-undimension.name"):
1207 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
1208 list(do_query("detector").order_by(order_by))
1210 for order_by in ("attract", "-attract"):
1211 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
1212 list(do_query("detector").order_by(order_by))
1214 for order_by in ("timestamp.begin", "-timestamp.begin"):
1215 with self.assertRaisesRegex(
1216 ValueError,
1217 r"Element name mismatch: 'timestamp' instead of 'visit'; "
1218 r"perhaps you meant 'timespan.begin'\?",
1219 ):
1220 list(do_query("visit").order_by(order_by))
1222 def test_skypix_constraint_queries(self) -> None:
1223 """Test queries spatially constrained by a skypix data ID."""
1224 butler = self.make_butler("hsc-rc2-subset.yaml")
1226 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1227 with butler._query() as query:
1228 return query.data_ids(dimensions, **kwargs)
1230 patch_regions = {
1231 (data_id["tract"], data_id["patch"]): data_id.region
1232 for data_id in _query_data_ids(["patch"]).expanded()
1233 if data_id.region is not None
1234 }
1235 skypix_dimension = cast(SkyPixDimension, butler.dimensions["htm11"])
1236 # This check ensures the test doesn't become trivial due to a config
1237 # change; if it does, just pick a different HTML level.
1238 self.assertNotEqual(skypix_dimension, butler.dimensions.commonSkyPix)
1239 # Gather all skypix IDs that definitely overlap at least one of these
1240 # patches.
1241 relevant_skypix_ids = lsst.sphgeom.RangeSet()
1242 for patch_region in patch_regions.values():
1243 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
1244 # Look for a "nontrivial" skypix_id that overlaps at least one patch
1245 # and does not overlap at least one other patch.
1246 for skypix_id in itertools.chain.from_iterable(
1247 range(begin, end) for begin, end in relevant_skypix_ids
1248 ):
1249 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
1250 overlapping_patches = {
1251 patch_key
1252 for patch_key, patch_region in patch_regions.items()
1253 if not patch_region.isDisjointFrom(skypix_region)
1254 }
1255 if overlapping_patches and overlapping_patches != patch_regions.keys():
1256 break
1257 else:
1258 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
1259 self.assertEqual(
1260 {
1261 (data_id["tract"], data_id["patch"])
1262 for data_id in _query_data_ids(
1263 ["patch"],
1264 data_id={skypix_dimension.name: skypix_id},
1265 )
1266 },
1267 overlapping_patches,
1268 )
1269 # Test that a three-way join that includes the common skypix system in
1270 # the dimensions doesn't generate redundant join terms in the query.
1271 full_data_ids = set(
1272 _query_data_ids(["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC").expanded()
1273 )
1274 self.assertGreater(len(full_data_ids), 0)
1275 for data_id in full_data_ids:
1276 tract = data_id.records["tract"]
1277 visit = data_id.records["visit"]
1278 htm7 = data_id.records["htm7"]
1279 assert tract is not None and visit is not None and htm7 is not None
1280 self.assertFalse(tract.region.isDisjointFrom(htm7.region))
1281 self.assertFalse(visit.region.isDisjointFrom(htm7.region))
1283 def test_bind_in_query_datasets(self) -> None:
1284 """Test that the bind parameter is correctly forwarded in
1285 query.datasets recursion.
1286 """
1287 butler = self.make_butler("base.yaml", "datasets.yaml")
1289 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
1290 with butler._query() as query:
1291 return query.datasets(dataset, **kwargs)
1293 # Importing datasets from yaml should go through the code path where
1294 # we update collection summaries as we insert datasets.
1295 self.assertEqual(
1296 set(_query_datasets("flat", band="r", collections=..., find_first=False)),
1297 set(
1298 _query_datasets(
1299 "flat", where="band=my_band", bind={"my_band": "r"}, collections=..., find_first=False
1300 )
1301 ),
1302 )
1304 def test_dataset_constrained_dimension_record_queries(self) -> None:
1305 """Test that query.dimension_records works even when given a dataset
1306 constraint whose dimensions extend beyond the requested dimension
1307 element's.
1308 """
1309 butler = self.make_butler("base.yaml", "datasets.yaml")
1311 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1312 with butler._query() as query:
1313 return query.dimension_records(element, **kwargs)
1315 # Query for physical_filter dimension records, using a dataset that
1316 # has both physical_filter and dataset dimensions.
1317 records = _query_dimension_records(
1318 "physical_filter",
1319 datasets=["flat"],
1320 collections="imported_r",
1321 )
1322 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
1323 # Trying to constrain by all dataset types is an error.
1324 with self.assertRaises(TypeError):
1325 list(_query_dimension_records("physical_filter", datasets=..., collections="imported_r"))
1327 def test_exposure_queries(self) -> None:
1328 """Test query methods using arguments sourced from the exposure log
1329 service.
1331 The most complete test dataset currently available to daf_butler tests
1332 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
1333 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
1334 dimension records as it was focused on providing nontrivial spatial
1335 overlaps between visit+detector and tract+patch. So in this test we
1336 need to translate queries that originally used the exposure dimension
1337 to use the (very similar) visit dimension instead.
1338 """
1339 butler = self.make_butler("hsc-rc2-subset.yaml")
1341 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1342 with butler._query() as query:
1343 return query.data_ids(dimensions, **kwargs)
1345 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1346 with butler._query() as query:
1347 return query.dimension_records(element, **kwargs)
1349 self.assertEqual(
1350 [
1351 record.id
1352 for record in _query_dimension_records("visit", instrument="HSC").order_by("id").limit(5)
1353 ],
1354 [318, 322, 326, 330, 332],
1355 )
1356 self.assertEqual(
1357 [
1358 data_id["visit"]
1359 for data_id in _query_data_ids(["visit"], instrument="HSC").order_by("id").limit(5)
1360 ],
1361 [318, 322, 326, 330, 332],
1362 )
1363 self.assertEqual(
1364 [
1365 record.id
1366 for record in _query_dimension_records("detector", instrument="HSC")
1367 .order_by("full_name")
1368 .limit(5)
1369 ],
1370 [73, 72, 71, 70, 65],
1371 )
1372 self.assertEqual(
1373 [
1374 data_id["detector"]
1375 for data_id in _query_data_ids(["detector"], instrument="HSC").order_by("full_name").limit(5)
1376 ],
1377 [73, 72, 71, 70, 65],
1378 )
1380 def test_spatial_join(self) -> None:
1381 """Test queries that involve spatial overlap joins."""
1382 butler = self.make_butler("hsc-rc2-subset.yaml")
1384 def _query_data_ids(
1385 dimensions: DimensionGroup | list[str] | str, **kwargs: Any
1386 ) -> DataCoordinateQueryResults:
1387 with butler._query() as query:
1388 return query.data_ids(dimensions, **kwargs)
1390 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1391 with butler._query() as query:
1392 return query.dimension_records(element, **kwargs)
1394 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1395 # the TopologicalFamily they belong to. We'll relate all elements in
1396 # each family to all of the elements in each other family.
1397 families = defaultdict(set)
1398 # Dictionary of {element.name: {dataId: region}}.
1399 regions = {}
1400 for element in butler.dimensions.database_elements:
1401 if element.spatial is not None:
1402 families[element.spatial.name].add(element)
1403 regions[element.name] = {
1404 record.dataId: record.region for record in _query_dimension_records(element.name)
1405 }
1407 # If this check fails, it's not necessarily a problem - it may just be
1408 # a reasonable change to the default dimension definitions - but the
1409 # test below depends on there being more than one family to do anything
1410 # useful.
1411 self.assertEqual(len(families), 2)
1413 # Overlap DatabaseDimensionElements with each other.
1414 for family1, family2 in itertools.combinations(families, 2):
1415 for element1, element2 in itertools.product(families[family1], families[family2]):
1416 dimensions = element1.minimal_group | element2.minimal_group
1417 # Construct expected set of overlapping data IDs via a
1418 # brute-force comparison of the regions we've already fetched.
1419 expected = {
1420 DataCoordinate.standardize(
1421 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1422 )
1423 for (dataId1, region1), (dataId2, region2) in itertools.product(
1424 regions[element1.name].items(), regions[element2.name].items()
1425 )
1426 if not region1.isDisjointFrom(region2)
1427 }
1428 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1429 queried = set(_query_data_ids(dimensions))
1430 self.assertEqual(expected, queried)
1432 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1433 commonSkyPix = butler.dimensions.commonSkyPix
1434 for elementName, these_regions in regions.items():
1435 dimensions = butler.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1436 expected = set()
1437 for dataId, region in these_regions.items():
1438 for begin, end in commonSkyPix.pixelization.envelope(region):
1439 expected.update(
1440 DataCoordinate.standardize(
1441 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1442 )
1443 for index in range(begin, end)
1444 )
1445 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1446 queried = set(_query_data_ids(dimensions))
1447 self.assertEqual(expected, queried)