Coverage for python/lsst/daf/butler/tests/butler_query.py: 6%
560 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 10:56 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 10:56 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ButlerQueryTests"]
32import itertools
33import os
34import re
35from abc import ABC, abstractmethod
36from collections import defaultdict, namedtuple
37from collections.abc import Iterable
38from typing import TYPE_CHECKING, Any, cast
40import astropy.time
41import lsst.sphgeom
42from lsst.daf.relation import RelationalAlgebraError
44from .._dataset_type import DatasetType
45from .._exceptions import EmptyQueryResultError
46from ..dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
47from ..registry._collection_type import CollectionType
48from ..registry._exceptions import DataIdValueError, DatasetTypeError, MissingCollectionError
49from ..transfers import YamlRepoImportBackend
50from .utils import TestCaseMixin
52if TYPE_CHECKING:
53 from .._butler import Butler
54 from .._dataset_ref import DatasetRef
55 from .._query_results import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
56 from ..dimensions import DimensionGroup, DimensionRecord
57 from ..registry.sql_registry import SqlRegistry
60class ButlerQueryTests(ABC, TestCaseMixin):
61 """Base class for unit tests that test `lsst.daf.butler.Butler.query`
62 implementations.
63 """
65 data_dir: str
66 """Root directory containing test data YAML files."""
68 @abstractmethod
69 def make_butler(self, *args: str) -> Butler:
70 """Make Butler instance populated with data used in the tests below.
72 Parameters
73 ----------
74 *args : str
75 Names of the files to pass to `load_data`.
76 """
77 raise NotImplementedError()
79 def load_data(self, registry: SqlRegistry, filename: str) -> None:
80 """Load registry test data from ``data_dir/<filename>``,
81 which should be a YAML import/export file.
83 This method should be called from implementations of `make_butler`
84 where the Registry should exist.
85 """
86 with open(os.path.join(self.data_dir, filename)) as stream:
87 backend = YamlRepoImportBackend(stream, registry)
88 backend.register()
89 backend.load(datastore=None)
91 def make_bias_collection(self, registry: SqlRegistry) -> None:
92 """Make "biases" collection containing only bias datasets.
94 Default test dataset has two collections, each with both flats and
95 biases. This adds a new collection for biases, only if "imported_g"
96 collection exists (usually loaded from datasets.yaml).
98 This method should be called from implementations of `make_butler`
99 where the Registry should exist.
100 """
101 try:
102 registry.getCollectionType("imported_g")
103 except MissingCollectionError:
104 return
105 registry.registerCollection("biases", CollectionType.TAGGED)
106 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
108 def test_query_data_ids_convenience(self) -> None:
109 """Basic test for `Butler.query_data_ids` method."""
110 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
112 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> list[DataCoordinate]:
113 """Call query_data_ids with some default arguments."""
114 return butler._query_data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs)
116 result = _do_query("visit")
117 self.assertEqual(len(result), 2)
118 self.assertCountEqual(
119 [data_id.mapping for data_id in result],
120 [
121 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"},
122 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"},
123 ],
124 )
126 self.assertTrue(all(data_id.hasFull() for data_id in result))
127 self.assertFalse(any(data_id.hasRecords() for data_id in result))
129 # Test user expression.
130 where = "physical_filter = filter_name"
131 bind = {"filter_name": "Cam1-G"}
132 result = _do_query("visit", where=where, bind=bind)
133 self.assertEqual(
134 [data_id.mapping for data_id in result],
135 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}],
136 )
138 # Test chained methods, some modify original result in place, so build
139 # new result for each one.
140 result = _do_query("visit", order_by="-band")
141 self.assertEqual([data_id["visit"] for data_id in result], [2, 1])
143 result = _do_query("visit", order_by=("-band",), limit=1)
144 self.assertEqual([data_id["visit"] for data_id in result], [2])
146 result = _do_query("visit", order_by=("-band",), limit=1, offset=1)
147 self.assertEqual([data_id["visit"] for data_id in result], [1])
149 with self.assertRaisesRegex(TypeError, "offset is specified without limit"):
150 result = _do_query("visit", order_by="-band", offset=1000)
152 # Empty result but suppress exception.
153 result = _do_query("visit", order_by="-band", limit=1, offset=1000, explain=False)
154 self.assertFalse(result)
156 # Empty result, will raise an exception.
157 with self.assertRaises(EmptyQueryResultError) as exc_cm:
158 _do_query("visit", order_by="-band", limit=1, offset=1000)
159 self.assertTrue(exc_cm.exception.reasons)
161 def test_query_data_ids(self) -> None:
162 """Basic test for `Butler.query().data_ids()` method."""
163 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
165 def _do_query(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
166 """Call query.data_ids with some default arguments."""
167 with butler._query() as query:
168 return query.data_ids(dimensions, instrument="Cam1", skymap="SkyMap1", **kwargs)
170 result = _do_query("visit")
171 self.assertEqual(result.count(), 2)
172 self.assertTrue(result.any())
173 self.assertCountEqual(
174 [data_id.mapping for data_id in result],
175 [
176 {"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"},
177 {"instrument": "Cam1", "visit": 2, "band": "r", "physical_filter": "Cam1-R1"},
178 ],
179 )
181 self.assertTrue(result.has_full())
182 self.assertFalse(result.has_records())
184 with result.materialize() as materialized:
185 result = materialized.expanded()
186 self.assertEqual(result.count(), 2)
187 self.assertTrue(result.has_records())
189 # Test user expression.
190 where = "physical_filter = filter_name"
191 bind = {"filter_name": "Cam1-G"}
192 result = _do_query("visit", where=where, bind=bind)
193 self.assertEqual(
194 [data_id.mapping for data_id in result],
195 [{"instrument": "Cam1", "visit": 1, "band": "g", "physical_filter": "Cam1-G"}],
196 )
198 # Test chained methods, some modify original result in place, so build
199 # new result for each one.
200 result = _do_query("visit")
201 result = result.order_by("-band")
202 self.assertEqual([data_id["visit"] for data_id in result], [2, 1])
204 result = _do_query("visit")
205 result = result.order_by("-band").limit(1)
206 self.assertEqual([data_id["visit"] for data_id in result], [2])
208 result = _do_query("visit")
209 result = result.order_by("-band").limit(1, 1)
210 self.assertEqual([data_id["visit"] for data_id in result], [1])
212 result = _do_query("visit")
213 result = result.order_by("-band").limit(1, 1000)
214 self.assertFalse(result.any())
215 self.assertGreater(len(list(result.explain_no_results())), 0)
217 def test_query_dimension_records_convenience(self) -> None:
218 """Basic test for `Butler.query_dimension_records` method."""
219 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
221 def _do_query(element: str, **kwargs: Any) -> list[DimensionRecord]:
222 """Call query_dimension_records with some default arguments."""
223 return butler._query_dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs)
225 result = _do_query("visit")
226 self.assertEqual(len(result), 2)
227 self.assertEqual(
228 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result),
229 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)},
230 )
232 # Test user expression.
233 where = "physical_filter = filter_name"
234 bind = {"filter_name": "Cam1-G"}
235 result = _do_query("visit", where=where, bind=bind)
236 self.assertEqual(len(result), 1)
237 self.assertEqual([record.id for record in result], [1])
239 result = _do_query("visit", order_by="-visit")
240 self.assertEqual([record.id for record in result], [2, 1])
242 result = _do_query("visit", order_by=("-visit",), limit=1)
243 self.assertEqual([record.id for record in result], [2])
245 result = _do_query("visit", order_by=("-visit",), limit=1, offset=1)
246 self.assertEqual([record.id for record in result], [1])
248 with self.assertRaisesRegex(TypeError, "offset is specified without limit"):
249 result = _do_query("visit", order_by="-visit", offset=1000)
251 result = _do_query("visit", order_by="-visit", limit=1, offset=1000, explain=False)
252 self.assertFalse(result)
254 with self.assertRaises(EmptyQueryResultError) as exc_cm:
255 _do_query("visit", order_by="-visit", limit=1, offset=1000)
256 self.assertTrue(exc_cm.exception.reasons)
258 def test_query_dimension_records(self) -> None:
259 """Basic test for `_query_dimension_records` method."""
260 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
262 def _do_query(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
263 """Call query.dimension_records with some default arguments."""
264 with butler._query() as query:
265 return query.dimension_records(element, instrument="Cam1", skymap="SkyMap1", **kwargs)
267 result = _do_query("visit")
268 self.assertEqual(result.count(), 2)
269 self.assertTrue(result.any())
270 self.assertEqual(
271 set((record.id, record.name, record.physical_filter, record.day_obs) for record in result),
272 {(1, "1", "Cam1-G", 20210909), (2, "2", "Cam1-R1", 20210909)},
273 )
275 # Test user expression.
276 where = "physical_filter = filter_name"
277 bind = {"filter_name": "Cam1-G"}
278 result = _do_query("visit", where=where, bind=bind)
279 self.assertEqual(result.count(), 1)
280 self.assertEqual([record.id for record in result], [1])
282 result = _do_query("visit")
283 result = result.order_by("-visit")
284 self.assertEqual([record.id for record in result], [2, 1])
286 result = _do_query("visit")
287 result = result.order_by("-visit").limit(1)
288 self.assertEqual([record.id for record in result], [2])
290 result = _do_query("visit")
291 result = result.order_by("-visit").limit(1, 1)
292 self.assertEqual([record.id for record in result], [1])
294 result = _do_query("visit")
295 result = result.order_by("-visit").limit(1, 1000)
296 self.assertFalse(result.any())
297 self.assertGreater(len(list(result.explain_no_results())), 0)
299 def test_query_datasets_convenience(self) -> None:
300 """Basic test for `Butler.query_datasets` method."""
301 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
303 def _do_query(dataset: Any, **kwargs: Any) -> list[DatasetRef]:
304 return butler._query_datasets(dataset, **kwargs)
306 result = _do_query(..., collections=["imported_g"])
307 self.assertEqual(len(result), 6)
308 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4])
310 # Test user expression.
311 where = "detector IN (detectors) and instrument = instr"
312 bind = {"detectors": (2, 3), "instr": "Cam1"}
313 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind)
314 self.assertEqual(len(result), 8)
315 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3})
317 where = "detector = 1000000 and instrument = 'Cam1'"
318 result = _do_query(..., collections=..., find_first=False, where=where, explain=False)
319 self.assertFalse(result)
321 with self.assertRaises(EmptyQueryResultError) as exc_cm:
322 _do_query(..., collections=..., find_first=False, where=where)
323 self.assertTrue(exc_cm.exception.reasons)
325 def test_query_datasets(self) -> None:
326 """Basic test for `_query_datasets` method."""
327 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
329 def _do_query(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
330 with butler._query() as query:
331 return query.datasets(dataset, **kwargs)
333 result = _do_query(..., collections=["imported_g"])
334 self.assertEqual(result.count(), 6)
335 self.assertTrue(result.any())
336 self.assertCountEqual([ref.dataId["detector"] for ref in result], [1, 2, 3, 2, 3, 4])
338 by_type = list(result.by_parent_dataset_type())
339 self.assertEqual(len(by_type), 2)
340 self.assertEqual(set(item.parent_dataset_type.name for item in by_type), {"bias", "flat"})
342 with result.materialize() as materialized:
343 result = materialized.expanded()
344 self.assertEqual(result.count(), 6)
345 for ref in result:
346 self.assertTrue(ref.dataId.hasRecords())
348 # Test user expression.
349 where = "detector IN (detectors) and instrument = instr"
350 bind = {"detectors": (2, 3), "instr": "Cam1"}
351 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind)
352 self.assertEqual(result.count(), 8)
353 self.assertEqual(set(ref.dataId["detector"] for ref in result), {2, 3})
355 where = "detector = 1000000 and instrument = 'Cam1'"
356 result = _do_query(..., collections=..., find_first=False, where=where, bind=bind)
357 self.assertFalse(result.any())
358 self.assertGreater(len(list(result.explain_no_results())), 0)
360 def test_query_result_summaries(self) -> None:
361 """Test summary methods like `count`, `any`, and `explain_no_results`
362 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
363 """
364 # This method was copied almost verbatim from Registry test class,
365 # replacing Registry methods with new Butler methods.
366 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
368 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
369 with butler._query() as query:
370 return query.data_ids(dimensions, **kwargs)
372 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
373 with butler._query() as query:
374 return query.datasets(dataset, **kwargs)
376 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
377 with butler._query() as query:
378 return query.dimension_records(element, **kwargs)
380 # First query yields two results, and involves no postprocessing.
381 query1 = _query_data_ids(["physical_filter"], band="r")
382 self.assertTrue(query1.any(execute=False, exact=False))
383 self.assertTrue(query1.any(execute=True, exact=False))
384 self.assertTrue(query1.any(execute=True, exact=True))
385 self.assertEqual(query1.count(exact=False), 2)
386 self.assertEqual(query1.count(exact=True), 2)
387 self.assertFalse(list(query1.explain_no_results()))
388 # Second query should yield no results, which we should see when
389 # we attempt to expand the data ID.
390 query2 = _query_data_ids(["physical_filter"], band="h")
391 # There's no execute=False, exact=Fals test here because the behavior
392 # not something we want to guarantee in this case (and exact=False
393 # says either answer is legal).
394 self.assertFalse(query2.any(execute=True, exact=False))
395 self.assertFalse(query2.any(execute=True, exact=True))
396 self.assertEqual(query2.count(exact=False), 0)
397 self.assertEqual(query2.count(exact=True), 0)
398 self.assertTrue(list(query2.explain_no_results()))
399 # These queries yield no results due to various problems that can be
400 # spotted prior to execution, yielding helpful diagnostics.
401 base_query = _query_data_ids(["detector", "physical_filter"])
402 queries_and_snippets: list[Any] = [
403 (
404 # Dataset type name doesn't match any existing dataset types.
405 _query_datasets("nonexistent", collections=..., find_first=False),
406 ["nonexistent"],
407 ),
408 (
409 # Dataset type object isn't registered.
410 _query_datasets(
411 DatasetType(
412 "nonexistent",
413 dimensions=["instrument"],
414 universe=butler.dimensions,
415 storageClass="Image",
416 ),
417 collections=...,
418 find_first=False,
419 ),
420 ["nonexistent"],
421 ),
422 (
423 # No datasets of this type in this collection.
424 _query_datasets("flat", collections=["biases"]),
425 ["flat", "biases"],
426 ),
427 (
428 # No datasets of this type in this collection.
429 base_query.find_datasets("flat", collections=["biases"]),
430 ["flat", "biases"],
431 ),
432 (
433 # No collections matching at all.
434 _query_datasets("flat", collections=re.compile("potato.+"), find_first=False),
435 ["potato"],
436 ),
437 ]
438 # The behavior of these additional queries is slated to change in the
439 # future, so we also check for deprecation warnings.
440 with self.assertWarns(FutureWarning):
441 queries_and_snippets.append(
442 (
443 # Dataset type name doesn't match any existing dataset
444 # types.
445 _query_data_ids(["detector"], datasets=["nonexistent"], collections=...),
446 ["nonexistent"],
447 )
448 )
449 with self.assertWarns(FutureWarning):
450 queries_and_snippets.append(
451 (
452 # Dataset type name doesn't match any existing dataset
453 # types.
454 _query_dimension_records("detector", datasets=["nonexistent"], collections=...),
455 ["nonexistent"],
456 )
457 )
458 for query, snippets in queries_and_snippets:
459 self.assertFalse(query.any(execute=False, exact=False))
460 self.assertFalse(query.any(execute=True, exact=False))
461 self.assertFalse(query.any(execute=True, exact=True))
462 self.assertEqual(query.count(exact=False), 0)
463 self.assertEqual(query.count(exact=True), 0)
464 messages = list(query.explain_no_results())
465 self.assertTrue(messages)
466 # Want all expected snippets to appear in at least one message.
467 self.assertTrue(
468 any(
469 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
470 ),
471 messages,
472 )
474 # This query does yield results, but should also emit a warning because
475 # dataset type patterns to queryDataIds is deprecated; just look for
476 # the warning.
477 with self.assertWarns(FutureWarning):
478 _query_data_ids(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
480 # These queries yield no results due to problems that can be identified
481 # by cheap follow-up queries, yielding helpful diagnostics.
482 for query, snippets in [
483 (
484 # No records for one of the involved dimensions.
485 _query_data_ids(["subfilter"]),
486 ["no rows", "subfilter"],
487 ),
488 (
489 # No records for one of the involved dimensions.
490 _query_dimension_records("subfilter"),
491 ["no rows", "subfilter"],
492 ),
493 ]:
494 self.assertFalse(query.any(execute=True, exact=False))
495 self.assertFalse(query.any(execute=True, exact=True))
496 self.assertEqual(query.count(exact=True), 0)
497 messages = list(query.explain_no_results())
498 self.assertTrue(messages)
499 # Want all expected snippets to appear in at least one message.
500 self.assertTrue(
501 any(
502 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
503 ),
504 messages,
505 )
507 # This query yields four overlaps in the database, but one is filtered
508 # out in postprocessing. The count queries aren't accurate because
509 # they don't account for duplication that happens due to an internal
510 # join against commonSkyPix.
511 query3 = _query_data_ids(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
512 self.assertEqual(
513 {
514 DataCoordinate.standardize(
515 instrument="Cam1",
516 skymap="SkyMap1",
517 visit=v,
518 tract=t,
519 universe=butler.dimensions,
520 )
521 for v, t in [(1, 0), (2, 0), (2, 1)]
522 },
523 set(query3),
524 )
525 self.assertTrue(query3.any(execute=False, exact=False))
526 self.assertTrue(query3.any(execute=True, exact=False))
527 self.assertTrue(query3.any(execute=True, exact=True))
528 self.assertGreaterEqual(query3.count(exact=False), 4)
529 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
530 self.assertFalse(list(query3.explain_no_results()))
531 # This query yields overlaps in the database, but all are filtered
532 # out in postprocessing. The count queries again aren't very useful.
533 # We have to use `where=` here to avoid an optimization that
534 # (currently) skips the spatial postprocess-filtering because it
535 # recognizes that no spatial join is necessary. That's not ideal, but
536 # fixing it is out of scope for this ticket.
537 query4 = _query_data_ids(
538 ["visit", "tract"],
539 instrument="Cam1",
540 skymap="SkyMap1",
541 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
542 )
543 self.assertFalse(set(query4))
544 self.assertTrue(query4.any(execute=False, exact=False))
545 self.assertTrue(query4.any(execute=True, exact=False))
546 self.assertFalse(query4.any(execute=True, exact=True))
547 self.assertGreaterEqual(query4.count(exact=False), 1)
548 self.assertEqual(query4.count(exact=True, discard=True), 0)
549 messages = list(query4.explain_no_results())
550 self.assertTrue(messages)
551 self.assertTrue(any("overlap" in message for message in messages))
552 # This query should yield results from one dataset type but not the
553 # other, which is not registered.
554 query5 = _query_datasets(["bias", "nonexistent"], collections=["biases"])
555 self.assertTrue(set(query5))
556 self.assertTrue(query5.any(execute=False, exact=False))
557 self.assertTrue(query5.any(execute=True, exact=False))
558 self.assertTrue(query5.any(execute=True, exact=True))
559 self.assertGreaterEqual(query5.count(exact=False), 1)
560 self.assertGreaterEqual(query5.count(exact=True), 1)
561 self.assertFalse(list(query5.explain_no_results()))
562 # This query applies a selection that yields no results, fully in the
563 # database. Explaining why it fails involves traversing the relation
564 # tree and running a LIMIT 1 query at each level that has the potential
565 # to remove rows.
566 query6 = _query_dimension_records(
567 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
568 )
569 self.assertEqual(query6.count(exact=True), 0)
570 messages = list(query6.explain_no_results())
571 self.assertTrue(messages)
572 self.assertTrue(any("no-purpose" in message for message in messages))
574 def test_query_results(self) -> None:
575 """Test querying for data IDs and then manipulating the QueryResults
576 object returned to perform other queries.
577 """
578 # This method was copied almost verbatim from Registry test class,
579 # replacing Registry methods with new Butler methods.
580 butler = self.make_butler("base.yaml", "datasets.yaml")
582 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
583 with butler._query() as query:
584 return query.data_ids(dimensions, **kwargs)
586 bias = butler.get_dataset_type("bias")
587 flat = butler.get_dataset_type("flat")
588 # Obtain expected results from methods other than those we're testing
589 # here. That includes:
590 # - the dimensions of the data IDs we want to query:
591 expected_dimensions = butler.dimensions.conform(["detector", "physical_filter"])
592 # - the dimensions of some other data IDs we'll extract from that:
593 expected_subset_dimensions = butler.dimensions.conform(["detector"])
594 # - the data IDs we expect to obtain from the first queries:
595 expectedDataIds = DataCoordinateSet(
596 {
597 DataCoordinate.standardize(
598 instrument="Cam1", detector=d, physical_filter=p, universe=butler.dimensions
599 )
600 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
601 },
602 dimensions=expected_dimensions,
603 hasFull=False,
604 hasRecords=False,
605 )
606 # - the flat datasets we expect to find from those data IDs, in just
607 # one collection (so deduplication is irrelevant):
608 expectedFlats = [
609 butler.find_dataset(
610 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
611 ),
612 butler.find_dataset(
613 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
614 ),
615 butler.find_dataset(
616 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
617 ),
618 ]
619 # - the data IDs we expect to extract from that:
620 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
621 # - the bias datasets we expect to find from those data IDs, after we
622 # subset-out the physical_filter dimension, both with duplicates:
623 expectedAllBiases = [
624 ref
625 for ref in [
626 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
627 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
628 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
629 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
630 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
631 ]
632 if ref is not None
633 ]
634 # - ...and without duplicates:
635 expectedDeduplicatedBiases = [
636 butler.find_dataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
637 butler.find_dataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
638 butler.find_dataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
639 ]
640 # Test against those expected results, using a "lazy" query for the
641 # data IDs (which re-executes that query each time we use it to do
642 # something new).
643 dataIds = _query_data_ids(
644 ["detector", "physical_filter"],
645 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
646 instrument="Cam1",
647 )
648 self.assertEqual(dataIds.dimensions, expected_dimensions)
649 self.assertEqual(set(dataIds), set(expectedDataIds))
650 self.assertCountEqual(
651 list(
652 dataIds.find_datasets(
653 flat,
654 collections=["imported_r"],
655 )
656 ),
657 expectedFlats,
658 )
659 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
660 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
661 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
662 self.assertCountEqual(
663 list(
664 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=False)
665 ),
666 expectedAllBiases,
667 )
668 self.assertCountEqual(
669 list(
670 subsetDataIds.find_datasets(bias, collections=["imported_r", "imported_g"], find_first=True)
671 ),
672 expectedDeduplicatedBiases,
673 )
675 # Searching for a dataset with dimensions we had projected away
676 # restores those dimensions.
677 self.assertCountEqual(
678 list(subsetDataIds.find_datasets("flat", collections=["imported_r"], find_first=True)),
679 expectedFlats,
680 )
682 # Use a component dataset type.
683 self.assertCountEqual(
684 [
685 ref.makeComponentRef("image")
686 for ref in subsetDataIds.find_datasets(
687 bias,
688 collections=["imported_r", "imported_g"],
689 find_first=False,
690 )
691 ],
692 [ref.makeComponentRef("image") for ref in expectedAllBiases],
693 )
695 # Use a named dataset type that does not exist and a dataset type
696 # object that does not exist.
697 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
699 # Test both string name and dataset type object.
700 tests: tuple[tuple[DatasetType | str, str], ...] = (
701 (unknown_type, unknown_type.name),
702 (unknown_type.name, unknown_type.name),
703 )
704 for test_type, test_type_name in tests:
705 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
706 list(
707 subsetDataIds.find_datasets(
708 test_type, collections=["imported_r", "imported_g"], find_first=True
709 )
710 )
712 # Materialize the bias dataset queries (only) by putting the results
713 # into temporary tables, then repeat those tests.
714 with subsetDataIds.find_datasets(
715 bias, collections=["imported_r", "imported_g"], find_first=False
716 ).materialize() as biases:
717 self.assertCountEqual(list(biases), expectedAllBiases)
718 with subsetDataIds.find_datasets(
719 bias, collections=["imported_r", "imported_g"], find_first=True
720 ).materialize() as biases:
721 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
722 # Materialize the data ID subset query, but not the dataset queries.
723 with subsetDataIds.materialize() as subsetDataIds:
724 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
725 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
726 self.assertCountEqual(
727 list(
728 subsetDataIds.find_datasets(
729 bias, collections=["imported_r", "imported_g"], find_first=False
730 )
731 ),
732 expectedAllBiases,
733 )
734 self.assertCountEqual(
735 list(
736 subsetDataIds.find_datasets(
737 bias, collections=["imported_r", "imported_g"], find_first=True
738 )
739 ),
740 expectedDeduplicatedBiases,
741 )
742 # Materialize the dataset queries, too.
743 with subsetDataIds.find_datasets(
744 bias, collections=["imported_r", "imported_g"], find_first=False
745 ).materialize() as biases:
746 self.assertCountEqual(list(biases), expectedAllBiases)
747 with subsetDataIds.find_datasets(
748 bias, collections=["imported_r", "imported_g"], find_first=True
749 ).materialize() as biases:
750 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
751 # Materialize the original query, but none of the follow-up queries.
752 with dataIds.materialize() as dataIds:
753 self.assertEqual(dataIds.dimensions, expected_dimensions)
754 self.assertEqual(set(dataIds), set(expectedDataIds))
755 self.assertCountEqual(
756 list(
757 dataIds.find_datasets(
758 flat,
759 collections=["imported_r"],
760 )
761 ),
762 expectedFlats,
763 )
764 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
765 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
766 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
767 self.assertCountEqual(
768 list(
769 subsetDataIds.find_datasets(
770 bias, collections=["imported_r", "imported_g"], find_first=False
771 )
772 ),
773 expectedAllBiases,
774 )
775 self.assertCountEqual(
776 list(
777 subsetDataIds.find_datasets(
778 bias, collections=["imported_r", "imported_g"], find_first=True
779 )
780 ),
781 expectedDeduplicatedBiases,
782 )
783 # Materialize just the bias dataset queries.
784 with subsetDataIds.find_datasets(
785 bias, collections=["imported_r", "imported_g"], find_first=False
786 ).materialize() as biases:
787 self.assertCountEqual(list(biases), expectedAllBiases)
788 with subsetDataIds.find_datasets(
789 bias, collections=["imported_r", "imported_g"], find_first=True
790 ).materialize() as biases:
791 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
792 # Materialize the subset data ID query, but not the dataset
793 # queries.
794 with subsetDataIds.materialize() as subsetDataIds:
795 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
796 self.assertEqual(set(subsetDataIds), set(expectedSubsetDataIds))
797 self.assertCountEqual(
798 list(
799 subsetDataIds.find_datasets(
800 bias, collections=["imported_r", "imported_g"], find_first=False
801 )
802 ),
803 expectedAllBiases,
804 )
805 self.assertCountEqual(
806 list(
807 subsetDataIds.find_datasets(
808 bias, collections=["imported_r", "imported_g"], find_first=True
809 )
810 ),
811 expectedDeduplicatedBiases,
812 )
813 # Materialize the bias dataset queries, too, so now we're
814 # materializing every single step.
815 with subsetDataIds.find_datasets(
816 bias, collections=["imported_r", "imported_g"], find_first=False
817 ).materialize() as biases:
818 self.assertCountEqual(list(biases), expectedAllBiases)
819 with subsetDataIds.find_datasets(
820 bias, collections=["imported_r", "imported_g"], find_first=True
821 ).materialize() as biases:
822 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
824 def test_query_datasets_deduplication(self) -> None:
825 """Test that the findFirst option to query.datasets selects datasets
826 from collections in the order given".
827 """
828 # This method was copied almost verbatim from Registry test class,
829 # replacing Registry methods with new Butler methods.
830 butler = self.make_butler("base.yaml", "datasets.yaml")
832 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
833 with butler._query() as query:
834 return query.datasets(dataset, **kwargs)
836 self.assertCountEqual(
837 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=False)),
838 [
839 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
840 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
841 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
842 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
843 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
844 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
845 ],
846 )
847 self.assertCountEqual(
848 list(_query_datasets("bias", collections=["imported_g", "imported_r"], find_first=True)),
849 [
850 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
851 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
852 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
853 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
854 ],
855 )
856 self.assertCountEqual(
857 list(_query_datasets("bias", collections=["imported_r", "imported_g"], find_first=True)),
858 [
859 butler.find_dataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
860 butler.find_dataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
861 butler.find_dataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
862 butler.find_dataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
863 ],
864 )
866 def test_query_data_ids_order_by(self) -> None:
867 """Test order_by and limit on result returned by query.data_ids()."""
868 # This method was copied almost verbatim from Registry test class,
869 # replacing Registry methods with new Butler methods.
870 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
872 def do_query(
873 dimensions: Iterable[str] = ("visit", "tract"), datasets: Any = None, collections: Any = None
874 ) -> DataCoordinateQueryResults:
875 with butler._query() as query:
876 return query.data_ids(
877 dimensions,
878 datasets=datasets,
879 collections=collections,
880 instrument="Cam1",
881 skymap="SkyMap1",
882 )
884 Test = namedtuple(
885 "Test",
886 ("order_by", "keys", "result", "limit", "datasets", "collections"),
887 defaults=(None, None, None),
888 )
890 test_data = (
891 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
892 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
893 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
894 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
895 Test(
896 "tract.id,visit.id",
897 "tract,visit",
898 ((0, 1), (0, 1), (0, 2)),
899 limit=(3,),
900 ),
901 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
902 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
903 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
904 Test(
905 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
906 ),
907 Test(
908 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
909 ),
910 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
911 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
912 Test(
913 "tract,-timespan.begin,timespan.end",
914 "tract,visit",
915 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
916 ),
917 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
918 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
919 Test(
920 "tract,detector",
921 "tract,detector",
922 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
923 datasets="flat",
924 collections="imported_r",
925 ),
926 Test(
927 "tract,detector.full_name",
928 "tract,detector",
929 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
930 datasets="flat",
931 collections="imported_r",
932 ),
933 Test(
934 "tract,detector.raft,detector.name_in_raft",
935 "tract,detector",
936 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
937 datasets="flat",
938 collections="imported_r",
939 ),
940 )
942 for test in test_data:
943 order_by = test.order_by.split(",")
944 keys = test.keys.split(",")
945 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
946 if test.limit is not None:
947 query = query.limit(*test.limit)
948 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
949 self.assertEqual(dataIds, test.result)
951 # and materialize
952 query = do_query(keys).order_by(*order_by)
953 if test.limit is not None:
954 query = query.limit(*test.limit)
955 with self.assertRaises(RelationalAlgebraError):
956 with query.materialize():
957 pass # pragma: no cover
959 # errors in a name
960 for order_by in ("", "-"):
961 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
962 list(do_query().order_by(order_by))
964 for order_by in ("undimension.name", "-undimension.name"):
965 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
966 list(do_query().order_by(order_by))
968 for order_by in ("attract", "-attract"):
969 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
970 list(do_query().order_by(order_by))
972 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
973 list(do_query(("exposure", "visit")).order_by("exposure_time"))
975 with self.assertRaisesRegex(
976 ValueError,
977 r"Timespan exists in more than one dimension element \(exposure, visit\); "
978 r"qualify timespan with specific dimension name\.",
979 ):
980 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
982 with self.assertRaisesRegex(
983 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
984 ):
985 list(do_query("tract").order_by("timespan.begin"))
987 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
988 list(do_query("tract").order_by("tract.timespan.begin"))
990 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
991 list(do_query("tract").order_by("tract.name"))
993 with self.assertRaisesRegex(
994 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
995 ):
996 list(do_query("visit").order_by("timestamp.begin"))
998 def test_query_int_range_expressions(self) -> None:
999 """Test integer range expressions in ``where`` arguments.
1001 Note that our expressions use inclusive stop values, unlike Python's.
1002 """
1003 butler = self.make_butler("base.yaml")
1005 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1006 with butler._query() as query:
1007 return query.data_ids(dimensions, **kwargs)
1009 self.assertEqual(
1010 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..2)")),
1011 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
1012 )
1013 self.assertEqual(
1014 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
1015 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
1016 )
1017 self.assertEqual(
1018 set(_query_data_ids(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
1019 {butler.registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
1020 )
1022 def test_query_data_ids_expression_error(self) -> None:
1023 """Test error checking of 'where' expressions in query.data_ids."""
1024 butler = self.make_butler("base.yaml")
1026 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1027 with butler._query() as query:
1028 return query.data_ids(dimensions, **kwargs)
1030 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
1031 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
1032 _query_data_ids(["detector"], where="foo.bar = 12")
1033 with self.assertRaisesRegex(
1034 LookupError, "Dimension element name cannot be inferred in this context."
1035 ):
1036 _query_data_ids(["detector"], where="timespan.end < time", bind=bind)
1038 def test_query_data_ids_governor_exceptions(self) -> None:
1039 """Test exceptions raised by query.data_ids for incorrect governors."""
1040 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
1042 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1043 with butler._query() as query:
1044 return query.data_ids(dimensions, **kwargs)
1046 Test = namedtuple(
1047 "Test",
1048 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
1049 defaults=(None, None, None, {}, None, 0),
1050 )
1052 test_data = (
1053 Test("tract,visit", count=6),
1054 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
1055 Test(
1056 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
1057 ),
1058 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
1059 Test(
1060 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
1061 ),
1062 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
1063 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
1064 Test(
1065 "tract,visit",
1066 where="instrument=cam AND skymap=map",
1067 bind={"cam": "Cam1", "map": "SkyMap1"},
1068 count=6,
1069 ),
1070 Test(
1071 "tract,visit",
1072 where="instrument=cam AND skymap=map",
1073 bind={"cam": "Cam", "map": "SkyMap"},
1074 exception=DataIdValueError,
1075 ),
1076 )
1078 for test in test_data:
1079 dimensions = test.dimensions.split(",")
1080 if test.exception:
1081 with self.assertRaises(test.exception):
1082 _query_data_ids(
1083 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1084 ).count()
1085 else:
1086 query = _query_data_ids(
1087 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1088 )
1089 self.assertEqual(query.count(discard=True), test.count)
1091 # and materialize
1092 if test.exception:
1093 with self.assertRaises(test.exception):
1094 query = _query_data_ids(
1095 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1096 )
1097 else:
1098 query = _query_data_ids(
1099 dimensions, data_id=test.dataId, where=test.where, bind=test.bind, **test.kwargs
1100 )
1101 with query.materialize() as materialized:
1102 self.assertEqual(materialized.count(discard=True), test.count)
1104 def test_query_dimension_records_exceptions(self) -> None:
1105 """Test exceptions raised by query.dimension_records()."""
1106 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
1108 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1109 with butler._query() as query:
1110 return query.dimension_records(element, **kwargs)
1112 result = _query_dimension_records("detector")
1113 self.assertEqual(result.count(), 4)
1114 result = _query_dimension_records("detector", instrument="Cam1")
1115 self.assertEqual(result.count(), 4)
1116 result = _query_dimension_records("detector", data_id={"instrument": "Cam1"})
1117 self.assertEqual(result.count(), 4)
1118 result = _query_dimension_records("detector", where="instrument='Cam1'")
1119 self.assertEqual(result.count(), 4)
1120 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "Cam1"})
1121 self.assertEqual(result.count(), 4)
1123 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
1124 result = _query_dimension_records("detector", instrument="NotCam1")
1126 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
1127 result = _query_dimension_records("detector", data_id={"instrument": "NotCam1"})
1129 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1130 result = _query_dimension_records("detector", where="instrument='NotCam1'")
1132 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1133 result = _query_dimension_records("detector", where="instrument=instr", bind={"instr": "NotCam1"})
1135 def test_query_dimension_records_order_by(self) -> None:
1136 """Test order_by and limit on result returned by
1137 query.dimension_records().
1138 """
1139 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
1141 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1142 with butler._query() as query:
1143 return query.dimension_records(element, **kwargs)
1145 def do_query(
1146 element: str, datasets: Any = None, collections: Any = None
1147 ) -> DimensionRecordQueryResults:
1148 return _query_dimension_records(
1149 element, instrument="Cam1", datasets=datasets, collections=collections
1150 )
1152 query = do_query("detector")
1153 self.assertEqual(len(list(query)), 4)
1155 Test = namedtuple(
1156 "Test",
1157 ("element", "order_by", "result", "limit", "datasets", "collections"),
1158 defaults=(None, None, None),
1159 )
1161 test_data = (
1162 Test("detector", "detector", (1, 2, 3, 4)),
1163 Test("detector", "-detector", (4, 3, 2, 1)),
1164 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
1165 Test("detector", "-detector.purpose", (4,), limit=(1,)),
1166 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
1167 Test("visit", "visit", (1, 2)),
1168 Test("visit", "-visit.id", (2, 1)),
1169 Test("visit", "zenith_angle", (1, 2)),
1170 Test("visit", "-visit.name", (2, 1)),
1171 Test("visit", "day_obs,-timespan.begin", (2, 1)),
1172 )
1174 for test in test_data:
1175 order_by = test.order_by.split(",")
1176 query = do_query(test.element).order_by(*order_by)
1177 if test.limit is not None:
1178 query = query.limit(*test.limit)
1179 dataIds = tuple(rec.id for rec in query)
1180 self.assertEqual(dataIds, test.result)
1182 # errors in a name
1183 for order_by in ("", "-"):
1184 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
1185 list(do_query("detector").order_by(order_by))
1187 for order_by in ("undimension.name", "-undimension.name"):
1188 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
1189 list(do_query("detector").order_by(order_by))
1191 for order_by in ("attract", "-attract"):
1192 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
1193 list(do_query("detector").order_by(order_by))
1195 for order_by in ("timestamp.begin", "-timestamp.begin"):
1196 with self.assertRaisesRegex(
1197 ValueError,
1198 r"Element name mismatch: 'timestamp' instead of 'visit'; "
1199 r"perhaps you meant 'timespan.begin'\?",
1200 ):
1201 list(do_query("visit").order_by(order_by))
1203 def test_skypix_constraint_queries(self) -> None:
1204 """Test queries spatially constrained by a skypix data ID."""
1205 butler = self.make_butler("hsc-rc2-subset.yaml")
1207 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1208 with butler._query() as query:
1209 return query.data_ids(dimensions, **kwargs)
1211 patch_regions = {
1212 (data_id["tract"], data_id["patch"]): data_id.region
1213 for data_id in _query_data_ids(["patch"]).expanded()
1214 if data_id.region is not None
1215 }
1216 skypix_dimension = cast(SkyPixDimension, butler.dimensions["htm11"])
1217 # This check ensures the test doesn't become trivial due to a config
1218 # change; if it does, just pick a different HTML level.
1219 self.assertNotEqual(skypix_dimension, butler.dimensions.commonSkyPix)
1220 # Gather all skypix IDs that definitely overlap at least one of these
1221 # patches.
1222 relevant_skypix_ids = lsst.sphgeom.RangeSet()
1223 for patch_region in patch_regions.values():
1224 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
1225 # Look for a "nontrivial" skypix_id that overlaps at least one patch
1226 # and does not overlap at least one other patch.
1227 for skypix_id in itertools.chain.from_iterable(
1228 range(begin, end) for begin, end in relevant_skypix_ids
1229 ):
1230 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
1231 overlapping_patches = {
1232 patch_key
1233 for patch_key, patch_region in patch_regions.items()
1234 if not patch_region.isDisjointFrom(skypix_region)
1235 }
1236 if overlapping_patches and overlapping_patches != patch_regions.keys():
1237 break
1238 else:
1239 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
1240 self.assertEqual(
1241 {
1242 (data_id["tract"], data_id["patch"])
1243 for data_id in _query_data_ids(
1244 ["patch"],
1245 data_id={skypix_dimension.name: skypix_id},
1246 )
1247 },
1248 overlapping_patches,
1249 )
1250 # Test that a three-way join that includes the common skypix system in
1251 # the dimensions doesn't generate redundant join terms in the query.
1252 full_data_ids = set(
1253 _query_data_ids(["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC").expanded()
1254 )
1255 self.assertGreater(len(full_data_ids), 0)
1256 for data_id in full_data_ids:
1257 tract = data_id.records["tract"]
1258 visit = data_id.records["visit"]
1259 htm7 = data_id.records["htm7"]
1260 assert tract is not None and visit is not None and htm7 is not None
1261 self.assertFalse(tract.region.isDisjointFrom(htm7.region))
1262 self.assertFalse(visit.region.isDisjointFrom(htm7.region))
1264 def test_bind_in_query_datasets(self) -> None:
1265 """Test that the bind parameter is correctly forwarded in
1266 query.datasets recursion.
1267 """
1268 butler = self.make_butler("base.yaml", "datasets.yaml")
1270 def _query_datasets(dataset: Any, **kwargs: Any) -> DatasetQueryResults:
1271 with butler._query() as query:
1272 return query.datasets(dataset, **kwargs)
1274 # Importing datasets from yaml should go through the code path where
1275 # we update collection summaries as we insert datasets.
1276 self.assertEqual(
1277 set(_query_datasets("flat", band="r", collections=..., find_first=False)),
1278 set(
1279 _query_datasets(
1280 "flat", where="band=my_band", bind={"my_band": "r"}, collections=..., find_first=False
1281 )
1282 ),
1283 )
1285 def test_dataset_constrained_dimension_record_queries(self) -> None:
1286 """Test that query.dimension_records works even when given a dataset
1287 constraint whose dimensions extend beyond the requested dimension
1288 element's.
1289 """
1290 butler = self.make_butler("base.yaml", "datasets.yaml")
1292 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1293 with butler._query() as query:
1294 return query.dimension_records(element, **kwargs)
1296 # Query for physical_filter dimension records, using a dataset that
1297 # has both physical_filter and dataset dimensions.
1298 records = _query_dimension_records(
1299 "physical_filter",
1300 datasets=["flat"],
1301 collections="imported_r",
1302 )
1303 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
1304 # Trying to constrain by all dataset types is an error.
1305 with self.assertRaises(TypeError):
1306 list(_query_dimension_records("physical_filter", datasets=..., collections="imported_r"))
1308 def test_exposure_queries(self) -> None:
1309 """Test query methods using arguments sourced from the exposure log
1310 service.
1312 The most complete test dataset currently available to daf_butler tests
1313 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
1314 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
1315 dimension records as it was focused on providing nontrivial spatial
1316 overlaps between visit+detector and tract+patch. So in this test we
1317 need to translate queries that originally used the exposure dimension
1318 to use the (very similar) visit dimension instead.
1319 """
1320 butler = self.make_butler("hsc-rc2-subset.yaml")
1322 def _query_data_ids(dimensions: list[str] | str, **kwargs: Any) -> DataCoordinateQueryResults:
1323 with butler._query() as query:
1324 return query.data_ids(dimensions, **kwargs)
1326 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1327 with butler._query() as query:
1328 return query.dimension_records(element, **kwargs)
1330 self.assertEqual(
1331 [
1332 record.id
1333 for record in _query_dimension_records("visit", instrument="HSC").order_by("id").limit(5)
1334 ],
1335 [318, 322, 326, 330, 332],
1336 )
1337 self.assertEqual(
1338 [
1339 data_id["visit"]
1340 for data_id in _query_data_ids(["visit"], instrument="HSC").order_by("id").limit(5)
1341 ],
1342 [318, 322, 326, 330, 332],
1343 )
1344 self.assertEqual(
1345 [
1346 record.id
1347 for record in _query_dimension_records("detector", instrument="HSC")
1348 .order_by("full_name")
1349 .limit(5)
1350 ],
1351 [73, 72, 71, 70, 65],
1352 )
1353 self.assertEqual(
1354 [
1355 data_id["detector"]
1356 for data_id in _query_data_ids(["detector"], instrument="HSC").order_by("full_name").limit(5)
1357 ],
1358 [73, 72, 71, 70, 65],
1359 )
1361 def test_spatial_join(self) -> None:
1362 """Test queries that involve spatial overlap joins."""
1363 butler = self.make_butler("hsc-rc2-subset.yaml")
1365 def _query_data_ids(
1366 dimensions: DimensionGroup | list[str] | str, **kwargs: Any
1367 ) -> DataCoordinateQueryResults:
1368 with butler._query() as query:
1369 return query.data_ids(dimensions, **kwargs)
1371 def _query_dimension_records(element: str, **kwargs: Any) -> DimensionRecordQueryResults:
1372 with butler._query() as query:
1373 return query.dimension_records(element, **kwargs)
1375 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1376 # the TopologicalFamily they belong to. We'll relate all elements in
1377 # each family to all of the elements in each other family.
1378 families = defaultdict(set)
1379 # Dictionary of {element.name: {dataId: region}}.
1380 regions = {}
1381 for element in butler.dimensions.database_elements:
1382 if element.spatial is not None:
1383 families[element.spatial.name].add(element)
1384 regions[element.name] = {
1385 record.dataId: record.region for record in _query_dimension_records(element.name)
1386 }
1388 # If this check fails, it's not necessarily a problem - it may just be
1389 # a reasonable change to the default dimension definitions - but the
1390 # test below depends on there being more than one family to do anything
1391 # useful.
1392 self.assertEqual(len(families), 2)
1394 # Overlap DatabaseDimensionElements with each other.
1395 for family1, family2 in itertools.combinations(families, 2):
1396 for element1, element2 in itertools.product(families[family1], families[family2]):
1397 dimensions = element1.minimal_group | element2.minimal_group
1398 # Construct expected set of overlapping data IDs via a
1399 # brute-force comparison of the regions we've already fetched.
1400 expected = {
1401 DataCoordinate.standardize(
1402 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1403 )
1404 for (dataId1, region1), (dataId2, region2) in itertools.product(
1405 regions[element1.name].items(), regions[element2.name].items()
1406 )
1407 if not region1.isDisjointFrom(region2)
1408 }
1409 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1410 queried = set(_query_data_ids(dimensions))
1411 self.assertEqual(expected, queried)
1413 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1414 commonSkyPix = butler.dimensions.commonSkyPix
1415 for elementName, these_regions in regions.items():
1416 dimensions = butler.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1417 expected = set()
1418 for dataId, region in these_regions.items():
1419 for begin, end in commonSkyPix.pixelization.envelope(region):
1420 expected.update(
1421 DataCoordinate.standardize(
1422 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1423 )
1424 for index in range(begin, end)
1425 )
1426 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1427 queried = set(_query_data_ids(dimensions))
1428 self.assertEqual(expected, queried)