Coverage for python/lsst/daf/butler/tests/butler_queries.py: 15%
191 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 02:48 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 02:48 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ()
32import os
33import unittest
34from abc import ABC, abstractmethod
35from collections.abc import Iterable, Sequence
36from typing import ClassVar
38import astropy.time
40from .._butler import Butler
41from .._dataset_type import DatasetType
42from .._exceptions import InvalidQueryError
43from .._timespan import Timespan
44from ..dimensions import DataCoordinate, DimensionRecord
45from ..direct_query_driver import DirectQueryDriver
46from ..queries import DimensionRecordQueryResults
47from ..registry import CollectionType, NoDefaultCollectionError, RegistryDefaults
48from ..registry.sql_registry import SqlRegistry
49from ..transfers import YamlRepoImportBackend
50from .utils import TestCaseMixin
52# Simplified tuples of the detector records we'll frequently be querying for.
53DETECTOR_TUPLES = {
54 1: ("Cam1", 1, "Aa", "SCIENCE"),
55 2: ("Cam1", 2, "Ab", "SCIENCE"),
56 3: ("Cam1", 3, "Ba", "SCIENCE"),
57 4: ("Cam1", 4, "Bb", "WAVEFRONT"),
58}
61def make_detector_tuples(records: Iterable[DimensionRecord]) -> dict[int, tuple[str, int, str, str]]:
62 """Make tuples with the same entries as DETECTOR_TUPLES from an iterable of
63 detector dimension records.
65 Parameters
66 ----------
67 records : `~collections.abc.Iterable` [ `.dimensions.DimensionRecord` ]
68 Detector dimension records.
70 Returns
71 -------
72 tuples : `dict` [ `int`, `tuple` ]
73 Dictionary mapping detector ID to tuples with the same fields as the
74 ``DETECTOR_TUPLES`` constant in this file.
75 """
76 return {record.id: (record.instrument, record.id, record.full_name, record.purpose) for record in records}
79class ButlerQueryTests(ABC, TestCaseMixin):
80 """Base class for unit tests that test `lsst.daf.butler.Butler.query`
81 implementations.
82 """
84 data_dir: ClassVar[str]
85 """Root directory containing test data YAML files."""
87 @abstractmethod
88 def make_butler(self, *args: str) -> Butler:
89 """Make Butler instance populated with data used in the tests below.
91 Parameters
92 ----------
93 *args : str
94 Names of the files to pass to `load_data`.
96 Returns
97 -------
98 butler : `Butler`
99 Butler to use for tests.
100 """
101 raise NotImplementedError()
103 def load_data(self, registry: SqlRegistry, filename: str) -> None:
104 """Load registry test data from ``data_dir/<filename>``,
105 which should be a YAML import/export file.
107 This method should be called from implementations of `make_butler`
108 where the Registry should exist.
110 Parameters
111 ----------
112 registry : `SqlRegistry`
113 The registry to use.
114 filename : `str`
115 Location of test data.
116 """
117 with open(os.path.join(self.data_dir, filename)) as stream:
118 backend = YamlRepoImportBackend(stream, registry)
119 backend.register()
120 backend.load(datastore=None)
122 def check_detector_records(
123 self,
124 results: DimensionRecordQueryResults,
125 ids: Sequence[int] = (1, 2, 3, 4),
126 ordered: bool = False,
127 messages: Iterable[str] = (),
128 doomed: bool = False,
129 has_postprocessing: bool = False,
130 ) -> None:
131 self.assertEqual(results.element.name, "detector")
132 self.assertEqual(results.dimensions, results.dimensions.universe["detector"].minimal_group)
133 if has_postprocessing and not doomed:
134 self.assertEqual(results.count(discard=True), len(ids))
135 self.assertGreaterEqual(results.count(discard=False, exact=False), len(ids))
136 with self.assertRaisesRegex(InvalidQueryError, "^Cannot count query rows"):
137 results.count()
138 else:
139 self.assertEqual(results.count(discard=True), len(ids))
140 self.assertEqual(results.count(discard=False), len(ids))
141 self.assertEqual(results.count(discard=True, exact=False), len(ids))
142 self.assertEqual(results.count(discard=False, exact=False), len(ids))
143 self.assertEqual(results.any(), bool(ids))
144 if not doomed:
145 self.assertTrue(results.any(exact=False, execute=False))
146 with self.assertRaisesRegex(InvalidQueryError, "^Cannot obtain exact"):
147 results.any(exact=True, execute=False)
148 else:
149 self.assertFalse(results.any(exact=False, execute=False))
150 self.assertFalse(results.any(exact=True, execute=False))
151 self.assertCountEqual(results.explain_no_results(), list(messages))
152 expected = [DETECTOR_TUPLES[i] for i in ids]
153 queried = list(make_detector_tuples(results).values())
154 if ordered:
155 self.assertEqual(queried, expected)
156 else:
157 self.assertCountEqual(queried, expected)
159 def test_simple_record_query(self) -> None:
160 """Test query-system basics with simple queries for dimension
161 records.
163 This includes tests for order_by, limit, and where expressions, but
164 only for cases where there are no datasets, dimension projections,
165 or spatial/temporal overlaps.
166 """
167 butler = self.make_butler("base.yaml")
168 with butler._query() as query:
169 _x = query.expression_factory
170 results = query.dimension_records("detector")
171 self.check_detector_records(results)
172 self.check_detector_records(results.order_by("detector"), ordered=True)
173 self.check_detector_records(
174 results.order_by(_x.detector.full_name.desc), [4, 3, 2, 1], ordered=True
175 )
176 self.check_detector_records(results.order_by("detector").limit(2), [1, 2], ordered=True)
177 self.check_detector_records(results.where(_x.detector.raft == "B", instrument="Cam1"), [3, 4])
179 def test_implied_union_record_query(self) -> None:
180 """Test queries for a dimension ('band') that uses "implied union"
181 storage, in which its values are the union of the values for it in a
182 another dimension (physical_filter) that implies it.
183 """
184 butler = self.make_butler("base.yaml")
185 band = butler.dimensions["band"]
186 self.assertEqual(band.implied_union_target, butler.dimensions["physical_filter"])
187 with butler._query() as query:
188 self.assertCountEqual(
189 list(query.dimension_records("band")),
190 [band.RecordClass(name="g"), band.RecordClass(name="r")],
191 )
192 self.assertCountEqual(
193 list(query.where(physical_filter="Cam1-R1", instrument="Cam1").dimension_records("band")),
194 [band.RecordClass(name="r")],
195 )
197 def test_dataset_constrained_record_query(self) -> None:
198 """Test a query for dimension records constrained by the existence of
199 datasets of a particular type.
200 """
201 butler = self.make_butler("base.yaml", "datasets.yaml")
202 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
203 butler.registry.registerCollection("empty", CollectionType.RUN)
204 butler.registry.registerCollection("chain", CollectionType.CHAINED)
205 butler.registry.setCollectionChain("chain", ["imported_g", "empty", "imported_r"])
206 with butler._query() as query:
207 # No collections here or in defaults is an error.
208 with self.assertRaises(NoDefaultCollectionError):
209 query.join_dataset_search("bias").dimension_records("detector").any()
210 butler.registry.defaults = RegistryDefaults(collections=["chain"])
211 with butler._query() as query:
212 _x = query.expression_factory
213 # Simplest case: this collection only has the first 3 detectors.
214 self.check_detector_records(
215 query.join_dataset_search("bias", collections=["imported_g"]).dimension_records("detector"),
216 [1, 2, 3],
217 )
218 # Together these collections have two biases for two of the
219 # detectors, but this shouldn't cause duplicate results.
220 self.check_detector_records(
221 query.join_dataset_search("bias", collections=["imported_g", "imported_r"]).dimension_records(
222 "detector"
223 ),
224 )
225 # Again we've got the potential for duplicates due to multiple
226 # datasets with the same data ID, and this time we force the
227 # deduplication to happen outside the dataset-search subquery by
228 # adding a WHERE filter on a dataset column. We also use the
229 # defaulted collection ('chain') to supply the collection.
230 self.check_detector_records(
231 query.join_dataset_search("bias")
232 .where(
233 _x.any(
234 _x.all(_x["bias"].collection == "imported_g", _x.detector.raft == "B"),
235 _x.all(_x["bias"].collection == "imported_r", _x.detector.raft == "A"),
236 ),
237 instrument="Cam1",
238 )
239 .dimension_records("detector"),
240 [2, 3],
241 )
242 # Flats have dimensions (physical_filter and band) we would
243 # normally include in query for detector records. This also should
244 # not cause duplicates.
245 self.check_detector_records(
246 query.join_dataset_search("flat", collections=["imported_g"]).dimension_records("detector"),
247 [2, 3, 4],
248 )
249 # No results, but for reasons we can't determine before we run the
250 # query.
251 self.check_detector_records(
252 query.join_dataset_search("flat", collections=["imported_g"])
253 .where(_x.band == "r")
254 .dimension_records("detector"),
255 [],
256 )
257 # No results, and we can diagnose why before we run the query.
258 self.check_detector_records(
259 query.join_dataset_search("bias", collections=["empty"]).dimension_records("detector"),
260 [],
261 messages=[
262 "Search for dataset type 'bias' is doomed to fail.",
263 "No datasets of type 'bias' in collection 'empty'.",
264 ],
265 doomed=True,
266 )
267 self.check_detector_records(
268 query.join_dataset_search("bias", collections=["imported_g"])
269 .where(instrument="Cam2")
270 .dimension_records("detector"),
271 [],
272 messages=[
273 "Search for dataset type 'bias' is doomed to fail.",
274 "No datasets with instrument='Cam2' in collection 'imported_g'.",
275 ],
276 doomed=True,
277 )
279 def test_spatial_overlaps(self) -> None:
280 """Test queries for dimension records with spatial overlaps.
282 Run tests/data/registry/spatial.py to plot the various regions used in
283 this test.
284 """
285 butler = self.make_butler("base.yaml", "spatial.yaml")
286 # Set default governor data ID values both to test that code path and
287 # to keep us from having to repeat them in every 'where' call below.
288 butler.registry.defaults = RegistryDefaults(instrument="Cam1", skymap="SkyMap1")
289 htm7 = butler.dimensions.skypix_dimensions["htm7"]
290 with butler._query() as query:
291 _x = query.expression_factory
292 # Query for detectors from a particular visit that overlap an
293 # explicit region.
294 self.check_detector_records(
295 query.where(
296 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)),
297 visit=1,
298 ).dimension_records("detector"),
299 [1, 3, 4],
300 has_postprocessing=True,
301 )
302 # Query for detectors from a particular visit that overlap an htm7
303 # ID. This is basically the same query as the last one, but
304 # expressed as a spatial join, and we can recognize that
305 # postprocessing is not needed (while in the last case it did
306 # nothing, but we couldn't tell that in advance because the query
307 # didn't know the region came from htm7).
308 self.check_detector_records(
309 query.where(
310 _x.visit_detector_region.region.overlaps(_x.htm7.region),
311 visit=1,
312 htm7=253954,
313 ).dimension_records("detector"),
314 [1, 3, 4],
315 has_postprocessing=False,
316 )
317 # Repeat the last query but with the spatial join implicit rather
318 # than explicit.
319 self.check_detector_records(
320 query.where(
321 visit=1,
322 htm7=253954,
323 ).dimension_records("detector"),
324 [1, 3, 4],
325 has_postprocessing=False,
326 )
327 # Query for the detectors from any visit that overlap a region:
328 # this gets contributions from multiple visits, and would have
329 # duplicates if we didn't get rid of them via GROUP BY.
330 self.check_detector_records(
331 query.where(
332 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)),
333 ).dimension_records("detector"),
334 [1, 2, 3, 4],
335 has_postprocessing=True,
336 )
337 # Once again we rewrite the region-constraint query as a spatial
338 # join, which drops the postprocessing. This join has to be
339 # explicit because `visit` no longer gets into the query dimensions
340 # some other way, and without it `detector` is not spatial.
341 self.check_detector_records(
342 query.where(
343 _x.visit_detector_region.region.overlaps(_x.htm7.region),
344 htm7=253954,
345 ).dimension_records("detector"),
346 [1, 2, 3, 4],
347 has_postprocessing=False,
348 )
349 # Query for detectors from any visit that overlap a patch. This
350 # requires joining visit_detector_region to htm7 and htm7 to patch,
351 # and then some postprocessing. We want to make sure there are no
352 # duplicates from a detector and patch both overlapping multiple
353 # htm7 tiles (which affects detectors 1 and 2) and that
354 # postprocessing filters out detector 4, which has one htm7 tile in
355 # common with the patch but does not actually overlap it.
356 self.check_detector_records(
357 query.where(
358 _x.visit_detector_region.region.overlaps(_x.patch.region),
359 tract=0,
360 patch=4,
361 ).dimension_records("detector"),
362 [1, 2, 3],
363 has_postprocessing=True,
364 )
365 # Query for that patch's region and express the previous query as
366 # a region-constraint instead of a spatial join.
367 (patch_record,) = query.where(tract=0, patch=4).dimension_records("patch")
368 self.check_detector_records(
369 query.where(
370 _x.visit_detector_region.region.overlaps(patch_record.region),
371 ).dimension_records("detector"),
372 [1, 2, 3],
373 has_postprocessing=True,
374 )
375 # Combine postprocessing with order_by and limit.
376 self.check_detector_records(
377 query.where(
378 _x.visit_detector_region.region.overlaps(patch_record.region),
379 )
380 .dimension_records("detector")
381 .order_by(_x.detector.desc)
382 .limit(2),
383 [3, 2],
384 has_postprocessing=True,
385 )
386 # Try a case where there are some records before postprocessing but
387 # none afterwards.
388 self.check_detector_records(
389 query.where(
390 _x.visit_detector_region.region.overlaps(patch_record.region),
391 detector=4,
392 ).dimension_records("detector"),
393 [],
394 has_postprocessing=True,
395 )
397 def test_common_skypix_overlaps(self) -> None:
398 """Test spatial overlap queries that return htm7 records."""
399 butler = self.make_butler("base.yaml", "spatial.yaml")
400 # Insert some datasets that use a skypix dimension, since some queries
401 # are only possible if a superset of the skypix IDs are in the query
402 # already.
403 cat1 = DatasetType("cat1", dimensions=butler.dimensions.conform(["htm7"]), storageClass="ArrowTable")
404 butler.registry.registerDatasetType(cat1)
405 butler.registry.registerCollection("refcats", CollectionType.RUN)
406 butler.registry.insertDatasets(cat1, [{"htm7": i} for i in range(253952, 253968)], run="refcats")
407 with butler._query() as query:
408 _x = query.expression_factory
409 # Explicit join to patch.
410 self.assertCountEqual(
411 [
412 record.id
413 for record in query.where(
414 _x.htm7.region.overlaps(_x.patch.region), skymap="SkyMap1", tract=0, patch=4
415 ).dimension_records("htm7")
416 ],
417 [253954, 253955],
418 )
419 # Implicit join to patch.
420 self.assertCountEqual(
421 [
422 record.id
423 for record in query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("htm7")
424 ],
425 [253954, 253955],
426 )
427 # Constraint on the patch region (with the query not knowing it
428 # corresponds to that patch).
429 (patch,) = query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("patch")
430 self.assertCountEqual(
431 [
432 record.id
433 for record in query.join_dataset_search("cat1", collections=["refcats"])
434 .where(_x.htm7.region.overlaps(patch.region))
435 .dimension_records("htm7")
436 ],
437 [253954, 253955],
438 )
440 def test_data_coordinate_upload(self) -> None:
441 """Test queries for dimension records with a data coordinate upload."""
442 butler = self.make_butler("base.yaml", "spatial.yaml")
443 with butler._query() as query:
444 # Query with a data ID upload that has an irrelevant row (there's
445 # no data with "Cam2").
446 self.check_detector_records(
447 query.join_data_coordinates(
448 [
449 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions),
450 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
451 DataCoordinate.standardize(instrument="Cam2", detector=4, universe=butler.dimensions),
452 ]
453 ).dimension_records("detector"),
454 [1, 3],
455 )
456 # Query with a data ID upload that directly contains duplicates,
457 # which should not appear in the results.
458 self.check_detector_records(
459 query.join_data_coordinates(
460 [
461 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions),
462 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
463 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
464 ]
465 ).dimension_records("detector"),
466 [1, 3],
467 )
468 # Query with a data ID upload that has extra dimensions that could
469 # also introduce duplicates if we're not careful.
470 self.check_detector_records(
471 query.join_data_coordinates(
472 [
473 DataCoordinate.standardize(
474 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions
475 ),
476 DataCoordinate.standardize(
477 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions
478 ),
479 DataCoordinate.standardize(
480 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions
481 ),
482 ]
483 ).dimension_records("detector"),
484 [1, 3],
485 )
486 # Query with a data ID upload that has extra dimensions that are
487 # used in a constraint.
488 self.check_detector_records(
489 query.join_data_coordinates(
490 [
491 DataCoordinate.standardize(
492 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions
493 ),
494 DataCoordinate.standardize(
495 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions
496 ),
497 DataCoordinate.standardize(
498 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions
499 ),
500 ]
501 )
502 .where(instrument="Cam1", visit=2)
503 .dimension_records("detector"),
504 [3],
505 )
506 # Query with a data ID upload that must be spatially joined to
507 # the other dimensions. This join is added automatically.
508 self.check_detector_records(
509 query.join_data_coordinates(
510 [
511 DataCoordinate.standardize(
512 skymap="SkyMap1", tract=1, patch=1, universe=butler.dimensions
513 ),
514 DataCoordinate.standardize(
515 skymap="SkyMap1", tract=1, patch=2, universe=butler.dimensions
516 ),
517 DataCoordinate.standardize(
518 skymap="SkyMap1", tract=1, patch=3, universe=butler.dimensions
519 ),
520 ]
521 )
522 .where(instrument="Cam1", visit=2)
523 .dimension_records("detector"),
524 [2, 3, 4],
525 has_postprocessing=True,
526 )
527 # Query with a data ID upload that embeds a spatial relationship.
528 # This prevents automatic creation of a spatial join. To make the
529 # test more interesting, the spatial relationship embedded in these
530 # data IDs is nonsense: it includes combinations that do not
531 # overlap, while leaving out combinations that do overlap.
532 self.check_detector_records(
533 query.join_data_coordinates(
534 [
535 DataCoordinate.standardize(
536 skymap="SkyMap1",
537 tract=1,
538 patch=1,
539 instrument="Cam1",
540 visit=1,
541 detector=1,
542 universe=butler.dimensions,
543 ),
544 DataCoordinate.standardize(
545 skymap="SkyMap1",
546 tract=1,
547 patch=1,
548 instrument="Cam1",
549 visit=1,
550 detector=2,
551 universe=butler.dimensions,
552 ),
553 DataCoordinate.standardize(
554 skymap="SkyMap1",
555 tract=1,
556 patch=3,
557 instrument="Cam1",
558 visit=1,
559 detector=3,
560 universe=butler.dimensions,
561 ),
562 ]
563 )
564 .where(skymap="SkyMap1", tract=1, patch=1)
565 .dimension_records("detector"),
566 [1, 2],
567 )
568 # Query with an empty data ID upload (not a useful thing to do,
569 # but a way to probe edge-case behavior).
570 self.check_detector_records(
571 query.join_data_coordinates(
572 [
573 DataCoordinate.make_empty(universe=butler.dimensions),
574 ]
575 ).dimension_records("detector"),
576 [1, 2, 3, 4],
577 )
579 def test_data_coordinate_upload_force_temp_table(self) -> None:
580 """Test queries for dimension records with a data coordinate upload
581 that is so big it has to go into a temporary table rather than be
582 included directly into the query via bind params (by making the
583 threshold for making a a temporary table tiny).
585 This test assumes a DirectQueryDriver and is automatically skipped when
586 some other driver is found.
587 """
588 butler = self.make_butler("base.yaml", "spatial.yaml")
589 with butler._query() as query:
590 if not isinstance(query._driver, DirectQueryDriver):
591 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.")
592 query._driver._constant_rows_limit = 2
593 self.check_detector_records(
594 query.join_data_coordinates(
595 [
596 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions),
597 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
598 DataCoordinate.standardize(instrument="Cam1", detector=4, universe=butler.dimensions),
599 ]
600 ).dimension_records("detector"),
601 [1, 3, 4],
602 )
604 def test_materialization(self) -> None:
605 """Test querying for dimension records against a materialized previous
606 query.
607 """
608 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
609 with butler._query() as query:
610 _x = query.expression_factory
611 # Simple case where the materialization has just the dimensions
612 # we need for the rest of the query.
613 self.check_detector_records(
614 query.where(_x.detector.raft == "A", instrument="Cam1")
615 .materialize()
616 .dimension_records("detector"),
617 [1, 2],
618 )
619 # This materialization has extra dimensions that could cause
620 # duplicates if we don't SELECT DISTINCT them away.
621 self.check_detector_records(
622 query.join_dimensions(["visit", "detector"])
623 .where(_x.detector.raft == "A", instrument="Cam1")
624 .materialize()
625 .dimension_records("detector"),
626 [1, 2],
627 )
628 # Materialize a spatial-join, which should prevent the creation
629 # of a spatial join in the downstream query.
630 self.check_detector_records(
631 query.join_dimensions(["visit", "detector", "tract"]).materialize()
632 # The patch constraint here should do nothing, because only the
633 # spatial join from the materialization should exist. The
634 # behavior is surprising no matter what here, and the
635 # recommendation to users is to add an explicit overlap
636 # expression any time it's not obvious what the default is.
637 .where(skymap="SkyMap1", tract=0, instrument="Cam1", visit=2, patch=5).dimension_records(
638 "detector"
639 ),
640 [1, 2],
641 has_postprocessing=True,
642 )
643 # Materialize with a dataset join.
644 self.check_detector_records(
645 query.join_dataset_search("bias", collections=["imported_g"])
646 .materialize(datasets=["bias"])
647 .dimension_records("detector"),
648 [1, 2, 3],
649 )
651 def test_timespan_results(self) -> None:
652 """Test returning dimension records that include timespans."""
653 butler = self.make_butler("base.yaml", "spatial.yaml")
654 with butler._query() as query:
655 self.assertCountEqual(
656 [
657 (record.id, record.timespan.begin, record.timespan.end)
658 for record in query.dimension_records("visit")
659 ],
660 [
661 (
662 1,
663 astropy.time.Time("2021-09-09T03:00:00", format="isot", scale="tai"),
664 astropy.time.Time("2021-09-09T03:01:00", format="isot", scale="tai"),
665 ),
666 (
667 2,
668 astropy.time.Time("2021-09-09T03:02:00", format="isot", scale="tai"),
669 astropy.time.Time("2021-09-09T03:03:00", format="isot", scale="tai"),
670 ),
671 ],
672 )
674 def test_direct_driver_paging(self) -> None:
675 """Test queries for dimension records that require multiple pages (by
676 making the threshold for making a a temporary table tiny).
678 This test assumes a DirectQueryDriver and is automatically skipped when
679 some other driver is found.
680 """
681 butler = self.make_butler("base.yaml")
682 # Basic test where pages should be transparent.
683 with butler._query() as query:
684 if not isinstance(query._driver, DirectQueryDriver):
685 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.")
686 query._driver._raw_page_size = 2
687 self.check_detector_records(
688 query.dimension_records("detector"),
689 [1, 2, 3, 4],
690 )
691 # Test that it's an error to continue query iteration after closing the
692 # context manager.
693 with butler._query() as query:
694 assert isinstance(query._driver, DirectQueryDriver)
695 query._driver._raw_page_size = 2
696 iterator = iter(query.dimension_records("detector"))
697 next(iterator)
698 with self.assertRaisesRegex(RuntimeError, "Cannot continue query result iteration"):
699 list(iterator)
701 def test_column_expressions(self) -> None:
702 """Test queries with a wide variant of column expressions."""
703 butler = self.make_butler("base.yaml", "spatial.yaml")
704 butler.registry.defaults = RegistryDefaults(instrument="Cam1")
705 with butler._query() as query:
706 _x = query.expression_factory
707 self.check_detector_records(
708 query.where(_x.not_(_x.detector != 2)).dimension_records("detector"),
709 [2],
710 )
711 self.check_detector_records(
712 query.where(_x.literal(2) == _x.detector).dimension_records("detector"),
713 [2],
714 )
715 self.check_detector_records(
716 query.where(_x.literal(2) == _x.detector + 1).dimension_records("detector"),
717 [1],
718 )
719 self.check_detector_records(
720 query.where(-_x.detector == -3).dimension_records("detector"),
721 [3],
722 )
723 self.check_detector_records(
724 query.where(_x.detector == 1, _x.detector == 2).dimension_records("detector"),
725 [],
726 messages=["'where' expression requires both detector=2 and detector=1."],
727 )
728 self.assertCountEqual(
729 [
730 record.id
731 for record in query.where(
732 _x.visit.timespan.overlaps(
733 Timespan(
734 begin=astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"),
735 end=None,
736 )
737 )
738 ).dimension_records("visit")
739 ],
740 [2],
741 )
742 self.assertCountEqual(
743 [
744 record.id
745 for record in query.where(
746 _x.not_(
747 _x.visit.timespan.end
748 < astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"),
749 )
750 ).dimension_records("visit")
751 ],
752 [2],
753 )
754 self.assertCountEqual(
755 [
756 record.id
757 for record in query.where(
758 _x.visit.timespan.begin
759 > astropy.time.Time("2021-09-09T03:01:30", format="isot", scale="tai")
760 ).dimension_records("visit")
761 ],
762 [2],
763 )
764 self.assertCountEqual(
765 [
766 record.id
767 for record in query.where(
768 (_x.visit.exposure_time + -(5.0 * _x.visit.zenith_angle)) > 0.0
769 ).dimension_records("visit")
770 ],
771 [1],
772 )
773 self.assertCountEqual(
774 [
775 record.id
776 for record in query.where(_x.visit.exposure_time - 5.0 >= 50.0).dimension_records("visit")
777 ],
778 [1],
779 )
780 self.assertCountEqual(
781 [record.id for record in query.where(_x.visit.id % 2 != 0).dimension_records("visit")],
782 [1],
783 )
784 self.assertCountEqual(
785 [
786 record.id
787 for record in query.where(_x.visit.zenith_angle / 5.0 <= 1.0).dimension_records("visit")
788 ],
789 [1],
790 )
791 self.assertCountEqual(
792 [record.id for record in query.where(_x.visit.timespan.is_null).dimension_records("visit")],
793 [],
794 )
795 self.assertCountEqual(
796 [
797 record.id
798 for record in query.where(_x.visit.exposure_time.is_null).dimension_records("visit")
799 ],
800 [],
801 )
802 self.check_detector_records(
803 query.where(_x.detector.in_iterable([1, 3, 4])).dimension_records("detector"),
804 [1, 3, 4],
805 )
806 self.check_detector_records(
807 query.where(_x.detector.in_range(start=2, stop=None)).dimension_records("detector"),
808 [2, 3, 4],
809 )
810 self.check_detector_records(
811 query.where(_x.detector.in_range(start=1, stop=3)).dimension_records("detector"),
812 [1, 2],
813 )
814 self.check_detector_records(
815 query.where(_x.detector.in_range(start=1, stop=None, step=2)).dimension_records("detector"),
816 [1, 3],
817 )
818 self.check_detector_records(
819 query.where(_x.detector.in_range(start=1, stop=2)).dimension_records("detector"),
820 [1],
821 )
822 # This is a complex way to write a much simpler query ("where
823 # detector.raft == 'A'"), but it tests code paths that would
824 # otherwise require a lot more test setup.
825 self.check_detector_records(
826 query.where(
827 _x.detector.in_query(_x.detector, query.where(_x.detector.raft == "A"))
828 ).dimension_records("detector"),
829 [1, 2],
830 )
831 # Error to reference tract without skymap in a WHERE clause.
832 with self.assertRaises(InvalidQueryError):
833 list(query.where(_x.tract == 4).dimension_records("patch"))