Coverage for python/lsst/daf/butler/tests/butler_queries.py: 15%
190 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ()
32import os
33import unittest
34from abc import ABC, abstractmethod
35from collections.abc import Iterable, Sequence
36from typing import ClassVar
38import astropy.time
40from .._butler import Butler
41from .._dataset_type import DatasetType
42from .._timespan import Timespan
43from ..dimensions import DataCoordinate, DimensionRecord
44from ..direct_query_driver import DirectQueryDriver
45from ..queries import DimensionRecordQueryResults, InvalidQueryError
46from ..registry import CollectionType, NoDefaultCollectionError, RegistryDefaults
47from ..registry.sql_registry import SqlRegistry
48from ..transfers import YamlRepoImportBackend
49from .utils import TestCaseMixin
51# Simplified tuples of the detector records we'll frequently be querying for.
52DETECTOR_TUPLES = {
53 1: ("Cam1", 1, "Aa", "SCIENCE"),
54 2: ("Cam1", 2, "Ab", "SCIENCE"),
55 3: ("Cam1", 3, "Ba", "SCIENCE"),
56 4: ("Cam1", 4, "Bb", "WAVEFRONT"),
57}
60def make_detector_tuples(records: Iterable[DimensionRecord]) -> dict[int, tuple[str, int, str, str]]:
61 """Make tuples with the same entries as DETECTOR_TUPLES from an iterable of
62 detector dimension records.
64 Parameters
65 ----------
66 records : `~collections.abc.Iterable` [ `.dimensions.DimensionRecord` ]
67 Detector dimension records.
69 Returns
70 -------
71 tuples : `dict` [ `int`, `tuple` ]
72 Dictionary mapping detector ID to tuples with the same fields as the
73 ``DETECTOR_TUPLES`` constant in this file.
74 """
75 return {record.id: (record.instrument, record.id, record.full_name, record.purpose) for record in records}
78class ButlerQueryTests(ABC, TestCaseMixin):
79 """Base class for unit tests that test `lsst.daf.butler.Butler.query`
80 implementations.
81 """
83 data_dir: ClassVar[str]
84 """Root directory containing test data YAML files."""
86 @abstractmethod
87 def make_butler(self, *args: str) -> Butler:
88 """Make Butler instance populated with data used in the tests below.
90 Parameters
91 ----------
92 *args : str
93 Names of the files to pass to `load_data`.
95 Returns
96 -------
97 butler : `Butler`
98 Butler to use for tests.
99 """
100 raise NotImplementedError()
102 def load_data(self, registry: SqlRegistry, filename: str) -> None:
103 """Load registry test data from ``data_dir/<filename>``,
104 which should be a YAML import/export file.
106 This method should be called from implementations of `make_butler`
107 where the Registry should exist.
109 Parameters
110 ----------
111 registry : `SqlRegistry`
112 The registry to use.
113 filename : `str`
114 Location of test data.
115 """
116 with open(os.path.join(self.data_dir, filename)) as stream:
117 backend = YamlRepoImportBackend(stream, registry)
118 backend.register()
119 backend.load(datastore=None)
121 def check_detector_records(
122 self,
123 results: DimensionRecordQueryResults,
124 ids: Sequence[int] = (1, 2, 3, 4),
125 ordered: bool = False,
126 messages: Iterable[str] = (),
127 doomed: bool = False,
128 has_postprocessing: bool = False,
129 ) -> None:
130 self.assertEqual(results.element.name, "detector")
131 self.assertEqual(results.dimensions, results.dimensions.universe["detector"].minimal_group)
132 if has_postprocessing and not doomed:
133 self.assertEqual(results.count(discard=True), len(ids))
134 self.assertGreaterEqual(results.count(discard=False, exact=False), len(ids))
135 with self.assertRaises(RuntimeError):
136 results.count()
137 else:
138 self.assertEqual(results.count(discard=True), len(ids))
139 self.assertEqual(results.count(discard=False), len(ids))
140 self.assertEqual(results.count(discard=True, exact=False), len(ids))
141 self.assertEqual(results.count(discard=False, exact=False), len(ids))
142 self.assertEqual(results.any(), bool(ids))
143 if not doomed:
144 self.assertTrue(results.any(exact=False, execute=False))
145 with self.assertRaises(RuntimeError):
146 results.any(exact=True, execute=False)
147 else:
148 self.assertFalse(results.any(exact=False, execute=False))
149 self.assertFalse(results.any(exact=True, execute=False))
150 self.assertCountEqual(results.explain_no_results(), list(messages))
151 expected = [DETECTOR_TUPLES[i] for i in ids]
152 queried = list(make_detector_tuples(results).values())
153 if ordered:
154 self.assertEqual(queried, expected)
155 else:
156 self.assertCountEqual(queried, expected)
158 def test_simple_record_query(self) -> None:
159 """Test query-system basics with simple queries for dimension
160 records.
162 This includes tests for order_by, limit, and where expressions, but
163 only for cases where there are no datasets, dimension projections,
164 or spatial/temporal overlaps.
165 """
166 butler = self.make_butler("base.yaml")
167 with butler._query() as query:
168 _x = query.expression_factory
169 results = query.dimension_records("detector")
170 self.check_detector_records(results)
171 self.check_detector_records(results.order_by("detector"), ordered=True)
172 self.check_detector_records(
173 results.order_by(_x.detector.full_name.desc), [4, 3, 2, 1], ordered=True
174 )
175 self.check_detector_records(results.order_by("detector").limit(2), [1, 2], ordered=True)
176 self.check_detector_records(results.where(_x.detector.raft == "B", instrument="Cam1"), [3, 4])
178 def test_implied_union_record_query(self) -> None:
179 """Test queries for a dimension ('band') that uses "implied union"
180 storage, in which its values are the union of the values for it in a
181 another dimension (physical_filter) that implies it.
182 """
183 butler = self.make_butler("base.yaml")
184 band = butler.dimensions["band"]
185 self.assertEqual(band.implied_union_target, butler.dimensions["physical_filter"])
186 with butler._query() as query:
187 self.assertCountEqual(
188 list(query.dimension_records("band")),
189 [band.RecordClass(name="g"), band.RecordClass(name="r")],
190 )
191 self.assertCountEqual(
192 list(query.where(physical_filter="Cam1-R1", instrument="Cam1").dimension_records("band")),
193 [band.RecordClass(name="r")],
194 )
196 def test_dataset_constrained_record_query(self) -> None:
197 """Test a query for dimension records constrained by the existence of
198 datasets of a particular type.
199 """
200 butler = self.make_butler("base.yaml", "datasets.yaml")
201 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
202 butler.registry.registerCollection("empty", CollectionType.RUN)
203 butler.registry.registerCollection("chain", CollectionType.CHAINED)
204 butler.registry.setCollectionChain("chain", ["imported_g", "empty", "imported_r"])
205 with butler._query() as query:
206 # No collections here or in defaults is an error.
207 with self.assertRaises(NoDefaultCollectionError):
208 query.join_dataset_search("bias").dimension_records("detector").any()
209 butler.registry.defaults = RegistryDefaults(collections=["chain"])
210 with butler._query() as query:
211 _x = query.expression_factory
212 # Simplest case: this collection only has the first 3 detectors.
213 self.check_detector_records(
214 query.join_dataset_search("bias", collections=["imported_g"]).dimension_records("detector"),
215 [1, 2, 3],
216 )
217 # Together these collections have two biases for two of the
218 # detectors, but this shouldn't cause duplicate results.
219 self.check_detector_records(
220 query.join_dataset_search("bias", collections=["imported_g", "imported_r"]).dimension_records(
221 "detector"
222 ),
223 )
224 # Again we've got the potential for duplicates due to multiple
225 # datasets with the same data ID, and this time we force the
226 # deduplication to happen outside the dataset-search subquery by
227 # adding a WHERE filter on a dataset column. We also use the
228 # defaulted collection ('chain') to supply the collection.
229 self.check_detector_records(
230 query.join_dataset_search("bias")
231 .where(
232 _x.any(
233 _x.all(_x["bias"].collection == "imported_g", _x.detector.raft == "B"),
234 _x.all(_x["bias"].collection == "imported_r", _x.detector.raft == "A"),
235 ),
236 instrument="Cam1",
237 )
238 .dimension_records("detector"),
239 [2, 3],
240 )
241 # Flats have dimensions (physical_filter and band) we would
242 # normally include in query for detector records. This also should
243 # not cause duplicates.
244 self.check_detector_records(
245 query.join_dataset_search("flat", collections=["imported_g"]).dimension_records("detector"),
246 [2, 3, 4],
247 )
248 # No results, but for reasons we can't determine before we run the
249 # query.
250 self.check_detector_records(
251 query.join_dataset_search("flat", collections=["imported_g"])
252 .where(_x.band == "r")
253 .dimension_records("detector"),
254 [],
255 )
256 # No results, and we can diagnose why before we run the query.
257 self.check_detector_records(
258 query.join_dataset_search("bias", collections=["empty"]).dimension_records("detector"),
259 [],
260 messages=[
261 "Search for dataset type 'bias' is doomed to fail.",
262 "No datasets of type 'bias' in collection 'empty'.",
263 ],
264 doomed=True,
265 )
266 self.check_detector_records(
267 query.join_dataset_search("bias", collections=["imported_g"])
268 .where(instrument="Cam2")
269 .dimension_records("detector"),
270 [],
271 messages=[
272 "Search for dataset type 'bias' is doomed to fail.",
273 "No datasets with instrument='Cam2' in collection 'imported_g'.",
274 ],
275 doomed=True,
276 )
278 def test_spatial_overlaps(self) -> None:
279 """Test queries for dimension records with spatial overlaps.
281 Run tests/data/registry/spatial.py to plot the various regions used in
282 this test.
283 """
284 butler = self.make_butler("base.yaml", "spatial.yaml")
285 # Set default governor data ID values both to test that code path and
286 # to keep us from having to repeat them in every 'where' call below.
287 butler.registry.defaults = RegistryDefaults(instrument="Cam1", skymap="SkyMap1")
288 htm7 = butler.dimensions.skypix_dimensions["htm7"]
289 with butler._query() as query:
290 _x = query.expression_factory
291 # Query for detectors from a particular visit that overlap an
292 # explicit region.
293 self.check_detector_records(
294 query.where(
295 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)),
296 visit=1,
297 ).dimension_records("detector"),
298 [1, 3, 4],
299 has_postprocessing=True,
300 )
301 # Query for detectors from a particular visit that overlap an htm7
302 # ID. This is basically the same query as the last one, but
303 # expressed as a spatial join, and we can recognize that
304 # postprocessing is not needed (while in the last case it did
305 # nothing, but we couldn't tell that in advance because the query
306 # didn't know the region came from htm7).
307 self.check_detector_records(
308 query.where(
309 _x.visit_detector_region.region.overlaps(_x.htm7.region),
310 visit=1,
311 htm7=253954,
312 ).dimension_records("detector"),
313 [1, 3, 4],
314 has_postprocessing=False,
315 )
316 # Repeat the last query but with the spatial join implicit rather
317 # than explicit.
318 self.check_detector_records(
319 query.where(
320 visit=1,
321 htm7=253954,
322 ).dimension_records("detector"),
323 [1, 3, 4],
324 has_postprocessing=False,
325 )
326 # Query for the detectors from any visit that overlap a region:
327 # this gets contributions from multiple visits, and would have
328 # duplicates if we didn't get rid of them via GROUP BY.
329 self.check_detector_records(
330 query.where(
331 _x.visit_detector_region.region.overlaps(htm7.pixelization.pixel(253954)),
332 ).dimension_records("detector"),
333 [1, 2, 3, 4],
334 has_postprocessing=True,
335 )
336 # Once again we rewrite the region-constraint query as a spatial
337 # join, which drops the postprocessing. This join has to be
338 # explicit because `visit` no longer gets into the query dimensions
339 # some other way, and without it `detector` is not spatial.
340 self.check_detector_records(
341 query.where(
342 _x.visit_detector_region.region.overlaps(_x.htm7.region),
343 htm7=253954,
344 ).dimension_records("detector"),
345 [1, 2, 3, 4],
346 has_postprocessing=False,
347 )
348 # Query for detectors from any visit that overlap a patch. This
349 # requires joining visit_detector_region to htm7 and htm7 to patch,
350 # and then some postprocessing. We want to make sure there are no
351 # duplicates from a detector and patch both overlapping multiple
352 # htm7 tiles (which affects detectors 1 and 2) and that
353 # postprocessing filters out detector 4, which has one htm7 tile in
354 # common with the patch but does not actually overlap it.
355 self.check_detector_records(
356 query.where(
357 _x.visit_detector_region.region.overlaps(_x.patch.region),
358 tract=0,
359 patch=4,
360 ).dimension_records("detector"),
361 [1, 2, 3],
362 has_postprocessing=True,
363 )
364 # Query for that patch's region and express the previous query as
365 # a region-constraint instead of a spatial join.
366 (patch_record,) = query.where(tract=0, patch=4).dimension_records("patch")
367 self.check_detector_records(
368 query.where(
369 _x.visit_detector_region.region.overlaps(patch_record.region),
370 ).dimension_records("detector"),
371 [1, 2, 3],
372 has_postprocessing=True,
373 )
374 # Combine postprocessing with order_by and limit.
375 self.check_detector_records(
376 query.where(
377 _x.visit_detector_region.region.overlaps(patch_record.region),
378 )
379 .dimension_records("detector")
380 .order_by(_x.detector.desc)
381 .limit(2),
382 [3, 2],
383 has_postprocessing=True,
384 )
385 # Try a case where there are some records before postprocessing but
386 # none afterwards.
387 self.check_detector_records(
388 query.where(
389 _x.visit_detector_region.region.overlaps(patch_record.region),
390 detector=4,
391 ).dimension_records("detector"),
392 [],
393 has_postprocessing=True,
394 )
396 def test_common_skypix_overlaps(self) -> None:
397 """Test spatial overlap queries that return htm7 records."""
398 butler = self.make_butler("base.yaml", "spatial.yaml")
399 # Insert some datasets that use a skypix dimension, since some queries
400 # are only possible if a superset of the skypix IDs are in the query
401 # already.
402 cat1 = DatasetType("cat1", dimensions=butler.dimensions.conform(["htm7"]), storageClass="ArrowTable")
403 butler.registry.registerDatasetType(cat1)
404 butler.registry.registerCollection("refcats", CollectionType.RUN)
405 butler.registry.insertDatasets(cat1, [{"htm7": i} for i in range(253952, 253968)], run="refcats")
406 with butler._query() as query:
407 _x = query.expression_factory
408 # Explicit join to patch.
409 self.assertCountEqual(
410 [
411 record.id
412 for record in query.where(
413 _x.htm7.region.overlaps(_x.patch.region), skymap="SkyMap1", tract=0, patch=4
414 ).dimension_records("htm7")
415 ],
416 [253954, 253955],
417 )
418 # Implicit join to patch.
419 self.assertCountEqual(
420 [
421 record.id
422 for record in query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("htm7")
423 ],
424 [253954, 253955],
425 )
426 # Constraint on the patch region (with the query not knowing it
427 # corresponds to that patch).
428 (patch,) = query.where(skymap="SkyMap1", tract=0, patch=4).dimension_records("patch")
429 self.assertCountEqual(
430 [
431 record.id
432 for record in query.join_dataset_search("cat1", collections=["refcats"])
433 .where(_x.htm7.region.overlaps(patch.region))
434 .dimension_records("htm7")
435 ],
436 [253954, 253955],
437 )
439 def test_data_coordinate_upload(self) -> None:
440 """Test queries for dimension records with a data coordinate upload."""
441 butler = self.make_butler("base.yaml", "spatial.yaml")
442 with butler._query() as query:
443 # Query with a data ID upload that has an irrelevant row (there's
444 # no data with "Cam2").
445 self.check_detector_records(
446 query.join_data_coordinates(
447 [
448 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions),
449 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
450 DataCoordinate.standardize(instrument="Cam2", detector=4, universe=butler.dimensions),
451 ]
452 ).dimension_records("detector"),
453 [1, 3],
454 )
455 # Query with a data ID upload that directly contains duplicates,
456 # which should not appear in the results.
457 self.check_detector_records(
458 query.join_data_coordinates(
459 [
460 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions),
461 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
462 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
463 ]
464 ).dimension_records("detector"),
465 [1, 3],
466 )
467 # Query with a data ID upload that has extra dimensions that could
468 # also introduce duplicates if we're not careful.
469 self.check_detector_records(
470 query.join_data_coordinates(
471 [
472 DataCoordinate.standardize(
473 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions
474 ),
475 DataCoordinate.standardize(
476 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions
477 ),
478 DataCoordinate.standardize(
479 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions
480 ),
481 ]
482 ).dimension_records("detector"),
483 [1, 3],
484 )
485 # Query with a data ID upload that has extra dimensions that are
486 # used in a constraint.
487 self.check_detector_records(
488 query.join_data_coordinates(
489 [
490 DataCoordinate.standardize(
491 instrument="Cam1", visit=1, detector=1, universe=butler.dimensions
492 ),
493 DataCoordinate.standardize(
494 instrument="Cam1", visit=2, detector=3, universe=butler.dimensions
495 ),
496 DataCoordinate.standardize(
497 instrument="Cam1", visit=1, detector=3, universe=butler.dimensions
498 ),
499 ]
500 )
501 .where(instrument="Cam1", visit=2)
502 .dimension_records("detector"),
503 [3],
504 )
505 # Query with a data ID upload that must be spatially joined to
506 # the other dimensions. This join is added automatically.
507 self.check_detector_records(
508 query.join_data_coordinates(
509 [
510 DataCoordinate.standardize(
511 skymap="SkyMap1", tract=1, patch=1, universe=butler.dimensions
512 ),
513 DataCoordinate.standardize(
514 skymap="SkyMap1", tract=1, patch=2, universe=butler.dimensions
515 ),
516 DataCoordinate.standardize(
517 skymap="SkyMap1", tract=1, patch=3, universe=butler.dimensions
518 ),
519 ]
520 )
521 .where(instrument="Cam1", visit=2)
522 .dimension_records("detector"),
523 [2, 3, 4],
524 has_postprocessing=True,
525 )
526 # Query with a data ID upload that embeds a spatial relationship.
527 # This prevents automatic creation of a spatial join. To make the
528 # test more interesting, the spatial relationship embedded in these
529 # data IDs is nonsense: it includes combinations that do not
530 # overlap, while leaving out combinations that do overlap.
531 self.check_detector_records(
532 query.join_data_coordinates(
533 [
534 DataCoordinate.standardize(
535 skymap="SkyMap1",
536 tract=1,
537 patch=1,
538 instrument="Cam1",
539 visit=1,
540 detector=1,
541 universe=butler.dimensions,
542 ),
543 DataCoordinate.standardize(
544 skymap="SkyMap1",
545 tract=1,
546 patch=1,
547 instrument="Cam1",
548 visit=1,
549 detector=2,
550 universe=butler.dimensions,
551 ),
552 DataCoordinate.standardize(
553 skymap="SkyMap1",
554 tract=1,
555 patch=3,
556 instrument="Cam1",
557 visit=1,
558 detector=3,
559 universe=butler.dimensions,
560 ),
561 ]
562 )
563 .where(skymap="SkyMap1", tract=1, patch=1)
564 .dimension_records("detector"),
565 [1, 2],
566 )
567 # Query with an empty data ID upload (not a useful thing to do,
568 # but a way to probe edge-case behavior).
569 self.check_detector_records(
570 query.join_data_coordinates(
571 [
572 DataCoordinate.make_empty(universe=butler.dimensions),
573 ]
574 ).dimension_records("detector"),
575 [1, 2, 3, 4],
576 )
578 def test_data_coordinate_upload_force_temp_table(self) -> None:
579 """Test queries for dimension records with a data coordinate upload
580 that is so big it has to go into a temporary table rather than be
581 included directly into the query via bind params (by making the
582 threshold for making a a temporary table tiny).
584 This test assumes a DirectQueryDriver and is automatically skipped when
585 some other driver is found.
586 """
587 butler = self.make_butler("base.yaml", "spatial.yaml")
588 with butler._query() as query:
589 if not isinstance(query._driver, DirectQueryDriver):
590 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.")
591 query._driver._constant_rows_limit = 2
592 self.check_detector_records(
593 query.join_data_coordinates(
594 [
595 DataCoordinate.standardize(instrument="Cam1", detector=1, universe=butler.dimensions),
596 DataCoordinate.standardize(instrument="Cam1", detector=3, universe=butler.dimensions),
597 DataCoordinate.standardize(instrument="Cam1", detector=4, universe=butler.dimensions),
598 ]
599 ).dimension_records("detector"),
600 [1, 3, 4],
601 )
603 def test_materialization(self) -> None:
604 """Test querying for dimension records against a materialized previous
605 query.
606 """
607 butler = self.make_butler("base.yaml", "datasets.yaml", "spatial.yaml")
608 with butler._query() as query:
609 _x = query.expression_factory
610 # Simple case where the materialization has just the dimensions
611 # we need for the rest of the query.
612 self.check_detector_records(
613 query.where(_x.detector.raft == "A", instrument="Cam1")
614 .materialize()
615 .dimension_records("detector"),
616 [1, 2],
617 )
618 # This materialization has extra dimensions that could cause
619 # duplicates if we don't SELECT DISTINCT them away.
620 self.check_detector_records(
621 query.join_dimensions(["visit", "detector"])
622 .where(_x.detector.raft == "A", instrument="Cam1")
623 .materialize()
624 .dimension_records("detector"),
625 [1, 2],
626 )
627 # Materialize a spatial-join, which should prevent the creation
628 # of a spatial join in the downstream query.
629 self.check_detector_records(
630 query.join_dimensions(["visit", "detector", "tract"]).materialize()
631 # The patch constraint here should do nothing, because only the
632 # spatial join from the materialization should exist. The
633 # behavior is surprising no matter what here, and the
634 # recommendation to users is to add an explicit overlap
635 # expression any time it's not obvious what the default is.
636 .where(skymap="SkyMap1", tract=0, instrument="Cam1", visit=2, patch=5).dimension_records(
637 "detector"
638 ),
639 [1, 2],
640 has_postprocessing=True,
641 )
642 # Materialize with a dataset join.
643 self.check_detector_records(
644 query.join_dataset_search("bias", collections=["imported_g"])
645 .materialize(datasets=["bias"])
646 .dimension_records("detector"),
647 [1, 2, 3],
648 )
650 def test_timespan_results(self) -> None:
651 """Test returning dimension records that include timespans."""
652 butler = self.make_butler("base.yaml", "spatial.yaml")
653 with butler._query() as query:
654 self.assertCountEqual(
655 [
656 (record.id, record.timespan.begin, record.timespan.end)
657 for record in query.dimension_records("visit")
658 ],
659 [
660 (
661 1,
662 astropy.time.Time("2021-09-09T03:00:00", format="isot", scale="tai"),
663 astropy.time.Time("2021-09-09T03:01:00", format="isot", scale="tai"),
664 ),
665 (
666 2,
667 astropy.time.Time("2021-09-09T03:02:00", format="isot", scale="tai"),
668 astropy.time.Time("2021-09-09T03:03:00", format="isot", scale="tai"),
669 ),
670 ],
671 )
673 def test_direct_driver_paging(self) -> None:
674 """Test queries for dimension records that require multiple pages (by
675 making the threshold for making a a temporary table tiny).
677 This test assumes a DirectQueryDriver and is automatically skipped when
678 some other driver is found.
679 """
680 butler = self.make_butler("base.yaml")
681 # Basic test where pages should be transparent.
682 with butler._query() as query:
683 if not isinstance(query._driver, DirectQueryDriver):
684 raise unittest.SkipTest("Test requires meddling with DirectQueryDriver internals.")
685 query._driver._raw_page_size = 2
686 self.check_detector_records(
687 query.dimension_records("detector"),
688 [1, 2, 3, 4],
689 )
690 # Test that it's an error to continue query iteration after closing the
691 # context manager.
692 with butler._query() as query:
693 assert isinstance(query._driver, DirectQueryDriver)
694 query._driver._raw_page_size = 2
695 iterator = iter(query.dimension_records("detector"))
696 next(iterator)
697 with self.assertRaisesRegex(RuntimeError, "Cannot continue query result iteration"):
698 list(iterator)
700 def test_column_expressions(self) -> None:
701 """Test queries with a wide variant of column expressions."""
702 butler = self.make_butler("base.yaml", "spatial.yaml")
703 butler.registry.defaults = RegistryDefaults(instrument="Cam1")
704 with butler._query() as query:
705 _x = query.expression_factory
706 self.check_detector_records(
707 query.where(_x.not_(_x.detector != 2)).dimension_records("detector"),
708 [2],
709 )
710 self.check_detector_records(
711 query.where(_x.literal(2) == _x.detector).dimension_records("detector"),
712 [2],
713 )
714 self.check_detector_records(
715 query.where(_x.literal(2) == _x.detector + 1).dimension_records("detector"),
716 [1],
717 )
718 self.check_detector_records(
719 query.where(-_x.detector == -3).dimension_records("detector"),
720 [3],
721 )
722 self.check_detector_records(
723 query.where(_x.detector == 1, _x.detector == 2).dimension_records("detector"),
724 [],
725 messages=["'where' expression requires both detector=2 and detector=1."],
726 )
727 self.assertCountEqual(
728 [
729 record.id
730 for record in query.where(
731 _x.visit.timespan.overlaps(
732 Timespan(
733 begin=astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"),
734 end=None,
735 )
736 )
737 ).dimension_records("visit")
738 ],
739 [2],
740 )
741 self.assertCountEqual(
742 [
743 record.id
744 for record in query.where(
745 _x.not_(
746 _x.visit.timespan.end
747 < astropy.time.Time("2021-09-09T03:02:30", format="isot", scale="tai"),
748 )
749 ).dimension_records("visit")
750 ],
751 [2],
752 )
753 self.assertCountEqual(
754 [
755 record.id
756 for record in query.where(
757 _x.visit.timespan.begin
758 > astropy.time.Time("2021-09-09T03:01:30", format="isot", scale="tai")
759 ).dimension_records("visit")
760 ],
761 [2],
762 )
763 self.assertCountEqual(
764 [
765 record.id
766 for record in query.where(
767 (_x.visit.exposure_time + -(5.0 * _x.visit.zenith_angle)) > 0.0
768 ).dimension_records("visit")
769 ],
770 [1],
771 )
772 self.assertCountEqual(
773 [
774 record.id
775 for record in query.where(_x.visit.exposure_time - 5.0 >= 50.0).dimension_records("visit")
776 ],
777 [1],
778 )
779 self.assertCountEqual(
780 [record.id for record in query.where(_x.visit.id % 2 != 0).dimension_records("visit")],
781 [1],
782 )
783 self.assertCountEqual(
784 [
785 record.id
786 for record in query.where(_x.visit.zenith_angle / 5.0 <= 1.0).dimension_records("visit")
787 ],
788 [1],
789 )
790 self.assertCountEqual(
791 [record.id for record in query.where(_x.visit.timespan.is_null).dimension_records("visit")],
792 [],
793 )
794 self.assertCountEqual(
795 [
796 record.id
797 for record in query.where(_x.visit.exposure_time.is_null).dimension_records("visit")
798 ],
799 [],
800 )
801 self.check_detector_records(
802 query.where(_x.detector.in_iterable([1, 3, 4])).dimension_records("detector"),
803 [1, 3, 4],
804 )
805 self.check_detector_records(
806 query.where(_x.detector.in_range(start=2, stop=None)).dimension_records("detector"),
807 [2, 3, 4],
808 )
809 self.check_detector_records(
810 query.where(_x.detector.in_range(start=1, stop=3)).dimension_records("detector"),
811 [1, 2],
812 )
813 self.check_detector_records(
814 query.where(_x.detector.in_range(start=1, stop=None, step=2)).dimension_records("detector"),
815 [1, 3],
816 )
817 self.check_detector_records(
818 query.where(_x.detector.in_range(start=1, stop=2)).dimension_records("detector"),
819 [1],
820 )
821 # This is a complex way to write a much simpler query ("where
822 # detector.raft == 'A'"), but it tests code paths that would
823 # otherwise require a lot more test setup.
824 self.check_detector_records(
825 query.where(
826 _x.detector.in_query(_x.detector, query.where(_x.detector.raft == "A"))
827 ).dimension_records("detector"),
828 [1, 2],
829 )
830 # Error to reference tract without skymap in a WHERE clause.
831 with self.assertRaises(InvalidQueryError):
832 list(query.where(_x.tract == 4).dimension_records("patch"))