Coverage for tests / test_cliCmdQueryDatasets.py: 24%
126 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Unit tests for daf_butler CLI query-datasets command."""
30import os
31import unittest
33from astropy.table import Table as AstropyTable
34from numpy import array
36from lsst.daf.butler import CollectionType, InvalidQueryError, StorageClassFactory, script
37from lsst.daf.butler.cli.butler import cli as butlerCli
38from lsst.daf.butler.cli.utils import LogCliRunner, clickResultMsg
39from lsst.daf.butler.tests import addDatasetType
40from lsst.daf.butler.tests.utils import (
41 ButlerTestHelper,
42 MetricTestRepo,
43 makeTestTempDir,
44 readTable,
45 removeTestTempDir,
46)
47from lsst.resources import ResourcePath
49TESTDIR = os.path.abspath(os.path.dirname(__file__))
52def expectedFilesystemDatastoreTables(root: ResourcePath):
53 """Return the expected table contents."""
54 return (
55 AstropyTable(
56 array(
57 (
58 (
59 "test_metric_comp.data",
60 "ingest/run",
61 "DummyCamComp",
62 "423",
63 "R",
64 "d-r",
65 root.join(
66 "ingest/run/test_metric_comp.data/"
67 "test_metric_comp_v00000423_fDummyCamComp_data.yaml"
68 ),
69 ),
70 (
71 "test_metric_comp.data",
72 "ingest/run",
73 "DummyCamComp",
74 "424",
75 "R",
76 "d-r",
77 root.join(
78 "ingest/run/test_metric_comp.data/"
79 "test_metric_comp_v00000424_fDummyCamComp_data.yaml"
80 ),
81 ),
82 )
83 ),
84 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
85 ),
86 AstropyTable(
87 array(
88 (
89 (
90 "test_metric_comp.output",
91 "ingest/run",
92 "DummyCamComp",
93 "423",
94 "R",
95 "d-r",
96 root.join(
97 "ingest/run/test_metric_comp.output/"
98 "test_metric_comp_v00000423_fDummyCamComp_output.yaml"
99 ),
100 ),
101 (
102 "test_metric_comp.output",
103 "ingest/run",
104 "DummyCamComp",
105 "424",
106 "R",
107 "d-r",
108 root.join(
109 "ingest/run/test_metric_comp.output/"
110 "test_metric_comp_v00000424_fDummyCamComp_output.yaml"
111 ),
112 ),
113 )
114 ),
115 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
116 ),
117 AstropyTable(
118 array(
119 (
120 (
121 "test_metric_comp.summary",
122 "ingest/run",
123 "DummyCamComp",
124 "423",
125 "R",
126 "d-r",
127 root.join(
128 "ingest/run/test_metric_comp.summary/"
129 "test_metric_comp_v00000423_fDummyCamComp_summary.yaml"
130 ),
131 ),
132 (
133 "test_metric_comp.summary",
134 "ingest/run",
135 "DummyCamComp",
136 "424",
137 "R",
138 "d-r",
139 root.join(
140 "ingest/run/test_metric_comp.summary/"
141 "test_metric_comp_v00000424_fDummyCamComp_summary.yaml"
142 ),
143 ),
144 )
145 ),
146 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
147 ),
148 )
151class QueryDatasetsTest(unittest.TestCase, ButlerTestHelper):
152 """Test the query-datasets command-line."""
154 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
155 storageClassFactory = StorageClassFactory()
157 @staticmethod
158 def _queryDatasets(
159 repo, glob=(), collections=(), where="", find_first=False, show_uri=False, limit=0, order_by=()
160 ):
161 query = script.QueryDatasets(
162 glob,
163 collections,
164 where=where,
165 find_first=find_first,
166 show_uri=show_uri,
167 limit=limit,
168 order_by=order_by,
169 butler=repo,
170 )
171 return list(query.getTables())
173 def setUp(self):
174 self.testdir = makeTestTempDir(TESTDIR)
175 self.repoDir = os.path.join(self.testdir, "repo")
177 def tearDown(self):
178 removeTestTempDir(self.testdir)
180 def testChained(self):
181 testRepo = MetricTestRepo(
182 self.repoDir, configFile=os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
183 )
184 self.enterContext(testRepo.butler)
186 tables = self._queryDatasets(repo=testRepo.butler, show_uri=True, collections="*", glob="*")
188 # Want second datastore root since in-memory is ephemeral and
189 # all the relevant datasets are stored in the second as well as third
190 # datastore.
191 roots = testRepo.butler.get_datastore_roots()
192 datastore_root = roots[testRepo.butler.get_datastore_names()[1]]
194 self.assertAstropyTablesEqual(
195 tables,
196 expectedFilesystemDatastoreTables(datastore_root),
197 filterColumns=True,
198 )
200 def testShowURI(self):
201 """Test for expected output with show_uri=True."""
202 testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile)
203 self.enterContext(testRepo.butler)
205 tables = self._queryDatasets(repo=testRepo.butler, show_uri=True, collections="*", glob="*")
207 roots = testRepo.butler.get_datastore_roots()
208 datastore_root = list(roots.values())[0]
210 self.assertAstropyTablesEqual(
211 tables, expectedFilesystemDatastoreTables(datastore_root), filterColumns=True
212 )
214 def testShowUriNoDisassembly(self):
215 """Test for expected output with show_uri=True and no disassembly."""
216 testRepo = MetricTestRepo(
217 self.repoDir,
218 configFile=self.configFile,
219 storageClassName="StructuredCompositeReadCompNoDisassembly",
220 )
221 self.enterContext(testRepo.butler)
223 tables = self._queryDatasets(repo=testRepo.butler, show_uri=True, collections="*", glob="*")
225 roots = testRepo.butler.get_datastore_roots()
226 datastore_root = list(roots.values())[0]
228 expected = [
229 AstropyTable(
230 array(
231 (
232 (
233 "test_metric_comp",
234 "ingest/run",
235 "DummyCamComp",
236 "423",
237 "R",
238 "d-r",
239 datastore_root.join(
240 "ingest/run/test_metric_comp/test_metric_comp_v00000423_fDummyCamComp.yaml"
241 ),
242 ),
243 (
244 "test_metric_comp",
245 "ingest/run",
246 "DummyCamComp",
247 "424",
248 "R",
249 "d-r",
250 datastore_root.join(
251 "ingest/run/test_metric_comp/test_metric_comp_v00000424_fDummyCamComp.yaml"
252 ),
253 ),
254 )
255 ),
256 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
257 ),
258 ]
260 self.assertAstropyTablesEqual(tables, expected, filterColumns=True)
262 def testNoShowURI(self):
263 """Test for expected output without show_uri (default is False)."""
264 testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile)
265 self.enterContext(testRepo.butler)
267 tables = self._queryDatasets(repo=testRepo.butler, collections="*", glob="*")
269 expectedTables = (
270 AstropyTable(
271 array(
272 (
273 ("test_metric_comp", "ingest/run", "DummyCamComp", "423", "R", "d-r"),
274 ("test_metric_comp", "ingest/run", "DummyCamComp", "424", "R", "d-r"),
275 )
276 ),
277 names=("type", "run", "instrument", "visit", "band", "physical_filter"),
278 ),
279 )
281 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
283 def testWhere(self):
284 """Test using the where clause to reduce the number of rows returned by
285 queryDatasets.
286 """
287 testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile)
288 self.enterContext(testRepo.butler)
290 for glob in (("*",), ("test_metric_comp",)):
291 with self.subTest(glob=glob):
292 tables = self._queryDatasets(
293 repo=testRepo.butler,
294 where="instrument='DummyCamComp' AND visit=423",
295 collections="*",
296 glob=glob,
297 )
299 expectedTables = (
300 AstropyTable(
301 array(("test_metric_comp", "ingest/run", "DummyCamComp", "423", "R", "d-r")),
302 names=("type", "run", "instrument", "visit", "band", "physical_filter"),
303 ),
304 )
306 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
308 with self.assertRaises(InvalidQueryError):
309 self._queryDatasets(repo=testRepo.butler, collections="*", find_first=True, glob=glob)
311 def testGlobDatasetType(self):
312 """Test specifying dataset type."""
313 # Create and register an additional DatasetType
314 testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile)
315 self.enterContext(testRepo.butler)
317 testRepo.butler.registry.insertDimensionData(
318 "visit",
319 {
320 "instrument": "DummyCamComp",
321 "id": 425,
322 "name": "fourtwentyfive",
323 "physical_filter": "d-r",
324 "day_obs": 20200101,
325 },
326 )
328 datasetType = addDatasetType(
329 testRepo.butler, "alt_test_metric_comp", ("instrument", "visit"), "StructuredCompositeReadComp"
330 )
332 testRepo.addDataset(dataId={"instrument": "DummyCamComp", "visit": 425}, datasetType=datasetType)
334 # verify the new dataset type increases the number of tables found:
335 tables = self._queryDatasets(repo=testRepo.butler, collections="*", glob="*")
337 expectedTables = (
338 AstropyTable(
339 array(("alt_test_metric_comp", "ingest/run", "DummyCamComp", "425", "R", "d-r")),
340 names=("type", "run", "instrument", "visit", "band", "physical_filter"),
341 ),
342 AstropyTable(
343 array(
344 (
345 ("test_metric_comp", "ingest/run", "DummyCamComp", "423", "R", "d-r"),
346 ("test_metric_comp", "ingest/run", "DummyCamComp", "424", "R", "d-r"),
347 )
348 ),
349 names=("type", "run", "instrument", "visit", "band", "physical_filter"),
350 ),
351 )
353 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
355 # Dataset type (glob) argument will become mandatory soon
356 with self.assertWarns(FutureWarning):
357 self._queryDatasets(repo=testRepo.butler, collections="*")
359 def test_limit_order(self):
360 """Test limit and ordering."""
361 # Create and register an additional DatasetType
362 testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile)
363 self.enterContext(testRepo.butler)
365 with self.assertLogs("lsst.daf.butler.script.queryDatasets", level="WARNING") as cm:
366 tables = self._queryDatasets(
367 repo=testRepo.butler,
368 limit=-1,
369 order_by=("visit",),
370 collections="*",
371 glob=("test_metric_comp",),
372 )
374 self.assertIn("increase this limit", cm.output[0])
376 expectedTables = [
377 AstropyTable(
378 array((("test_metric_comp", "ingest/run", "DummyCamComp", "423", "R", "d-r"),)),
379 names=("type", "run", "instrument", "visit", "band", "physical_filter"),
380 ),
381 ]
382 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
384 # Same as previous test, but with positive limit so no warning is
385 # issued.
386 with self.assertNoLogs("lsst.daf.butler.script.queryDatasets", level="WARNING"):
387 tables = self._queryDatasets(
388 repo=testRepo.butler,
389 limit=1,
390 order_by=("visit",),
391 collections="*",
392 glob=("test_metric_comp",),
393 )
394 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
396 with self.assertLogs("lsst.daf.butler.script.queryDatasets", level="WARNING") as cm:
397 tables = self._queryDatasets(
398 repo=testRepo.butler,
399 limit=-1,
400 order_by=("-visit",),
401 collections="*",
402 glob=("test_metric_comp",),
403 )
404 self.assertIn("increase this limit", cm.output[0])
406 expectedTables = [
407 AstropyTable(
408 array((("test_metric_comp", "ingest/run", "DummyCamComp", "424", "R", "d-r"),)),
409 names=("type", "run", "instrument", "visit", "band", "physical_filter"),
410 ),
411 ]
412 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
414 # --order-by is not supported by the query backend for multiple dataset
415 # types, so we can only provide it for queries with a single dataset
416 # type.
417 with self.assertRaisesRegex(NotImplementedError, "--order-by"):
418 self._queryDatasets(
419 repo=testRepo.butler, limit=1, order_by=("visit",), collections="*", glob=["*"]
420 )
421 with self.assertRaisesRegex(NotImplementedError, "--order-by"):
422 self._queryDatasets(
423 repo=testRepo.butler,
424 limit=1,
425 order_by=("visit",),
426 collections="*",
427 glob=["test_metric_comp", "raw"],
428 )
430 def testFindFirstAndCollections(self):
431 """Test the find-first option, and the collections option, since it
432 is required for find-first.
433 """
434 testRepo = MetricTestRepo(self.repoDir, configFile=self.configFile)
435 self.enterContext(testRepo.butler)
437 # Add a new run, and add a dataset to shadow an existing dataset.
438 testRepo.addDataset(run="foo", dataId={"instrument": "DummyCamComp", "visit": 424})
440 # Add a CHAINED collection to include the two runs, to check that
441 # flattening works as well.
442 testRepo.butler.collections.register("chain", CollectionType.CHAINED)
443 testRepo.butler.collections.redefine_chain("chain", ["foo", "ingest/run"])
445 # Verify that without find-first, duplicate datasets are returned
446 tables = self._queryDatasets(repo=testRepo.butler, collections=["chain"], show_uri=True, glob="*")
448 # The test should be running with a single FileDatastore.
449 roots = testRepo.butler.get_datastore_roots()
450 assert len(roots) == 1
451 datastore_root = list(roots.values())[0]
453 expectedTables = (
454 AstropyTable(
455 array(
456 (
457 (
458 "test_metric_comp.data",
459 "foo",
460 "DummyCamComp",
461 "424",
462 "R",
463 "d-r",
464 datastore_root.join(
465 "foo/test_metric_comp.data/test_metric_comp_v00000424_fDummyCamComp_data.yaml"
466 ),
467 ),
468 (
469 "test_metric_comp.data",
470 "ingest/run",
471 "DummyCamComp",
472 "423",
473 "R",
474 "d-r",
475 datastore_root.join(
476 "ingest/run/test_metric_comp.data/"
477 "test_metric_comp_v00000423_fDummyCamComp_data.yaml"
478 ),
479 ),
480 (
481 "test_metric_comp.data",
482 "ingest/run",
483 "DummyCamComp",
484 "424",
485 "R",
486 "d-r",
487 datastore_root.join(
488 "ingest/run/test_metric_comp.data/"
489 "test_metric_comp_v00000424_fDummyCamComp_data.yaml"
490 ),
491 ),
492 )
493 ),
494 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
495 ),
496 AstropyTable(
497 array(
498 (
499 (
500 "test_metric_comp.output",
501 "foo",
502 "DummyCamComp",
503 "424",
504 "R",
505 "d-r",
506 datastore_root.join(
507 "foo/test_metric_comp.output/"
508 "test_metric_comp_v00000424_fDummyCamComp_output.yaml"
509 ),
510 ),
511 (
512 "test_metric_comp.output",
513 "ingest/run",
514 "DummyCamComp",
515 "423",
516 "R",
517 "d-r",
518 datastore_root.join(
519 "ingest/run/test_metric_comp.output/"
520 "test_metric_comp_v00000423_fDummyCamComp_output.yaml"
521 ),
522 ),
523 (
524 "test_metric_comp.output",
525 "ingest/run",
526 "DummyCamComp",
527 "424",
528 "R",
529 "d-r",
530 datastore_root.join(
531 "ingest/run/test_metric_comp.output/"
532 "test_metric_comp_v00000424_fDummyCamComp_output.yaml"
533 ),
534 ),
535 )
536 ),
537 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
538 ),
539 AstropyTable(
540 array(
541 (
542 (
543 "test_metric_comp.summary",
544 "foo",
545 "DummyCamComp",
546 "424",
547 "R",
548 "d-r",
549 datastore_root.join(
550 "foo/test_metric_comp.summary/"
551 "test_metric_comp_v00000424_fDummyCamComp_summary.yaml"
552 ),
553 ),
554 (
555 "test_metric_comp.summary",
556 "ingest/run",
557 "DummyCamComp",
558 "423",
559 "R",
560 "d-r",
561 datastore_root.join(
562 "ingest/run/test_metric_comp.summary/"
563 "test_metric_comp_v00000423_fDummyCamComp_summary.yaml"
564 ),
565 ),
566 (
567 "test_metric_comp.summary",
568 "ingest/run",
569 "DummyCamComp",
570 "424",
571 "R",
572 "d-r",
573 datastore_root.join(
574 "ingest/run/test_metric_comp.summary/"
575 "test_metric_comp_v00000424_fDummyCamComp_summary.yaml"
576 ),
577 ),
578 )
579 ),
580 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
581 ),
582 )
584 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
586 # Verify that with find first the duplicate dataset is eliminated and
587 # the more recent dataset is returned.
588 tables = self._queryDatasets(
589 repo=testRepo.butler, collections=["chain"], show_uri=True, find_first=True, glob="*"
590 )
592 expectedTables = (
593 AstropyTable(
594 array(
595 (
596 (
597 "test_metric_comp.data",
598 "foo",
599 "DummyCamComp",
600 "424",
601 "R",
602 "d-r",
603 datastore_root.join(
604 "foo/test_metric_comp.data/test_metric_comp_v00000424_fDummyCamComp_data.yaml"
605 ),
606 ),
607 (
608 "test_metric_comp.data",
609 "ingest/run",
610 "DummyCamComp",
611 "423",
612 "R",
613 "d-r",
614 datastore_root.join(
615 "ingest/run/test_metric_comp.data/"
616 "test_metric_comp_v00000423_fDummyCamComp_data.yaml"
617 ),
618 ),
619 )
620 ),
621 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
622 ),
623 AstropyTable(
624 array(
625 (
626 (
627 "test_metric_comp.output",
628 "foo",
629 "DummyCamComp",
630 "424",
631 "R",
632 "d-r",
633 datastore_root.join(
634 "foo/test_metric_comp.output/"
635 "test_metric_comp_v00000424_fDummyCamComp_output.yaml"
636 ),
637 ),
638 (
639 "test_metric_comp.output",
640 "ingest/run",
641 "DummyCamComp",
642 "423",
643 "R",
644 "d-r",
645 datastore_root.join(
646 "ingest/run/test_metric_comp.output/"
647 "test_metric_comp_v00000423_fDummyCamComp_output.yaml"
648 ),
649 ),
650 )
651 ),
652 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
653 ),
654 AstropyTable(
655 array(
656 (
657 (
658 "test_metric_comp.summary",
659 "foo",
660 "DummyCamComp",
661 "424",
662 "R",
663 "d-r",
664 datastore_root.join(
665 "foo/test_metric_comp.summary/"
666 "test_metric_comp_v00000424_fDummyCamComp_summary.yaml"
667 ),
668 ),
669 (
670 "test_metric_comp.summary",
671 "ingest/run",
672 "DummyCamComp",
673 "423",
674 "R",
675 "d-r",
676 datastore_root.join(
677 "ingest/run/test_metric_comp.summary/"
678 "test_metric_comp_v00000423_fDummyCamComp_summary.yaml"
679 ),
680 ),
681 )
682 ),
683 names=("type", "run", "instrument", "visit", "band", "physical_filter", "URI"),
684 ),
685 )
687 self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)
689 # Verify that globs are not supported with find_first=True.
690 with self.assertRaises(InvalidQueryError):
691 self._queryDatasets(
692 repo=testRepo.butler, collections=["*"], show_uri=True, find_first=True, glob="*"
693 )
695 # Collections argument will become mandatory soon
696 with self.assertWarns(FutureWarning):
697 self._queryDatasets(repo=testRepo.butler, glob="*")
700class QueryDatasetsCLITest(unittest.TestCase, ButlerTestHelper):
701 """Test that the command line has basic functionality."""
703 def setUp(self):
704 self.root = makeTestTempDir(TESTDIR)
705 self.testRepo = MetricTestRepo(
706 self.root, configFile=os.path.join(TESTDIR, "config/basic/butler.yaml")
707 )
708 self.enterContext(self.testRepo.butler)
709 self.runner = LogCliRunner()
711 def tearDown(self):
712 removeTestTempDir(self.root)
714 def test_simple(self):
715 """Simple test that query-datasets runs."""
716 result = self.runner.invoke(
717 butlerCli, ["query-datasets", "--collections", "ingest/run", self.root, "test_metric_comp"]
718 )
719 self.assertEqual(result.exit_code, 0, clickResultMsg(result))
720 got = readTable(result.output)
721 self.assertEqual(len(got), 2)
724if __name__ == "__main__":
725 unittest.main()