Coverage for tests / test_cliCmdPruneDatasets.py: 38%
132 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 08:17 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Unit tests for daf_butler CLI prune-datasets subcommand."""
30import os
31import unittest
32from itertools import chain
33from unittest.mock import ANY, patch
35from astropy.table import Table
37# Tests require the SqlRegistry
38import lsst.daf.butler.registry.sql_registry
39import lsst.daf.butler.script
40from lsst.daf.butler import Butler, CollectionInfo, CollectionType
41from lsst.daf.butler.cli.butler import cli as butlerCli
42from lsst.daf.butler.cli.cmd.commands import (
43 pruneDatasets_askContinueMsg,
44 pruneDatasets_didNotRemoveAforementioned,
45 pruneDatasets_didRemoveAforementioned,
46 pruneDatasets_didRemoveMsg,
47 pruneDatasets_errNoCollectionRestriction,
48 pruneDatasets_errNoOp,
49 pruneDatasets_errPruneOnNotRun,
50 pruneDatasets_errPurgeAndDisassociate,
51 pruneDatasets_errQuietWithDryRun,
52 pruneDatasets_noDatasetsFound,
53 pruneDatasets_willRemoveMsg,
54 pruneDatasets_wouldDisassociateAndRemoveMsg,
55 pruneDatasets_wouldDisassociateMsg,
56 pruneDatasets_wouldRemoveMsg,
57)
58from lsst.daf.butler.cli.utils import LogCliRunner, astropyTablesToStr, clickResultMsg
59from lsst.daf.butler.direct_butler import DirectButler
60from lsst.daf.butler.script import QueryDatasets
61from lsst.daf.butler.tests.utils import (
62 ButlerTestHelper,
63 MetricTestRepo,
64 makeTestTempDir,
65 removeTestTempDir,
66)
68TESTDIR = os.path.abspath(os.path.dirname(__file__))
69doFindTables = True
72def getTables():
73 """Return test table."""
74 if doFindTables:
75 yield from (Table(((1, 2, 3),), names=("foo",)),)
76 return ()
79def getDatasets():
80 """Return the datasets string."""
81 yield ["datasets"]
84def getRefs():
85 """Return all the datasets as a single list."""
86 return list(chain(*getDatasets()))
89def makeQueryDatasets(*args, **kwargs):
90 """Return a query datasets object."""
91 if not kwargs.get("glob"):
92 # Use all dataset types if not specified.
93 kwargs["glob"] = ("*",)
94 return QueryDatasets(*args, **kwargs)
97class PruneDatasetsTestCase(unittest.TestCase):
98 """Tests the ``prune_datasets`` "command" function (in
99 ``cli/cmd/commands.py``) and the ``pruneDatasets`` "script" function (in
100 ``scripts/_pruneDatasets.py``).
102 ``Butler.pruneDatasets`` and a few other functions that get called before
103 it are mocked, and tests check for expected arguments to those mocks.
104 """
106 def setUp(self):
107 self.repo = "here"
109 @staticmethod
110 def makeQueryDatasetsArgs(**kwargs):
111 expectedArgs = dict(
112 butler=ANY, collections=("*",), where="", find_first=True, show_uri=False, glob=tuple()
113 )
114 expectedArgs.update(kwargs)
115 return expectedArgs
117 @staticmethod
118 def makePruneDatasetsArgs(**kwargs):
119 expectedArgs = dict(refs=tuple(), disassociate=False, tags=(), purge=False, unstore=False)
120 expectedArgs.update(kwargs)
121 return expectedArgs
123 # Mock the QueryDatasets.getTables function to return a set of Astropy
124 # tables, similar to what would be returned by a call to
125 # QueryDatasets.getTables on a repo with real data.
126 @patch.object(lsst.daf.butler.script._pruneDatasets.QueryDatasets, "getTables", side_effect=getTables)
127 # Mock the QueryDatasets.getDatasets function. Normally it would return a
128 # list of queries.DatasetQueryResults, but all we need to do is verify that
129 # the output of this function is passed into our pruneDatasets magicMock,
130 # so we can return something arbitrary that we can test is equal."""
131 @patch.object(lsst.daf.butler.script._pruneDatasets.QueryDatasets, "getDatasets", side_effect=getDatasets)
132 # Mock the actual QueryDatasets class, so we can inspect calls to its init
133 # function. Note that the side_effect returns an instance of QueryDatasets,
134 # so this mock records and then is a pass-through.
135 @patch.object(lsst.daf.butler.script._pruneDatasets, "QueryDatasets", side_effect=makeQueryDatasets)
136 # Mock the pruneDatasets butler command so we can test for expected calls
137 # to it, without dealing with setting up a full repo with data for it.
138 @patch.object(DirectButler, "pruneDatasets")
139 def run_test(
140 self,
141 mockPruneDatasets,
142 mockQueryDatasets_init,
143 mockQueryDatasets_getDatasets,
144 mockQueryDatasets_getTables,
145 cliArgs,
146 exMsgs,
147 exPruneDatasetsCallArgs,
148 exGetTablesCalled,
149 exQueryDatasetsCallArgs,
150 invokeInput=None,
151 exPruneDatasetsExitCode=0,
152 ):
153 """Execute the test.
155 Makes a temporary repo, invokes ``prune-datasets``. Verifies expected
156 output, exit codes, and mock calls.
158 Parameters
159 ----------
160 mockPruneDatasets : `MagicMock`
161 The MagicMock for the ``Butler.pruneDatasets`` function.
162 mockQueryDatasets_init : `MagicMock`
163 The MagicMock for the ``QueryDatasets.__init__`` function.
164 mockQueryDatasets_getDatasets : `MagicMock`
165 The MagicMock for the ``QueryDatasets.getDatasets`` function.
166 mockQueryDatasets_getTables : `MagicMock`
167 The MagicMock for the ``QueryDatasets.getTables`` function.
168 cliArgs : `list` [`str`]
169 The arguments to pass to the command line. Do not include the
170 subcommand name or the repo.
171 exMsgs : `list` [`str`] or None
172 A list of text fragments that should appear in the text output
173 after calling the CLI command, or None if no output should be
174 produced.
175 exPruneDatasetsCallArgs : `dict` [`str`, `typing.Any`]
176 The arguments that ``Butler.pruneDatasets`` should have been called
177 with, or None if that function should not have been called.
178 exGetTablesCalled : bool
179 `True` if ``QueryDatasets.getTables`` should have been called, else
180 `False`.
181 exQueryDatasetsCallArgs : `dict` [`str`, `typing.Any`]
182 The arguments that ``QueryDatasets.__init__`` should have bene
183 called with, or `None` if the function should not have been called.
184 invokeInput : `str`, optional.
185 As string to pass to the ``CliRunner.invoke`` `input` argument. By
186 default None.
187 exPruneDatasetsExitCode : `int`
188 The expected exit code returned from invoking ``prune-datasets``.
189 """
190 runner = LogCliRunner()
191 with runner.isolated_filesystem():
192 # Make a repo so a butler can be created
193 result = runner.invoke(butlerCli, ["create", self.repo])
194 self.assertEqual(result.exit_code, 0, clickResultMsg(result))
196 # Run the prune-datasets CLI command, this will call all of our
197 # mocks:
198 cliArgs = ["prune-datasets", self.repo] + cliArgs
199 result = runner.invoke(butlerCli, cliArgs, input=invokeInput)
200 self.assertEqual(result.exit_code, exPruneDatasetsExitCode, clickResultMsg(result))
202 # Verify the Butler.pruneDatasets was called exactly once with
203 # expected arguments. The datasets argument is the value returned
204 # by QueryDatasets, which we've mocked with side effect
205 # ``getDatasets()``.
206 if exPruneDatasetsCallArgs:
207 mockPruneDatasets.assert_called_once_with(**exPruneDatasetsCallArgs)
208 else:
209 mockPruneDatasets.assert_not_called()
211 # Less critical, but do a quick verification that the QueryDataset
212 # member function mocks were called, in this case we expect one
213 # time each.
214 if exQueryDatasetsCallArgs:
215 mockQueryDatasets_init.assert_called_once_with(**exQueryDatasetsCallArgs)
216 else:
217 mockQueryDatasets_init.assert_not_called()
218 # If Butler.pruneDatasets was not called, then
219 # QueryDatasets.getDatasets also does not get called.
220 if exPruneDatasetsCallArgs:
221 mockQueryDatasets_getDatasets.assert_called_once()
222 if exGetTablesCalled:
223 mockQueryDatasets_getTables.assert_called_once()
224 else:
225 mockQueryDatasets_getTables.assert_not_called()
227 if exMsgs is None:
228 self.assertEqual("", result.output)
229 else:
230 for expectedMsg in exMsgs:
231 self.assertIn(expectedMsg, result.output)
233 def test_defaults_doContinue(self):
234 """Test running with the default values.
236 Verify that with the default flags that the subcommand says what it
237 will do, prompts for input, and says that it's done.
238 """
239 self.run_test(
240 cliArgs=["myCollection", "--unstore"],
241 exPruneDatasetsCallArgs=self.makePruneDatasetsArgs(refs=getRefs(), unstore=True),
242 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("myCollection",)),
243 exGetTablesCalled=True,
244 exMsgs=(
245 pruneDatasets_willRemoveMsg,
246 pruneDatasets_askContinueMsg,
247 astropyTablesToStr(getTables()),
248 pruneDatasets_didRemoveAforementioned,
249 ),
250 invokeInput="yes",
251 )
253 def test_defaults_doNotContinue(self):
254 """Test running with the default values but not continuing.
256 Verify that with the default flags that the subcommand says what it
257 will do, prompts for input, and aborts when told not to continue.
258 """
259 self.run_test(
260 cliArgs=["myCollection", "--unstore"],
261 exPruneDatasetsCallArgs=None,
262 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("myCollection",)),
263 exGetTablesCalled=True,
264 exMsgs=(
265 pruneDatasets_willRemoveMsg,
266 pruneDatasets_askContinueMsg,
267 pruneDatasets_didNotRemoveAforementioned,
268 ),
269 invokeInput="no",
270 )
272 def test_dryRun_unstore(self):
273 """Test the --dry-run flag with --unstore.
275 Verify that with the dry-run flag the subcommand says what it would
276 remove, but does not remove the datasets.
277 """
278 self.run_test(
279 cliArgs=["myCollection", "--dry-run", "--unstore"],
280 exPruneDatasetsCallArgs=None,
281 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("myCollection",)),
282 exGetTablesCalled=True,
283 exMsgs=(pruneDatasets_wouldRemoveMsg, astropyTablesToStr(getTables())),
284 )
286 def test_dryRun_disassociate(self):
287 """Test the --dry-run flag with --disassociate.
289 Verify that with the dry-run flag the subcommand says what it would
290 remove, but does not remove the datasets.
291 """
292 collection = "myCollection"
293 self.run_test(
294 cliArgs=[collection, "--dry-run", "--disassociate", "tag1"],
295 exPruneDatasetsCallArgs=None,
296 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=(collection,)),
297 exGetTablesCalled=True,
298 exMsgs=(
299 pruneDatasets_wouldDisassociateMsg.format(collections=(collection,)),
300 astropyTablesToStr(getTables()),
301 ),
302 )
304 def test_dryRun_unstoreAndDisassociate(self):
305 """Test the --dry-run flag with --unstore and --disassociate.
307 Verify that with the dry-run flag the subcommand says what it would
308 remove, but does not remove the datasets.
309 """
310 collection = "myCollection"
311 self.run_test(
312 cliArgs=[collection, "--dry-run", "--unstore", "--disassociate", "tag1"],
313 exPruneDatasetsCallArgs=None,
314 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=(collection,)),
315 exGetTablesCalled=True,
316 exMsgs=(
317 pruneDatasets_wouldDisassociateAndRemoveMsg.format(collections=(collection,)),
318 astropyTablesToStr(getTables()),
319 ),
320 )
322 def test_noConfirm(self):
323 """Test the --no-confirm flag.
325 Verify that with the no-confirm flag the subcommand does not ask for
326 a confirmation, prints the did remove message and the tables that were
327 passed for removal.
328 """
329 self.run_test(
330 cliArgs=["myCollection", "--no-confirm", "--unstore"],
331 exPruneDatasetsCallArgs=self.makePruneDatasetsArgs(refs=getRefs(), unstore=True),
332 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("myCollection",)),
333 exGetTablesCalled=True,
334 exMsgs=(pruneDatasets_didRemoveMsg, astropyTablesToStr(getTables())),
335 )
337 def test_quiet(self):
338 """Test the --quiet flag.
340 Verify that with the quiet flag and the no-confirm flags set that no
341 output is produced by the subcommand.
342 """
343 self.run_test(
344 cliArgs=["myCollection", "--quiet", "--unstore"],
345 exPruneDatasetsCallArgs=self.makePruneDatasetsArgs(refs=getRefs(), unstore=True),
346 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("myCollection",)),
347 exGetTablesCalled=True,
348 exMsgs=None,
349 )
351 def test_quietWithDryRun(self):
352 """Test for an error using the --quiet flag with --dry-run."""
353 self.run_test(
354 cliArgs=["--quiet", "--dry-run", "--unstore"],
355 exPruneDatasetsCallArgs=None,
356 exQueryDatasetsCallArgs=None,
357 exGetTablesCalled=False,
358 exMsgs=(pruneDatasets_errQuietWithDryRun,),
359 exPruneDatasetsExitCode=1,
360 )
362 def test_noCollections(self):
363 """Test for an error if no collections are indicated."""
364 self.run_test(
365 cliArgs=["--find-all", "--unstore"],
366 exPruneDatasetsCallArgs=None,
367 exQueryDatasetsCallArgs=None,
368 exGetTablesCalled=False,
369 exMsgs=(pruneDatasets_errNoCollectionRestriction,),
370 exPruneDatasetsExitCode=1,
371 )
373 def test_noDatasets(self):
374 """Test for expected outputs when no datasets are found."""
375 global doFindTables
376 reset = doFindTables
377 try:
378 doFindTables = False
379 self.run_test(
380 cliArgs=["myCollection", "--unstore"],
381 exPruneDatasetsCallArgs=None,
382 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("myCollection",)),
383 exGetTablesCalled=True,
384 exMsgs=(pruneDatasets_noDatasetsFound,),
385 )
386 finally:
387 doFindTables = reset
389 def test_purgeWithDisassociate(self):
390 """Verify there is an error when --purge and --disassociate are both
391 passed in.
392 """
393 self.run_test(
394 cliArgs=["--purge", "run", "--disassociate", "tag1", "tag2"],
395 exPruneDatasetsCallArgs=None,
396 exQueryDatasetsCallArgs=None, # should not make it far enough to call this.
397 exGetTablesCalled=False, # ...or this.
398 exMsgs=(pruneDatasets_errPurgeAndDisassociate,),
399 exPruneDatasetsExitCode=1,
400 )
402 def test_purgeNoOp(self):
403 """Verify there is an error when none of --purge, --unstore, or
404 --disassociate are passed.
405 """
406 self.run_test(
407 cliArgs=[],
408 exPruneDatasetsCallArgs=None,
409 exQueryDatasetsCallArgs=None, # should not make it far enough to call this.
410 exGetTablesCalled=False, # ...or this.
411 exMsgs=(pruneDatasets_errNoOp,),
412 exPruneDatasetsExitCode=1,
413 )
415 @patch.object(
416 lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
417 "get_info",
418 side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
419 )
420 def test_purgeImpliedArgs(self, mockGetCollectionType):
421 """Verify the arguments implied by --purge.
423 --purge <run> implies the following arguments to butler.pruneDatasets:
424 purge=True, disassociate=True, unstore=True
425 And for QueryDatasets, if COLLECTIONS is not passed then <run> gets
426 used as the value of COLLECTIONS (and when there is a COLLECTIONS
427 value then find_first gets set to True)
428 """
429 self.run_test(
430 cliArgs=["--purge", "run"],
431 invokeInput="yes",
432 exPruneDatasetsCallArgs=self.makePruneDatasetsArgs(
433 purge=True, refs=getRefs(), disassociate=True, unstore=True
434 ),
435 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("run",), find_first=True),
436 exGetTablesCalled=True,
437 exMsgs=(
438 pruneDatasets_willRemoveMsg,
439 pruneDatasets_askContinueMsg,
440 astropyTablesToStr(getTables()),
441 pruneDatasets_didRemoveAforementioned,
442 ),
443 )
445 @patch.object(
446 lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
447 "get_info",
448 side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
449 )
450 def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
451 """Verify the arguments implied by --purge, with a COLLECTIONS."""
452 self.run_test(
453 cliArgs=["myCollection", "--purge", "run"],
454 invokeInput="yes",
455 exPruneDatasetsCallArgs=self.makePruneDatasetsArgs(
456 purge=True, disassociate=True, unstore=True, refs=getRefs()
457 ),
458 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(
459 collections=("myCollection",), find_first=True
460 ),
461 exGetTablesCalled=True,
462 exMsgs=(
463 pruneDatasets_willRemoveMsg,
464 pruneDatasets_askContinueMsg,
465 astropyTablesToStr(getTables()),
466 pruneDatasets_didRemoveAforementioned,
467 ),
468 )
470 @patch.object(
471 lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
472 "get_info",
473 side_effect=lambda x: CollectionInfo(name="myTaggedCollection", type=CollectionType.TAGGED),
474 )
475 def test_purgeOnNonRunCollection(self, mockGetCollectionType):
476 """Verify calling run on a non-run collection fails with expected
477 error message.
478 """
479 collectionName = "myTaggedCollection"
480 self.run_test(
481 cliArgs=["--purge", collectionName],
482 invokeInput="yes",
483 exPruneDatasetsCallArgs=None,
484 exQueryDatasetsCallArgs=None,
485 exGetTablesCalled=False,
486 exMsgs=(pruneDatasets_errPruneOnNotRun.format(collection=collectionName),),
487 exPruneDatasetsExitCode=1,
488 )
490 def test_disassociateImpliedArgs(self):
491 """Verify the arguments implied by --disassociate.
493 --disassociate <tags> implies the following arguments to
494 butler.pruneDatasets:
495 disassociate=True, tags=<tags>
496 and if COLLECTIONS is not passed then <tags> gets used as the value
497 of COLLECTIONS.
499 Use the --no-confirm flag instead of invokeInput="yes", and check for
500 the associated output.
501 """
502 self.run_test(
503 cliArgs=["--disassociate", "tag1", "--disassociate", "tag2", "--no-confirm"],
504 exPruneDatasetsCallArgs=self.makePruneDatasetsArgs(
505 tags=("tag1", "tag2"), disassociate=True, refs=getRefs()
506 ),
507 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(collections=("tag1", "tag2"), find_first=True),
508 exGetTablesCalled=True,
509 exMsgs=(pruneDatasets_didRemoveMsg, astropyTablesToStr(getTables())),
510 )
512 def test_disassociateImpliedArgsWithCollections(self):
513 """Verify the arguments implied by --disassociate, with a --collection
514 flag.
515 """
516 self.run_test(
517 cliArgs=["myCollection", "--disassociate", "tag1", "--disassociate", "tag2", "--no-confirm"],
518 exPruneDatasetsCallArgs=self.makePruneDatasetsArgs(
519 tags=("tag1", "tag2"), disassociate=True, refs=getRefs()
520 ),
521 exQueryDatasetsCallArgs=self.makeQueryDatasetsArgs(
522 collections=("myCollection",), find_first=True
523 ),
524 exGetTablesCalled=True,
525 exMsgs=(pruneDatasets_didRemoveMsg, astropyTablesToStr(getTables())),
526 )
529class QueryDatasetsCLITest(unittest.TestCase, ButlerTestHelper):
530 """Test that the command works when run as a script."""
532 def setUp(self):
533 self.root = makeTestTempDir(TESTDIR)
534 self.testRepo = MetricTestRepo(
535 self.root, configFile=os.path.join(TESTDIR, "config/basic/butler.yaml")
536 )
537 self.enterContext(self.testRepo.butler)
538 self.runner = LogCliRunner()
540 def tearDown(self):
541 removeTestTempDir(self.root)
543 def test_no_confirm(self):
544 """Test that prune-datasets can execute with --no-confirm."""
545 result = self.runner.invoke(
546 butlerCli,
547 [
548 "prune-datasets",
549 "--no-confirm",
550 self.root,
551 "--purge",
552 "ingest/run",
553 "--where",
554 "instrument='DummyCamComp' and visit=423",
555 ],
556 )
557 self.assertEqual(result.exit_code, 0, clickResultMsg(result))
558 with Butler(self.root) as butler:
559 self.assertIsNone(butler.get_dataset(self.testRepo.ref1.id))
560 self.assertIsNotNone(butler.get_dataset(self.testRepo.ref2.id))
562 def test_confirm(self):
563 """Test that prune-datasets can execute with the default --confirm."""
564 result = self.runner.invoke(
565 butlerCli,
566 [
567 "prune-datasets",
568 self.root,
569 "--purge",
570 "ingest/run",
571 "--where",
572 "instrument='DummyCamComp' and visit=423",
573 ],
574 input="y\n",
575 )
576 self.assertEqual(result.exit_code, 0, clickResultMsg(result))
577 with Butler(self.root) as butler:
578 self.assertIsNone(butler.get_dataset(self.testRepo.ref1.id))
579 self.assertIsNotNone(butler.get_dataset(self.testRepo.ref2.id))
582if __name__ == "__main__":
583 unittest.main()