Coverage for python/lsst/daf/butler/script/pruneCollection.py: 14%
52 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-08 10:28 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-08 10:28 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24from collections.abc import Callable
25from dataclasses import dataclass
27from astropy.table import Table
29from .._butler import (
30 Butler,
31 PurgeUnsupportedPruneCollectionsError,
32 PurgeWithoutUnstorePruneCollectionsError,
33 RunWithoutPurgePruneCollectionsError,
34)
35from ..registry import CollectionType
36from .queryDatasets import QueryDatasets
39class PruneCollectionResult:
40 def __init__(self, confirm: bool) -> None:
41 # if `confirm == True`, will contain the astropy table describing data
42 # that will be removed.
43 self.removeTable: None | Table = None
44 # the callback function to do the work
45 self.onConfirmation: None | Callable[[], None] = None
46 # true if the user should be shown what will be removed before pruning
47 # the collection.
48 self.confirm: bool = confirm
51def pruneCollection(
52 repo: str, collection: str, purge: bool, unstore: bool, unlink: list[str], confirm: bool
53) -> Table:
54 """Remove a collection and possibly prune datasets within it.
56 Parameters
57 ----------
58 repo : `str`
59 Same as the ``config`` argument to ``Butler.__init__``
60 collection : `str`
61 Same as the ``name`` argument to ``Butler.pruneCollection``.
62 purge : `bool`, optional
63 Same as the ``purge`` argument to ``Butler.pruneCollection``.
64 unstore: `bool`, optional
65 Same as the ``unstore`` argument to ``Butler.pruneCollection``.
66 unlink: `list` [`str`]
67 Same as the ``unlink`` argument to ``Butler.pruneCollection``.
68 confirm : `bool`
69 If `True` will produce a table of collections that will be removed for
70 display to the user.
72 Returns
73 -------
74 collections : `astropy.table.Table`
75 The table containing collections that will be removed, their type, and
76 the number of datasets in the collection if applicable.
77 """
79 @dataclass
80 class CollectionInfo:
81 """Lightweight container to hold the type of collection and the number
82 of datasets in the collection if applicable."""
84 count: int | None
85 type: str
87 result = PruneCollectionResult(confirm)
88 if confirm:
89 print("Searching collections...")
90 butler = Butler(repo)
91 collectionNames = list(
92 butler.registry.queryCollections(
93 collectionTypes=frozenset(
94 (
95 CollectionType.RUN,
96 CollectionType.TAGGED,
97 CollectionType.CHAINED,
98 CollectionType.CALIBRATION,
99 )
100 ),
101 expression=(collection,),
102 includeChains=True,
103 )
104 )
106 collections: dict[str, CollectionInfo] = {}
108 def addCollection(name: str) -> None:
109 """Add a collection to the collections, recursive if the collection
110 being added can contain collections."""
111 collectionType = butler.registry.getCollectionType(name).name
112 collections[name] = CollectionInfo(0 if collectionType == "RUN" else None, collectionType)
113 if collectionType == "CHAINED":
114 for c in butler.registry.getCollectionChain(name):
115 addCollection(c)
117 for name in collectionNames:
118 addCollection(name)
120 collections = {k: collections[k] for k in sorted(collections.keys())}
122 queryDatasets = QueryDatasets(
123 repo=repo,
124 glob=[],
125 collections=[collection],
126 where="",
127 find_first=True,
128 show_uri=False,
129 )
130 for datasetRef in queryDatasets.getDatasets():
131 assert datasetRef.run is not None, "This must be a resolved dataset ref"
132 collectionInfo = collections[datasetRef.run]
133 if collectionInfo.count is None:
134 raise RuntimeError(f"Unexpected dataset in collection of type {collectionInfo.type}")
135 collectionInfo.count += 1
137 result.removeTable = Table(
138 [
139 list(collections.keys()),
140 [v.type for v in collections.values()],
141 [v.count if v.count is not None else "-" for v in collections.values()],
142 ],
143 names=("Collection", "Collection Type", "Number of Datasets"),
144 )
146 def doRemove() -> None:
147 """Perform the prune collection step."""
148 butler = Butler(repo, writeable=True)
149 try:
150 butler.pruneCollection(collection, purge, unstore, unlink)
151 except PurgeWithoutUnstorePruneCollectionsError as e:
152 raise TypeError("Cannot pass --purge without --unstore.") from e
153 except RunWithoutPurgePruneCollectionsError as e:
154 raise TypeError(f"Cannot prune RUN collection {e.collectionType.name} without --purge.") from e
155 except PurgeUnsupportedPruneCollectionsError as e:
156 raise TypeError(
157 f"Cannot prune {e.collectionType} collection {e.collectionType.name} with --purge."
158 ) from e
160 result.onConfirmation = doRemove
161 return result