Coverage for python/lsst/daf/butler/script/pruneCollection.py: 15%
50 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-21 02:43 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-21 02:43 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from dataclasses import dataclass
24from typing import Callable, Dict, List, Optional, Union
26from astropy.table import Table
28from .._butler import (
29 Butler,
30 PurgeUnsupportedPruneCollectionsError,
31 PurgeWithoutUnstorePruneCollectionsError,
32 RunWithoutPurgePruneCollectionsError,
33)
34from ..registry import CollectionType
35from .queryDatasets import QueryDatasets
38class PruneCollectionResult:
39 def __init__(self, confirm: bool) -> None:
40 # if `confirm == True`, will contain the astropy table describing data
41 # that will be removed.
42 self.removeTable: Union[None, Table] = None
43 # the callback function to do the work
44 self.onConfirmation: Union[None, Callable[[], None]] = None
45 # true if the user should be shown what will be removed before pruning
46 # the collection.
47 self.confirm: bool = confirm
50def pruneCollection(
51 repo: str, collection: str, purge: bool, unstore: bool, unlink: List[str], confirm: bool
52) -> Table:
53 """Remove a collection and possibly prune datasets within it.
55 Parameters
56 ----------
57 repo : `str`
58 Same as the ``config`` argument to ``Butler.__init__``
59 collection : `str`
60 Same as the ``name`` argument to ``Butler.pruneCollection``.
61 purge : `bool`, optional
62 Same as the ``purge`` argument to ``Butler.pruneCollection``.
63 unstore: `bool`, optional
64 Same as the ``unstore`` argument to ``Butler.pruneCollection``.
65 unlink: `list` [`str`]
66 Same as the ``unlink`` argument to ``Butler.pruneCollection``.
67 confirm : `bool`
68 If `True` will produce a table of collections that will be removed for
69 display to the user.
71 Returns
72 -------
73 collections : `astropy.table.Table`
74 The table containing collections that will be removed, their type, and
75 the number of datasets in the collection if applicable.
76 """
78 @dataclass
79 class CollectionInfo:
80 """Lightweight container to hold the type of collection and the number
81 of datasets in the collection if applicable."""
83 count: Optional[int]
84 type: str
86 result = PruneCollectionResult(confirm)
87 if confirm:
88 print("Searching collections...")
89 butler = Butler(repo)
90 collectionNames = list(
91 butler.registry.queryCollections(
92 collectionTypes=frozenset(
93 (
94 CollectionType.RUN,
95 CollectionType.TAGGED,
96 CollectionType.CHAINED,
97 CollectionType.CALIBRATION,
98 )
99 ),
100 expression=(collection,),
101 includeChains=True,
102 )
103 )
105 collections: Dict[str, CollectionInfo] = {}
107 def addCollection(name: str) -> None:
108 """Add a collection to the collections, recursive if the collection
109 being added can contain collections."""
110 collectionType = butler.registry.getCollectionType(name).name
111 collections[name] = CollectionInfo(0 if collectionType == "RUN" else None, collectionType)
112 if collectionType == "CHAINED":
113 for c in butler.registry.getCollectionChain(name):
114 addCollection(c)
116 for name in collectionNames:
117 addCollection(name)
119 collections = {k: collections[k] for k in sorted(collections.keys())}
121 queryDatasets = QueryDatasets(
122 repo=repo,
123 glob=None,
124 collections=[collection],
125 where=None,
126 find_first=True,
127 show_uri=False,
128 )
129 for datasetRef in queryDatasets.getDatasets():
130 collectionInfo = collections[datasetRef.run]
131 if collectionInfo.count is None:
132 raise RuntimeError(f"Unexpected dataset in collection of type {collectionInfo.type}")
133 collectionInfo.count += 1
135 result.removeTable = Table(
136 [
137 list(collections.keys()),
138 [v.type for v in collections.values()],
139 [v.count if v.count is not None else "-" for v in collections.values()],
140 ],
141 names=("Collection", "Collection Type", "Number of Datasets"),
142 )
144 def doRemove() -> None:
145 """Perform the prune collection step."""
146 butler = Butler(repo, writeable=True)
147 try:
148 butler.pruneCollection(collection, purge, unstore, unlink)
149 except PurgeWithoutUnstorePruneCollectionsError as e:
150 raise TypeError("Cannot pass --purge without --unstore.") from e
151 except RunWithoutPurgePruneCollectionsError as e:
152 raise TypeError(f"Cannot prune RUN collection {e.collectionType.name} without --purge.") from e
153 except PurgeUnsupportedPruneCollectionsError as e:
154 raise TypeError(
155 f"Cannot prune {e.collectionType} collection {e.collectionType.name} with --purge."
156 ) from e
158 result.onConfirmation = doRemove
159 return result