Coverage for python/lsst/daf/butler/script/pruneCollection.py: 14%
50 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 19:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-01 19:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from astropy.table import Table
24from dataclasses import dataclass
25from typing import Callable, Dict, List, Optional, Union
26from .. import Butler
27from .. import (
28 PurgeWithoutUnstorePruneCollectionsError,
29 RunWithoutPurgePruneCollectionsError,
30 PurgeUnsupportedPruneCollectionsError,
31)
32from . import QueryDatasets
33from ..registry import CollectionType
36class PruneCollectionResult:
37 def __init__(self, confirm: bool) -> None:
38 # if `confirm == True`, will contain the astropy table describing data
39 # that will be removed.
40 self.removeTable: Union[None, Table] = None
41 # the callback function to do the work
42 self.onConfirmation: Union[None, Callable[[], None]] = None
43 # true if the user should be shown what will be removed before pruning
44 # the collection.
45 self.confirm: bool = confirm
48def pruneCollection(repo: str,
49 collection: str,
50 purge: bool,
51 unstore: bool,
52 unlink: List[str],
53 confirm: bool) -> Table:
54 """Remove a collection and possibly prune datasets within it.
56 Parameters
57 ----------
58 repo : `str`
59 Same as the ``config`` argument to ``Butler.__init__``
60 collection : `str`
61 Same as the ``name`` argument to ``Butler.pruneCollection``.
62 purge : `bool`, optional
63 Same as the ``purge`` argument to ``Butler.pruneCollection``.
64 unstore: `bool`, optional
65 Same as the ``unstore`` argument to ``Butler.pruneCollection``.
66 unlink: `list` [`str`]
67 Same as the ``unlink`` argument to ``Butler.pruneCollection``.
68 confirm : `bool`
69 If `True` will produce a table of collections that will be removed for
70 display to the user.
72 Returns
73 -------
74 collections : `astropy.table.Table`
75 The table containing collections that will be removed, their type, and
76 the number of datasets in the collection if applicable.
77 """
79 @dataclass
80 class CollectionInfo:
81 """Lightweight container to hold the type of collection and the number
82 of datasets in the collection if applicable."""
83 count: Optional[int]
84 type: str
86 result = PruneCollectionResult(confirm)
87 if confirm:
88 print("Searching collections...")
89 butler = Butler(repo)
90 collectionNames = list(
91 butler.registry.queryCollections(
92 collectionTypes=frozenset((
93 CollectionType.RUN,
94 CollectionType.TAGGED,
95 CollectionType.CHAINED,
96 CollectionType.CALIBRATION,
97 )),
98 expression=(collection,),
99 includeChains=True,
100 )
101 )
103 collections: Dict[str, CollectionInfo] = {}
105 def addCollection(name: str) -> None:
106 """Add a collection to the collections, recursive if the collection
107 being added can contain collections."""
108 collectionType = butler.registry.getCollectionType(name).name
109 collections[name] = CollectionInfo(0 if collectionType == "RUN" else None, collectionType)
110 if collectionType == "CHAINED":
111 for c in butler.registry.getCollectionChain(name):
112 addCollection(c)
114 for name in collectionNames:
115 addCollection(name)
117 queryDatasets = QueryDatasets(
118 repo=repo,
119 glob=None,
120 collections=[collection],
121 where=None,
122 find_first=True,
123 show_uri=False,
124 )
125 for datasetRef in queryDatasets.getDatasets():
126 collectionInfo = collections[datasetRef.run]
127 if collectionInfo.count is None:
128 raise RuntimeError(f"Unexpected datasaset in collection of type {collectionInfo.type}")
129 collectionInfo.count += 1
131 result.removeTable = Table(
132 [
133 list(collections.keys()),
134 [v.type for v in collections.values()],
135 [v.count if v.count is not None else "-" for v in collections.values()],
136 ],
137 names=("Collection", "Collection Type", "Number of Datasets")
138 )
140 def doRemove() -> None:
141 """Perform the prune collection step."""
142 butler = Butler(repo, writeable=True)
143 try:
144 butler.pruneCollection(collection, purge, unstore, unlink)
145 except PurgeWithoutUnstorePruneCollectionsError as e:
146 raise TypeError("Cannot pass --purge without --unstore.") from e
147 except RunWithoutPurgePruneCollectionsError as e:
148 raise TypeError(f"Cannot prune RUN collection {e.collectionType.name} without --purge.") from e
149 except PurgeUnsupportedPruneCollectionsError as e:
150 raise TypeError(
151 f"Cannot prune {e.collectionType} collection {e.collectionType.name} with --purge.") from e
153 result.onConfirmation = doRemove
154 return result