Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 34%
77 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-28 07:52 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-28 07:52 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from enum import Enum, auto
25from .._butler import Butler
26from ..registry import CollectionType
27from .queryDatasets import QueryDatasets
30class PruneDatasetsResult:
31 """Contains the results of a prune-datasets action.
33 The action may not be complete if the caller requested a confirmation, in
34 which case calling ``onConfirmation`` will perform the action.
36 Parameters
37 ----------
38 tables : `list` [``astropy.table.table``], optional
39 The astropy tables that will be or were deleted, by default None.
40 state : ``PruneDatasetsResult.State``, optional
41 The initial state of execution of the action, if `None` the result
42 state is ``INIT``, by default None.
44 Attributes
45 ----------
46 tables
47 Same as in Parameters.
48 state : ``PruneDatasetsResult.State``
49 The current state of the action.
50 onConfirmation : `Callable[None, None]`
51 The function to call to perform the action if the caller wants to
52 confirm the tables before performing the action.
53 """
55 class State(Enum):
56 INIT = auto()
57 DRY_RUN_COMPLETE = auto()
58 AWAITING_CONFIRMATION = auto()
59 FINISHED = auto()
60 ERR_PURGE_AND_DISASSOCIATE = auto()
61 ERR_NO_COLLECTION_RESTRICTION = auto()
62 ERR_PRUNE_ON_NOT_RUN = auto()
63 ERR_NO_OP = auto()
65 def __init__(self, tables=None, state=None, errDict=None):
66 self.state = state or self.State.INIT
67 self.tables = tables
68 self.onConfirmation = None
69 # Action describes the removal action for dry-run, will be a dict with
70 # keys disassociate, unstore, purge, and collections.
71 self.action = None
72 # errDict is a container for variables related to the error that may be
73 # substituted into a user-visible string.
74 self.errDict = errDict or {}
76 @property
77 def dryRun(self):
78 return self.state is self.State.DRY_RUN_COMPLETE
80 @property
81 def confirm(self):
82 return self.state is self.State.AWAITING_CONFIRMATION
84 @property
85 def finished(self):
86 return self.state is self.State.FINISHED
88 @property
89 def errPurgeAndDisassociate(self):
90 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE
92 @property
93 def errNoCollectionRestriction(self):
94 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION
96 @property
97 def errPruneOnNotRun(self):
98 return self.state is self.state.ERR_PRUNE_ON_NOT_RUN
100 @property
101 def errNoOp(self):
102 return self.state is self.state.ERR_NO_OP
105def pruneDatasets(
106 repo, collections, datasets, where, disassociate_tags, unstore, purge_run, dry_run, confirm, find_all
107):
108 """Prune datasets from a repository.
110 Parameters
111 ----------
112 repo : `str`
113 URI to the location of the repo or URI to a config file describing the
114 repo and its location.
115 collections : iterable [`str`]
116 A list of glob-style search string that identify the collections to
117 search for.
118 datasets : iterable [`str`]
119 A list of glob-style search string that identify the dataset type names
120 to search for.
121 where : `str`
122 A string expression similar to a SQL WHERE clause. May involve any
123 column of a dimension table or (as a shortcut for the primary key
124 column of a dimension table) dimension name.
125 find_all : `bool`
126 If False, for each result data ID, will only delete the dataset from
127 the first collection in which a dataset of that dataset type appears
128 (according to the order of ``collections`` passed in). If used,
129 ``collections`` must specify at least one expression and must not
130 contain wildcards. This is the inverse of ``QueryDataset``'s find_first
131 option.
132 disassociate_tags : `list` [`str`]
133 TAGGED collections to disassociate the datasets from. If not `None`
134 then ``purge_run`` must be `None`.
135 unstore : `bool`
136 Same as the unstore argument to ``Butler.pruneDatasets``.
137 purge_run : `str`
138 Completely remove datasets from the ``Registry``. Note that current
139 implementation accepts any RUN-type collection, but will remove
140 datasets from all collections.
141 dry_run : `bool`
142 Get results for what would be removed but do not remove.
143 confirm : `bool`
144 Get results for what would be removed and return the results for
145 display & confirmation, with a completion function to run after
146 confirmation.
148 The matrix of legal & illegal combinations of purge, unstore, and
149 disassociate is this:
150 - none of (purge, unstore, disassociate): error, nothing to do
151 - purge only: ok
152 - unstore only: ok
153 - disassociate only: ok
154 - purge+unstore: ok, just ignore unstore (purge effectively implies
155 unstore)
156 - purge+disassociate: this is an error (instead of ignoring disassociate),
157 because that comes with a collection argument that we can't respect, and
158 that might be confusing (purge will disassociate from all TAGGED
159 collections, not just the one given)
160 - purge+unstore+disassociate: an error, for the same reason as just
161 purge+disassociate
162 - unstore+disassociate: ok; these operations are unrelated to each other
164 Returns
165 -------
166 results : ``PruneDatasetsResult``
167 A data structure that contains information about datasets for removal,
168 removal status, and options to continue in some cases.
169 """
170 if not disassociate_tags and not unstore and not purge_run:
171 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP)
173 if disassociate_tags and purge_run:
174 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE)
176 # If collections is not specified and a purge_run is, use the purge_run for
177 # collections, or if disassociate_tags is then use that.
178 if not collections:
179 if purge_run:
180 collections = (purge_run,)
181 elif disassociate_tags:
182 collections = disassociate_tags
184 if not collections:
185 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION)
187 butler = Butler(repo)
189 # If purging, verify that the collection to purge is RUN type collection.
190 if purge_run:
191 collectionType = butler.registry.getCollectionType(purge_run)
192 if collectionType is not CollectionType.RUN:
193 return PruneDatasetsResult(
194 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
195 )
197 datasets = QueryDatasets(
198 repo=repo,
199 glob=datasets,
200 collections=collections,
201 where=where,
202 # By default we want find_first to be True if collections are provided
203 # (else False) (find_first requires collections to be provided).
204 # But the user may specify that they want to find all (thus forcing
205 # find_first to be False)
206 find_first=not find_all,
207 show_uri=False,
208 )
210 result = PruneDatasetsResult(datasets.getTables())
212 disassociate = bool(disassociate_tags) or bool(purge_run)
213 purge = bool(purge_run)
214 unstore = unstore or bool(purge_run)
216 if dry_run:
217 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE
218 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections)
219 return result
221 def doPruneDatasets():
222 butler = Butler(repo, writeable=True)
223 butler.pruneDatasets(
224 refs=datasets.getDatasets(),
225 disassociate=disassociate,
226 tags=disassociate_tags or (),
227 purge=purge,
228 unstore=unstore,
229 )
230 result.state = PruneDatasetsResult.State.FINISHED
231 return result
233 if confirm:
234 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION
235 result.onConfirmation = doPruneDatasets
236 return result
238 return doPruneDatasets()