Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 31%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from enum import Enum, auto
25from . import QueryDatasets
26from .. import Butler
27from .. import CollectionType
30class PruneDatasetsResult:
31 """Contains the results of a prune-datasets action.
33 The action may not be complete if the caller requested a confirmation, in
34 which case calling ``onConfirmation`` will perform the action.
36 Parameters
37 ----------
38 tables : `list` [``astropy.table.table``], optional
39 The astropy tables that will be or were deleted, by default None.
40 state : ``PruneDatasetsResult.State``, optional
41 The initial state of execution of the action, if `None` the result
42 state is ``INIT``, by default None.
44 Attributes
45 ----------
46 tables
47 Same as in Parameters.
48 state : ``PruneDatasetsResult.State``
49 The current state of the action.
50 onConfirmation : `Callable[None, None]`
51 The function to call to perform the action if the caller wants to
52 confirm the tables before performing the action.
53 """
55 class State(Enum):
56 INIT = auto()
57 DRY_RUN_COMPLETE = auto()
58 AWAITING_CONFIRMATION = auto()
59 FINISHED = auto()
60 ERR_PURGE_AND_DISASSOCIATE = auto()
61 ERR_NO_COLLECTION_RESTRICTION = auto()
62 ERR_PRUNE_ON_NOT_RUN = auto()
63 ERR_NO_OP = auto()
65 def __init__(self, tables=None, state=None, errDict=None):
66 self.state = state or self.State.INIT
67 self.tables = tables
68 self.onConfirmation = None
69 # Action describes the removal action for dry-run, will be a dict with
70 # keys disassociate, unstore, purge, and collections.
71 self.action = None
72 # errDict is a container for variables related to the error that may be
73 # substituted into a user-visible string.
74 self.errDict = errDict or {}
76 @property
77 def dryRun(self):
78 return self.state is self.State.DRY_RUN_COMPLETE
80 @property
81 def confirm(self):
82 return self.state is self.State.AWAITING_CONFIRMATION
84 @property
85 def finished(self):
86 return self.state is self.State.FINISHED
88 @property
89 def errPurgeAndDisassociate(self):
90 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE
92 @property
93 def errNoCollectionRestriction(self):
94 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION
96 @property
97 def errPruneOnNotRun(self):
98 return self.state is self.state.ERR_PRUNE_ON_NOT_RUN
100 @property
101 def errNoOp(self):
102 return self.state is self.state.ERR_NO_OP
105def pruneDatasets(repo, collections, datasets, where, disassociate_tags, unstore, purge_run, dry_run, confirm,
106 find_all):
107 """Prune datasets from a repository.
109 Parameters
110 ----------
111 repo : `str`
112 URI to the location of the repo or URI to a config file describing the
113 repo and its location.
114 collections : iterable [`str`]
115 A list of glob-style search string that identify the collections to
116 search for.
117 datasets : iterable [`str`]
118 A list of glob-style search string that identify the dataset type names
119 to search for.
120 where : `str`
121 A string expression similar to a SQL WHERE clause. May involve any
122 column of a dimension table or (as a shortcut for the primary key
123 column of a dimension table) dimension name.
124 find_all : `bool`
125 If False, for each result data ID, will only delete the dataset from
126 the first collection in which a dataset of that dataset type appears
127 (according to the order of ``collections`` passed in). If used,
128 ``collections`` must specify at least one expression and must not
129 contain wildcards. This is the inverse of ``QueryDataset``'s find_first
130 option.
131 disassociate_tags : `list` [`str`]
132 TAGGED collections to disassociate the datasets from. If not `None`
133 then ``purge_run`` must be `None`.
134 unstore : `bool`
135 Same as the unstore argument to ``Butler.pruneDatasets``.
136 purge_run : `str`
137 Completely remove the dataset from this run in the ``Registry``.
138 dry_run : `bool`
139 Get results for what would be removed but do not remove.
140 confirm : `bool`
141 Get results for what would be removed and return the results for
142 display & confirmation, with a completion function to run after
143 confirmation.
145 The matrix of legal & illegal combinations of purge, unstore, and
146 disassociate is this:
147 - none of (purge, unstore, disassociate): error, nothing to do
148 - purge only: ok
149 - unstore only: ok
150 - disassociate only: ok
151 - purge+unstore: ok, just ignore unstore (purge effectively implies
152 unstore)
153 - purge+disassociate: this is an error (instead of ignoring disassociate),
154 because that comes with a collection argument that we can't respect, and
155 that might be confusing (purge will disassociate from all TAGGED
156 collections, not just the one given)
157 - purge+unstore+disassociate: an error, for the same reason as just
158 purge+disassociate
159 - unstore+disassociate: ok; these operations are unrelated to each other
161 Returns
162 -------
163 results : ``PruneDatasetsResult``
164 A data structure that contains information about datasets for removal,
165 removal status, and options to continue in some cases.
166 """
167 if not disassociate_tags and not unstore and not purge_run:
168 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP)
170 if disassociate_tags and purge_run:
171 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE)
173 # If collections is not specified and a purge_run is, use the purge_run for
174 # collections, or if disassociate_tags is then use that.
175 if not collections:
176 if purge_run:
177 collections = (purge_run,)
178 elif disassociate_tags:
179 collections = disassociate_tags
181 if not collections:
182 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION)
184 butler = Butler(repo)
186 # If purging, verify that all the collections to purge are RUN type
187 # collections:
188 if purge_run:
189 collectionType = butler.registry.getCollectionType(purge_run)
190 if collectionType is not CollectionType.RUN:
191 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN,
192 errDict=dict(collection=purge_run))
194 datasets = QueryDatasets(
195 repo=repo,
196 glob=datasets,
197 collections=collections,
198 where=where,
199 # By default we want find_first to be True if collections are provided
200 # (else False) (find_first requires collections to be provided).
201 # But the user may specify that they want to find all (thus forcing
202 # find_first to be False)
203 find_first=not find_all,
204 show_uri=False
205 )
207 result = PruneDatasetsResult(datasets.getTables())
209 disassociate = bool(disassociate_tags) or bool(purge_run)
210 purge = bool(purge_run)
211 unstore = unstore or bool(purge_run)
213 if dry_run:
214 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE
215 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections)
216 return result
218 def doPruneDatasets():
219 butler = Butler(repo, writeable=True)
220 butler.pruneDatasets(
221 refs=datasets.getDatasets(),
222 disassociate=disassociate,
223 tags=disassociate_tags or (),
224 purge=purge,
225 run=purge_run or None,
226 unstore=unstore,
227 )
228 result.state = PruneDatasetsResult.State.FINISHED
229 return result
231 if confirm:
232 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION
233 result.onConfirmation = doPruneDatasets
234 return result
236 return doPruneDatasets()