Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 33%
84 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23from collections.abc import Callable, Iterable
24from enum import Enum, auto
25from typing import TYPE_CHECKING, Any
27from .._butler import Butler
28from ..registry import CollectionType
29from .queryDatasets import QueryDatasets
31if TYPE_CHECKING:
32 from astropy.table import Table
35class PruneDatasetsResult:
36 """Contains the results of a prune-datasets action.
38 The action may not be complete if the caller requested a confirmation, in
39 which case calling ``onConfirmation`` will perform the action.
41 Parameters
42 ----------
43 tables : `list` [`astropy.table.Table`], optional
44 The astropy tables that will be or were deleted, by default None.
45 state : `PruneDatasetsResult.State`, optional
46 The initial state of execution of the action, if `None` the result
47 state is ``INIT``, by default None.
49 Attributes
50 ----------
51 tables
52 Same as in Parameters.
53 state : ``PruneDatasetsResult.State``
54 The current state of the action.
55 onConfirmation : `Callable[None, None]`
56 The function to call to perform the action if the caller wants to
57 confirm the tables before performing the action.
58 """
60 action: dict[str, Any] | None
61 onConfirmation: Callable | None
63 class State(Enum):
64 INIT = auto()
65 DRY_RUN_COMPLETE = auto()
66 AWAITING_CONFIRMATION = auto()
67 FINISHED = auto()
68 ERR_PURGE_AND_DISASSOCIATE = auto()
69 ERR_NO_COLLECTION_RESTRICTION = auto()
70 ERR_PRUNE_ON_NOT_RUN = auto()
71 ERR_NO_OP = auto()
73 def __init__(
74 self,
75 tables: list[Table] | None = None,
76 state: State | None = None,
77 errDict: dict[str, str] | None = None,
78 ):
79 self.state = state or self.State.INIT
80 if tables is None:
81 tables = []
82 self.tables = tables
83 self.onConfirmation = None
84 # Action describes the removal action for dry-run, will be a dict with
85 # keys disassociate, unstore, purge, and collections.
86 self.action = None
87 # errDict is a container for variables related to the error that may be
88 # substituted into a user-visible string.
89 self.errDict = errDict or {}
91 @property
92 def dryRun(self) -> bool:
93 return self.state is self.State.DRY_RUN_COMPLETE
95 @property
96 def confirm(self) -> bool:
97 return self.state is self.State.AWAITING_CONFIRMATION
99 @property
100 def finished(self) -> bool:
101 return self.state is self.State.FINISHED
103 @property
104 def errPurgeAndDisassociate(self) -> bool:
105 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE
107 @property
108 def errNoCollectionRestriction(self) -> bool:
109 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION
111 @property
112 def errPruneOnNotRun(self) -> bool:
113 return self.state is self.State.ERR_PRUNE_ON_NOT_RUN
115 @property
116 def errNoOp(self) -> bool:
117 return self.state is self.State.ERR_NO_OP
120def pruneDatasets(
121 repo: str,
122 collections: Iterable[str],
123 datasets: Iterable[str],
124 where: str,
125 disassociate_tags: Iterable[str],
126 unstore: bool,
127 purge_run: str,
128 dry_run: bool,
129 confirm: bool,
130 find_all: bool,
131) -> PruneDatasetsResult:
132 """Prune datasets from a repository.
134 Parameters
135 ----------
136 repo : `str`
137 URI to the location of the repo or URI to a config file describing the
138 repo and its location.
139 collections : iterable [`str`]
140 A list of glob-style search string that identify the collections to
141 search for.
142 datasets : iterable [`str`]
143 A list of glob-style search string that identify the dataset type names
144 to search for.
145 where : `str`
146 A string expression similar to a SQL WHERE clause. May involve any
147 column of a dimension table or (as a shortcut for the primary key
148 column of a dimension table) dimension name.
149 disassociate_tags : `list` [`str`]
150 TAGGED collections to disassociate the datasets from. If not `None`
151 then ``purge_run`` must be `None`.
152 unstore : `bool`
153 Same as the unstore argument to ``Butler.pruneDatasets``.
154 purge_run : `str`
155 Completely remove datasets from the ``Registry``. Note that current
156 implementation accepts any RUN-type collection, but will remove
157 datasets from all collections.
158 dry_run : `bool`
159 Get results for what would be removed but do not remove.
160 confirm : `bool`
161 Get results for what would be removed and return the results for
162 display & confirmation, with a completion function to run after
163 confirmation.
164 find_all : `bool`
165 If False, for each result data ID, will only delete the dataset from
166 the first collection in which a dataset of that dataset type appears
167 (according to the order of ``collections`` passed in). If used,
168 ``collections`` must specify at least one expression and must not
169 contain wildcards. This is the inverse of ``QueryDataset``'s find_first
170 option.
172 Notes
173 -----
174 The matrix of legal & illegal combinations of purge, unstore, and
175 disassociate is this:
176 - none of (purge, unstore, disassociate): error, nothing to do
177 - purge only: ok
178 - unstore only: ok
179 - disassociate only: ok
180 - purge+unstore: ok, just ignore unstore (purge effectively implies
181 unstore)
182 - purge+disassociate: this is an error (instead of ignoring disassociate),
183 because that comes with a collection argument that we can't respect, and
184 that might be confusing (purge will disassociate from all TAGGED
185 collections, not just the one given)
186 - purge+unstore+disassociate: an error, for the same reason as just
187 purge+disassociate
188 - unstore+disassociate: ok; these operations are unrelated to each other
190 Returns
191 -------
192 results : `PruneDatasetsResult`
193 A data structure that contains information about datasets for removal,
194 removal status, and options to continue in some cases.
195 """
196 if not disassociate_tags and not unstore and not purge_run:
197 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP)
199 if disassociate_tags and purge_run:
200 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE)
202 # If collections is not specified and a purge_run is, use the purge_run for
203 # collections, or if disassociate_tags is then use that.
204 if not collections:
205 if purge_run:
206 collections = (purge_run,)
207 elif disassociate_tags:
208 collections = disassociate_tags
210 if not collections:
211 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION)
213 butler = Butler(repo)
215 # If purging, verify that the collection to purge is RUN type collection.
216 if purge_run:
217 collectionType = butler.registry.getCollectionType(purge_run)
218 if collectionType is not CollectionType.RUN:
219 return PruneDatasetsResult(
220 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
221 )
223 datasets_found = QueryDatasets(
224 repo=repo,
225 glob=datasets,
226 collections=collections,
227 where=where,
228 # By default we want find_first to be True if collections are provided
229 # (else False) (find_first requires collections to be provided).
230 # But the user may specify that they want to find all (thus forcing
231 # find_first to be False)
232 find_first=not find_all,
233 show_uri=False,
234 )
236 result = PruneDatasetsResult(datasets_found.getTables())
238 disassociate = bool(disassociate_tags) or bool(purge_run)
239 purge = bool(purge_run)
240 unstore = unstore or bool(purge_run)
242 if dry_run:
243 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE
244 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections)
245 return result
247 def doPruneDatasets() -> PruneDatasetsResult:
248 butler = Butler(repo, writeable=True)
249 butler.pruneDatasets(
250 refs=datasets_found.getDatasets(),
251 disassociate=disassociate,
252 tags=disassociate_tags or (),
253 purge=purge,
254 unstore=unstore,
255 )
256 result.state = PruneDatasetsResult.State.FINISHED
257 return result
259 if confirm:
260 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION
261 result.onConfirmation = doPruneDatasets
262 return result
264 return doPruneDatasets()