Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 41%
84 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-18 09:55 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-18 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from collections.abc import Callable, Iterable
30from enum import Enum, auto
31from typing import TYPE_CHECKING, Any
33from .._butler import Butler
34from ..registry import CollectionType
35from .queryDatasets import QueryDatasets
37if TYPE_CHECKING:
38 from astropy.table import Table
41class PruneDatasetsResult:
42 """Contains the results of a prune-datasets action.
44 The action may not be complete if the caller requested a confirmation, in
45 which case calling ``onConfirmation`` will perform the action.
47 Parameters
48 ----------
49 tables : `list` [`astropy.table.Table`], optional
50 The astropy tables that will be or were deleted, by default None.
51 state : `PruneDatasetsResult.State`, optional
52 The initial state of execution of the action, if `None` the result
53 state is ``INIT``, by default `None`.
54 errDict : `dict` [`str`, `str`] or `None`
55 Place to store error messages. Will be created if not given.
57 Attributes
58 ----------
59 tables
60 Same as in Parameters.
61 state : ``PruneDatasetsResult.State``
62 The current state of the action.
63 onConfirmation : `Callable[None, None]`
64 The function to call to perform the action if the caller wants to
65 confirm the tables before performing the action.
66 """
68 action: dict[str, Any] | None
69 onConfirmation: Callable | None
71 class State(Enum):
72 """State associated with dataset pruning request."""
74 INIT = auto()
75 DRY_RUN_COMPLETE = auto()
76 AWAITING_CONFIRMATION = auto()
77 FINISHED = auto()
78 ERR_PURGE_AND_DISASSOCIATE = auto()
79 ERR_NO_COLLECTION_RESTRICTION = auto()
80 ERR_PRUNE_ON_NOT_RUN = auto()
81 ERR_NO_OP = auto()
83 def __init__(
84 self,
85 tables: list[Table] | None = None,
86 state: State | None = None,
87 errDict: dict[str, str] | None = None,
88 ):
89 self.state = state or self.State.INIT
90 if tables is None:
91 tables = []
92 self.tables = tables
93 self.onConfirmation = None
94 # Action describes the removal action for dry-run, will be a dict with
95 # keys disassociate, unstore, purge, and collections.
96 self.action = None
97 # errDict is a container for variables related to the error that may be
98 # substituted into a user-visible string.
99 self.errDict = errDict or {}
101 @property
102 def dryRun(self) -> bool:
103 return self.state is self.State.DRY_RUN_COMPLETE
105 @property
106 def confirm(self) -> bool:
107 return self.state is self.State.AWAITING_CONFIRMATION
109 @property
110 def finished(self) -> bool:
111 return self.state is self.State.FINISHED
113 @property
114 def errPurgeAndDisassociate(self) -> bool:
115 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE
117 @property
118 def errNoCollectionRestriction(self) -> bool:
119 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION
121 @property
122 def errPruneOnNotRun(self) -> bool:
123 return self.state is self.State.ERR_PRUNE_ON_NOT_RUN
125 @property
126 def errNoOp(self) -> bool:
127 return self.state is self.State.ERR_NO_OP
130def pruneDatasets(
131 repo: str,
132 collections: Iterable[str],
133 datasets: Iterable[str],
134 where: str,
135 disassociate_tags: Iterable[str],
136 unstore: bool,
137 purge_run: str,
138 dry_run: bool,
139 confirm: bool,
140 find_all: bool,
141) -> PruneDatasetsResult:
142 """Prune datasets from a repository.
144 Parameters
145 ----------
146 repo : `str`
147 URI to the location of the repo or URI to a config file describing the
148 repo and its location.
149 collections : iterable [`str`]
150 A list of glob-style search string that identify the collections to
151 search for.
152 datasets : iterable [`str`]
153 A list of glob-style search string that identify the dataset type names
154 to search for.
155 where : `str`
156 A string expression similar to a SQL WHERE clause. May involve any
157 column of a dimension table or (as a shortcut for the primary key
158 column of a dimension table) dimension name.
159 disassociate_tags : `list` [`str`]
160 TAGGED collections to disassociate the datasets from. If not `None`
161 then ``purge_run`` must be `None`.
162 unstore : `bool`
163 Same as the unstore argument to ``Butler.pruneDatasets``.
164 purge_run : `str`
165 Completely remove datasets from the ``Registry``. Note that current
166 implementation accepts any RUN-type collection, but will remove
167 datasets from all collections in ``collections`` if it is non-empty.
168 dry_run : `bool`
169 Get results for what would be removed but do not remove.
170 confirm : `bool`
171 Get results for what would be removed and return the results for
172 display & confirmation, with a completion function to run after
173 confirmation.
174 find_all : `bool`
175 If False, for each result data ID, will only delete the dataset from
176 the first collection in which a dataset of that dataset type appears
177 (according to the order of ``collections`` passed in). If used,
178 ``collections`` must specify at least one expression and must not
179 contain wildcards. This is the inverse of ``QueryDataset``'s find_first
180 option.
182 Notes
183 -----
184 The matrix of legal & illegal combinations of purge, unstore, and
185 disassociate is this:
186 - none of (purge, unstore, disassociate): error, nothing to do
187 - purge only: ok
188 - unstore only: ok
189 - disassociate only: ok
190 - purge+unstore: ok, just ignore unstore (purge effectively implies
191 unstore)
192 - purge+disassociate: this is an error (instead of ignoring disassociate),
193 because that comes with a collection argument that we can't respect, and
194 that might be confusing (purge will disassociate from all TAGGED
195 collections, not just the one given)
196 - purge+unstore+disassociate: an error, for the same reason as just
197 purge+disassociate
198 - unstore+disassociate: ok; these operations are unrelated to each other
200 Returns
201 -------
202 results : `PruneDatasetsResult`
203 A data structure that contains information about datasets for removal,
204 removal status, and options to continue in some cases.
205 """
206 if not disassociate_tags and not unstore and not purge_run:
207 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP)
209 if disassociate_tags and purge_run:
210 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE)
212 # If collections is not specified and a purge_run is, use the purge_run for
213 # collections, or if disassociate_tags is then use that.
214 if not collections:
215 if purge_run:
216 collections = (purge_run,)
217 elif disassociate_tags:
218 collections = disassociate_tags
220 if not collections:
221 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION)
223 # If purging, verify that the collection to purge is RUN type collection.
224 if purge_run:
225 butler = Butler.from_config(repo, without_datastore=True)
226 collectionType = butler.registry.getCollectionType(purge_run)
227 if collectionType is not CollectionType.RUN:
228 return PruneDatasetsResult(
229 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
230 )
232 datasets_found = QueryDatasets(
233 repo=repo,
234 glob=datasets,
235 collections=collections,
236 where=where,
237 # By default we want find_first to be True if collections are provided
238 # (else False) (find_first requires collections to be provided).
239 # But the user may specify that they want to find all (thus forcing
240 # find_first to be False)
241 find_first=not find_all,
242 show_uri=False,
243 )
245 result = PruneDatasetsResult(datasets_found.getTables())
247 disassociate = bool(disassociate_tags) or bool(purge_run)
248 purge = bool(purge_run)
249 unstore = unstore or bool(purge_run)
251 if dry_run:
252 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE
253 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections)
254 return result
256 def doPruneDatasets() -> PruneDatasetsResult:
257 butler = Butler.from_config(repo, writeable=True)
258 butler.pruneDatasets(
259 refs=datasets_found.getDatasets(),
260 disassociate=disassociate,
261 tags=disassociate_tags or (),
262 purge=purge,
263 unstore=unstore,
264 )
265 result.state = PruneDatasetsResult.State.FINISHED
266 return result
268 if confirm:
269 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION
270 result.onConfirmation = doPruneDatasets
271 return result
273 return doPruneDatasets()