Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 41%
84 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23from collections.abc import Callable, Iterable
24from enum import Enum, auto
25from typing import TYPE_CHECKING, Any
27from .._butler import Butler
28from ..registry import CollectionType
29from .queryDatasets import QueryDatasets
31if TYPE_CHECKING:
32 from astropy.table import Table
35class PruneDatasetsResult:
36 """Contains the results of a prune-datasets action.
38 The action may not be complete if the caller requested a confirmation, in
39 which case calling ``onConfirmation`` will perform the action.
41 Parameters
42 ----------
43 tables : `list` [`astropy.table.Table`], optional
44 The astropy tables that will be or were deleted, by default None.
45 state : `PruneDatasetsResult.State`, optional
46 The initial state of execution of the action, if `None` the result
47 state is ``INIT``, by default None.
49 Attributes
50 ----------
51 tables
52 Same as in Parameters.
53 state : ``PruneDatasetsResult.State``
54 The current state of the action.
55 onConfirmation : `Callable[None, None]`
56 The function to call to perform the action if the caller wants to
57 confirm the tables before performing the action.
58 """
60 action: dict[str, Any] | None
61 onConfirmation: Callable | None
63 class State(Enum):
64 """State associated with dataset pruning request."""
66 INIT = auto()
67 DRY_RUN_COMPLETE = auto()
68 AWAITING_CONFIRMATION = auto()
69 FINISHED = auto()
70 ERR_PURGE_AND_DISASSOCIATE = auto()
71 ERR_NO_COLLECTION_RESTRICTION = auto()
72 ERR_PRUNE_ON_NOT_RUN = auto()
73 ERR_NO_OP = auto()
75 def __init__(
76 self,
77 tables: list[Table] | None = None,
78 state: State | None = None,
79 errDict: dict[str, str] | None = None,
80 ):
81 self.state = state or self.State.INIT
82 if tables is None:
83 tables = []
84 self.tables = tables
85 self.onConfirmation = None
86 # Action describes the removal action for dry-run, will be a dict with
87 # keys disassociate, unstore, purge, and collections.
88 self.action = None
89 # errDict is a container for variables related to the error that may be
90 # substituted into a user-visible string.
91 self.errDict = errDict or {}
93 @property
94 def dryRun(self) -> bool:
95 return self.state is self.State.DRY_RUN_COMPLETE
97 @property
98 def confirm(self) -> bool:
99 return self.state is self.State.AWAITING_CONFIRMATION
101 @property
102 def finished(self) -> bool:
103 return self.state is self.State.FINISHED
105 @property
106 def errPurgeAndDisassociate(self) -> bool:
107 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE
109 @property
110 def errNoCollectionRestriction(self) -> bool:
111 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION
113 @property
114 def errPruneOnNotRun(self) -> bool:
115 return self.state is self.State.ERR_PRUNE_ON_NOT_RUN
117 @property
118 def errNoOp(self) -> bool:
119 return self.state is self.State.ERR_NO_OP
122def pruneDatasets(
123 repo: str,
124 collections: Iterable[str],
125 datasets: Iterable[str],
126 where: str,
127 disassociate_tags: Iterable[str],
128 unstore: bool,
129 purge_run: str,
130 dry_run: bool,
131 confirm: bool,
132 find_all: bool,
133) -> PruneDatasetsResult:
134 """Prune datasets from a repository.
136 Parameters
137 ----------
138 repo : `str`
139 URI to the location of the repo or URI to a config file describing the
140 repo and its location.
141 collections : iterable [`str`]
142 A list of glob-style search string that identify the collections to
143 search for.
144 datasets : iterable [`str`]
145 A list of glob-style search string that identify the dataset type names
146 to search for.
147 where : `str`
148 A string expression similar to a SQL WHERE clause. May involve any
149 column of a dimension table or (as a shortcut for the primary key
150 column of a dimension table) dimension name.
151 disassociate_tags : `list` [`str`]
152 TAGGED collections to disassociate the datasets from. If not `None`
153 then ``purge_run`` must be `None`.
154 unstore : `bool`
155 Same as the unstore argument to ``Butler.pruneDatasets``.
156 purge_run : `str`
157 Completely remove datasets from the ``Registry``. Note that current
158 implementation accepts any RUN-type collection, but will remove
159 datasets from all collections in ``collections`` if it is non-empty.
160 dry_run : `bool`
161 Get results for what would be removed but do not remove.
162 confirm : `bool`
163 Get results for what would be removed and return the results for
164 display & confirmation, with a completion function to run after
165 confirmation.
166 find_all : `bool`
167 If False, for each result data ID, will only delete the dataset from
168 the first collection in which a dataset of that dataset type appears
169 (according to the order of ``collections`` passed in). If used,
170 ``collections`` must specify at least one expression and must not
171 contain wildcards. This is the inverse of ``QueryDataset``'s find_first
172 option.
174 Notes
175 -----
176 The matrix of legal & illegal combinations of purge, unstore, and
177 disassociate is this:
178 - none of (purge, unstore, disassociate): error, nothing to do
179 - purge only: ok
180 - unstore only: ok
181 - disassociate only: ok
182 - purge+unstore: ok, just ignore unstore (purge effectively implies
183 unstore)
184 - purge+disassociate: this is an error (instead of ignoring disassociate),
185 because that comes with a collection argument that we can't respect, and
186 that might be confusing (purge will disassociate from all TAGGED
187 collections, not just the one given)
188 - purge+unstore+disassociate: an error, for the same reason as just
189 purge+disassociate
190 - unstore+disassociate: ok; these operations are unrelated to each other
192 Returns
193 -------
194 results : `PruneDatasetsResult`
195 A data structure that contains information about datasets for removal,
196 removal status, and options to continue in some cases.
197 """
198 if not disassociate_tags and not unstore and not purge_run:
199 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP)
201 if disassociate_tags and purge_run:
202 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE)
204 # If collections is not specified and a purge_run is, use the purge_run for
205 # collections, or if disassociate_tags is then use that.
206 if not collections:
207 if purge_run:
208 collections = (purge_run,)
209 elif disassociate_tags:
210 collections = disassociate_tags
212 if not collections:
213 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION)
215 butler = Butler(repo)
217 # If purging, verify that the collection to purge is RUN type collection.
218 if purge_run:
219 collectionType = butler.registry.getCollectionType(purge_run)
220 if collectionType is not CollectionType.RUN:
221 return PruneDatasetsResult(
222 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
223 )
225 datasets_found = QueryDatasets(
226 repo=repo,
227 glob=datasets,
228 collections=collections,
229 where=where,
230 # By default we want find_first to be True if collections are provided
231 # (else False) (find_first requires collections to be provided).
232 # But the user may specify that they want to find all (thus forcing
233 # find_first to be False)
234 find_first=not find_all,
235 show_uri=False,
236 )
238 result = PruneDatasetsResult(datasets_found.getTables())
240 disassociate = bool(disassociate_tags) or bool(purge_run)
241 purge = bool(purge_run)
242 unstore = unstore or bool(purge_run)
244 if dry_run:
245 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE
246 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections)
247 return result
249 def doPruneDatasets() -> PruneDatasetsResult:
250 butler = Butler(repo, writeable=True)
251 butler.pruneDatasets(
252 refs=datasets_found.getDatasets(),
253 disassociate=disassociate,
254 tags=disassociate_tags or (),
255 purge=purge,
256 unstore=unstore,
257 )
258 result.state = PruneDatasetsResult.State.FINISHED
259 return result
261 if confirm:
262 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION
263 result.onConfirmation = doPruneDatasets
264 return result
266 return doPruneDatasets()