Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 35%
84 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-15 01:59 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-15 01:59 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23from collections.abc import Callable, Iterable
24from enum import Enum, auto
25from typing import TYPE_CHECKING, Any
27from .._butler import Butler
28from ..registry import CollectionType
29from .queryDatasets import QueryDatasets
31if TYPE_CHECKING: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true
32 from astropy.table import Table
35class PruneDatasetsResult:
36 """Contains the results of a prune-datasets action.
38 The action may not be complete if the caller requested a confirmation, in
39 which case calling ``onConfirmation`` will perform the action.
41 Parameters
42 ----------
43 tables : `list` [`astropy.table.Table`], optional
44 The astropy tables that will be or were deleted, by default None.
45 state : `PruneDatasetsResult.State`, optional
46 The initial state of execution of the action, if `None` the result
47 state is ``INIT``, by default None.
49 Attributes
50 ----------
51 tables
52 Same as in Parameters.
53 state : ``PruneDatasetsResult.State``
54 The current state of the action.
55 onConfirmation : `Callable[None, None]`
56 The function to call to perform the action if the caller wants to
57 confirm the tables before performing the action.
58 """
60 action: dict[str, Any] | None
61 onConfirmation: Callable | None
63 class State(Enum):
64 INIT = auto()
65 DRY_RUN_COMPLETE = auto()
66 AWAITING_CONFIRMATION = auto()
67 FINISHED = auto()
68 ERR_PURGE_AND_DISASSOCIATE = auto()
69 ERR_NO_COLLECTION_RESTRICTION = auto()
70 ERR_PRUNE_ON_NOT_RUN = auto()
71 ERR_NO_OP = auto()
73 def __init__(
74 self,
75 tables: list[Table] | None = None,
76 state: State | None = None,
77 errDict: dict[str, str] | None = None,
78 ):
79 self.state = state or self.State.INIT
80 self.tables = tables
81 self.onConfirmation = None
82 # Action describes the removal action for dry-run, will be a dict with
83 # keys disassociate, unstore, purge, and collections.
84 self.action = None
85 # errDict is a container for variables related to the error that may be
86 # substituted into a user-visible string.
87 self.errDict = errDict or {}
89 @property
90 def dryRun(self) -> bool:
91 return self.state is self.State.DRY_RUN_COMPLETE
93 @property
94 def confirm(self) -> bool:
95 return self.state is self.State.AWAITING_CONFIRMATION
97 @property
98 def finished(self) -> bool:
99 return self.state is self.State.FINISHED
101 @property
102 def errPurgeAndDisassociate(self) -> bool:
103 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE
105 @property
106 def errNoCollectionRestriction(self) -> bool:
107 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION
109 @property
110 def errPruneOnNotRun(self) -> bool:
111 return self.state is self.State.ERR_PRUNE_ON_NOT_RUN
113 @property
114 def errNoOp(self) -> bool:
115 return self.state is self.State.ERR_NO_OP
118def pruneDatasets(
119 repo: str,
120 collections: Iterable[str],
121 datasets: Iterable[str],
122 where: str | None,
123 disassociate_tags: Iterable[str],
124 unstore: bool,
125 purge_run: str,
126 dry_run: bool,
127 confirm: bool,
128 find_all: bool,
129) -> PruneDatasetsResult:
130 """Prune datasets from a repository.
132 Parameters
133 ----------
134 repo : `str`
135 URI to the location of the repo or URI to a config file describing the
136 repo and its location.
137 collections : iterable [`str`]
138 A list of glob-style search string that identify the collections to
139 search for.
140 datasets : iterable [`str`]
141 A list of glob-style search string that identify the dataset type names
142 to search for.
143 where : `str`
144 A string expression similar to a SQL WHERE clause. May involve any
145 column of a dimension table or (as a shortcut for the primary key
146 column of a dimension table) dimension name.
147 disassociate_tags : `list` [`str`]
148 TAGGED collections to disassociate the datasets from. If not `None`
149 then ``purge_run`` must be `None`.
150 unstore : `bool`
151 Same as the unstore argument to ``Butler.pruneDatasets``.
152 purge_run : `str`
153 Completely remove datasets from the ``Registry``. Note that current
154 implementation accepts any RUN-type collection, but will remove
155 datasets from all collections.
156 dry_run : `bool`
157 Get results for what would be removed but do not remove.
158 confirm : `bool`
159 Get results for what would be removed and return the results for
160 display & confirmation, with a completion function to run after
161 confirmation.
162 find_all : `bool`
163 If False, for each result data ID, will only delete the dataset from
164 the first collection in which a dataset of that dataset type appears
165 (according to the order of ``collections`` passed in). If used,
166 ``collections`` must specify at least one expression and must not
167 contain wildcards. This is the inverse of ``QueryDataset``'s find_first
168 option.
170 Notes
171 -----
172 The matrix of legal & illegal combinations of purge, unstore, and
173 disassociate is this:
174 - none of (purge, unstore, disassociate): error, nothing to do
175 - purge only: ok
176 - unstore only: ok
177 - disassociate only: ok
178 - purge+unstore: ok, just ignore unstore (purge effectively implies
179 unstore)
180 - purge+disassociate: this is an error (instead of ignoring disassociate),
181 because that comes with a collection argument that we can't respect, and
182 that might be confusing (purge will disassociate from all TAGGED
183 collections, not just the one given)
184 - purge+unstore+disassociate: an error, for the same reason as just
185 purge+disassociate
186 - unstore+disassociate: ok; these operations are unrelated to each other
188 Returns
189 -------
190 results : `PruneDatasetsResult`
191 A data structure that contains information about datasets for removal,
192 removal status, and options to continue in some cases.
193 """
194 if not disassociate_tags and not unstore and not purge_run:
195 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP)
197 if disassociate_tags and purge_run:
198 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE)
200 # If collections is not specified and a purge_run is, use the purge_run for
201 # collections, or if disassociate_tags is then use that.
202 if not collections:
203 if purge_run:
204 collections = (purge_run,)
205 elif disassociate_tags:
206 collections = disassociate_tags
208 if not collections:
209 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION)
211 butler = Butler(repo)
213 # If purging, verify that the collection to purge is RUN type collection.
214 if purge_run:
215 collectionType = butler.registry.getCollectionType(purge_run)
216 if collectionType is not CollectionType.RUN:
217 return PruneDatasetsResult(
218 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
219 )
221 datasets_found = QueryDatasets(
222 repo=repo,
223 glob=datasets,
224 collections=collections,
225 where=where,
226 # By default we want find_first to be True if collections are provided
227 # (else False) (find_first requires collections to be provided).
228 # But the user may specify that they want to find all (thus forcing
229 # find_first to be False)
230 find_first=not find_all,
231 show_uri=False,
232 )
234 result = PruneDatasetsResult(datasets_found.getTables())
236 disassociate = bool(disassociate_tags) or bool(purge_run)
237 purge = bool(purge_run)
238 unstore = unstore or bool(purge_run)
240 if dry_run:
241 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE
242 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections)
243 return result
245 def doPruneDatasets() -> PruneDatasetsResult:
246 butler = Butler(repo, writeable=True)
247 butler.pruneDatasets(
248 refs=datasets_found.getDatasets(),
249 disassociate=disassociate,
250 tags=disassociate_tags or (),
251 purge=purge,
252 unstore=unstore,
253 )
254 result.state = PruneDatasetsResult.State.FINISHED
255 return result
257 if confirm:
258 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION
259 result.onConfirmation = doPruneDatasets
260 return result
262 return doPruneDatasets()