Coverage for python/lsst/ctrl/mpexec/cli/script/purge.py: 28%
100 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 04:15 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-20 04:15 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
29import itertools
30from typing import Any
32from lsst.daf.butler import Butler, CollectionType
33from lsst.daf.butler.registry import MissingCollectionError
35from .confirmable import ConfirmableResult
37advice = (
38 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n'
39 'use "butler remove-collections" to remove that parent entirely.'
40)
43class ChildHasMultipleParentsFailure:
44 """Failure when the child has multiple parents.
46 Parameters
47 ----------
48 child : `str`
49 Child collection name.
50 parents : `list` [`str`]
51 Parent collections.
52 """
54 def __init__(self, child: str, parents: list[str]):
55 self.child = child
56 self.parents = parents
58 def __str__(self) -> str:
59 parents = ", ".join([f'"{p}"' for p in self.parents])
60 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}'
63class TopCollectionHasParentsFailure:
64 """Failure when the top collection has parents.
66 Parameters
67 ----------
68 collection : `str`
69 Name of collection.
70 parents : `list` [`str`]
71 Parents of collection.
72 """
74 def __init__(self, collection: str, parents: list[str]):
75 self.collection = collection
76 self.parents = parents
78 def __str__(self) -> str:
79 parents = ", ".join([f'"{p}"' for p in self.parents])
80 return (
81 f'The passed-in collection "{self.collection}" must not be contained in other collections but '
82 f"is contained in collection(s) {parents}.\n {advice}"
83 )
86class TopCollectionIsNotChainedFailure:
87 """Failure when the top collection is not a chain.
89 Parameters
90 ----------
91 collection : `str`
92 Name of collection.
93 collection_type : `CollectionType`
94 Type of collection.
95 """
97 def __init__(self, collection: str, collection_type: CollectionType):
98 self.collection = collection
99 self.collection_type = collection_type
101 def __str__(self) -> str:
102 return (
103 "The passed-in collection must be a CHAINED collection; "
104 f'"{self.collection}" is a {self.collection_type.name} collection.'
105 )
108class TopCollectionNotFoundFailure:
109 """Failure when the top collection is not found.
111 Parameters
112 ----------
113 collection : `str`
114 Name of collection.
115 """
117 def __init__(self, collection: str):
118 self.collection = collection
120 def __str__(self) -> str:
121 return f'The passed-in collection "{self.collection}" was not found.'
124class PurgeResult(ConfirmableResult):
125 """The results of the purge command.
127 Parameters
128 ----------
129 butler_config : `str`
130 Butler configuration URI.
131 """
133 def __init__(self, butler_config: str):
134 self.runs_to_remove: list[str] = []
135 self.chains_to_remove: list[str] = []
136 self.others_to_remove: list[str] = []
137 self.butler_config = butler_config
138 self.failure: Any = None
140 @property
141 def describe_failure(self) -> str:
142 return str(self.failure)
144 def describe(self, will: bool) -> str:
145 msg = ""
146 if will:
147 msg += "Will remove:\n"
148 else:
149 msg += "Removed:\n"
150 msg += f" runs: {', '.join(self.runs_to_remove)}\n"
151 msg += f" chains: {', '.join(self.chains_to_remove)}\n"
152 msg += f" others: {', '.join(self.others_to_remove)}"
153 return msg
155 def on_confirmation(self) -> None:
156 if self.failure:
157 # This should not happen, it is a logic error.
158 raise RuntimeError("Can not purge, there were errors preparing collections.")
159 butler = Butler.from_config(self.butler_config, writeable=True)
160 with butler.transaction():
161 for c in itertools.chain(self.others_to_remove, self.chains_to_remove):
162 butler.registry.removeCollection(c)
163 butler.removeRuns(self.runs_to_remove)
165 @property
166 def failed(self) -> bool:
167 return bool(self.failure)
169 @property
170 def can_continue(self) -> bool:
171 # Will always be true: at the very least there is a top level CHAINED
172 # collection to remove. And if the top level collection is not found it
173 # results in a TopCollectionNotFoundFailure.
174 return True
176 def fail(
177 self,
178 failure: (
179 ChildHasMultipleParentsFailure
180 | TopCollectionHasParentsFailure
181 | TopCollectionIsNotChainedFailure
182 | TopCollectionNotFoundFailure
183 ),
184 ) -> None:
185 self.failure = failure
188def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> list[str] | None:
189 """Check that the parents of a child collection match the provided
190 expected parents.
192 Parameters
193 ----------
194 butler : `~lsst.daf.butler.Butler`
195 The butler to the current repo.
196 child : `str`
197 The child collection to check.
198 expected_parents : `list` [`str`]
199 The list of expected parents.
201 Returns
202 -------
203 parents: `list` or `None`
204 If `None` then the child's parents matched the expected parents. If
205 not `None`, then the actual parents of the child.
206 """
207 parents = butler.registry.getCollectionParentChains(child)
208 if parents != set(expected_parents):
209 return list(parents)
210 return None
213def prepare_to_remove(
214 top_collection: str,
215 parent_collection: str,
216 butler: Butler,
217 recursive: bool,
218 purge_result: PurgeResult,
219) -> None:
220 """Add a CHAINED colleciton to the list of chains to remove and then
221 find its children and add them to the appropriate lists for removal.
223 Verify that the children of the CHAINED collection have exactly one
224 parent (that CHAINED collection). If `recursive` is `True` then run
225 recursively on the children of any child CHAINED collections.
227 Parameters
228 ----------
229 top_collection : `str`
230 The name of the top CHAINED collection being purged.
231 Child collections to remove must start with this name,
232 other child collections will be ignored.
233 parent_collection : `str`
234 The parent CHAINED collection currently being removed.
235 butler : `~lsst.daf.butler.Butler`
236 The butler to the repo.
237 recursive : `bool`
238 If True then children of the top collection that are also CHAINED
239 collections will be purged.
240 purge_result : `PurgeResult`
241 The data structure being populated with failure information or
242 collections to remove.
243 """
244 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED
245 purge_result.chains_to_remove.append(parent_collection)
246 for child in butler.registry.getCollectionChain(parent_collection):
247 if child.startswith(top_collection):
248 if parents := check_parents(butler, child, [parent_collection]):
249 purge_result.fail(ChildHasMultipleParentsFailure(child, parents))
250 collection_type = butler.registry.getCollectionType(child)
251 if collection_type == CollectionType.RUN:
252 purge_result.runs_to_remove.append(child)
253 elif collection_type == CollectionType.CHAINED:
254 if recursive:
255 prepare_to_remove(
256 top_collection=top_collection,
257 parent_collection=child,
258 butler=butler,
259 recursive=recursive,
260 purge_result=purge_result,
261 )
262 else:
263 purge_result.chains_to_remove.append(child)
264 else:
265 purge_result.others_to_remove.append(child)
268def purge(
269 butler_config: str,
270 collection: str,
271 recursive: bool,
272) -> PurgeResult:
273 """Purge a CHAINED collection and it's children from a repository.
275 Parameters
276 ----------
277 butler_config : `str`
278 The path location of the gen3 butler/registry config file.
279 collection : `str`
280 The name of the CHAINED colleciton to purge.
281 recursive : bool
282 If True then children of the top collection that are also CHAINED
283 collections will be purged.
285 Returns
286 -------
287 purge_result : `PurgeResult`
288 The description of what datasets to remove and/or failures encountered
289 while preparing to remove datasets to remove, and a completion function
290 to remove the datasets after confirmation, if needed.
291 """
292 result = PurgeResult(butler_config)
293 butler = Butler.from_config(butler_config)
295 try:
296 collection_type = butler.registry.getCollectionType(collection)
297 except MissingCollectionError:
298 result.fail(TopCollectionNotFoundFailure(collection))
299 return result
301 if collection_type != CollectionType.CHAINED:
302 result.fail(TopCollectionIsNotChainedFailure(collection, collection_type))
303 elif parents := check_parents(butler, collection, []):
304 result.fail(TopCollectionHasParentsFailure(collection, parents))
305 else:
306 prepare_to_remove(
307 top_collection=collection,
308 parent_collection=collection,
309 purge_result=result,
310 butler=butler,
311 recursive=recursive,
312 )
313 return result