Coverage for python / lsst / ctrl / mpexec / cli / script / purge.py: 26%
99 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:48 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:48 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
29import itertools
30from typing import Any
32from lsst.daf.butler import Butler, CollectionType
33from lsst.daf.butler.registry import MissingCollectionError
35from .confirmable import ConfirmableResult
37advice = (
38 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n'
39 'use "butler remove-collections" to remove that parent entirely.'
40)
43class ChildHasMultipleParentsFailure:
44 """Failure when the child has multiple parents.
46 Parameters
47 ----------
48 child : `str`
49 Child collection name.
50 parents : `list` [`str`]
51 Parent collections.
52 """
54 def __init__(self, child: str, parents: list[str]):
55 self.child = child
56 self.parents = parents
58 def __str__(self) -> str:
59 parents = ", ".join([f'"{p}"' for p in self.parents])
60 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}'
63class TopCollectionHasParentsFailure:
64 """Failure when the top collection has parents.
66 Parameters
67 ----------
68 collection : `str`
69 Name of collection.
70 parents : `list` [`str`]
71 Parents of collection.
72 """
74 def __init__(self, collection: str, parents: list[str]):
75 self.collection = collection
76 self.parents = parents
78 def __str__(self) -> str:
79 parents = ", ".join([f'"{p}"' for p in self.parents])
80 return (
81 f'The passed-in collection "{self.collection}" must not be contained in other collections but '
82 f"is contained in collection(s) {parents}.\n {advice}"
83 )
86class TopCollectionIsNotChainedFailure:
87 """Failure when the top collection is not a chain.
89 Parameters
90 ----------
91 collection : `str`
92 Name of collection.
93 collection_type : `CollectionType`
94 Type of collection.
95 """
97 def __init__(self, collection: str, collection_type: CollectionType):
98 self.collection = collection
99 self.collection_type = collection_type
101 def __str__(self) -> str:
102 return (
103 "The passed-in collection must be a CHAINED collection; "
104 f'"{self.collection}" is a {self.collection_type.name} collection.'
105 )
108class TopCollectionNotFoundFailure:
109 """Failure when the top collection is not found.
111 Parameters
112 ----------
113 collection : `str`
114 Name of collection.
115 """
117 def __init__(self, collection: str):
118 self.collection = collection
120 def __str__(self) -> str:
121 return f'The passed-in collection "{self.collection}" was not found.'
124class PurgeResult(ConfirmableResult):
125 """The results of the purge command.
127 Parameters
128 ----------
129 butler_config : `str`
130 Butler configuration URI.
131 """
133 def __init__(self, butler_config: str):
134 self.runs_to_remove: list[str] = []
135 self.chains_to_remove: list[str] = []
136 self.others_to_remove: list[str] = []
137 self.butler_config = butler_config
138 self.failure: Any = None
140 @property
141 def describe_failure(self) -> str:
142 return str(self.failure)
144 def describe(self, will: bool) -> str:
145 msg = ""
146 if will:
147 msg += "Will remove:\n"
148 else:
149 msg += "Removed:\n"
150 msg += f" runs: {', '.join(self.runs_to_remove)}\n"
151 msg += f" chains: {', '.join(self.chains_to_remove)}\n"
152 msg += f" others: {', '.join(self.others_to_remove)}"
153 return msg
155 def on_confirmation(self) -> None:
156 if self.failure:
157 # This should not happen, it is a logic error.
158 raise RuntimeError("Can not purge, there were errors preparing collections.")
159 with Butler.from_config(self.butler_config, writeable=True) as butler, butler.transaction():
160 for c in itertools.chain(self.others_to_remove, self.chains_to_remove):
161 butler.registry.removeCollection(c)
162 butler.removeRuns(self.runs_to_remove)
164 @property
165 def failed(self) -> bool:
166 return bool(self.failure)
168 @property
169 def can_continue(self) -> bool:
170 # Will always be true: at the very least there is a top level CHAINED
171 # collection to remove. And if the top level collection is not found it
172 # results in a TopCollectionNotFoundFailure.
173 return True
175 def fail(
176 self,
177 failure: (
178 ChildHasMultipleParentsFailure
179 | TopCollectionHasParentsFailure
180 | TopCollectionIsNotChainedFailure
181 | TopCollectionNotFoundFailure
182 ),
183 ) -> None:
184 self.failure = failure
187def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> list[str] | None:
188 """Check that the parents of a child collection match the provided
189 expected parents.
191 Parameters
192 ----------
193 butler : `~lsst.daf.butler.Butler`
194 The butler to the current repo.
195 child : `str`
196 The child collection to check.
197 expected_parents : `list` [`str`]
198 The list of expected parents.
200 Returns
201 -------
202 parents: `list` or `None`
203 If `None` then the child's parents matched the expected parents. If
204 not `None`, then the actual parents of the child.
205 """
206 parents = butler.registry.getCollectionParentChains(child)
207 if parents != set(expected_parents):
208 return list(parents)
209 return None
212def prepare_to_remove(
213 top_collection: str,
214 parent_collection: str,
215 butler: Butler,
216 recursive: bool,
217 purge_result: PurgeResult,
218) -> None:
219 """Add a CHAINED colleciton to the list of chains to remove and then
220 find its children and add them to the appropriate lists for removal.
222 Verify that the children of the CHAINED collection have exactly one
223 parent (that CHAINED collection). If `recursive` is `True` then run
224 recursively on the children of any child CHAINED collections.
226 Parameters
227 ----------
228 top_collection : `str`
229 The name of the top CHAINED collection being purged.
230 Child collections to remove must start with this name,
231 other child collections will be ignored.
232 parent_collection : `str`
233 The parent CHAINED collection currently being removed.
234 butler : `~lsst.daf.butler.Butler`
235 The butler to the repo.
236 recursive : `bool`
237 If True then children of the top collection that are also CHAINED
238 collections will be purged.
239 purge_result : `PurgeResult`
240 The data structure being populated with failure information or
241 collections to remove.
242 """
243 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED
244 purge_result.chains_to_remove.append(parent_collection)
245 for child in butler.registry.getCollectionChain(parent_collection):
246 if child.startswith(top_collection):
247 if parents := check_parents(butler, child, [parent_collection]):
248 purge_result.fail(ChildHasMultipleParentsFailure(child, parents))
249 collection_type = butler.registry.getCollectionType(child)
250 if collection_type == CollectionType.RUN:
251 purge_result.runs_to_remove.append(child)
252 elif collection_type == CollectionType.CHAINED:
253 if recursive:
254 prepare_to_remove(
255 top_collection=top_collection,
256 parent_collection=child,
257 butler=butler,
258 recursive=recursive,
259 purge_result=purge_result,
260 )
261 else:
262 purge_result.chains_to_remove.append(child)
263 else:
264 purge_result.others_to_remove.append(child)
267def purge(
268 butler_config: str,
269 collection: str,
270 recursive: bool,
271) -> PurgeResult:
272 """Purge a CHAINED collection and it's children from a repository.
274 Parameters
275 ----------
276 butler_config : `str`
277 The path location of the gen3 butler/registry config file.
278 collection : `str`
279 The name of the CHAINED colleciton to purge.
280 recursive : bool
281 If True then children of the top collection that are also CHAINED
282 collections will be purged.
284 Returns
285 -------
286 purge_result : `PurgeResult`
287 The description of what datasets to remove and/or failures encountered
288 while preparing to remove datasets to remove, and a completion function
289 to remove the datasets after confirmation, if needed.
290 """
291 result = PurgeResult(butler_config)
292 with Butler.from_config(butler_config) as butler:
293 try:
294 collection_type = butler.registry.getCollectionType(collection)
295 except MissingCollectionError:
296 result.fail(TopCollectionNotFoundFailure(collection))
297 return result
299 if collection_type != CollectionType.CHAINED:
300 result.fail(TopCollectionIsNotChainedFailure(collection, collection_type))
301 elif parents := check_parents(butler, collection, []):
302 result.fail(TopCollectionHasParentsFailure(collection, parents))
303 else:
304 prepare_to_remove(
305 top_collection=collection,
306 parent_collection=collection,
307 purge_result=result,
308 butler=butler,
309 recursive=recursive,
310 )
311 return result