Coverage for python/lsst/ctrl/mpexec/cli/script/purge.py: 25%
100 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:40 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:40 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23import itertools
24from typing import Any
26from lsst.daf.butler import Butler, CollectionType
27from lsst.daf.butler.registry import MissingCollectionError
29from .confirmable import ConfirmableResult
31advice = (
32 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n'
33 'use "butler remove-collections" to remove that parent entirely.'
34)
37class ChildHasMultipleParentsFailure:
38 """Failure when the child has multiple parents."""
40 def __init__(self, child: str, parents: list[str]):
41 self.child = child
42 self.parents = parents
44 def __str__(self) -> str:
45 parents = ", ".join([f'"{p}"' for p in self.parents])
46 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}'
49class TopCollectionHasParentsFailure:
50 """Failure when the top collection has parents."""
52 def __init__(self, collection: str, parents: list[str]):
53 self.collection = collection
54 self.parents = parents
56 def __str__(self) -> str:
57 parents = ", ".join([f'"{p}"' for p in self.parents])
58 return (
59 f'The passed-in collection "{self.collection}" must not be contained in other collections but '
60 f"is contained in collection(s) {parents}.\n {advice}"
61 )
64class TopCollectionIsNotChainedFailure:
65 """Failure when the top collection is not a chain."""
67 def __init__(self, collection: str, collection_type: CollectionType):
68 self.collection = collection
69 self.collection_type = collection_type
71 def __str__(self) -> str:
72 return (
73 "The passed-in collection must be a CHAINED collection; "
74 f'"{self.collection}" is a {self.collection_type.name} collection.'
75 )
78class TopCollectionNotFoundFailure:
79 """Failure when the top collection is not found."""
81 def __init__(self, collection: str):
82 self.collection = collection
84 def __str__(self) -> str:
85 return f'The passed-in collection "{self.collection}" was not found.'
88class PurgeResult(ConfirmableResult):
89 """The results of the purge command."""
91 def __init__(self, butler_config: str):
92 self.runs_to_remove: list[str] = []
93 self.chains_to_remove: list[str] = []
94 self.others_to_remove: list[str] = []
95 self.butler_config = butler_config
96 self.failure: Any = None
98 @property
99 def describe_failure(self) -> str:
100 return str(self.failure)
102 def describe(self, will: bool) -> str:
103 msg = ""
104 if will:
105 msg += "Will remove:\n"
106 else:
107 msg += "Removed:\n"
108 msg += f" runs: {', '.join(self.runs_to_remove)}\n"
109 msg += f" chains: {', '.join(self.chains_to_remove)}\n"
110 msg += f" others: {', '.join(self.others_to_remove)}"
111 return msg
113 def on_confirmation(self) -> None:
114 if self.failure:
115 # This should not happen, it is a logic error.
116 raise RuntimeError("Can not purge, there were errors preparing collections.")
117 butler = Butler(self.butler_config, writeable=True)
118 with butler.transaction():
119 for c in itertools.chain(self.others_to_remove, self.chains_to_remove):
120 butler.registry.removeCollection(c)
121 butler.removeRuns(self.runs_to_remove)
123 @property
124 def failed(self) -> bool:
125 return bool(self.failure)
127 @property
128 def can_continue(self) -> bool:
129 # Will always be true: at the very least there is a top level CHAINED
130 # collection to remove. And if the top level collection is not found it
131 # results in a TopCollectionNotFoundFailure.
132 return True
134 def fail(
135 self,
136 failure: (
137 ChildHasMultipleParentsFailure
138 | TopCollectionHasParentsFailure
139 | TopCollectionIsNotChainedFailure
140 | TopCollectionNotFoundFailure
141 ),
142 ) -> None:
143 self.failure = failure
146def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> list[str] | None:
147 """Check that the parents of a child collection match the provided
148 expected parents.
150 Parameters
151 ----------
152 butler : `~lsst.daf.butler.Butler`
153 The butler to the current repo.
154 child : `str`
155 The child collection to check.
156 expected_parents : `list` [`str`]
157 The list of expected parents.
159 Returns
160 -------
161 parents: `list` or `None`
162 If `None` then the child's parents matched the expected parents. If
163 not `None`, then the actual parents of the child.
164 """
165 parents = butler.registry.getCollectionParentChains(child)
166 if parents != set(expected_parents):
167 return list(parents)
168 return None
171def prepare_to_remove(
172 top_collection: str,
173 parent_collection: str,
174 butler: Butler,
175 recursive: bool,
176 purge_result: PurgeResult,
177) -> None:
178 """Add a CHAINED colleciton to the list of chains to remove and then
179 find its children and add them to the appropriate lists for removal.
181 Verify that the children of the CHAINED collection have exactly one
182 parent (that CHAINED collection). If `recursive` is `True` then run
183 recursively on the children of any child CHAINED collections.
185 Parameters
186 ----------
187 top_collection : `str`
188 The name of the top CHAINED collection being purged.
189 Child collections to remove must start with this name,
190 other child collections will be ignored.
191 parent_collection : `str`
192 The parent CHAINED collection currently being removed.
193 butler : `~lsst.daf.butler.Butler`
194 The butler to the repo.
195 recursive : `bool`
196 If True then children of the top collection that are also CHAINED
197 collections will be purged.
198 purge_result : `PurgeResult`
199 The data structure being populated with failure information or
200 collections to remove.
201 """
202 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED
203 purge_result.chains_to_remove.append(parent_collection)
204 for child in butler.registry.getCollectionChain(parent_collection):
205 if child.startswith(top_collection):
206 if parents := check_parents(butler, child, [parent_collection]):
207 purge_result.fail(ChildHasMultipleParentsFailure(child, parents))
208 collection_type = butler.registry.getCollectionType(child)
209 if collection_type == CollectionType.RUN:
210 purge_result.runs_to_remove.append(child)
211 elif collection_type == CollectionType.CHAINED:
212 if recursive:
213 prepare_to_remove(
214 top_collection=top_collection,
215 parent_collection=child,
216 butler=butler,
217 recursive=recursive,
218 purge_result=purge_result,
219 )
220 else:
221 purge_result.chains_to_remove.append(child)
222 else:
223 purge_result.others_to_remove.append(child)
226def purge(
227 butler_config: str,
228 collection: str,
229 recursive: bool,
230) -> PurgeResult:
231 """Purge a CHAINED collection and it's children from a repository.
233 Parameters
234 ----------
235 butler_config : `str`
236 The path location of the gen3 butler/registry config file.
237 collection : `str`
238 The name of the CHAINED colleciton to purge.
239 recursive : bool
240 If True then children of the top collection that are also CHAINED
241 collections will be purged.
243 Returns
244 -------
245 purge_result : `PurgeResult`
246 The description of what datasets to remove and/or failures encountered
247 while preparing to remove datasets to remove, and a completion function
248 to remove the datasets after confirmation, if needed.
249 """
250 result = PurgeResult(butler_config)
251 butler = Butler(butler_config)
253 try:
254 collection_type = butler.registry.getCollectionType(collection)
255 except MissingCollectionError:
256 result.fail(TopCollectionNotFoundFailure(collection))
257 return result
259 if collection_type != CollectionType.CHAINED:
260 result.fail(TopCollectionIsNotChainedFailure(collection, collection_type))
261 elif parents := check_parents(butler, collection, []):
262 result.fail(TopCollectionHasParentsFailure(collection, parents))
263 else:
264 prepare_to_remove(
265 top_collection=collection,
266 parent_collection=collection,
267 purge_result=result,
268 butler=butler,
269 recursive=recursive,
270 )
271 return result