Coverage for python/lsst/ctrl/mpexec/cli/script/purge.py: 30%
100 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-28 09:39 +0000
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-28 09:39 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23import itertools
24from typing import Any, Optional, Union
26from lsst.daf.butler import Butler, CollectionType
27from lsst.daf.butler.registry import MissingCollectionError
29from .confirmable import ConfirmableResult
31advice = (
32 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n'
33 'use "butler remove-collections" to remove that parent entirely.'
34)
37class ChildHasMultipleParentsFailure:
38 def __init__(self, child: str, parents: list[str]):
39 self.child = child
40 self.parents = parents
42 def __str__(self) -> str:
43 parents = ", ".join([f'"{p}"' for p in self.parents])
44 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}'
47class TopCollectionHasParentsFailure:
48 def __init__(self, collection: str, parents: list[str]):
49 self.collection = collection
50 self.parents = parents
52 def __str__(self) -> str:
53 parents = ", ".join([f'"{p}"' for p in self.parents])
54 return (
55 f'The passed-in collection "{self.collection}" must not be contained in other collections but '
56 f"is contained in collection(s) {parents}.\n {advice}"
57 )
60class TopCollectionIsNotChianedFailure:
61 def __init__(self, collection: str, collection_type: CollectionType):
62 self.collection = collection
63 self.collection_type = collection_type
65 def __str__(self) -> str:
66 return (
67 "The passed-in collection must be a CHAINED collection; "
68 f'"{self.collection}" is a {self.collection_type.name} collection.'
69 )
72class TopCollectionNotFoundFailure:
73 def __init__(self, collection: str):
74 self.collection = collection
76 def __str__(self) -> str:
77 return f'The passed-in colleciton "{self.collection}" was not found.'
80class PurgeResult(ConfirmableResult):
81 def __init__(self, butler_config: str):
82 self.runs_to_remove: list[str] = []
83 self.chains_to_remove: list[str] = []
84 self.others_to_remove: list[str] = []
85 self.butler_config = butler_config
86 self.failure: Any = None
88 @property
89 def describe_failure(self) -> str:
90 return str(self.failure)
92 def describe(self, will: bool) -> str:
93 msg = ""
94 if will:
95 msg += "Will remove:\n"
96 else:
97 msg += "Removed:\n"
98 msg += f" runs: {', '.join(self.runs_to_remove)}\n"
99 msg += f" chains: {', '.join(self.chains_to_remove)}\n"
100 msg += f" others: {', '.join(self.others_to_remove)}"
101 return msg
103 def on_confirmation(self) -> None:
104 if self.failure:
105 # This should not happen, it is a logic error.
106 raise RuntimeError("Can not purge, there were errors preparing collections.")
107 butler = Butler(self.butler_config, writeable=True)
108 with butler.transaction():
109 for c in itertools.chain(self.others_to_remove, self.chains_to_remove):
110 butler.registry.removeCollection(c)
111 butler.removeRuns(self.runs_to_remove)
113 @property
114 def failed(self) -> bool:
115 return bool(self.failure)
117 @property
118 def can_continue(self) -> bool:
119 # Will always be true: at the very least there is a top level CHAINED
120 # collection to remove. And if the top level collection is not found it
121 # results in a TopCollectionNotFoundFailure.
122 return True
124 def fail(
125 self,
126 failure: Union[
127 ChildHasMultipleParentsFailure,
128 TopCollectionHasParentsFailure,
129 TopCollectionIsNotChianedFailure,
130 TopCollectionNotFoundFailure,
131 ],
132 ) -> None:
133 self.failure = failure
136def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> Optional[list[str]]:
137 """Check that the parents of a child collection match
138 the provided expected parents.
140 Parameters
141 ----------
142 butler : `Butler`
143 The butler to the current repo.
144 child : `str`
145 The child collection to check.
146 expected_parents : `list` [`str`]
147 The list of expected parents.
149 Returns
150 -------
151 parents: `list` or `None`
152 If `None` then the child's parents matched the expected parents. If
153 not `None`, then the actual parents of the child.
154 """
155 parents = butler.registry.getCollectionParentChains(child)
156 if parents != set(expected_parents):
157 return list(parents)
158 return None
161def prepare_to_remove(
162 top_collection: str,
163 parent_collection: str,
164 butler: Butler,
165 recursive: bool,
166 purge_result: PurgeResult,
167) -> None:
168 """Add a CHAINED colleciton to the list of chains to remove and then
169 find its children and add them to the appropriate lists for removal.
171 Verify that the children of the CHAINED collection have exactly one
172 parent (that CHAINED collection). If `recursive` is `True` then run
173 recursively on the children of any child CHAINED collections.
175 Parameters
176 ----------
177 top_collection : `str`
178 The name of the top CHAINED collection being purged.
179 Child collections to remove must start with this name,
180 other child collections will be ignored.
181 parent_collection : `str`
182 The parent CHAINED collection currently being removed.
183 butler : `Butler`
184 The butler to the repo.
185 recursive : `bool`
186 If True then children of the top collection that are also CHAINED
187 collections will be purged.
188 purge_result : `PurgeResult`
189 The data structure being populated with failure information or
190 collections to remove.
191 """
192 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED
193 purge_result.chains_to_remove.append(parent_collection)
194 for child in butler.registry.getCollectionChain(parent_collection):
195 if child.startswith(top_collection):
196 if parents := check_parents(butler, child, [parent_collection]):
197 purge_result.fail(ChildHasMultipleParentsFailure(child, parents))
198 collection_type = butler.registry.getCollectionType(child)
199 if collection_type == CollectionType.RUN:
200 purge_result.runs_to_remove.append(child)
201 elif collection_type == CollectionType.CHAINED:
202 if recursive:
203 prepare_to_remove(
204 top_collection=top_collection,
205 parent_collection=child,
206 butler=butler,
207 recursive=recursive,
208 purge_result=purge_result,
209 )
210 else:
211 purge_result.chains_to_remove.append(child)
212 else:
213 purge_result.others_to_remove.append(child)
216def purge(
217 butler_config: str,
218 collection: str,
219 recursive: bool,
220) -> PurgeResult:
221 """Purge a CHAINED collection and it's children from a repository.
223 Parameters
224 ----------
225 butler_config : `str`
226 The path location of the gen3 butler/registry config file.
227 collection : `str`
228 The name of the CHAINED colleciton to purge.
229 recursive : bool
230 If True then children of the top collection that are also CHAINED
231 collections will be purged.
233 Returns
234 -------
235 purge_result : PurgeResult
236 The description of what datasets to remove and/or failures encountered
237 while preparing to remove datasets to remove, and a completion function
238 to remove the datasets after confirmation, if needed.
239 """
240 result = PurgeResult(butler_config)
241 butler = Butler(butler_config)
243 try:
244 collection_type = butler.registry.getCollectionType(collection)
245 except MissingCollectionError:
246 result.fail(TopCollectionNotFoundFailure(collection))
247 return result
249 if collection_type != CollectionType.CHAINED:
250 result.fail(TopCollectionIsNotChianedFailure(collection, collection_type))
251 elif parents := check_parents(butler, collection, []):
252 result.fail(TopCollectionHasParentsFailure(collection, parents))
253 else:
254 prepare_to_remove(
255 top_collection=collection,
256 parent_collection=collection,
257 purge_result=result,
258 butler=butler,
259 recursive=recursive,
260 )
261 return result