Coverage for python/lsst/ctrl/mpexec/cli/script/purge.py: 28%

100 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-03 10:43 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28 

29import itertools 

30from typing import Any 

31 

32from lsst.daf.butler import Butler, CollectionType 

33from lsst.daf.butler.registry import MissingCollectionError 

34 

35from .confirmable import ConfirmableResult 

36 

37advice = ( 

38 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n' 

39 'use "butler remove-collections" to remove that parent entirely.' 

40) 

41 

42 

43class ChildHasMultipleParentsFailure: 

44 """Failure when the child has multiple parents.""" 

45 

46 def __init__(self, child: str, parents: list[str]): 

47 self.child = child 

48 self.parents = parents 

49 

50 def __str__(self) -> str: 

51 parents = ", ".join([f'"{p}"' for p in self.parents]) 

52 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}' 

53 

54 

55class TopCollectionHasParentsFailure: 

56 """Failure when the top collection has parents.""" 

57 

58 def __init__(self, collection: str, parents: list[str]): 

59 self.collection = collection 

60 self.parents = parents 

61 

62 def __str__(self) -> str: 

63 parents = ", ".join([f'"{p}"' for p in self.parents]) 

64 return ( 

65 f'The passed-in collection "{self.collection}" must not be contained in other collections but ' 

66 f"is contained in collection(s) {parents}.\n {advice}" 

67 ) 

68 

69 

70class TopCollectionIsNotChainedFailure: 

71 """Failure when the top collection is not a chain.""" 

72 

73 def __init__(self, collection: str, collection_type: CollectionType): 

74 self.collection = collection 

75 self.collection_type = collection_type 

76 

77 def __str__(self) -> str: 

78 return ( 

79 "The passed-in collection must be a CHAINED collection; " 

80 f'"{self.collection}" is a {self.collection_type.name} collection.' 

81 ) 

82 

83 

84class TopCollectionNotFoundFailure: 

85 """Failure when the top collection is not found.""" 

86 

87 def __init__(self, collection: str): 

88 self.collection = collection 

89 

90 def __str__(self) -> str: 

91 return f'The passed-in collection "{self.collection}" was not found.' 

92 

93 

94class PurgeResult(ConfirmableResult): 

95 """The results of the purge command.""" 

96 

97 def __init__(self, butler_config: str): 

98 self.runs_to_remove: list[str] = [] 

99 self.chains_to_remove: list[str] = [] 

100 self.others_to_remove: list[str] = [] 

101 self.butler_config = butler_config 

102 self.failure: Any = None 

103 

104 @property 

105 def describe_failure(self) -> str: 

106 return str(self.failure) 

107 

108 def describe(self, will: bool) -> str: 

109 msg = "" 

110 if will: 

111 msg += "Will remove:\n" 

112 else: 

113 msg += "Removed:\n" 

114 msg += f" runs: {', '.join(self.runs_to_remove)}\n" 

115 msg += f" chains: {', '.join(self.chains_to_remove)}\n" 

116 msg += f" others: {', '.join(self.others_to_remove)}" 

117 return msg 

118 

119 def on_confirmation(self) -> None: 

120 if self.failure: 

121 # This should not happen, it is a logic error. 

122 raise RuntimeError("Can not purge, there were errors preparing collections.") 

123 butler = Butler.from_config(self.butler_config, writeable=True) 

124 with butler.transaction(): 

125 for c in itertools.chain(self.others_to_remove, self.chains_to_remove): 

126 butler.registry.removeCollection(c) 

127 butler.removeRuns(self.runs_to_remove) 

128 

129 @property 

130 def failed(self) -> bool: 

131 return bool(self.failure) 

132 

133 @property 

134 def can_continue(self) -> bool: 

135 # Will always be true: at the very least there is a top level CHAINED 

136 # collection to remove. And if the top level collection is not found it 

137 # results in a TopCollectionNotFoundFailure. 

138 return True 

139 

140 def fail( 

141 self, 

142 failure: ( 

143 ChildHasMultipleParentsFailure 

144 | TopCollectionHasParentsFailure 

145 | TopCollectionIsNotChainedFailure 

146 | TopCollectionNotFoundFailure 

147 ), 

148 ) -> None: 

149 self.failure = failure 

150 

151 

152def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> list[str] | None: 

153 """Check that the parents of a child collection match the provided 

154 expected parents. 

155 

156 Parameters 

157 ---------- 

158 butler : `~lsst.daf.butler.Butler` 

159 The butler to the current repo. 

160 child : `str` 

161 The child collection to check. 

162 expected_parents : `list` [`str`] 

163 The list of expected parents. 

164 

165 Returns 

166 ------- 

167 parents: `list` or `None` 

168 If `None` then the child's parents matched the expected parents. If 

169 not `None`, then the actual parents of the child. 

170 """ 

171 parents = butler.registry.getCollectionParentChains(child) 

172 if parents != set(expected_parents): 

173 return list(parents) 

174 return None 

175 

176 

177def prepare_to_remove( 

178 top_collection: str, 

179 parent_collection: str, 

180 butler: Butler, 

181 recursive: bool, 

182 purge_result: PurgeResult, 

183) -> None: 

184 """Add a CHAINED colleciton to the list of chains to remove and then 

185 find its children and add them to the appropriate lists for removal. 

186 

187 Verify that the children of the CHAINED collection have exactly one 

188 parent (that CHAINED collection). If `recursive` is `True` then run 

189 recursively on the children of any child CHAINED collections. 

190 

191 Parameters 

192 ---------- 

193 top_collection : `str` 

194 The name of the top CHAINED collection being purged. 

195 Child collections to remove must start with this name, 

196 other child collections will be ignored. 

197 parent_collection : `str` 

198 The parent CHAINED collection currently being removed. 

199 butler : `~lsst.daf.butler.Butler` 

200 The butler to the repo. 

201 recursive : `bool` 

202 If True then children of the top collection that are also CHAINED 

203 collections will be purged. 

204 purge_result : `PurgeResult` 

205 The data structure being populated with failure information or 

206 collections to remove. 

207 """ 

208 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED 

209 purge_result.chains_to_remove.append(parent_collection) 

210 for child in butler.registry.getCollectionChain(parent_collection): 

211 if child.startswith(top_collection): 

212 if parents := check_parents(butler, child, [parent_collection]): 

213 purge_result.fail(ChildHasMultipleParentsFailure(child, parents)) 

214 collection_type = butler.registry.getCollectionType(child) 

215 if collection_type == CollectionType.RUN: 

216 purge_result.runs_to_remove.append(child) 

217 elif collection_type == CollectionType.CHAINED: 

218 if recursive: 

219 prepare_to_remove( 

220 top_collection=top_collection, 

221 parent_collection=child, 

222 butler=butler, 

223 recursive=recursive, 

224 purge_result=purge_result, 

225 ) 

226 else: 

227 purge_result.chains_to_remove.append(child) 

228 else: 

229 purge_result.others_to_remove.append(child) 

230 

231 

232def purge( 

233 butler_config: str, 

234 collection: str, 

235 recursive: bool, 

236) -> PurgeResult: 

237 """Purge a CHAINED collection and it's children from a repository. 

238 

239 Parameters 

240 ---------- 

241 butler_config : `str` 

242 The path location of the gen3 butler/registry config file. 

243 collection : `str` 

244 The name of the CHAINED colleciton to purge. 

245 recursive : bool 

246 If True then children of the top collection that are also CHAINED 

247 collections will be purged. 

248 

249 Returns 

250 ------- 

251 purge_result : `PurgeResult` 

252 The description of what datasets to remove and/or failures encountered 

253 while preparing to remove datasets to remove, and a completion function 

254 to remove the datasets after confirmation, if needed. 

255 """ 

256 result = PurgeResult(butler_config) 

257 butler = Butler.from_config(butler_config) 

258 

259 try: 

260 collection_type = butler.registry.getCollectionType(collection) 

261 except MissingCollectionError: 

262 result.fail(TopCollectionNotFoundFailure(collection)) 

263 return result 

264 

265 if collection_type != CollectionType.CHAINED: 

266 result.fail(TopCollectionIsNotChainedFailure(collection, collection_type)) 

267 elif parents := check_parents(butler, collection, []): 

268 result.fail(TopCollectionHasParentsFailure(collection, parents)) 

269 else: 

270 prepare_to_remove( 

271 top_collection=collection, 

272 parent_collection=collection, 

273 purge_result=result, 

274 butler=butler, 

275 recursive=recursive, 

276 ) 

277 return result