Coverage for python/lsst/ctrl/mpexec/cli/script/purge.py: 28%

100 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-01-30 10:53 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28 

29import itertools 

30from typing import Any 

31 

32from lsst.daf.butler import Butler, CollectionType 

33from lsst.daf.butler.registry import MissingCollectionError 

34 

35from .confirmable import ConfirmableResult 

36 

37advice = ( 

38 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n' 

39 'use "butler remove-collections" to remove that parent entirely.' 

40) 

41 

42 

43class ChildHasMultipleParentsFailure: 

44 """Failure when the child has multiple parents. 

45 

46 Parameters 

47 ---------- 

48 child : `str` 

49 Child collection name. 

50 parents : `list` [`str`] 

51 Parent collections. 

52 """ 

53 

54 def __init__(self, child: str, parents: list[str]): 

55 self.child = child 

56 self.parents = parents 

57 

58 def __str__(self) -> str: 

59 parents = ", ".join([f'"{p}"' for p in self.parents]) 

60 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}' 

61 

62 

63class TopCollectionHasParentsFailure: 

64 """Failure when the top collection has parents. 

65 

66 Parameters 

67 ---------- 

68 collection : `str` 

69 Name of collection. 

70 parents : `list` [`str`] 

71 Parents of collection. 

72 """ 

73 

74 def __init__(self, collection: str, parents: list[str]): 

75 self.collection = collection 

76 self.parents = parents 

77 

78 def __str__(self) -> str: 

79 parents = ", ".join([f'"{p}"' for p in self.parents]) 

80 return ( 

81 f'The passed-in collection "{self.collection}" must not be contained in other collections but ' 

82 f"is contained in collection(s) {parents}.\n {advice}" 

83 ) 

84 

85 

86class TopCollectionIsNotChainedFailure: 

87 """Failure when the top collection is not a chain. 

88 

89 Parameters 

90 ---------- 

91 collection : `str` 

92 Name of collection. 

93 collection_type : `CollectionType` 

94 Type of collection. 

95 """ 

96 

97 def __init__(self, collection: str, collection_type: CollectionType): 

98 self.collection = collection 

99 self.collection_type = collection_type 

100 

101 def __str__(self) -> str: 

102 return ( 

103 "The passed-in collection must be a CHAINED collection; " 

104 f'"{self.collection}" is a {self.collection_type.name} collection.' 

105 ) 

106 

107 

108class TopCollectionNotFoundFailure: 

109 """Failure when the top collection is not found. 

110 

111 Parameters 

112 ---------- 

113 collection : `str` 

114 Name of collection. 

115 """ 

116 

117 def __init__(self, collection: str): 

118 self.collection = collection 

119 

120 def __str__(self) -> str: 

121 return f'The passed-in collection "{self.collection}" was not found.' 

122 

123 

124class PurgeResult(ConfirmableResult): 

125 """The results of the purge command. 

126 

127 Parameters 

128 ---------- 

129 butler_config : `str` 

130 Butler configuration URI. 

131 """ 

132 

133 def __init__(self, butler_config: str): 

134 self.runs_to_remove: list[str] = [] 

135 self.chains_to_remove: list[str] = [] 

136 self.others_to_remove: list[str] = [] 

137 self.butler_config = butler_config 

138 self.failure: Any = None 

139 

140 @property 

141 def describe_failure(self) -> str: 

142 return str(self.failure) 

143 

144 def describe(self, will: bool) -> str: 

145 msg = "" 

146 if will: 

147 msg += "Will remove:\n" 

148 else: 

149 msg += "Removed:\n" 

150 msg += f" runs: {', '.join(self.runs_to_remove)}\n" 

151 msg += f" chains: {', '.join(self.chains_to_remove)}\n" 

152 msg += f" others: {', '.join(self.others_to_remove)}" 

153 return msg 

154 

155 def on_confirmation(self) -> None: 

156 if self.failure: 

157 # This should not happen, it is a logic error. 

158 raise RuntimeError("Can not purge, there were errors preparing collections.") 

159 butler = Butler.from_config(self.butler_config, writeable=True) 

160 with butler.transaction(): 

161 for c in itertools.chain(self.others_to_remove, self.chains_to_remove): 

162 butler.registry.removeCollection(c) 

163 butler.removeRuns(self.runs_to_remove) 

164 

165 @property 

166 def failed(self) -> bool: 

167 return bool(self.failure) 

168 

169 @property 

170 def can_continue(self) -> bool: 

171 # Will always be true: at the very least there is a top level CHAINED 

172 # collection to remove. And if the top level collection is not found it 

173 # results in a TopCollectionNotFoundFailure. 

174 return True 

175 

176 def fail( 

177 self, 

178 failure: ( 

179 ChildHasMultipleParentsFailure 

180 | TopCollectionHasParentsFailure 

181 | TopCollectionIsNotChainedFailure 

182 | TopCollectionNotFoundFailure 

183 ), 

184 ) -> None: 

185 self.failure = failure 

186 

187 

188def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> list[str] | None: 

189 """Check that the parents of a child collection match the provided 

190 expected parents. 

191 

192 Parameters 

193 ---------- 

194 butler : `~lsst.daf.butler.Butler` 

195 The butler to the current repo. 

196 child : `str` 

197 The child collection to check. 

198 expected_parents : `list` [`str`] 

199 The list of expected parents. 

200 

201 Returns 

202 ------- 

203 parents: `list` or `None` 

204 If `None` then the child's parents matched the expected parents. If 

205 not `None`, then the actual parents of the child. 

206 """ 

207 parents = butler.registry.getCollectionParentChains(child) 

208 if parents != set(expected_parents): 

209 return list(parents) 

210 return None 

211 

212 

213def prepare_to_remove( 

214 top_collection: str, 

215 parent_collection: str, 

216 butler: Butler, 

217 recursive: bool, 

218 purge_result: PurgeResult, 

219) -> None: 

220 """Add a CHAINED colleciton to the list of chains to remove and then 

221 find its children and add them to the appropriate lists for removal. 

222 

223 Verify that the children of the CHAINED collection have exactly one 

224 parent (that CHAINED collection). If `recursive` is `True` then run 

225 recursively on the children of any child CHAINED collections. 

226 

227 Parameters 

228 ---------- 

229 top_collection : `str` 

230 The name of the top CHAINED collection being purged. 

231 Child collections to remove must start with this name, 

232 other child collections will be ignored. 

233 parent_collection : `str` 

234 The parent CHAINED collection currently being removed. 

235 butler : `~lsst.daf.butler.Butler` 

236 The butler to the repo. 

237 recursive : `bool` 

238 If True then children of the top collection that are also CHAINED 

239 collections will be purged. 

240 purge_result : `PurgeResult` 

241 The data structure being populated with failure information or 

242 collections to remove. 

243 """ 

244 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED 

245 purge_result.chains_to_remove.append(parent_collection) 

246 for child in butler.registry.getCollectionChain(parent_collection): 

247 if child.startswith(top_collection): 

248 if parents := check_parents(butler, child, [parent_collection]): 

249 purge_result.fail(ChildHasMultipleParentsFailure(child, parents)) 

250 collection_type = butler.registry.getCollectionType(child) 

251 if collection_type == CollectionType.RUN: 

252 purge_result.runs_to_remove.append(child) 

253 elif collection_type == CollectionType.CHAINED: 

254 if recursive: 

255 prepare_to_remove( 

256 top_collection=top_collection, 

257 parent_collection=child, 

258 butler=butler, 

259 recursive=recursive, 

260 purge_result=purge_result, 

261 ) 

262 else: 

263 purge_result.chains_to_remove.append(child) 

264 else: 

265 purge_result.others_to_remove.append(child) 

266 

267 

268def purge( 

269 butler_config: str, 

270 collection: str, 

271 recursive: bool, 

272) -> PurgeResult: 

273 """Purge a CHAINED collection and it's children from a repository. 

274 

275 Parameters 

276 ---------- 

277 butler_config : `str` 

278 The path location of the gen3 butler/registry config file. 

279 collection : `str` 

280 The name of the CHAINED colleciton to purge. 

281 recursive : bool 

282 If True then children of the top collection that are also CHAINED 

283 collections will be purged. 

284 

285 Returns 

286 ------- 

287 purge_result : `PurgeResult` 

288 The description of what datasets to remove and/or failures encountered 

289 while preparing to remove datasets to remove, and a completion function 

290 to remove the datasets after confirmation, if needed. 

291 """ 

292 result = PurgeResult(butler_config) 

293 butler = Butler.from_config(butler_config) 

294 

295 try: 

296 collection_type = butler.registry.getCollectionType(collection) 

297 except MissingCollectionError: 

298 result.fail(TopCollectionNotFoundFailure(collection)) 

299 return result 

300 

301 if collection_type != CollectionType.CHAINED: 

302 result.fail(TopCollectionIsNotChainedFailure(collection, collection_type)) 

303 elif parents := check_parents(butler, collection, []): 

304 result.fail(TopCollectionHasParentsFailure(collection, parents)) 

305 else: 

306 prepare_to_remove( 

307 top_collection=collection, 

308 parent_collection=collection, 

309 purge_result=result, 

310 butler=butler, 

311 recursive=recursive, 

312 ) 

313 return result