Coverage for python / lsst / ctrl / mpexec / cli / script / purge.py: 26%

99 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-17 09:00 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28 

29import itertools 

30from typing import Any 

31 

32from lsst.daf.butler import Butler, CollectionType 

33from lsst.daf.butler.registry import MissingCollectionError 

34 

35from .confirmable import ConfirmableResult 

36 

37advice = ( 

38 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n' 

39 'use "butler remove-collections" to remove that parent entirely.' 

40) 

41 

42 

43class ChildHasMultipleParentsFailure: 

44 """Failure when the child has multiple parents. 

45 

46 Parameters 

47 ---------- 

48 child : `str` 

49 Child collection name. 

50 parents : `list` [`str`] 

51 Parent collections. 

52 """ 

53 

54 def __init__(self, child: str, parents: list[str]): 

55 self.child = child 

56 self.parents = parents 

57 

58 def __str__(self) -> str: 

59 parents = ", ".join([f'"{p}"' for p in self.parents]) 

60 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}' 

61 

62 

63class TopCollectionHasParentsFailure: 

64 """Failure when the top collection has parents. 

65 

66 Parameters 

67 ---------- 

68 collection : `str` 

69 Name of collection. 

70 parents : `list` [`str`] 

71 Parents of collection. 

72 """ 

73 

74 def __init__(self, collection: str, parents: list[str]): 

75 self.collection = collection 

76 self.parents = parents 

77 

78 def __str__(self) -> str: 

79 parents = ", ".join([f'"{p}"' for p in self.parents]) 

80 return ( 

81 f'The passed-in collection "{self.collection}" must not be contained in other collections but ' 

82 f"is contained in collection(s) {parents}.\n {advice}" 

83 ) 

84 

85 

86class TopCollectionIsNotChainedFailure: 

87 """Failure when the top collection is not a chain. 

88 

89 Parameters 

90 ---------- 

91 collection : `str` 

92 Name of collection. 

93 collection_type : `CollectionType` 

94 Type of collection. 

95 """ 

96 

97 def __init__(self, collection: str, collection_type: CollectionType): 

98 self.collection = collection 

99 self.collection_type = collection_type 

100 

101 def __str__(self) -> str: 

102 return ( 

103 "The passed-in collection must be a CHAINED collection; " 

104 f'"{self.collection}" is a {self.collection_type.name} collection.' 

105 ) 

106 

107 

108class TopCollectionNotFoundFailure: 

109 """Failure when the top collection is not found. 

110 

111 Parameters 

112 ---------- 

113 collection : `str` 

114 Name of collection. 

115 """ 

116 

117 def __init__(self, collection: str): 

118 self.collection = collection 

119 

120 def __str__(self) -> str: 

121 return f'The passed-in collection "{self.collection}" was not found.' 

122 

123 

124class PurgeResult(ConfirmableResult): 

125 """The results of the purge command. 

126 

127 Parameters 

128 ---------- 

129 butler_config : `str` 

130 Butler configuration URI. 

131 """ 

132 

133 def __init__(self, butler_config: str): 

134 self.runs_to_remove: list[str] = [] 

135 self.chains_to_remove: list[str] = [] 

136 self.others_to_remove: list[str] = [] 

137 self.butler_config = butler_config 

138 self.failure: Any = None 

139 

140 @property 

141 def describe_failure(self) -> str: 

142 return str(self.failure) 

143 

144 def describe(self, will: bool) -> str: 

145 msg = "" 

146 if will: 

147 msg += "Will remove:\n" 

148 else: 

149 msg += "Removed:\n" 

150 msg += f" runs: {', '.join(self.runs_to_remove)}\n" 

151 msg += f" chains: {', '.join(self.chains_to_remove)}\n" 

152 msg += f" others: {', '.join(self.others_to_remove)}" 

153 return msg 

154 

155 def on_confirmation(self) -> None: 

156 if self.failure: 

157 # This should not happen, it is a logic error. 

158 raise RuntimeError("Can not purge, there were errors preparing collections.") 

159 with Butler.from_config(self.butler_config, writeable=True) as butler, butler.transaction(): 

160 for c in itertools.chain(self.others_to_remove, self.chains_to_remove): 

161 butler.registry.removeCollection(c) 

162 butler.removeRuns(self.runs_to_remove) 

163 

164 @property 

165 def failed(self) -> bool: 

166 return bool(self.failure) 

167 

168 @property 

169 def can_continue(self) -> bool: 

170 # Will always be true: at the very least there is a top level CHAINED 

171 # collection to remove. And if the top level collection is not found it 

172 # results in a TopCollectionNotFoundFailure. 

173 return True 

174 

175 def fail( 

176 self, 

177 failure: ( 

178 ChildHasMultipleParentsFailure 

179 | TopCollectionHasParentsFailure 

180 | TopCollectionIsNotChainedFailure 

181 | TopCollectionNotFoundFailure 

182 ), 

183 ) -> None: 

184 self.failure = failure 

185 

186 

187def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> list[str] | None: 

188 """Check that the parents of a child collection match the provided 

189 expected parents. 

190 

191 Parameters 

192 ---------- 

193 butler : `~lsst.daf.butler.Butler` 

194 The butler to the current repo. 

195 child : `str` 

196 The child collection to check. 

197 expected_parents : `list` [`str`] 

198 The list of expected parents. 

199 

200 Returns 

201 ------- 

202 parents: `list` or `None` 

203 If `None` then the child's parents matched the expected parents. If 

204 not `None`, then the actual parents of the child. 

205 """ 

206 parents = butler.registry.getCollectionParentChains(child) 

207 if parents != set(expected_parents): 

208 return list(parents) 

209 return None 

210 

211 

212def prepare_to_remove( 

213 top_collection: str, 

214 parent_collection: str, 

215 butler: Butler, 

216 recursive: bool, 

217 purge_result: PurgeResult, 

218) -> None: 

219 """Add a CHAINED colleciton to the list of chains to remove and then 

220 find its children and add them to the appropriate lists for removal. 

221 

222 Verify that the children of the CHAINED collection have exactly one 

223 parent (that CHAINED collection). If `recursive` is `True` then run 

224 recursively on the children of any child CHAINED collections. 

225 

226 Parameters 

227 ---------- 

228 top_collection : `str` 

229 The name of the top CHAINED collection being purged. 

230 Child collections to remove must start with this name, 

231 other child collections will be ignored. 

232 parent_collection : `str` 

233 The parent CHAINED collection currently being removed. 

234 butler : `~lsst.daf.butler.Butler` 

235 The butler to the repo. 

236 recursive : `bool` 

237 If True then children of the top collection that are also CHAINED 

238 collections will be purged. 

239 purge_result : `PurgeResult` 

240 The data structure being populated with failure information or 

241 collections to remove. 

242 """ 

243 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED 

244 purge_result.chains_to_remove.append(parent_collection) 

245 for child in butler.registry.getCollectionChain(parent_collection): 

246 if child.startswith(top_collection): 

247 if parents := check_parents(butler, child, [parent_collection]): 

248 purge_result.fail(ChildHasMultipleParentsFailure(child, parents)) 

249 collection_type = butler.registry.getCollectionType(child) 

250 if collection_type == CollectionType.RUN: 

251 purge_result.runs_to_remove.append(child) 

252 elif collection_type == CollectionType.CHAINED: 

253 if recursive: 

254 prepare_to_remove( 

255 top_collection=top_collection, 

256 parent_collection=child, 

257 butler=butler, 

258 recursive=recursive, 

259 purge_result=purge_result, 

260 ) 

261 else: 

262 purge_result.chains_to_remove.append(child) 

263 else: 

264 purge_result.others_to_remove.append(child) 

265 

266 

267def purge( 

268 butler_config: str, 

269 collection: str, 

270 recursive: bool, 

271) -> PurgeResult: 

272 """Purge a CHAINED collection and it's children from a repository. 

273 

274 Parameters 

275 ---------- 

276 butler_config : `str` 

277 The path location of the gen3 butler/registry config file. 

278 collection : `str` 

279 The name of the CHAINED colleciton to purge. 

280 recursive : bool 

281 If True then children of the top collection that are also CHAINED 

282 collections will be purged. 

283 

284 Returns 

285 ------- 

286 purge_result : `PurgeResult` 

287 The description of what datasets to remove and/or failures encountered 

288 while preparing to remove datasets to remove, and a completion function 

289 to remove the datasets after confirmation, if needed. 

290 """ 

291 result = PurgeResult(butler_config) 

292 with Butler.from_config(butler_config) as butler: 

293 try: 

294 collection_type = butler.registry.getCollectionType(collection) 

295 except MissingCollectionError: 

296 result.fail(TopCollectionNotFoundFailure(collection)) 

297 return result 

298 

299 if collection_type != CollectionType.CHAINED: 

300 result.fail(TopCollectionIsNotChainedFailure(collection, collection_type)) 

301 elif parents := check_parents(butler, collection, []): 

302 result.fail(TopCollectionHasParentsFailure(collection, parents)) 

303 else: 

304 prepare_to_remove( 

305 top_collection=collection, 

306 parent_collection=collection, 

307 purge_result=result, 

308 butler=butler, 

309 recursive=recursive, 

310 ) 

311 return result