Coverage for python/lsst/ctrl/mpexec/cli/script/purge.py: 28%

100 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-14 19:56 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23import itertools 

24from typing import Any 

25 

26from lsst.daf.butler import Butler, CollectionType 

27from lsst.daf.butler.registry import MissingCollectionError 

28 

29from .confirmable import ConfirmableResult 

30 

31advice = ( 

32 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n' 

33 'use "butler remove-collections" to remove that parent entirely.' 

34) 

35 

36 

37class ChildHasMultipleParentsFailure: 

38 """Failure when the child has multiple parents.""" 

39 

40 def __init__(self, child: str, parents: list[str]): 

41 self.child = child 

42 self.parents = parents 

43 

44 def __str__(self) -> str: 

45 parents = ", ".join([f'"{p}"' for p in self.parents]) 

46 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}' 

47 

48 

49class TopCollectionHasParentsFailure: 

50 """Failure when the top collection has parents.""" 

51 

52 def __init__(self, collection: str, parents: list[str]): 

53 self.collection = collection 

54 self.parents = parents 

55 

56 def __str__(self) -> str: 

57 parents = ", ".join([f'"{p}"' for p in self.parents]) 

58 return ( 

59 f'The passed-in collection "{self.collection}" must not be contained in other collections but ' 

60 f"is contained in collection(s) {parents}.\n {advice}" 

61 ) 

62 

63 

64class TopCollectionIsNotChainedFailure: 

65 """Failure when the top collection is not a chain.""" 

66 

67 def __init__(self, collection: str, collection_type: CollectionType): 

68 self.collection = collection 

69 self.collection_type = collection_type 

70 

71 def __str__(self) -> str: 

72 return ( 

73 "The passed-in collection must be a CHAINED collection; " 

74 f'"{self.collection}" is a {self.collection_type.name} collection.' 

75 ) 

76 

77 

78class TopCollectionNotFoundFailure: 

79 """Failure when the top collection is not found.""" 

80 

81 def __init__(self, collection: str): 

82 self.collection = collection 

83 

84 def __str__(self) -> str: 

85 return f'The passed-in collection "{self.collection}" was not found.' 

86 

87 

88class PurgeResult(ConfirmableResult): 

89 """The results of the purge command.""" 

90 

91 def __init__(self, butler_config: str): 

92 self.runs_to_remove: list[str] = [] 

93 self.chains_to_remove: list[str] = [] 

94 self.others_to_remove: list[str] = [] 

95 self.butler_config = butler_config 

96 self.failure: Any = None 

97 

98 @property 

99 def describe_failure(self) -> str: 

100 return str(self.failure) 

101 

102 def describe(self, will: bool) -> str: 

103 msg = "" 

104 if will: 

105 msg += "Will remove:\n" 

106 else: 

107 msg += "Removed:\n" 

108 msg += f" runs: {', '.join(self.runs_to_remove)}\n" 

109 msg += f" chains: {', '.join(self.chains_to_remove)}\n" 

110 msg += f" others: {', '.join(self.others_to_remove)}" 

111 return msg 

112 

113 def on_confirmation(self) -> None: 

114 if self.failure: 

115 # This should not happen, it is a logic error. 

116 raise RuntimeError("Can not purge, there were errors preparing collections.") 

117 butler = Butler(self.butler_config, writeable=True) 

118 with butler.transaction(): 

119 for c in itertools.chain(self.others_to_remove, self.chains_to_remove): 

120 butler.registry.removeCollection(c) 

121 butler.removeRuns(self.runs_to_remove) 

122 

123 @property 

124 def failed(self) -> bool: 

125 return bool(self.failure) 

126 

127 @property 

128 def can_continue(self) -> bool: 

129 # Will always be true: at the very least there is a top level CHAINED 

130 # collection to remove. And if the top level collection is not found it 

131 # results in a TopCollectionNotFoundFailure. 

132 return True 

133 

134 def fail( 

135 self, 

136 failure: ( 

137 ChildHasMultipleParentsFailure 

138 | TopCollectionHasParentsFailure 

139 | TopCollectionIsNotChainedFailure 

140 | TopCollectionNotFoundFailure 

141 ), 

142 ) -> None: 

143 self.failure = failure 

144 

145 

146def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> list[str] | None: 

147 """Check that the parents of a child collection match the provided 

148 expected parents. 

149 

150 Parameters 

151 ---------- 

152 butler : `~lsst.daf.butler.Butler` 

153 The butler to the current repo. 

154 child : `str` 

155 The child collection to check. 

156 expected_parents : `list` [`str`] 

157 The list of expected parents. 

158 

159 Returns 

160 ------- 

161 parents: `list` or `None` 

162 If `None` then the child's parents matched the expected parents. If 

163 not `None`, then the actual parents of the child. 

164 """ 

165 parents = butler.registry.getCollectionParentChains(child) 

166 if parents != set(expected_parents): 

167 return list(parents) 

168 return None 

169 

170 

171def prepare_to_remove( 

172 top_collection: str, 

173 parent_collection: str, 

174 butler: Butler, 

175 recursive: bool, 

176 purge_result: PurgeResult, 

177) -> None: 

178 """Add a CHAINED colleciton to the list of chains to remove and then 

179 find its children and add them to the appropriate lists for removal. 

180 

181 Verify that the children of the CHAINED collection have exactly one 

182 parent (that CHAINED collection). If `recursive` is `True` then run 

183 recursively on the children of any child CHAINED collections. 

184 

185 Parameters 

186 ---------- 

187 top_collection : `str` 

188 The name of the top CHAINED collection being purged. 

189 Child collections to remove must start with this name, 

190 other child collections will be ignored. 

191 parent_collection : `str` 

192 The parent CHAINED collection currently being removed. 

193 butler : `~lsst.daf.butler.Butler` 

194 The butler to the repo. 

195 recursive : `bool` 

196 If True then children of the top collection that are also CHAINED 

197 collections will be purged. 

198 purge_result : `PurgeResult` 

199 The data structure being populated with failure information or 

200 collections to remove. 

201 """ 

202 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED 

203 purge_result.chains_to_remove.append(parent_collection) 

204 for child in butler.registry.getCollectionChain(parent_collection): 

205 if child.startswith(top_collection): 

206 if parents := check_parents(butler, child, [parent_collection]): 

207 purge_result.fail(ChildHasMultipleParentsFailure(child, parents)) 

208 collection_type = butler.registry.getCollectionType(child) 

209 if collection_type == CollectionType.RUN: 

210 purge_result.runs_to_remove.append(child) 

211 elif collection_type == CollectionType.CHAINED: 

212 if recursive: 

213 prepare_to_remove( 

214 top_collection=top_collection, 

215 parent_collection=child, 

216 butler=butler, 

217 recursive=recursive, 

218 purge_result=purge_result, 

219 ) 

220 else: 

221 purge_result.chains_to_remove.append(child) 

222 else: 

223 purge_result.others_to_remove.append(child) 

224 

225 

226def purge( 

227 butler_config: str, 

228 collection: str, 

229 recursive: bool, 

230) -> PurgeResult: 

231 """Purge a CHAINED collection and it's children from a repository. 

232 

233 Parameters 

234 ---------- 

235 butler_config : `str` 

236 The path location of the gen3 butler/registry config file. 

237 collection : `str` 

238 The name of the CHAINED colleciton to purge. 

239 recursive : bool 

240 If True then children of the top collection that are also CHAINED 

241 collections will be purged. 

242 

243 Returns 

244 ------- 

245 purge_result : `PurgeResult` 

246 The description of what datasets to remove and/or failures encountered 

247 while preparing to remove datasets to remove, and a completion function 

248 to remove the datasets after confirmation, if needed. 

249 """ 

250 result = PurgeResult(butler_config) 

251 butler = Butler(butler_config) 

252 

253 try: 

254 collection_type = butler.registry.getCollectionType(collection) 

255 except MissingCollectionError: 

256 result.fail(TopCollectionNotFoundFailure(collection)) 

257 return result 

258 

259 if collection_type != CollectionType.CHAINED: 

260 result.fail(TopCollectionIsNotChainedFailure(collection, collection_type)) 

261 elif parents := check_parents(butler, collection, []): 

262 result.fail(TopCollectionHasParentsFailure(collection, parents)) 

263 else: 

264 prepare_to_remove( 

265 top_collection=collection, 

266 parent_collection=collection, 

267 purge_result=result, 

268 butler=butler, 

269 recursive=recursive, 

270 ) 

271 return result