Coverage for python/lsst/ctrl/mpexec/cli/script/purge.py: 25%

100 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-23 03:03 -0800

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23import itertools 

24from typing import Any, Optional, Union 

25 

26from lsst.daf.butler import Butler, CollectionType 

27from lsst.daf.butler.registry import MissingCollectionError 

28 

29from .confirmable import ConfirmableResult 

30 

31advice = ( 

32 'Use "butler collection-chain --mode remove" to remove this collection from its parent or\n' 

33 'use "butler remove-collections" to remove that parent entirely.' 

34) 

35 

36 

37class ChildHasMultipleParentsFailure: 

38 def __init__(self, child: str, parents: list[str]): 

39 self.child = child 

40 self.parents = parents 

41 

42 def __str__(self) -> str: 

43 parents = ", ".join([f'"{p}"' for p in self.parents]) 

44 return f'Collection "{self.child}" is in multiple chained collections: {parents}.\n {advice}' 

45 

46 

47class TopCollectionHasParentsFailure: 

48 def __init__(self, collection: str, parents: list[str]): 

49 self.collection = collection 

50 self.parents = parents 

51 

52 def __str__(self) -> str: 

53 parents = ", ".join([f'"{p}"' for p in self.parents]) 

54 return ( 

55 f'The passed-in collection "{self.collection}" must not be contained in other collections but ' 

56 f"is contained in collection(s) {parents}.\n {advice}" 

57 ) 

58 

59 

60class TopCollectionIsNotChianedFailure: 

61 def __init__(self, collection: str, collection_type: CollectionType): 

62 self.collection = collection 

63 self.collection_type = collection_type 

64 

65 def __str__(self) -> str: 

66 return ( 

67 "The passed-in collection must be a CHAINED collection; " 

68 f'"{self.collection}" is a {self.collection_type.name} collection.' 

69 ) 

70 

71 

72class TopCollectionNotFoundFailure: 

73 def __init__(self, collection: str): 

74 self.collection = collection 

75 

76 def __str__(self) -> str: 

77 return f'The passed-in colleciton "{self.collection}" was not found.' 

78 

79 

80class PurgeResult(ConfirmableResult): 

81 def __init__(self, butler_config: str): 

82 self.runs_to_remove: list[str] = [] 

83 self.chains_to_remove: list[str] = [] 

84 self.others_to_remove: list[str] = [] 

85 self.butler_config = butler_config 

86 self.failure: Any = None 

87 

88 @property 

89 def describe_failure(self) -> str: 

90 return str(self.failure) 

91 

92 def describe(self, will: bool) -> str: 

93 msg = "" 

94 if will: 

95 msg += "Will remove:\n" 

96 else: 

97 msg += "Removed:\n" 

98 msg += f" runs: {', '.join(self.runs_to_remove)}\n" 

99 msg += f" chains: {', '.join(self.chains_to_remove)}\n" 

100 msg += f" others: {', '.join(self.others_to_remove)}" 

101 return msg 

102 

103 def on_confirmation(self) -> None: 

104 if self.failure: 

105 # This should not happen, it is a logic error. 

106 raise RuntimeError("Can not purge, there were errors preparing collections.") 

107 butler = Butler(self.butler_config, writeable=True) 

108 with butler.transaction(): 

109 for c in itertools.chain(self.others_to_remove, self.chains_to_remove): 

110 butler.registry.removeCollection(c) 

111 butler.removeRuns(self.runs_to_remove) 

112 

113 @property 

114 def failed(self) -> bool: 

115 return bool(self.failure) 

116 

117 @property 

118 def can_continue(self) -> bool: 

119 # Will always be true: at the very least there is a top level CHAINED 

120 # collection to remove. And if the top level collection is not found it 

121 # results in a TopCollectionNotFoundFailure. 

122 return True 

123 

124 def fail( 

125 self, 

126 failure: Union[ 

127 ChildHasMultipleParentsFailure, 

128 TopCollectionHasParentsFailure, 

129 TopCollectionIsNotChianedFailure, 

130 TopCollectionNotFoundFailure, 

131 ], 

132 ) -> None: 

133 self.failure = failure 

134 

135 

136def check_parents(butler: Butler, child: str, expected_parents: list[str]) -> Optional[list[str]]: 

137 """Check that the parents of a child collection match 

138 the provided expected parents. 

139 

140 Parameters 

141 ---------- 

142 butler : `Butler` 

143 The butler to the current repo. 

144 child : `str` 

145 The child collection to check. 

146 expected_parents : `list` [`str`] 

147 The list of expected parents. 

148 

149 Returns 

150 ------- 

151 parents: `list` or `None` 

152 If `None` then the child's parents matched the expected parents. If 

153 not `None`, then the actual parents of the child. 

154 """ 

155 parents = butler.registry.getCollectionParentChains(child) 

156 if parents != set(expected_parents): 

157 return list(parents) 

158 return None 

159 

160 

161def prepare_to_remove( 

162 top_collection: str, 

163 parent_collection: str, 

164 butler: Butler, 

165 recursive: bool, 

166 purge_result: PurgeResult, 

167) -> None: 

168 """Add a CHAINED colleciton to the list of chains to remove and then 

169 find its children and add them to the appropriate lists for removal. 

170 

171 Verify that the children of the CHAINED collection have exactly one 

172 parent (that CHAINED collection). If `recursive` is `True` then run 

173 recursively on the children of any child CHAINED collections. 

174 

175 Parameters 

176 ---------- 

177 top_collection : `str` 

178 The name of the top CHAINED collection being purged. 

179 Child collections to remove must start with this name, 

180 other child collections will be ignored. 

181 parent_collection : `str` 

182 The parent CHAINED collection currently being removed. 

183 butler : `Butler` 

184 The butler to the repo. 

185 recursive : `bool` 

186 If True then children of the top collection that are also CHAINED 

187 collections will be purged. 

188 purge_result : `PurgeResult` 

189 The data structure being populated with failure information or 

190 collections to remove. 

191 """ 

192 assert butler.registry.getCollectionType(parent_collection) == CollectionType.CHAINED 

193 purge_result.chains_to_remove.append(parent_collection) 

194 for child in butler.registry.getCollectionChain(parent_collection): 

195 if child.startswith(top_collection): 

196 if parents := check_parents(butler, child, [parent_collection]): 

197 purge_result.fail(ChildHasMultipleParentsFailure(child, parents)) 

198 collection_type = butler.registry.getCollectionType(child) 

199 if collection_type == CollectionType.RUN: 

200 purge_result.runs_to_remove.append(child) 

201 elif collection_type == CollectionType.CHAINED: 

202 if recursive: 

203 prepare_to_remove( 

204 top_collection=top_collection, 

205 parent_collection=child, 

206 butler=butler, 

207 recursive=recursive, 

208 purge_result=purge_result, 

209 ) 

210 else: 

211 purge_result.chains_to_remove.append(child) 

212 else: 

213 purge_result.others_to_remove.append(child) 

214 

215 

216def purge( 

217 butler_config: str, 

218 collection: str, 

219 recursive: bool, 

220) -> PurgeResult: 

221 """Purge a CHAINED collection and it's children from a repository. 

222 

223 Parameters 

224 ---------- 

225 butler_config : `str` 

226 The path location of the gen3 butler/registry config file. 

227 collection : `str` 

228 The name of the CHAINED colleciton to purge. 

229 recursive : bool 

230 If True then children of the top collection that are also CHAINED 

231 collections will be purged. 

232 

233 Returns 

234 ------- 

235 purge_result : PurgeResult 

236 The description of what datasets to remove and/or failures encountered 

237 while preparing to remove datasets to remove, and a completion function 

238 to remove the datasets after confirmation, if needed. 

239 """ 

240 result = PurgeResult(butler_config) 

241 butler = Butler(butler_config) 

242 

243 try: 

244 collection_type = butler.registry.getCollectionType(collection) 

245 except MissingCollectionError: 

246 result.fail(TopCollectionNotFoundFailure(collection)) 

247 return result 

248 

249 if collection_type != CollectionType.CHAINED: 

250 result.fail(TopCollectionIsNotChianedFailure(collection, collection_type)) 

251 elif parents := check_parents(butler, collection, []): 

252 result.fail(TopCollectionHasParentsFailure(collection, parents)) 

253 else: 

254 prepare_to_remove( 

255 top_collection=collection, 

256 parent_collection=collection, 

257 purge_result=result, 

258 butler=butler, 

259 recursive=recursive, 

260 ) 

261 return result