Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 34%

77 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-17 02:08 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23from enum import Enum, auto 

24 

25from .._butler import Butler 

26from ..registry import CollectionType 

27from .queryDatasets import QueryDatasets 

28 

29 

30class PruneDatasetsResult: 

31 """Contains the results of a prune-datasets action. 

32 

33 The action may not be complete if the caller requested a confirmation, in 

34 which case calling ``onConfirmation`` will perform the action. 

35 

36 Parameters 

37 ---------- 

38 tables : `list` [``astropy.table.table``], optional 

39 The astropy tables that will be or were deleted, by default None. 

40 state : ``PruneDatasetsResult.State``, optional 

41 The initial state of execution of the action, if `None` the result 

42 state is ``INIT``, by default None. 

43 

44 Attributes 

45 ---------- 

46 tables 

47 Same as in Parameters. 

48 state : ``PruneDatasetsResult.State`` 

49 The current state of the action. 

50 onConfirmation : `Callable[None, None]` 

51 The function to call to perform the action if the caller wants to 

52 confirm the tables before performing the action. 

53 """ 

54 

55 class State(Enum): 

56 INIT = auto() 

57 DRY_RUN_COMPLETE = auto() 

58 AWAITING_CONFIRMATION = auto() 

59 FINISHED = auto() 

60 ERR_PURGE_AND_DISASSOCIATE = auto() 

61 ERR_NO_COLLECTION_RESTRICTION = auto() 

62 ERR_PRUNE_ON_NOT_RUN = auto() 

63 ERR_NO_OP = auto() 

64 

65 def __init__(self, tables=None, state=None, errDict=None): 

66 self.state = state or self.State.INIT 

67 self.tables = tables 

68 self.onConfirmation = None 

69 # Action describes the removal action for dry-run, will be a dict with 

70 # keys disassociate, unstore, purge, and collections. 

71 self.action = None 

72 # errDict is a container for variables related to the error that may be 

73 # substituted into a user-visible string. 

74 self.errDict = errDict or {} 

75 

76 @property 

77 def dryRun(self): 

78 return self.state is self.State.DRY_RUN_COMPLETE 

79 

80 @property 

81 def confirm(self): 

82 return self.state is self.State.AWAITING_CONFIRMATION 

83 

84 @property 

85 def finished(self): 

86 return self.state is self.State.FINISHED 

87 

88 @property 

89 def errPurgeAndDisassociate(self): 

90 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE 

91 

92 @property 

93 def errNoCollectionRestriction(self): 

94 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION 

95 

96 @property 

97 def errPruneOnNotRun(self): 

98 return self.state is self.state.ERR_PRUNE_ON_NOT_RUN 

99 

100 @property 

101 def errNoOp(self): 

102 return self.state is self.state.ERR_NO_OP 

103 

104 

105def pruneDatasets( 

106 repo, collections, datasets, where, disassociate_tags, unstore, purge_run, dry_run, confirm, find_all 

107): 

108 """Prune datasets from a repository. 

109 

110 Parameters 

111 ---------- 

112 repo : `str` 

113 URI to the location of the repo or URI to a config file describing the 

114 repo and its location. 

115 collections : iterable [`str`] 

116 A list of glob-style search string that identify the collections to 

117 search for. 

118 datasets : iterable [`str`] 

119 A list of glob-style search string that identify the dataset type names 

120 to search for. 

121 where : `str` 

122 A string expression similar to a SQL WHERE clause. May involve any 

123 column of a dimension table or (as a shortcut for the primary key 

124 column of a dimension table) dimension name. 

125 find_all : `bool` 

126 If False, for each result data ID, will only delete the dataset from 

127 the first collection in which a dataset of that dataset type appears 

128 (according to the order of ``collections`` passed in). If used, 

129 ``collections`` must specify at least one expression and must not 

130 contain wildcards. This is the inverse of ``QueryDataset``'s find_first 

131 option. 

132 disassociate_tags : `list` [`str`] 

133 TAGGED collections to disassociate the datasets from. If not `None` 

134 then ``purge_run`` must be `None`. 

135 unstore : `bool` 

136 Same as the unstore argument to ``Butler.pruneDatasets``. 

137 purge_run : `str` 

138 Completely remove datasets from the ``Registry``. Note that current 

139 implementation accepts any RUN-type collection, but will remove 

140 datasets from all collections. 

141 dry_run : `bool` 

142 Get results for what would be removed but do not remove. 

143 confirm : `bool` 

144 Get results for what would be removed and return the results for 

145 display & confirmation, with a completion function to run after 

146 confirmation. 

147 

148 The matrix of legal & illegal combinations of purge, unstore, and 

149 disassociate is this: 

150 - none of (purge, unstore, disassociate): error, nothing to do 

151 - purge only: ok 

152 - unstore only: ok 

153 - disassociate only: ok 

154 - purge+unstore: ok, just ignore unstore (purge effectively implies 

155 unstore) 

156 - purge+disassociate: this is an error (instead of ignoring disassociate), 

157 because that comes with a collection argument that we can't respect, and 

158 that might be confusing (purge will disassociate from all TAGGED 

159 collections, not just the one given) 

160 - purge+unstore+disassociate: an error, for the same reason as just 

161 purge+disassociate 

162 - unstore+disassociate: ok; these operations are unrelated to each other 

163 

164 Returns 

165 ------- 

166 results : ``PruneDatasetsResult`` 

167 A data structure that contains information about datasets for removal, 

168 removal status, and options to continue in some cases. 

169 """ 

170 if not disassociate_tags and not unstore and not purge_run: 

171 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP) 

172 

173 if disassociate_tags and purge_run: 

174 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE) 

175 

176 # If collections is not specified and a purge_run is, use the purge_run for 

177 # collections, or if disassociate_tags is then use that. 

178 if not collections: 

179 if purge_run: 

180 collections = (purge_run,) 

181 elif disassociate_tags: 

182 collections = disassociate_tags 

183 

184 if not collections: 

185 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION) 

186 

187 butler = Butler(repo) 

188 

189 # If purging, verify that the collection to purge is RUN type collection. 

190 if purge_run: 

191 collectionType = butler.registry.getCollectionType(purge_run) 

192 if collectionType is not CollectionType.RUN: 

193 return PruneDatasetsResult( 

194 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run) 

195 ) 

196 

197 datasets = QueryDatasets( 

198 repo=repo, 

199 glob=datasets, 

200 collections=collections, 

201 where=where, 

202 # By default we want find_first to be True if collections are provided 

203 # (else False) (find_first requires collections to be provided). 

204 # But the user may specify that they want to find all (thus forcing 

205 # find_first to be False) 

206 find_first=not find_all, 

207 show_uri=False, 

208 ) 

209 

210 result = PruneDatasetsResult(datasets.getTables()) 

211 

212 disassociate = bool(disassociate_tags) or bool(purge_run) 

213 purge = bool(purge_run) 

214 unstore = unstore or bool(purge_run) 

215 

216 if dry_run: 

217 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE 

218 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections) 

219 return result 

220 

221 def doPruneDatasets(): 

222 butler = Butler(repo, writeable=True) 

223 butler.pruneDatasets( 

224 refs=datasets.getDatasets(), 

225 disassociate=disassociate, 

226 tags=disassociate_tags or (), 

227 purge=purge, 

228 unstore=unstore, 

229 ) 

230 result.state = PruneDatasetsResult.State.FINISHED 

231 return result 

232 

233 if confirm: 

234 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION 

235 result.onConfirmation = doPruneDatasets 

236 return result 

237 

238 return doPruneDatasets()