Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 41%

84 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-14 19:21 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23from collections.abc import Callable, Iterable 

24from enum import Enum, auto 

25from typing import TYPE_CHECKING, Any 

26 

27from .._butler import Butler 

28from ..registry import CollectionType 

29from .queryDatasets import QueryDatasets 

30 

31if TYPE_CHECKING: 

32 from astropy.table import Table 

33 

34 

35class PruneDatasetsResult: 

36 """Contains the results of a prune-datasets action. 

37 

38 The action may not be complete if the caller requested a confirmation, in 

39 which case calling ``onConfirmation`` will perform the action. 

40 

41 Parameters 

42 ---------- 

43 tables : `list` [`astropy.table.Table`], optional 

44 The astropy tables that will be or were deleted, by default None. 

45 state : `PruneDatasetsResult.State`, optional 

46 The initial state of execution of the action, if `None` the result 

47 state is ``INIT``, by default None. 

48 

49 Attributes 

50 ---------- 

51 tables 

52 Same as in Parameters. 

53 state : ``PruneDatasetsResult.State`` 

54 The current state of the action. 

55 onConfirmation : `Callable[None, None]` 

56 The function to call to perform the action if the caller wants to 

57 confirm the tables before performing the action. 

58 """ 

59 

60 action: dict[str, Any] | None 

61 onConfirmation: Callable | None 

62 

63 class State(Enum): 

64 """State associated with dataset pruning request.""" 

65 

66 INIT = auto() 

67 DRY_RUN_COMPLETE = auto() 

68 AWAITING_CONFIRMATION = auto() 

69 FINISHED = auto() 

70 ERR_PURGE_AND_DISASSOCIATE = auto() 

71 ERR_NO_COLLECTION_RESTRICTION = auto() 

72 ERR_PRUNE_ON_NOT_RUN = auto() 

73 ERR_NO_OP = auto() 

74 

75 def __init__( 

76 self, 

77 tables: list[Table] | None = None, 

78 state: State | None = None, 

79 errDict: dict[str, str] | None = None, 

80 ): 

81 self.state = state or self.State.INIT 

82 if tables is None: 

83 tables = [] 

84 self.tables = tables 

85 self.onConfirmation = None 

86 # Action describes the removal action for dry-run, will be a dict with 

87 # keys disassociate, unstore, purge, and collections. 

88 self.action = None 

89 # errDict is a container for variables related to the error that may be 

90 # substituted into a user-visible string. 

91 self.errDict = errDict or {} 

92 

93 @property 

94 def dryRun(self) -> bool: 

95 return self.state is self.State.DRY_RUN_COMPLETE 

96 

97 @property 

98 def confirm(self) -> bool: 

99 return self.state is self.State.AWAITING_CONFIRMATION 

100 

101 @property 

102 def finished(self) -> bool: 

103 return self.state is self.State.FINISHED 

104 

105 @property 

106 def errPurgeAndDisassociate(self) -> bool: 

107 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE 

108 

109 @property 

110 def errNoCollectionRestriction(self) -> bool: 

111 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION 

112 

113 @property 

114 def errPruneOnNotRun(self) -> bool: 

115 return self.state is self.State.ERR_PRUNE_ON_NOT_RUN 

116 

117 @property 

118 def errNoOp(self) -> bool: 

119 return self.state is self.State.ERR_NO_OP 

120 

121 

122def pruneDatasets( 

123 repo: str, 

124 collections: Iterable[str], 

125 datasets: Iterable[str], 

126 where: str, 

127 disassociate_tags: Iterable[str], 

128 unstore: bool, 

129 purge_run: str, 

130 dry_run: bool, 

131 confirm: bool, 

132 find_all: bool, 

133) -> PruneDatasetsResult: 

134 """Prune datasets from a repository. 

135 

136 Parameters 

137 ---------- 

138 repo : `str` 

139 URI to the location of the repo or URI to a config file describing the 

140 repo and its location. 

141 collections : iterable [`str`] 

142 A list of glob-style search string that identify the collections to 

143 search for. 

144 datasets : iterable [`str`] 

145 A list of glob-style search string that identify the dataset type names 

146 to search for. 

147 where : `str` 

148 A string expression similar to a SQL WHERE clause. May involve any 

149 column of a dimension table or (as a shortcut for the primary key 

150 column of a dimension table) dimension name. 

151 disassociate_tags : `list` [`str`] 

152 TAGGED collections to disassociate the datasets from. If not `None` 

153 then ``purge_run`` must be `None`. 

154 unstore : `bool` 

155 Same as the unstore argument to ``Butler.pruneDatasets``. 

156 purge_run : `str` 

157 Completely remove datasets from the ``Registry``. Note that current 

158 implementation accepts any RUN-type collection, but will remove 

159 datasets from all collections in ``collections`` if it is non-empty. 

160 dry_run : `bool` 

161 Get results for what would be removed but do not remove. 

162 confirm : `bool` 

163 Get results for what would be removed and return the results for 

164 display & confirmation, with a completion function to run after 

165 confirmation. 

166 find_all : `bool` 

167 If False, for each result data ID, will only delete the dataset from 

168 the first collection in which a dataset of that dataset type appears 

169 (according to the order of ``collections`` passed in). If used, 

170 ``collections`` must specify at least one expression and must not 

171 contain wildcards. This is the inverse of ``QueryDataset``'s find_first 

172 option. 

173 

174 Notes 

175 ----- 

176 The matrix of legal & illegal combinations of purge, unstore, and 

177 disassociate is this: 

178 - none of (purge, unstore, disassociate): error, nothing to do 

179 - purge only: ok 

180 - unstore only: ok 

181 - disassociate only: ok 

182 - purge+unstore: ok, just ignore unstore (purge effectively implies 

183 unstore) 

184 - purge+disassociate: this is an error (instead of ignoring disassociate), 

185 because that comes with a collection argument that we can't respect, and 

186 that might be confusing (purge will disassociate from all TAGGED 

187 collections, not just the one given) 

188 - purge+unstore+disassociate: an error, for the same reason as just 

189 purge+disassociate 

190 - unstore+disassociate: ok; these operations are unrelated to each other 

191 

192 Returns 

193 ------- 

194 results : `PruneDatasetsResult` 

195 A data structure that contains information about datasets for removal, 

196 removal status, and options to continue in some cases. 

197 """ 

198 if not disassociate_tags and not unstore and not purge_run: 

199 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP) 

200 

201 if disassociate_tags and purge_run: 

202 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE) 

203 

204 # If collections is not specified and a purge_run is, use the purge_run for 

205 # collections, or if disassociate_tags is then use that. 

206 if not collections: 

207 if purge_run: 

208 collections = (purge_run,) 

209 elif disassociate_tags: 

210 collections = disassociate_tags 

211 

212 if not collections: 

213 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION) 

214 

215 butler = Butler(repo) 

216 

217 # If purging, verify that the collection to purge is RUN type collection. 

218 if purge_run: 

219 collectionType = butler.registry.getCollectionType(purge_run) 

220 if collectionType is not CollectionType.RUN: 

221 return PruneDatasetsResult( 

222 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run) 

223 ) 

224 

225 datasets_found = QueryDatasets( 

226 repo=repo, 

227 glob=datasets, 

228 collections=collections, 

229 where=where, 

230 # By default we want find_first to be True if collections are provided 

231 # (else False) (find_first requires collections to be provided). 

232 # But the user may specify that they want to find all (thus forcing 

233 # find_first to be False) 

234 find_first=not find_all, 

235 show_uri=False, 

236 ) 

237 

238 result = PruneDatasetsResult(datasets_found.getTables()) 

239 

240 disassociate = bool(disassociate_tags) or bool(purge_run) 

241 purge = bool(purge_run) 

242 unstore = unstore or bool(purge_run) 

243 

244 if dry_run: 

245 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE 

246 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections) 

247 return result 

248 

249 def doPruneDatasets() -> PruneDatasetsResult: 

250 butler = Butler(repo, writeable=True) 

251 butler.pruneDatasets( 

252 refs=datasets_found.getDatasets(), 

253 disassociate=disassociate, 

254 tags=disassociate_tags or (), 

255 purge=purge, 

256 unstore=unstore, 

257 ) 

258 result.state = PruneDatasetsResult.State.FINISHED 

259 return result 

260 

261 if confirm: 

262 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION 

263 result.onConfirmation = doPruneDatasets 

264 return result 

265 

266 return doPruneDatasets()