Coverage for python/lsst/daf/butler/script/_pruneDatasets.py: 41%

84 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-26 02:48 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from collections.abc import Callable, Iterable 

30from enum import Enum, auto 

31from typing import TYPE_CHECKING, Any 

32 

33from .._butler import Butler 

34from ..registry import CollectionType 

35from .queryDatasets import QueryDatasets 

36 

37if TYPE_CHECKING: 

38 from astropy.table import Table 

39 

40 

41class PruneDatasetsResult: 

42 """Contains the results of a prune-datasets action. 

43 

44 The action may not be complete if the caller requested a confirmation, in 

45 which case calling ``onConfirmation`` will perform the action. 

46 

47 Parameters 

48 ---------- 

49 tables : `list` [`astropy.table.Table`], optional 

50 The astropy tables that will be or were deleted, by default None. 

51 state : `PruneDatasetsResult.State`, optional 

52 The initial state of execution of the action, if `None` the result 

53 state is ``INIT``, by default `None`. 

54 errDict : `dict` [`str`, `str`] or `None` 

55 Place to store error messages. Will be created if not given. 

56 

57 Attributes 

58 ---------- 

59 tables 

60 Same as in Parameters. 

61 state : ``PruneDatasetsResult.State`` 

62 The current state of the action. 

63 onConfirmation : `Callable[None, None]` 

64 The function to call to perform the action if the caller wants to 

65 confirm the tables before performing the action. 

66 """ 

67 

68 action: dict[str, Any] | None 

69 onConfirmation: Callable | None 

70 

71 class State(Enum): 

72 """State associated with dataset pruning request.""" 

73 

74 INIT = auto() 

75 DRY_RUN_COMPLETE = auto() 

76 AWAITING_CONFIRMATION = auto() 

77 FINISHED = auto() 

78 ERR_PURGE_AND_DISASSOCIATE = auto() 

79 ERR_NO_COLLECTION_RESTRICTION = auto() 

80 ERR_PRUNE_ON_NOT_RUN = auto() 

81 ERR_NO_OP = auto() 

82 

83 def __init__( 

84 self, 

85 tables: list[Table] | None = None, 

86 state: State | None = None, 

87 errDict: dict[str, str] | None = None, 

88 ): 

89 self.state = state or self.State.INIT 

90 if tables is None: 

91 tables = [] 

92 self.tables = tables 

93 self.onConfirmation = None 

94 # Action describes the removal action for dry-run, will be a dict with 

95 # keys disassociate, unstore, purge, and collections. 

96 self.action = None 

97 # errDict is a container for variables related to the error that may be 

98 # substituted into a user-visible string. 

99 self.errDict = errDict or {} 

100 

101 @property 

102 def dryRun(self) -> bool: 

103 return self.state is self.State.DRY_RUN_COMPLETE 

104 

105 @property 

106 def confirm(self) -> bool: 

107 return self.state is self.State.AWAITING_CONFIRMATION 

108 

109 @property 

110 def finished(self) -> bool: 

111 return self.state is self.State.FINISHED 

112 

113 @property 

114 def errPurgeAndDisassociate(self) -> bool: 

115 return self.state is self.State.ERR_PURGE_AND_DISASSOCIATE 

116 

117 @property 

118 def errNoCollectionRestriction(self) -> bool: 

119 return self.state is self.State.ERR_NO_COLLECTION_RESTRICTION 

120 

121 @property 

122 def errPruneOnNotRun(self) -> bool: 

123 return self.state is self.State.ERR_PRUNE_ON_NOT_RUN 

124 

125 @property 

126 def errNoOp(self) -> bool: 

127 return self.state is self.State.ERR_NO_OP 

128 

129 

130def pruneDatasets( 

131 repo: str, 

132 collections: Iterable[str], 

133 datasets: Iterable[str], 

134 where: str, 

135 disassociate_tags: Iterable[str], 

136 unstore: bool, 

137 purge_run: str, 

138 dry_run: bool, 

139 confirm: bool, 

140 find_all: bool, 

141) -> PruneDatasetsResult: 

142 """Prune datasets from a repository. 

143 

144 Parameters 

145 ---------- 

146 repo : `str` 

147 URI to the location of the repo or URI to a config file describing the 

148 repo and its location. 

149 collections : iterable [`str`] 

150 A list of glob-style search string that identify the collections to 

151 search for. 

152 datasets : iterable [`str`] 

153 A list of glob-style search string that identify the dataset type names 

154 to search for. 

155 where : `str` 

156 A string expression similar to a SQL WHERE clause. May involve any 

157 column of a dimension table or (as a shortcut for the primary key 

158 column of a dimension table) dimension name. 

159 disassociate_tags : `list` [`str`] 

160 TAGGED collections to disassociate the datasets from. If not `None` 

161 then ``purge_run`` must be `None`. 

162 unstore : `bool` 

163 Same as the unstore argument to ``Butler.pruneDatasets``. 

164 purge_run : `str` 

165 Completely remove datasets from the ``Registry``. Note that current 

166 implementation accepts any RUN-type collection, but will remove 

167 datasets from all collections in ``collections`` if it is non-empty. 

168 dry_run : `bool` 

169 Get results for what would be removed but do not remove. 

170 confirm : `bool` 

171 Get results for what would be removed and return the results for 

172 display & confirmation, with a completion function to run after 

173 confirmation. 

174 find_all : `bool` 

175 If False, for each result data ID, will only delete the dataset from 

176 the first collection in which a dataset of that dataset type appears 

177 (according to the order of ``collections`` passed in). If used, 

178 ``collections`` must specify at least one expression and must not 

179 contain wildcards. This is the inverse of ``QueryDataset``'s find_first 

180 option. 

181 

182 Notes 

183 ----- 

184 The matrix of legal & illegal combinations of purge, unstore, and 

185 disassociate is this: 

186 - none of (purge, unstore, disassociate): error, nothing to do 

187 - purge only: ok 

188 - unstore only: ok 

189 - disassociate only: ok 

190 - purge+unstore: ok, just ignore unstore (purge effectively implies 

191 unstore) 

192 - purge+disassociate: this is an error (instead of ignoring disassociate), 

193 because that comes with a collection argument that we can't respect, and 

194 that might be confusing (purge will disassociate from all TAGGED 

195 collections, not just the one given) 

196 - purge+unstore+disassociate: an error, for the same reason as just 

197 purge+disassociate 

198 - unstore+disassociate: ok; these operations are unrelated to each other 

199 

200 Returns 

201 ------- 

202 results : `PruneDatasetsResult` 

203 A data structure that contains information about datasets for removal, 

204 removal status, and options to continue in some cases. 

205 """ 

206 if not disassociate_tags and not unstore and not purge_run: 

207 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_OP) 

208 

209 if disassociate_tags and purge_run: 

210 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_PURGE_AND_DISASSOCIATE) 

211 

212 # If collections is not specified and a purge_run is, use the purge_run for 

213 # collections, or if disassociate_tags is then use that. 

214 if not collections: 

215 if purge_run: 

216 collections = (purge_run,) 

217 elif disassociate_tags: 

218 collections = disassociate_tags 

219 

220 if not collections: 

221 return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION) 

222 

223 # If purging, verify that the collection to purge is RUN type collection. 

224 if purge_run: 

225 butler = Butler.from_config(repo, without_datastore=True) 

226 collectionType = butler.registry.getCollectionType(purge_run) 

227 if collectionType is not CollectionType.RUN: 

228 return PruneDatasetsResult( 

229 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run) 

230 ) 

231 

232 datasets_found = QueryDatasets( 

233 repo=repo, 

234 glob=datasets, 

235 collections=collections, 

236 where=where, 

237 # By default we want find_first to be True if collections are provided 

238 # (else False) (find_first requires collections to be provided). 

239 # But the user may specify that they want to find all (thus forcing 

240 # find_first to be False) 

241 find_first=not find_all, 

242 show_uri=False, 

243 ) 

244 

245 result = PruneDatasetsResult(datasets_found.getTables()) 

246 

247 disassociate = bool(disassociate_tags) or bool(purge_run) 

248 purge = bool(purge_run) 

249 unstore = unstore or bool(purge_run) 

250 

251 if dry_run: 

252 result.state = PruneDatasetsResult.State.DRY_RUN_COMPLETE 

253 result.action = dict(disassociate=disassociate, purge=purge, unstore=unstore, collections=collections) 

254 return result 

255 

256 def doPruneDatasets() -> PruneDatasetsResult: 

257 butler = Butler.from_config(repo, writeable=True) 

258 butler.pruneDatasets( 

259 refs=datasets_found.getDatasets(), 

260 disassociate=disassociate, 

261 tags=disassociate_tags or (), 

262 purge=purge, 

263 unstore=unstore, 

264 ) 

265 result.state = PruneDatasetsResult.State.FINISHED 

266 return result 

267 

268 if confirm: 

269 result.state = PruneDatasetsResult.State.AWAITING_CONFIRMATION 

270 result.onConfirmation = doPruneDatasets 

271 return result 

272 

273 return doPruneDatasets()