Coverage for python/lsst/daf/butler/_limited_butler.py: 81%

43 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-06 02:34 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from typing import Any, ClassVar, Dict, Iterable, Optional, Union 

29 

30from deprecated.sphinx import deprecated 

31 

32from ._deferredDatasetHandle import DeferredDatasetHandle 

33from .core import DatasetRef, Datastore, DimensionUniverse, StorageClass, StorageClassFactory 

34 

35log = logging.getLogger(__name__) 

36 

37 

38class LimitedButler(ABC): 

39 """A minimal butler interface that is sufficient to back 

40 `~lsst.pipe.base.PipelineTask` execution. 

41 """ 

42 

43 GENERATION: ClassVar[int] = 3 

44 """This is a Generation 3 Butler. 

45 

46 This attribute may be removed in the future, once the Generation 2 Butler 

47 interface has been fully retired; it should only be used in transitional 

48 code. 

49 """ 

50 

51 @abstractmethod 

52 def isWriteable(self) -> bool: 

53 """Return `True` if this `Butler` supports write operations.""" 

54 raise NotImplementedError() 

55 

56 @deprecated( 

57 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

58 " Please use Butler.put(). Will be removed after v27.0.", 

59 version="v26.0", 

60 category=FutureWarning, 

61 ) 

62 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

63 """Store a dataset that already has a UUID and ``RUN`` collection. 

64 

65 Parameters 

66 ---------- 

67 obj : `object` 

68 The dataset. 

69 ref : `DatasetRef` 

70 Resolved reference for a not-yet-stored dataset. 

71 

72 Returns 

73 ------- 

74 ref : `DatasetRef` 

75 The same as the given, for convenience and symmetry with 

76 `Butler.put`. 

77 

78 Raises 

79 ------ 

80 TypeError 

81 Raised if the butler is read-only. 

82 

83 Notes 

84 ----- 

85 Whether this method inserts the given dataset into a ``Registry`` is 

86 implementation defined (some `LimitedButler` subclasses do not have a 

87 `Registry`), but it always adds the dataset to a `Datastore`, and the 

88 given ``ref.id`` and ``ref.run`` are always preserved. 

89 """ 

90 return self.put(obj, ref) 

91 

92 @abstractmethod 

93 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

94 """Store a dataset that already has a UUID and ``RUN`` collection. 

95 

96 Parameters 

97 ---------- 

98 obj : `object` 

99 The dataset. 

100 ref : `DatasetRef` 

101 Resolved reference for a not-yet-stored dataset. 

102 

103 Returns 

104 ------- 

105 ref : `DatasetRef` 

106 The same as the given, for convenience and symmetry with 

107 `Butler.put`. 

108 

109 Raises 

110 ------ 

111 TypeError 

112 Raised if the butler is read-only. 

113 

114 Notes 

115 ----- 

116 Whether this method inserts the given dataset into a ``Registry`` is 

117 implementation defined (some `LimitedButler` subclasses do not have a 

118 `Registry`), but it always adds the dataset to a `Datastore`, and the 

119 given ``ref.id`` and ``ref.run`` are always preserved. 

120 """ 

121 raise NotImplementedError() 

122 

123 def get( 

124 self, 

125 ref: DatasetRef, 

126 /, 

127 *, 

128 parameters: dict[str, Any] | None = None, 

129 storageClass: StorageClass | str | None = None, 

130 ) -> Any: 

131 """Retrieve a stored dataset. 

132 

133 Parameters 

134 ---------- 

135 ref: `DatasetRef` 

136 A resolved `DatasetRef` directly associated with a dataset. 

137 parameters : `dict` 

138 Additional StorageClass-defined options to control reading, 

139 typically used to efficiently read only a subset of the dataset. 

140 storageClass : `StorageClass` or `str`, optional 

141 The storage class to be used to override the Python type 

142 returned by this method. By default the returned type matches 

143 the dataset type definition for this dataset. Specifying a 

144 read `StorageClass` can force a different type to be returned. 

145 This type must be compatible with the original type. 

146 

147 Returns 

148 ------- 

149 obj : `object` 

150 The dataset. 

151 

152 Raises 

153 ------ 

154 AmbiguousDatasetError 

155 Raised if the supplied `DatasetRef` is unresolved. 

156 

157 Notes 

158 ----- 

159 In a `LimitedButler` the only allowable way to specify a dataset is 

160 to use a resolved `DatasetRef`. Subclasses can support more options. 

161 """ 

162 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

163 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

164 

165 @deprecated( 

166 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

167 " Please use Butler.get(). Will be removed after v27.0.", 

168 version="v26.0", 

169 category=FutureWarning, 

170 ) 

171 def getDirect( 

172 self, 

173 ref: DatasetRef, 

174 *, 

175 parameters: Optional[Dict[str, Any]] = None, 

176 storageClass: str | StorageClass | None = None, 

177 ) -> Any: 

178 """Retrieve a stored dataset. 

179 

180 Parameters 

181 ---------- 

182 ref : `DatasetRef` 

183 Resolved reference to an already stored dataset. 

184 parameters : `dict` 

185 Additional StorageClass-defined options to control reading, 

186 typically used to efficiently read only a subset of the dataset. 

187 storageClass : `StorageClass` or `str`, optional 

188 The storage class to be used to override the Python type 

189 returned by this method. By default the returned type matches 

190 the dataset type definition for this dataset. Specifying a 

191 read `StorageClass` can force a different type to be returned. 

192 This type must be compatible with the original type. 

193 

194 Returns 

195 ------- 

196 obj : `object` 

197 The dataset. 

198 """ 

199 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

200 

201 @deprecated( 

202 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

203 "Please use Butler.getDeferred(). Will be removed after v27.0.", 

204 version="v26.0", 

205 category=FutureWarning, 

206 ) 

207 def getDirectDeferred( 

208 self, 

209 ref: DatasetRef, 

210 *, 

211 parameters: Union[dict, None] = None, 

212 storageClass: str | StorageClass | None = None, 

213 ) -> DeferredDatasetHandle: 

214 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

215 from a resolved `DatasetRef`. 

216 

217 Parameters 

218 ---------- 

219 ref : `DatasetRef` 

220 Resolved reference to an already stored dataset. 

221 parameters : `dict` 

222 Additional StorageClass-defined options to control reading, 

223 typically used to efficiently read only a subset of the dataset. 

224 storageClass : `StorageClass` or `str`, optional 

225 The storage class to be used to override the Python type 

226 returned by this method. By default the returned type matches 

227 the dataset type definition for this dataset. Specifying a 

228 read `StorageClass` can force a different type to be returned. 

229 This type must be compatible with the original type. 

230 

231 Returns 

232 ------- 

233 obj : `DeferredDatasetHandle` 

234 A handle which can be used to retrieve a dataset at a later time. 

235 """ 

236 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

237 

238 def getDeferred( 

239 self, 

240 ref: DatasetRef, 

241 /, 

242 *, 

243 parameters: dict[str, Any] | None = None, 

244 storageClass: str | StorageClass | None = None, 

245 ) -> DeferredDatasetHandle: 

246 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

247 after an immediate registry lookup. 

248 

249 Parameters 

250 ---------- 

251 ref : `DatasetRef` 

252 For the default implementation of a `LimitedButler`, the only 

253 acceptable parameter is a resolved `DatasetRef`. 

254 parameters : `dict` 

255 Additional StorageClass-defined options to control reading, 

256 typically used to efficiently read only a subset of the dataset. 

257 storageClass : `StorageClass` or `str`, optional 

258 The storage class to be used to override the Python type 

259 returned by this method. By default the returned type matches 

260 the dataset type definition for this dataset. Specifying a 

261 read `StorageClass` can force a different type to be returned. 

262 This type must be compatible with the original type. 

263 

264 Returns 

265 ------- 

266 obj : `DeferredDatasetHandle` 

267 A handle which can be used to retrieve a dataset at a later time. 

268 

269 Notes 

270 ----- 

271 In a `LimitedButler` the only allowable way to specify a dataset is 

272 to use a resolved `DatasetRef`. Subclasses can support more options. 

273 """ 

274 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

275 

276 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

277 """Return `True` if a dataset is actually present in the Datastore. 

278 

279 Parameters 

280 ---------- 

281 ref : `DatasetRef` 

282 Resolved reference to a dataset. 

283 

284 Returns 

285 ------- 

286 exists : `bool` 

287 Whether the dataset exists in the Datastore. 

288 """ 

289 return self.datastore.exists(ref) 

290 

291 def markInputUnused(self, ref: DatasetRef) -> None: 

292 """Indicate that a predicted input was not actually used when 

293 processing a `Quantum`. 

294 

295 Parameters 

296 ---------- 

297 ref : `DatasetRef` 

298 Reference to the unused dataset. 

299 

300 Notes 

301 ----- 

302 By default, a dataset is considered "actually used" if it is accessed 

303 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

304 (even if the handle is not used). This method must be called after one 

305 of those in order to remove the dataset from the actual input list. 

306 

307 This method does nothing for butlers that do not store provenance 

308 information (which is the default implementation provided by the base 

309 class). 

310 """ 

311 pass 

312 

313 @abstractmethod 

314 def pruneDatasets( 

315 self, 

316 refs: Iterable[DatasetRef], 

317 *, 

318 disassociate: bool = True, 

319 unstore: bool = False, 

320 tags: Iterable[str] = (), 

321 purge: bool = False, 

322 ) -> None: 

323 """Remove one or more datasets from a collection and/or storage. 

324 

325 Parameters 

326 ---------- 

327 refs : `~collections.abc.Iterable` of `DatasetRef` 

328 Datasets to prune. These must be "resolved" references (not just 

329 a `DatasetType` and data ID). 

330 disassociate : `bool`, optional 

331 Disassociate pruned datasets from ``tags``, or from all collections 

332 if ``purge=True``. 

333 unstore : `bool`, optional 

334 If `True` (`False` is default) remove these datasets from all 

335 datastores known to this butler. Note that this will make it 

336 impossible to retrieve these datasets even via other collections. 

337 Datasets that are already not stored are ignored by this option. 

338 tags : `Iterable` [ `str` ], optional 

339 `~CollectionType.TAGGED` collections to disassociate the datasets 

340 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

341 `True`. 

342 purge : `bool`, optional 

343 If `True` (`False` is default), completely remove the dataset from 

344 the `Registry`. To prevent accidental deletions, ``purge`` may 

345 only be `True` if all of the following conditions are met: 

346 

347 - ``disassociate`` is `True`; 

348 - ``unstore`` is `True`. 

349 

350 This mode may remove provenance information from datasets other 

351 than those provided, and should be used with extreme care. 

352 

353 Raises 

354 ------ 

355 TypeError 

356 Raised if the butler is read-only, if no collection was provided, 

357 or the conditions for ``purge=True`` were not met. 

358 """ 

359 raise NotImplementedError() 

360 

361 @property 

362 @abstractmethod 

363 def dimensions(self) -> DimensionUniverse: 

364 """Structure managing all dimensions recognized by this data 

365 repository (`DimensionUniverse`). 

366 """ 

367 raise NotImplementedError() 

368 

369 datastore: Datastore 

370 """The object that manages actual dataset storage (`Datastore`). 

371 

372 Direct user access to the datastore should rarely be necessary; the primary 

373 exception is the case where a `Datastore` implementation provides extra 

374 functionality beyond what the base class defines. 

375 """ 

376 

377 storageClasses: StorageClassFactory 

378 """An object that maps known storage class names to objects that fully 

379 describe them (`StorageClassFactory`). 

380 """