Coverage for python/lsst/daf/butler/_limited_butler.py: 80%

49 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-15 09:13 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable 

29from typing import Any, ClassVar 

30 

31from deprecated.sphinx import deprecated 

32 

33from ._deferredDatasetHandle import DeferredDatasetHandle 

34from .core import DatasetRef, Datastore, DimensionUniverse, StorageClass, StorageClassFactory 

35 

36log = logging.getLogger(__name__) 

37 

38 

39class LimitedButler(ABC): 

40 """A minimal butler interface that is sufficient to back 

41 `~lsst.pipe.base.PipelineTask` execution. 

42 """ 

43 

44 GENERATION: ClassVar[int] = 3 

45 """This is a Generation 3 Butler. 

46 

47 This attribute may be removed in the future, once the Generation 2 Butler 

48 interface has been fully retired; it should only be used in transitional 

49 code. 

50 """ 

51 

52 @abstractmethod 

53 def isWriteable(self) -> bool: 

54 """Return `True` if this `Butler` supports write operations.""" 

55 raise NotImplementedError() 

56 

57 @deprecated( 

58 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

59 " Please use Butler.put(). Will be removed after v27.0.", 

60 version="v26.0", 

61 category=FutureWarning, 

62 ) 

63 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

64 """Store a dataset that already has a UUID and ``RUN`` collection. 

65 

66 Parameters 

67 ---------- 

68 obj : `object` 

69 The dataset. 

70 ref : `DatasetRef` 

71 Resolved reference for a not-yet-stored dataset. 

72 

73 Returns 

74 ------- 

75 ref : `DatasetRef` 

76 The same as the given, for convenience and symmetry with 

77 `Butler.put`. 

78 

79 Raises 

80 ------ 

81 TypeError 

82 Raised if the butler is read-only. 

83 

84 Notes 

85 ----- 

86 Whether this method inserts the given dataset into a ``Registry`` is 

87 implementation defined (some `LimitedButler` subclasses do not have a 

88 `Registry`), but it always adds the dataset to a `Datastore`, and the 

89 given ``ref.id`` and ``ref.run`` are always preserved. 

90 """ 

91 return self.put(obj, ref) 

92 

93 @abstractmethod 

94 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

95 """Store a dataset that already has a UUID and ``RUN`` collection. 

96 

97 Parameters 

98 ---------- 

99 obj : `object` 

100 The dataset. 

101 ref : `DatasetRef` 

102 Resolved reference for a not-yet-stored dataset. 

103 

104 Returns 

105 ------- 

106 ref : `DatasetRef` 

107 The same as the given, for convenience and symmetry with 

108 `Butler.put`. 

109 

110 Raises 

111 ------ 

112 TypeError 

113 Raised if the butler is read-only. 

114 

115 Notes 

116 ----- 

117 Whether this method inserts the given dataset into a ``Registry`` is 

118 implementation defined (some `LimitedButler` subclasses do not have a 

119 `Registry`), but it always adds the dataset to a `Datastore`, and the 

120 given ``ref.id`` and ``ref.run`` are always preserved. 

121 """ 

122 raise NotImplementedError() 

123 

124 def get( 

125 self, 

126 ref: DatasetRef, 

127 /, 

128 *, 

129 parameters: dict[str, Any] | None = None, 

130 storageClass: StorageClass | str | None = None, 

131 ) -> Any: 

132 """Retrieve a stored dataset. 

133 

134 Parameters 

135 ---------- 

136 ref: `DatasetRef` 

137 A resolved `DatasetRef` directly associated with a dataset. 

138 parameters : `dict` 

139 Additional StorageClass-defined options to control reading, 

140 typically used to efficiently read only a subset of the dataset. 

141 storageClass : `StorageClass` or `str`, optional 

142 The storage class to be used to override the Python type 

143 returned by this method. By default the returned type matches 

144 the dataset type definition for this dataset. Specifying a 

145 read `StorageClass` can force a different type to be returned. 

146 This type must be compatible with the original type. 

147 

148 Returns 

149 ------- 

150 obj : `object` 

151 The dataset. 

152 

153 Raises 

154 ------ 

155 AmbiguousDatasetError 

156 Raised if the supplied `DatasetRef` is unresolved. 

157 

158 Notes 

159 ----- 

160 In a `LimitedButler` the only allowable way to specify a dataset is 

161 to use a resolved `DatasetRef`. Subclasses can support more options. 

162 """ 

163 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

164 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

165 

166 @deprecated( 

167 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

168 " Please use Butler.get(). Will be removed after v27.0.", 

169 version="v26.0", 

170 category=FutureWarning, 

171 ) 

172 def getDirect( 

173 self, 

174 ref: DatasetRef, 

175 *, 

176 parameters: dict[str, Any] | None = None, 

177 storageClass: str | StorageClass | None = None, 

178 ) -> Any: 

179 """Retrieve a stored dataset. 

180 

181 Parameters 

182 ---------- 

183 ref : `DatasetRef` 

184 Resolved reference to an already stored dataset. 

185 parameters : `dict` 

186 Additional StorageClass-defined options to control reading, 

187 typically used to efficiently read only a subset of the dataset. 

188 storageClass : `StorageClass` or `str`, optional 

189 The storage class to be used to override the Python type 

190 returned by this method. By default the returned type matches 

191 the dataset type definition for this dataset. Specifying a 

192 read `StorageClass` can force a different type to be returned. 

193 This type must be compatible with the original type. 

194 

195 Returns 

196 ------- 

197 obj : `object` 

198 The dataset. 

199 """ 

200 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

201 

202 @deprecated( 

203 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

204 "Please use Butler.getDeferred(). Will be removed after v27.0.", 

205 version="v26.0", 

206 category=FutureWarning, 

207 ) 

208 def getDirectDeferred( 

209 self, 

210 ref: DatasetRef, 

211 *, 

212 parameters: dict[str, Any] | None = None, 

213 storageClass: str | StorageClass | None = None, 

214 ) -> DeferredDatasetHandle: 

215 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

216 from a resolved `DatasetRef`. 

217 

218 Parameters 

219 ---------- 

220 ref : `DatasetRef` 

221 Resolved reference to an already stored dataset. 

222 parameters : `dict` 

223 Additional StorageClass-defined options to control reading, 

224 typically used to efficiently read only a subset of the dataset. 

225 storageClass : `StorageClass` or `str`, optional 

226 The storage class to be used to override the Python type 

227 returned by this method. By default the returned type matches 

228 the dataset type definition for this dataset. Specifying a 

229 read `StorageClass` can force a different type to be returned. 

230 This type must be compatible with the original type. 

231 

232 Returns 

233 ------- 

234 obj : `DeferredDatasetHandle` 

235 A handle which can be used to retrieve a dataset at a later time. 

236 """ 

237 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

238 

239 def getDeferred( 

240 self, 

241 ref: DatasetRef, 

242 /, 

243 *, 

244 parameters: dict[str, Any] | None = None, 

245 storageClass: str | StorageClass | None = None, 

246 ) -> DeferredDatasetHandle: 

247 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

248 after an immediate registry lookup. 

249 

250 Parameters 

251 ---------- 

252 ref : `DatasetRef` 

253 For the default implementation of a `LimitedButler`, the only 

254 acceptable parameter is a resolved `DatasetRef`. 

255 parameters : `dict` 

256 Additional StorageClass-defined options to control reading, 

257 typically used to efficiently read only a subset of the dataset. 

258 storageClass : `StorageClass` or `str`, optional 

259 The storage class to be used to override the Python type 

260 returned by this method. By default the returned type matches 

261 the dataset type definition for this dataset. Specifying a 

262 read `StorageClass` can force a different type to be returned. 

263 This type must be compatible with the original type. 

264 

265 Returns 

266 ------- 

267 obj : `DeferredDatasetHandle` 

268 A handle which can be used to retrieve a dataset at a later time. 

269 

270 Notes 

271 ----- 

272 In a `LimitedButler` the only allowable way to specify a dataset is 

273 to use a resolved `DatasetRef`. Subclasses can support more options. 

274 """ 

275 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

276 

277 def stored(self, ref: DatasetRef) -> bool: 

278 """Indicate whether the dataset's artifacts are present in the 

279 Datastore. 

280 

281 Parameters 

282 ---------- 

283 ref : `DatasetRef` 

284 Resolved reference to a dataset. 

285 

286 Returns 

287 ------- 

288 stored : `bool` 

289 Whether the dataset artifact exists in the datastore and can be 

290 retrieved. 

291 """ 

292 return self.datastore.exists(ref) 

293 

294 def stored_many( 

295 self, 

296 refs: Iterable[DatasetRef], 

297 ) -> dict[DatasetRef, bool]: 

298 """Check the datastore for artifact existence of multiple datasets 

299 at once. 

300 

301 Parameters 

302 ---------- 

303 refs : iterable of `DatasetRef` 

304 The datasets to be checked. 

305 

306 Returns 

307 ------- 

308 existence : `dict` of [`DatasetRef`, `bool`] 

309 Mapping from given dataset refs to boolean indicating artifact 

310 existence. 

311 """ 

312 return self.datastore.mexists(refs) 

313 

314 @deprecated( 

315 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). " 

316 "Will be removed after v27.0.", 

317 version="v26.0", 

318 category=FutureWarning, 

319 ) 

320 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

321 """Return `True` if a dataset is actually present in the Datastore. 

322 

323 Parameters 

324 ---------- 

325 ref : `DatasetRef` 

326 Resolved reference to a dataset. 

327 

328 Returns 

329 ------- 

330 exists : `bool` 

331 Whether the dataset exists in the Datastore. 

332 """ 

333 return self.stored(ref) 

334 

335 def markInputUnused(self, ref: DatasetRef) -> None: 

336 """Indicate that a predicted input was not actually used when 

337 processing a `Quantum`. 

338 

339 Parameters 

340 ---------- 

341 ref : `DatasetRef` 

342 Reference to the unused dataset. 

343 

344 Notes 

345 ----- 

346 By default, a dataset is considered "actually used" if it is accessed 

347 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

348 (even if the handle is not used). This method must be called after one 

349 of those in order to remove the dataset from the actual input list. 

350 

351 This method does nothing for butlers that do not store provenance 

352 information (which is the default implementation provided by the base 

353 class). 

354 """ 

355 pass 

356 

357 @abstractmethod 

358 def pruneDatasets( 

359 self, 

360 refs: Iterable[DatasetRef], 

361 *, 

362 disassociate: bool = True, 

363 unstore: bool = False, 

364 tags: Iterable[str] = (), 

365 purge: bool = False, 

366 ) -> None: 

367 """Remove one or more datasets from a collection and/or storage. 

368 

369 Parameters 

370 ---------- 

371 refs : `~collections.abc.Iterable` of `DatasetRef` 

372 Datasets to prune. These must be "resolved" references (not just 

373 a `DatasetType` and data ID). 

374 disassociate : `bool`, optional 

375 Disassociate pruned datasets from ``tags``, or from all collections 

376 if ``purge=True``. 

377 unstore : `bool`, optional 

378 If `True` (`False` is default) remove these datasets from all 

379 datastores known to this butler. Note that this will make it 

380 impossible to retrieve these datasets even via other collections. 

381 Datasets that are already not stored are ignored by this option. 

382 tags : `~collections.abc.Iterable` [ `str` ], optional 

383 `~CollectionType.TAGGED` collections to disassociate the datasets 

384 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

385 `True`. 

386 purge : `bool`, optional 

387 If `True` (`False` is default), completely remove the dataset from 

388 the `Registry`. To prevent accidental deletions, ``purge`` may 

389 only be `True` if all of the following conditions are met: 

390 

391 - ``disassociate`` is `True`; 

392 - ``unstore`` is `True`. 

393 

394 This mode may remove provenance information from datasets other 

395 than those provided, and should be used with extreme care. 

396 

397 Raises 

398 ------ 

399 TypeError 

400 Raised if the butler is read-only, if no collection was provided, 

401 or the conditions for ``purge=True`` were not met. 

402 """ 

403 raise NotImplementedError() 

404 

405 @property 

406 @abstractmethod 

407 def dimensions(self) -> DimensionUniverse: 

408 """Structure managing all dimensions recognized by this data 

409 repository (`DimensionUniverse`). 

410 """ 

411 raise NotImplementedError() 

412 

413 datastore: Datastore 

414 """The object that manages actual dataset storage (`Datastore`). 

415 

416 Direct user access to the datastore should rarely be necessary; the primary 

417 exception is the case where a `Datastore` implementation provides extra 

418 functionality beyond what the base class defines. 

419 """ 

420 

421 storageClasses: StorageClassFactory 

422 """An object that maps known storage class names to objects that fully 

423 describe them (`StorageClassFactory`). 

424 """