Coverage for python/lsst/daf/butler/_limited_butler.py: 79%

48 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-08 05:05 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from typing import Any, ClassVar, Dict, Iterable, Optional, Union 

29 

30from deprecated.sphinx import deprecated 

31 

32from ._deferredDatasetHandle import DeferredDatasetHandle 

33from .core import DatasetRef, Datastore, DimensionUniverse, StorageClass, StorageClassFactory 

34 

35log = logging.getLogger(__name__) 

36 

37 

38class LimitedButler(ABC): 

39 """A minimal butler interface that is sufficient to back 

40 `~lsst.pipe.base.PipelineTask` execution. 

41 """ 

42 

43 GENERATION: ClassVar[int] = 3 

44 """This is a Generation 3 Butler. 

45 

46 This attribute may be removed in the future, once the Generation 2 Butler 

47 interface has been fully retired; it should only be used in transitional 

48 code. 

49 """ 

50 

51 @abstractmethod 

52 def isWriteable(self) -> bool: 

53 """Return `True` if this `Butler` supports write operations.""" 

54 raise NotImplementedError() 

55 

56 @deprecated( 

57 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

58 " Please use Butler.put(). Will be removed after v27.0.", 

59 version="v26.0", 

60 category=FutureWarning, 

61 ) 

62 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

63 """Store a dataset that already has a UUID and ``RUN`` collection. 

64 

65 Parameters 

66 ---------- 

67 obj : `object` 

68 The dataset. 

69 ref : `DatasetRef` 

70 Resolved reference for a not-yet-stored dataset. 

71 

72 Returns 

73 ------- 

74 ref : `DatasetRef` 

75 The same as the given, for convenience and symmetry with 

76 `Butler.put`. 

77 

78 Raises 

79 ------ 

80 TypeError 

81 Raised if the butler is read-only. 

82 

83 Notes 

84 ----- 

85 Whether this method inserts the given dataset into a ``Registry`` is 

86 implementation defined (some `LimitedButler` subclasses do not have a 

87 `Registry`), but it always adds the dataset to a `Datastore`, and the 

88 given ``ref.id`` and ``ref.run`` are always preserved. 

89 """ 

90 return self.put(obj, ref) 

91 

92 @abstractmethod 

93 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

94 """Store a dataset that already has a UUID and ``RUN`` collection. 

95 

96 Parameters 

97 ---------- 

98 obj : `object` 

99 The dataset. 

100 ref : `DatasetRef` 

101 Resolved reference for a not-yet-stored dataset. 

102 

103 Returns 

104 ------- 

105 ref : `DatasetRef` 

106 The same as the given, for convenience and symmetry with 

107 `Butler.put`. 

108 

109 Raises 

110 ------ 

111 TypeError 

112 Raised if the butler is read-only. 

113 

114 Notes 

115 ----- 

116 Whether this method inserts the given dataset into a ``Registry`` is 

117 implementation defined (some `LimitedButler` subclasses do not have a 

118 `Registry`), but it always adds the dataset to a `Datastore`, and the 

119 given ``ref.id`` and ``ref.run`` are always preserved. 

120 """ 

121 raise NotImplementedError() 

122 

123 def get( 

124 self, 

125 ref: DatasetRef, 

126 /, 

127 *, 

128 parameters: dict[str, Any] | None = None, 

129 storageClass: StorageClass | str | None = None, 

130 ) -> Any: 

131 """Retrieve a stored dataset. 

132 

133 Parameters 

134 ---------- 

135 ref: `DatasetRef` 

136 A resolved `DatasetRef` directly associated with a dataset. 

137 parameters : `dict` 

138 Additional StorageClass-defined options to control reading, 

139 typically used to efficiently read only a subset of the dataset. 

140 storageClass : `StorageClass` or `str`, optional 

141 The storage class to be used to override the Python type 

142 returned by this method. By default the returned type matches 

143 the dataset type definition for this dataset. Specifying a 

144 read `StorageClass` can force a different type to be returned. 

145 This type must be compatible with the original type. 

146 

147 Returns 

148 ------- 

149 obj : `object` 

150 The dataset. 

151 

152 Raises 

153 ------ 

154 AmbiguousDatasetError 

155 Raised if the supplied `DatasetRef` is unresolved. 

156 

157 Notes 

158 ----- 

159 In a `LimitedButler` the only allowable way to specify a dataset is 

160 to use a resolved `DatasetRef`. Subclasses can support more options. 

161 """ 

162 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

163 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

164 

165 @deprecated( 

166 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

167 " Please use Butler.get(). Will be removed after v27.0.", 

168 version="v26.0", 

169 category=FutureWarning, 

170 ) 

171 def getDirect( 

172 self, 

173 ref: DatasetRef, 

174 *, 

175 parameters: Optional[Dict[str, Any]] = None, 

176 storageClass: str | StorageClass | None = None, 

177 ) -> Any: 

178 """Retrieve a stored dataset. 

179 

180 Parameters 

181 ---------- 

182 ref : `DatasetRef` 

183 Resolved reference to an already stored dataset. 

184 parameters : `dict` 

185 Additional StorageClass-defined options to control reading, 

186 typically used to efficiently read only a subset of the dataset. 

187 storageClass : `StorageClass` or `str`, optional 

188 The storage class to be used to override the Python type 

189 returned by this method. By default the returned type matches 

190 the dataset type definition for this dataset. Specifying a 

191 read `StorageClass` can force a different type to be returned. 

192 This type must be compatible with the original type. 

193 

194 Returns 

195 ------- 

196 obj : `object` 

197 The dataset. 

198 """ 

199 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

200 

201 @deprecated( 

202 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

203 "Please use Butler.getDeferred(). Will be removed after v27.0.", 

204 version="v26.0", 

205 category=FutureWarning, 

206 ) 

207 def getDirectDeferred( 

208 self, 

209 ref: DatasetRef, 

210 *, 

211 parameters: Union[dict, None] = None, 

212 storageClass: str | StorageClass | None = None, 

213 ) -> DeferredDatasetHandle: 

214 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

215 from a resolved `DatasetRef`. 

216 

217 Parameters 

218 ---------- 

219 ref : `DatasetRef` 

220 Resolved reference to an already stored dataset. 

221 parameters : `dict` 

222 Additional StorageClass-defined options to control reading, 

223 typically used to efficiently read only a subset of the dataset. 

224 storageClass : `StorageClass` or `str`, optional 

225 The storage class to be used to override the Python type 

226 returned by this method. By default the returned type matches 

227 the dataset type definition for this dataset. Specifying a 

228 read `StorageClass` can force a different type to be returned. 

229 This type must be compatible with the original type. 

230 

231 Returns 

232 ------- 

233 obj : `DeferredDatasetHandle` 

234 A handle which can be used to retrieve a dataset at a later time. 

235 """ 

236 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

237 

238 def getDeferred( 

239 self, 

240 ref: DatasetRef, 

241 /, 

242 *, 

243 parameters: dict[str, Any] | None = None, 

244 storageClass: str | StorageClass | None = None, 

245 ) -> DeferredDatasetHandle: 

246 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

247 after an immediate registry lookup. 

248 

249 Parameters 

250 ---------- 

251 ref : `DatasetRef` 

252 For the default implementation of a `LimitedButler`, the only 

253 acceptable parameter is a resolved `DatasetRef`. 

254 parameters : `dict` 

255 Additional StorageClass-defined options to control reading, 

256 typically used to efficiently read only a subset of the dataset. 

257 storageClass : `StorageClass` or `str`, optional 

258 The storage class to be used to override the Python type 

259 returned by this method. By default the returned type matches 

260 the dataset type definition for this dataset. Specifying a 

261 read `StorageClass` can force a different type to be returned. 

262 This type must be compatible with the original type. 

263 

264 Returns 

265 ------- 

266 obj : `DeferredDatasetHandle` 

267 A handle which can be used to retrieve a dataset at a later time. 

268 

269 Notes 

270 ----- 

271 In a `LimitedButler` the only allowable way to specify a dataset is 

272 to use a resolved `DatasetRef`. Subclasses can support more options. 

273 """ 

274 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

275 

276 def stored(self, ref: DatasetRef) -> bool: 

277 """Indicate whether the dataset's artifacts are present in the 

278 Datastore. 

279 

280 Parameters 

281 ---------- 

282 ref : `DatasetRef` 

283 Resolved reference to a dataset. 

284 

285 Returns 

286 ------- 

287 stored : `bool` 

288 Whether the dataset artifact exists in the datastore and can be 

289 retrieved. 

290 """ 

291 return self.datastore.exists(ref) 

292 

293 def stored_many( 

294 self, 

295 refs: Iterable[DatasetRef], 

296 ) -> dict[DatasetRef, bool]: 

297 """Check the datastore for artifact existence of multiple datasets 

298 at once. 

299 

300 Parameters 

301 ---------- 

302 refs : iterable of `DatasetRef` 

303 The datasets to be checked. 

304 

305 Returns 

306 ------- 

307 existence : `dict` of [`DatasetRef`, `bool`] 

308 Mapping from given dataset refs to boolean indicating artifact 

309 existence. 

310 """ 

311 return self.datastore.mexists(refs) 

312 

313 @deprecated( 

314 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). " 

315 "Will be removed after v27.0.", 

316 version="v26.0", 

317 category=FutureWarning, 

318 ) 

319 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

320 """Return `True` if a dataset is actually present in the Datastore. 

321 

322 Parameters 

323 ---------- 

324 ref : `DatasetRef` 

325 Resolved reference to a dataset. 

326 

327 Returns 

328 ------- 

329 exists : `bool` 

330 Whether the dataset exists in the Datastore. 

331 """ 

332 return self.stored(ref) 

333 

334 def markInputUnused(self, ref: DatasetRef) -> None: 

335 """Indicate that a predicted input was not actually used when 

336 processing a `Quantum`. 

337 

338 Parameters 

339 ---------- 

340 ref : `DatasetRef` 

341 Reference to the unused dataset. 

342 

343 Notes 

344 ----- 

345 By default, a dataset is considered "actually used" if it is accessed 

346 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

347 (even if the handle is not used). This method must be called after one 

348 of those in order to remove the dataset from the actual input list. 

349 

350 This method does nothing for butlers that do not store provenance 

351 information (which is the default implementation provided by the base 

352 class). 

353 """ 

354 pass 

355 

356 @abstractmethod 

357 def pruneDatasets( 

358 self, 

359 refs: Iterable[DatasetRef], 

360 *, 

361 disassociate: bool = True, 

362 unstore: bool = False, 

363 tags: Iterable[str] = (), 

364 purge: bool = False, 

365 ) -> None: 

366 """Remove one or more datasets from a collection and/or storage. 

367 

368 Parameters 

369 ---------- 

370 refs : `~collections.abc.Iterable` of `DatasetRef` 

371 Datasets to prune. These must be "resolved" references (not just 

372 a `DatasetType` and data ID). 

373 disassociate : `bool`, optional 

374 Disassociate pruned datasets from ``tags``, or from all collections 

375 if ``purge=True``. 

376 unstore : `bool`, optional 

377 If `True` (`False` is default) remove these datasets from all 

378 datastores known to this butler. Note that this will make it 

379 impossible to retrieve these datasets even via other collections. 

380 Datasets that are already not stored are ignored by this option. 

381 tags : `Iterable` [ `str` ], optional 

382 `~CollectionType.TAGGED` collections to disassociate the datasets 

383 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

384 `True`. 

385 purge : `bool`, optional 

386 If `True` (`False` is default), completely remove the dataset from 

387 the `Registry`. To prevent accidental deletions, ``purge`` may 

388 only be `True` if all of the following conditions are met: 

389 

390 - ``disassociate`` is `True`; 

391 - ``unstore`` is `True`. 

392 

393 This mode may remove provenance information from datasets other 

394 than those provided, and should be used with extreme care. 

395 

396 Raises 

397 ------ 

398 TypeError 

399 Raised if the butler is read-only, if no collection was provided, 

400 or the conditions for ``purge=True`` were not met. 

401 """ 

402 raise NotImplementedError() 

403 

404 @property 

405 @abstractmethod 

406 def dimensions(self) -> DimensionUniverse: 

407 """Structure managing all dimensions recognized by this data 

408 repository (`DimensionUniverse`). 

409 """ 

410 raise NotImplementedError() 

411 

412 datastore: Datastore 

413 """The object that manages actual dataset storage (`Datastore`). 

414 

415 Direct user access to the datastore should rarely be necessary; the primary 

416 exception is the case where a `Datastore` implementation provides extra 

417 functionality beyond what the base class defines. 

418 """ 

419 

420 storageClasses: StorageClassFactory 

421 """An object that maps known storage class names to objects that fully 

422 describe them (`StorageClassFactory`). 

423 """