Coverage for python/lsst/daf/butler/_limited_butler.py: 64%

49 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-19 03:42 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from typing import Any, ClassVar, Dict, Iterable, Optional, Union 

29 

30from deprecated.sphinx import deprecated 

31 

32from ._deferredDatasetHandle import DeferredDatasetHandle 

33from .core import ( 

34 AmbiguousDatasetError, 

35 DatasetRef, 

36 Datastore, 

37 DimensionUniverse, 

38 StorageClass, 

39 StorageClassFactory, 

40) 

41 

42log = logging.getLogger(__name__) 

43 

44 

45class LimitedButler(ABC): 

46 """A minimal butler interface that is sufficient to back 

47 `~lsst.pipe.base.PipelineTask` execution. 

48 """ 

49 

50 GENERATION: ClassVar[int] = 3 

51 """This is a Generation 3 Butler. 

52 

53 This attribute may be removed in the future, once the Generation 2 Butler 

54 interface has been fully retired; it should only be used in transitional 

55 code. 

56 """ 

57 

58 @abstractmethod 

59 def isWriteable(self) -> bool: 

60 """Return `True` if this `Butler` supports write operations.""" 

61 raise NotImplementedError() 

62 

63 @deprecated( 

64 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

65 " Please use Butler.put(). Will be removed after v27.0.", 

66 version="v26.0", 

67 category=FutureWarning, 

68 ) 

69 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

70 """Store a dataset that already has a UUID and ``RUN`` collection. 

71 

72 Parameters 

73 ---------- 

74 obj : `object` 

75 The dataset. 

76 ref : `DatasetRef` 

77 Resolved reference for a not-yet-stored dataset. 

78 

79 Returns 

80 ------- 

81 ref : `DatasetRef` 

82 The same as the given, for convenience and symmetry with 

83 `Butler.put`. 

84 

85 Raises 

86 ------ 

87 TypeError 

88 Raised if the butler is read-only. 

89 AmbiguousDatasetError 

90 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

91 

92 Notes 

93 ----- 

94 Whether this method inserts the given dataset into a ``Registry`` is 

95 implementation defined (some `LimitedButler` subclasses do not have a 

96 `Registry`), but it always adds the dataset to a `Datastore`, and the 

97 given ``ref.id`` and ``ref.run`` are always preserved. 

98 """ 

99 return self.put(obj, ref) 

100 

101 @abstractmethod 

102 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

103 """Store a dataset that already has a UUID and ``RUN`` collection. 

104 

105 Parameters 

106 ---------- 

107 obj : `object` 

108 The dataset. 

109 ref : `DatasetRef` 

110 Resolved reference for a not-yet-stored dataset. 

111 

112 Returns 

113 ------- 

114 ref : `DatasetRef` 

115 The same as the given, for convenience and symmetry with 

116 `Butler.put`. 

117 

118 Raises 

119 ------ 

120 TypeError 

121 Raised if the butler is read-only. 

122 AmbiguousDatasetError 

123 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

124 

125 Notes 

126 ----- 

127 Whether this method inserts the given dataset into a ``Registry`` is 

128 implementation defined (some `LimitedButler` subclasses do not have a 

129 `Registry`), but it always adds the dataset to a `Datastore`, and the 

130 given ``ref.id`` and ``ref.run`` are always preserved. 

131 """ 

132 raise NotImplementedError() 

133 

134 def get( 

135 self, 

136 ref: DatasetRef, 

137 /, 

138 *, 

139 parameters: dict[str, Any] | None = None, 

140 storageClass: StorageClass | str | None = None, 

141 ) -> Any: 

142 """Retrieve a stored dataset. 

143 

144 Parameters 

145 ---------- 

146 ref: `DatasetRef` 

147 A resolved `DatasetRef` directly associated with a dataset. 

148 parameters : `dict` 

149 Additional StorageClass-defined options to control reading, 

150 typically used to efficiently read only a subset of the dataset. 

151 storageClass : `StorageClass` or `str`, optional 

152 The storage class to be used to override the Python type 

153 returned by this method. By default the returned type matches 

154 the dataset type definition for this dataset. Specifying a 

155 read `StorageClass` can force a different type to be returned. 

156 This type must be compatible with the original type. 

157 

158 Returns 

159 ------- 

160 obj : `object` 

161 The dataset. 

162 

163 Raises 

164 ------ 

165 AmbiguousDatasetError 

166 Raised if the supplied `DatasetRef` is unresolved. 

167 

168 Notes 

169 ----- 

170 In a `LimitedButler` the only allowable way to specify a dataset is 

171 to use a resolved `DatasetRef`. Subclasses can support more options. 

172 """ 

173 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

174 if ref.id is None: 

175 raise AmbiguousDatasetError(f"Dataset {ref} is not resolved.") 

176 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

177 

178 @deprecated( 

179 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

180 " Please use Butler.get(). Will be removed after v27.0.", 

181 version="v26.0", 

182 category=FutureWarning, 

183 ) 

184 def getDirect( 

185 self, 

186 ref: DatasetRef, 

187 *, 

188 parameters: Optional[Dict[str, Any]] = None, 

189 storageClass: str | StorageClass | None = None, 

190 ) -> Any: 

191 """Retrieve a stored dataset. 

192 

193 Parameters 

194 ---------- 

195 ref : `DatasetRef` 

196 Resolved reference to an already stored dataset. 

197 parameters : `dict` 

198 Additional StorageClass-defined options to control reading, 

199 typically used to efficiently read only a subset of the dataset. 

200 storageClass : `StorageClass` or `str`, optional 

201 The storage class to be used to override the Python type 

202 returned by this method. By default the returned type matches 

203 the dataset type definition for this dataset. Specifying a 

204 read `StorageClass` can force a different type to be returned. 

205 This type must be compatible with the original type. 

206 

207 Returns 

208 ------- 

209 obj : `object` 

210 The dataset. 

211 

212 Raises 

213 ------ 

214 AmbiguousDatasetError 

215 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

216 """ 

217 return self.datastore.get(ref, parameters=parameters, storageClass=storageClass) 

218 

219 @deprecated( 

220 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

221 "Please use Butler.getDeferred(). Will be removed after v27.0.", 

222 version="v26.0", 

223 category=FutureWarning, 

224 ) 

225 def getDirectDeferred( 

226 self, 

227 ref: DatasetRef, 

228 *, 

229 parameters: Union[dict, None] = None, 

230 storageClass: str | StorageClass | None = None, 

231 ) -> DeferredDatasetHandle: 

232 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

233 from a resolved `DatasetRef`. 

234 

235 Parameters 

236 ---------- 

237 ref : `DatasetRef` 

238 Resolved reference to an already stored dataset. 

239 parameters : `dict` 

240 Additional StorageClass-defined options to control reading, 

241 typically used to efficiently read only a subset of the dataset. 

242 storageClass : `StorageClass` or `str`, optional 

243 The storage class to be used to override the Python type 

244 returned by this method. By default the returned type matches 

245 the dataset type definition for this dataset. Specifying a 

246 read `StorageClass` can force a different type to be returned. 

247 This type must be compatible with the original type. 

248 

249 Returns 

250 ------- 

251 obj : `DeferredDatasetHandle` 

252 A handle which can be used to retrieve a dataset at a later time. 

253 

254 Raises 

255 ------ 

256 AmbiguousDatasetError 

257 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

258 """ 

259 if ref.id is None: 

260 raise AmbiguousDatasetError( 

261 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

262 ) 

263 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

264 

265 def getDeferred( 

266 self, 

267 ref: DatasetRef, 

268 /, 

269 *, 

270 parameters: dict[str, Any] | None = None, 

271 storageClass: str | StorageClass | None = None, 

272 ) -> DeferredDatasetHandle: 

273 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

274 after an immediate registry lookup. 

275 

276 Parameters 

277 ---------- 

278 ref : `DatasetRef` 

279 For the default implementation of a `LimitedButler`, the only 

280 acceptable parameter is a resolved `DatasetRef`. 

281 parameters : `dict` 

282 Additional StorageClass-defined options to control reading, 

283 typically used to efficiently read only a subset of the dataset. 

284 storageClass : `StorageClass` or `str`, optional 

285 The storage class to be used to override the Python type 

286 returned by this method. By default the returned type matches 

287 the dataset type definition for this dataset. Specifying a 

288 read `StorageClass` can force a different type to be returned. 

289 This type must be compatible with the original type. 

290 

291 Returns 

292 ------- 

293 obj : `DeferredDatasetHandle` 

294 A handle which can be used to retrieve a dataset at a later time. 

295 

296 Raises 

297 ------ 

298 AmbiguousDatasetError 

299 Raised if an unresolved `DatasetRef` is passed as an input. 

300 

301 Notes 

302 ----- 

303 In a `LimitedButler` the only allowable way to specify a dataset is 

304 to use a resolved `DatasetRef`. Subclasses can support more options. 

305 """ 

306 if ref.id is None: 

307 raise AmbiguousDatasetError(f"Dataset {ref} is not resolved.") 

308 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

309 

310 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

311 """Return `True` if a dataset is actually present in the Datastore. 

312 

313 Parameters 

314 ---------- 

315 ref : `DatasetRef` 

316 Resolved reference to a dataset. 

317 

318 Returns 

319 ------- 

320 exists : `bool` 

321 Whether the dataset exists in the Datastore. 

322 """ 

323 return self.datastore.exists(ref) 

324 

325 def markInputUnused(self, ref: DatasetRef) -> None: 

326 """Indicate that a predicted input was not actually used when 

327 processing a `Quantum`. 

328 

329 Parameters 

330 ---------- 

331 ref : `DatasetRef` 

332 Reference to the unused dataset. 

333 

334 Notes 

335 ----- 

336 By default, a dataset is considered "actually used" if it is accessed 

337 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

338 (even if the handle is not used). This method must be called after one 

339 of those in order to remove the dataset from the actual input list. 

340 

341 This method does nothing for butlers that do not store provenance 

342 information (which is the default implementation provided by the base 

343 class). 

344 """ 

345 pass 

346 

347 @abstractmethod 

348 def pruneDatasets( 

349 self, 

350 refs: Iterable[DatasetRef], 

351 *, 

352 disassociate: bool = True, 

353 unstore: bool = False, 

354 tags: Iterable[str] = (), 

355 purge: bool = False, 

356 ) -> None: 

357 """Remove one or more datasets from a collection and/or storage. 

358 

359 Parameters 

360 ---------- 

361 refs : `~collections.abc.Iterable` of `DatasetRef` 

362 Datasets to prune. These must be "resolved" references (not just 

363 a `DatasetType` and data ID). 

364 disassociate : `bool`, optional 

365 Disassociate pruned datasets from ``tags``, or from all collections 

366 if ``purge=True``. 

367 unstore : `bool`, optional 

368 If `True` (`False` is default) remove these datasets from all 

369 datastores known to this butler. Note that this will make it 

370 impossible to retrieve these datasets even via other collections. 

371 Datasets that are already not stored are ignored by this option. 

372 tags : `Iterable` [ `str` ], optional 

373 `~CollectionType.TAGGED` collections to disassociate the datasets 

374 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

375 `True`. 

376 purge : `bool`, optional 

377 If `True` (`False` is default), completely remove the dataset from 

378 the `Registry`. To prevent accidental deletions, ``purge`` may 

379 only be `True` if all of the following conditions are met: 

380 

381 - ``disassociate`` is `True`; 

382 - ``unstore`` is `True`. 

383 

384 This mode may remove provenance information from datasets other 

385 than those provided, and should be used with extreme care. 

386 

387 Raises 

388 ------ 

389 TypeError 

390 Raised if the butler is read-only, if no collection was provided, 

391 or the conditions for ``purge=True`` were not met. 

392 """ 

393 raise NotImplementedError() 

394 

395 @property 

396 @abstractmethod 

397 def dimensions(self) -> DimensionUniverse: 

398 """Structure managing all dimensions recognized by this data 

399 repository (`DimensionUniverse`). 

400 """ 

401 raise NotImplementedError() 

402 

403 datastore: Datastore 

404 """The object that manages actual dataset storage (`Datastore`). 

405 

406 Direct user access to the datastore should rarely be necessary; the primary 

407 exception is the case where a `Datastore` implementation provides extra 

408 functionality beyond what the base class defines. 

409 """ 

410 

411 storageClasses: StorageClassFactory 

412 """An object that maps known storage class names to objects that fully 

413 describe them (`StorageClassFactory`). 

414 """