Coverage for python/lsst/daf/butler/_limited_butler.py: 76%

53 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-10 10:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("LimitedButler",) 

31 

32import logging 

33from abc import ABC, abstractmethod 

34from collections.abc import Iterable 

35from typing import Any, ClassVar 

36 

37from lsst.resources import ResourcePath 

38 

39from ._dataset_ref import DatasetRef 

40from ._deferredDatasetHandle import DeferredDatasetHandle 

41from ._storage_class import StorageClass, StorageClassFactory 

42from .datastore import DatasetRefURIs, Datastore 

43from .dimensions import DimensionUniverse 

44 

45log = logging.getLogger(__name__) 

46 

47 

48class LimitedButler(ABC): 

49 """A minimal butler interface that is sufficient to back 

50 `~lsst.pipe.base.PipelineTask` execution. 

51 """ 

52 

53 GENERATION: ClassVar[int] = 3 

54 """This is a Generation 3 Butler. 

55 

56 This attribute may be removed in the future, once the Generation 2 Butler 

57 interface has been fully retired; it should only be used in transitional 

58 code. 

59 """ 

60 

61 @abstractmethod 

62 def isWriteable(self) -> bool: 

63 """Return `True` if this `Butler` supports write operations.""" 

64 raise NotImplementedError() 

65 

66 @abstractmethod 

67 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

68 """Store a dataset that already has a UUID and ``RUN`` collection. 

69 

70 Parameters 

71 ---------- 

72 obj : `object` 

73 The dataset. 

74 ref : `DatasetRef` 

75 Resolved reference for a not-yet-stored dataset. 

76 

77 Returns 

78 ------- 

79 ref : `DatasetRef` 

80 The same as the given, for convenience and symmetry with 

81 `Butler.put`. 

82 

83 Raises 

84 ------ 

85 TypeError 

86 Raised if the butler is read-only. 

87 

88 Notes 

89 ----- 

90 Whether this method inserts the given dataset into a ``Registry`` is 

91 implementation defined (some `LimitedButler` subclasses do not have a 

92 `Registry`), but it always adds the dataset to a `Datastore`, and the 

93 given ``ref.id`` and ``ref.run`` are always preserved. 

94 """ 

95 raise NotImplementedError() 

96 

97 def get( 

98 self, 

99 ref: DatasetRef, 

100 /, 

101 *, 

102 parameters: dict[str, Any] | None = None, 

103 storageClass: StorageClass | str | None = None, 

104 ) -> Any: 

105 """Retrieve a stored dataset. 

106 

107 Parameters 

108 ---------- 

109 ref : `DatasetRef` 

110 A resolved `DatasetRef` directly associated with a dataset. 

111 parameters : `dict` 

112 Additional StorageClass-defined options to control reading, 

113 typically used to efficiently read only a subset of the dataset. 

114 storageClass : `StorageClass` or `str`, optional 

115 The storage class to be used to override the Python type 

116 returned by this method. By default the returned type matches 

117 the dataset type definition for this dataset. Specifying a 

118 read `StorageClass` can force a different type to be returned. 

119 This type must be compatible with the original type. 

120 

121 Returns 

122 ------- 

123 obj : `object` 

124 The dataset. 

125 

126 Raises 

127 ------ 

128 AmbiguousDatasetError 

129 Raised if the supplied `DatasetRef` is unresolved. 

130 

131 Notes 

132 ----- 

133 In a `LimitedButler` the only allowable way to specify a dataset is 

134 to use a resolved `DatasetRef`. Subclasses can support more options. 

135 """ 

136 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

137 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

138 

139 def getDeferred( 

140 self, 

141 ref: DatasetRef, 

142 /, 

143 *, 

144 parameters: dict[str, Any] | None = None, 

145 storageClass: str | StorageClass | None = None, 

146 ) -> DeferredDatasetHandle: 

147 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

148 after an immediate registry lookup. 

149 

150 Parameters 

151 ---------- 

152 ref : `DatasetRef` 

153 For the default implementation of a `LimitedButler`, the only 

154 acceptable parameter is a resolved `DatasetRef`. 

155 parameters : `dict` 

156 Additional StorageClass-defined options to control reading, 

157 typically used to efficiently read only a subset of the dataset. 

158 storageClass : `StorageClass` or `str`, optional 

159 The storage class to be used to override the Python type 

160 returned by this method. By default the returned type matches 

161 the dataset type definition for this dataset. Specifying a 

162 read `StorageClass` can force a different type to be returned. 

163 This type must be compatible with the original type. 

164 

165 Returns 

166 ------- 

167 obj : `DeferredDatasetHandle` 

168 A handle which can be used to retrieve a dataset at a later time. 

169 

170 Notes 

171 ----- 

172 In a `LimitedButler` the only allowable way to specify a dataset is 

173 to use a resolved `DatasetRef`. Subclasses can support more options. 

174 """ 

175 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

176 

177 def get_datastore_names(self) -> tuple[str, ...]: 

178 """Return the names of the datastores associated with this butler. 

179 

180 Returns 

181 ------- 

182 names : `tuple` [`str`, ...] 

183 The names of the datastores. 

184 """ 

185 return self._datastore.names 

186 

187 def get_datastore_roots(self) -> dict[str, ResourcePath | None]: 

188 """Return the defined root URIs for all registered datastores. 

189 

190 Returns 

191 ------- 

192 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`] 

193 A mapping from datastore name to datastore root URI. The root 

194 can be `None` if the datastore does not have any concept of a root 

195 URI. 

196 """ 

197 return self._datastore.roots 

198 

199 def getURIs( 

200 self, 

201 ref: DatasetRef, 

202 /, 

203 *, 

204 predict: bool = False, 

205 ) -> DatasetRefURIs: 

206 """Return the URIs associated with the dataset. 

207 

208 Parameters 

209 ---------- 

210 ref : `DatasetRef` 

211 A `DatasetRef` for which URIs are requested. 

212 predict : `bool` 

213 If `True`, allow URIs to be returned of datasets that have not 

214 been written. 

215 

216 Returns 

217 ------- 

218 uris : `DatasetRefURIs` 

219 The URI to the primary artifact associated with this dataset (if 

220 the dataset was disassembled within the datastore this may be 

221 `None`), and the URIs to any components associated with the dataset 

222 artifact (can be empty if there are no components). 

223 """ 

224 return self._datastore.getURIs(ref, predict) 

225 

226 def getURI( 

227 self, 

228 ref: DatasetRef, 

229 /, 

230 *, 

231 predict: bool = False, 

232 ) -> ResourcePath: 

233 """Return the URI to the Dataset. 

234 

235 Parameters 

236 ---------- 

237 ref : `DatasetRef` 

238 A `DatasetRef` for which a single URI is requested. 

239 predict : `bool` 

240 If `True`, allow URIs to be returned of datasets that have not 

241 been written. 

242 

243 Returns 

244 ------- 

245 uri : `lsst.resources.ResourcePath` 

246 URI pointing to the Dataset within the datastore. If the 

247 Dataset does not exist in the datastore, and if ``predict`` is 

248 `True`, the URI will be a prediction and will include a URI 

249 fragment "#predicted". 

250 If the datastore does not have entities that relate well 

251 to the concept of a URI the returned URI string will be 

252 descriptive. The returned URI is not guaranteed to be obtainable. 

253 

254 Raises 

255 ------ 

256 RuntimeError 

257 Raised if a URI is requested for a dataset that consists of 

258 multiple artifacts. 

259 """ 

260 primary, components = self.getURIs(ref, predict=predict) 

261 

262 if primary is None or components: 

263 raise RuntimeError( 

264 f"Dataset ({ref}) includes distinct URIs for components. " 

265 "Use LimitedButler.getURIs() instead." 

266 ) 

267 return primary 

268 

269 def get_many_uris( 

270 self, 

271 refs: Iterable[DatasetRef], 

272 predict: bool = False, 

273 allow_missing: bool = False, 

274 ) -> dict[DatasetRef, DatasetRefURIs]: 

275 """Return URIs associated with many datasets. 

276 

277 Parameters 

278 ---------- 

279 refs : iterable of `DatasetIdRef` 

280 References to the required datasets. 

281 predict : `bool`, optional 

282 If `True`, allow URIs to be returned of datasets that have not 

283 been written. 

284 allow_missing : `bool` 

285 If `False`, and ``predict`` is `False`, will raise if a 

286 `DatasetRef` does not exist. 

287 

288 Returns 

289 ------- 

290 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`] 

291 A dict of primary and component URIs, indexed by the passed-in 

292 refs. 

293 

294 Raises 

295 ------ 

296 FileNotFoundError 

297 A URI has been requested for a dataset that does not exist and 

298 guessing is not allowed. 

299 

300 Notes 

301 ----- 

302 In file-based datastores, get_many_uris does not check that the file is 

303 present. It assumes that if datastore is aware of the file then it 

304 actually exists. 

305 """ 

306 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing) 

307 

308 def stored(self, ref: DatasetRef) -> bool: 

309 """Indicate whether the dataset's artifacts are present in the 

310 Datastore. 

311 

312 Parameters 

313 ---------- 

314 ref : `DatasetRef` 

315 Resolved reference to a dataset. 

316 

317 Returns 

318 ------- 

319 stored : `bool` 

320 Whether the dataset artifact exists in the datastore and can be 

321 retrieved. 

322 """ 

323 return self._datastore.exists(ref) 

324 

325 def stored_many( 

326 self, 

327 refs: Iterable[DatasetRef], 

328 ) -> dict[DatasetRef, bool]: 

329 """Check the datastore for artifact existence of multiple datasets 

330 at once. 

331 

332 Parameters 

333 ---------- 

334 refs : iterable of `DatasetRef` 

335 The datasets to be checked. 

336 

337 Returns 

338 ------- 

339 existence : `dict` of [`DatasetRef`, `bool`] 

340 Mapping from given dataset refs to boolean indicating artifact 

341 existence. 

342 """ 

343 return self._datastore.mexists(refs) 

344 

345 def markInputUnused(self, ref: DatasetRef) -> None: 

346 """Indicate that a predicted input was not actually used when 

347 processing a `Quantum`. 

348 

349 Parameters 

350 ---------- 

351 ref : `DatasetRef` 

352 Reference to the unused dataset. 

353 

354 Notes 

355 ----- 

356 By default, a dataset is considered "actually used" if it is accessed 

357 via `get` or a handle to it is obtained via `getDeferred` 

358 (even if the handle is not used). This method must be called after one 

359 of those in order to remove the dataset from the actual input list. 

360 

361 This method does nothing for butlers that do not store provenance 

362 information (which is the default implementation provided by the base 

363 class). 

364 """ 

365 pass 

366 

367 @abstractmethod 

368 def pruneDatasets( 

369 self, 

370 refs: Iterable[DatasetRef], 

371 *, 

372 disassociate: bool = True, 

373 unstore: bool = False, 

374 tags: Iterable[str] = (), 

375 purge: bool = False, 

376 ) -> None: 

377 """Remove one or more datasets from a collection and/or storage. 

378 

379 Parameters 

380 ---------- 

381 refs : `~collections.abc.Iterable` of `DatasetRef` 

382 Datasets to prune. These must be "resolved" references (not just 

383 a `DatasetType` and data ID). 

384 disassociate : `bool`, optional 

385 Disassociate pruned datasets from ``tags``, or from all collections 

386 if ``purge=True``. 

387 unstore : `bool`, optional 

388 If `True` (`False` is default) remove these datasets from all 

389 datastores known to this butler. Note that this will make it 

390 impossible to retrieve these datasets even via other collections. 

391 Datasets that are already not stored are ignored by this option. 

392 tags : `~collections.abc.Iterable` [ `str` ], optional 

393 `~CollectionType.TAGGED` collections to disassociate the datasets 

394 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

395 `True`. 

396 purge : `bool`, optional 

397 If `True` (`False` is default), completely remove the dataset from 

398 the `Registry`. To prevent accidental deletions, ``purge`` may 

399 only be `True` if all of the following conditions are met: 

400 

401 - ``disassociate`` is `True`; 

402 - ``unstore`` is `True`. 

403 

404 This mode may remove provenance information from datasets other 

405 than those provided, and should be used with extreme care. 

406 

407 Raises 

408 ------ 

409 TypeError 

410 Raised if the butler is read-only, if no collection was provided, 

411 or the conditions for ``purge=True`` were not met. 

412 """ 

413 raise NotImplementedError() 

414 

415 @property 

416 @abstractmethod 

417 def dimensions(self) -> DimensionUniverse: 

418 """Structure managing all dimensions recognized by this data 

419 repository (`DimensionUniverse`). 

420 """ 

421 raise NotImplementedError() 

422 

423 _datastore: Datastore 

424 """The object that manages actual dataset storage (`Datastore`).""" 

425 

426 storageClasses: StorageClassFactory 

427 """An object that maps known storage class names to objects that fully 

428 describe them (`StorageClassFactory`). 

429 """