Coverage for python/lsst/daf/butler/_limited_butler.py: 78%

67 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("LimitedButler",) 

31 

32import logging 

33from abc import ABC, abstractmethod 

34from collections.abc import Iterable 

35from typing import Any, ClassVar 

36 

37from deprecated.sphinx import deprecated 

38from lsst.resources import ResourcePath 

39 

40from ._deferredDatasetHandle import DeferredDatasetHandle 

41from .core import DatasetRef, DatasetRefURIs, Datastore, DimensionUniverse, StorageClass, StorageClassFactory 

42 

43log = logging.getLogger(__name__) 

44 

45 

46class LimitedButler(ABC): 

47 """A minimal butler interface that is sufficient to back 

48 `~lsst.pipe.base.PipelineTask` execution. 

49 """ 

50 

51 GENERATION: ClassVar[int] = 3 

52 """This is a Generation 3 Butler. 

53 

54 This attribute may be removed in the future, once the Generation 2 Butler 

55 interface has been fully retired; it should only be used in transitional 

56 code. 

57 """ 

58 

59 @abstractmethod 

60 def isWriteable(self) -> bool: 

61 """Return `True` if this `Butler` supports write operations.""" 

62 raise NotImplementedError() 

63 

64 # TODO: remove on DM-40067. 

65 @deprecated( 

66 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

67 " Please use Butler.put(). Will be removed after v26.0.", 

68 version="v26.0", 

69 category=FutureWarning, 

70 ) 

71 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

72 """Store a dataset that already has a UUID and ``RUN`` collection. 

73 

74 Parameters 

75 ---------- 

76 obj : `object` 

77 The dataset. 

78 ref : `DatasetRef` 

79 Resolved reference for a not-yet-stored dataset. 

80 

81 Returns 

82 ------- 

83 ref : `DatasetRef` 

84 The same as the given, for convenience and symmetry with 

85 `Butler.put`. 

86 

87 Raises 

88 ------ 

89 TypeError 

90 Raised if the butler is read-only. 

91 

92 Notes 

93 ----- 

94 Whether this method inserts the given dataset into a ``Registry`` is 

95 implementation defined (some `LimitedButler` subclasses do not have a 

96 `Registry`), but it always adds the dataset to a `Datastore`, and the 

97 given ``ref.id`` and ``ref.run`` are always preserved. 

98 """ 

99 return self.put(obj, ref) 

100 

101 @abstractmethod 

102 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

103 """Store a dataset that already has a UUID and ``RUN`` collection. 

104 

105 Parameters 

106 ---------- 

107 obj : `object` 

108 The dataset. 

109 ref : `DatasetRef` 

110 Resolved reference for a not-yet-stored dataset. 

111 

112 Returns 

113 ------- 

114 ref : `DatasetRef` 

115 The same as the given, for convenience and symmetry with 

116 `Butler.put`. 

117 

118 Raises 

119 ------ 

120 TypeError 

121 Raised if the butler is read-only. 

122 

123 Notes 

124 ----- 

125 Whether this method inserts the given dataset into a ``Registry`` is 

126 implementation defined (some `LimitedButler` subclasses do not have a 

127 `Registry`), but it always adds the dataset to a `Datastore`, and the 

128 given ``ref.id`` and ``ref.run`` are always preserved. 

129 """ 

130 raise NotImplementedError() 

131 

132 def get( 

133 self, 

134 ref: DatasetRef, 

135 /, 

136 *, 

137 parameters: dict[str, Any] | None = None, 

138 storageClass: StorageClass | str | None = None, 

139 ) -> Any: 

140 """Retrieve a stored dataset. 

141 

142 Parameters 

143 ---------- 

144 ref: `DatasetRef` 

145 A resolved `DatasetRef` directly associated with a dataset. 

146 parameters : `dict` 

147 Additional StorageClass-defined options to control reading, 

148 typically used to efficiently read only a subset of the dataset. 

149 storageClass : `StorageClass` or `str`, optional 

150 The storage class to be used to override the Python type 

151 returned by this method. By default the returned type matches 

152 the dataset type definition for this dataset. Specifying a 

153 read `StorageClass` can force a different type to be returned. 

154 This type must be compatible with the original type. 

155 

156 Returns 

157 ------- 

158 obj : `object` 

159 The dataset. 

160 

161 Raises 

162 ------ 

163 AmbiguousDatasetError 

164 Raised if the supplied `DatasetRef` is unresolved. 

165 

166 Notes 

167 ----- 

168 In a `LimitedButler` the only allowable way to specify a dataset is 

169 to use a resolved `DatasetRef`. Subclasses can support more options. 

170 """ 

171 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

172 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

173 

174 # TODO: remove on DM-40067. 

175 @deprecated( 

176 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

177 " Please use Butler.get(). Will be removed after v26.0.", 

178 version="v26.0", 

179 category=FutureWarning, 

180 ) 

181 def getDirect( 

182 self, 

183 ref: DatasetRef, 

184 *, 

185 parameters: dict[str, Any] | None = None, 

186 storageClass: str | StorageClass | None = None, 

187 ) -> Any: 

188 """Retrieve a stored dataset. 

189 

190 Parameters 

191 ---------- 

192 ref : `DatasetRef` 

193 Resolved reference to an already stored dataset. 

194 parameters : `dict` 

195 Additional StorageClass-defined options to control reading, 

196 typically used to efficiently read only a subset of the dataset. 

197 storageClass : `StorageClass` or `str`, optional 

198 The storage class to be used to override the Python type 

199 returned by this method. By default the returned type matches 

200 the dataset type definition for this dataset. Specifying a 

201 read `StorageClass` can force a different type to be returned. 

202 This type must be compatible with the original type. 

203 

204 Returns 

205 ------- 

206 obj : `object` 

207 The dataset. 

208 """ 

209 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

210 

211 # TODO: remove on DM-40067. 

212 @deprecated( 

213 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

214 "Please use Butler.getDeferred(). Will be removed after v26.0.", 

215 version="v26.0", 

216 category=FutureWarning, 

217 ) 

218 def getDirectDeferred( 

219 self, 

220 ref: DatasetRef, 

221 *, 

222 parameters: dict[str, Any] | None = None, 

223 storageClass: str | StorageClass | None = None, 

224 ) -> DeferredDatasetHandle: 

225 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

226 from a resolved `DatasetRef`. 

227 

228 Parameters 

229 ---------- 

230 ref : `DatasetRef` 

231 Resolved reference to an already stored dataset. 

232 parameters : `dict` 

233 Additional StorageClass-defined options to control reading, 

234 typically used to efficiently read only a subset of the dataset. 

235 storageClass : `StorageClass` or `str`, optional 

236 The storage class to be used to override the Python type 

237 returned by this method. By default the returned type matches 

238 the dataset type definition for this dataset. Specifying a 

239 read `StorageClass` can force a different type to be returned. 

240 This type must be compatible with the original type. 

241 

242 Returns 

243 ------- 

244 obj : `DeferredDatasetHandle` 

245 A handle which can be used to retrieve a dataset at a later time. 

246 """ 

247 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

248 

249 def getDeferred( 

250 self, 

251 ref: DatasetRef, 

252 /, 

253 *, 

254 parameters: dict[str, Any] | None = None, 

255 storageClass: str | StorageClass | None = None, 

256 ) -> DeferredDatasetHandle: 

257 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

258 after an immediate registry lookup. 

259 

260 Parameters 

261 ---------- 

262 ref : `DatasetRef` 

263 For the default implementation of a `LimitedButler`, the only 

264 acceptable parameter is a resolved `DatasetRef`. 

265 parameters : `dict` 

266 Additional StorageClass-defined options to control reading, 

267 typically used to efficiently read only a subset of the dataset. 

268 storageClass : `StorageClass` or `str`, optional 

269 The storage class to be used to override the Python type 

270 returned by this method. By default the returned type matches 

271 the dataset type definition for this dataset. Specifying a 

272 read `StorageClass` can force a different type to be returned. 

273 This type must be compatible with the original type. 

274 

275 Returns 

276 ------- 

277 obj : `DeferredDatasetHandle` 

278 A handle which can be used to retrieve a dataset at a later time. 

279 

280 Notes 

281 ----- 

282 In a `LimitedButler` the only allowable way to specify a dataset is 

283 to use a resolved `DatasetRef`. Subclasses can support more options. 

284 """ 

285 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

286 

287 def get_datastore_names(self) -> tuple[str, ...]: 

288 """Return the names of the datastores associated with this butler. 

289 

290 Returns 

291 ------- 

292 names : `tuple` [`str`, ...] 

293 The names of the datastores. 

294 """ 

295 return self._datastore.names 

296 

297 def get_datastore_roots(self) -> dict[str, ResourcePath | None]: 

298 """Return the defined root URIs for all registered datastores. 

299 

300 Returns 

301 ------- 

302 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`] 

303 A mapping from datastore name to datastore root URI. The root 

304 can be `None` if the datastore does not have any concept of a root 

305 URI. 

306 """ 

307 return self._datastore.roots 

308 

309 def getURIs( 

310 self, 

311 ref: DatasetRef, 

312 /, 

313 *, 

314 predict: bool = False, 

315 ) -> DatasetRefURIs: 

316 """Return the URIs associated with the dataset. 

317 

318 Parameters 

319 ---------- 

320 ref : `DatasetRef` 

321 A `DatasetRef` for which URIs are requested. 

322 predict : `bool` 

323 If `True`, allow URIs to be returned of datasets that have not 

324 been written. 

325 

326 Returns 

327 ------- 

328 uris : `DatasetRefURIs` 

329 The URI to the primary artifact associated with this dataset (if 

330 the dataset was disassembled within the datastore this may be 

331 `None`), and the URIs to any components associated with the dataset 

332 artifact (can be empty if there are no components). 

333 """ 

334 return self._datastore.getURIs(ref, predict) 

335 

336 def getURI( 

337 self, 

338 ref: DatasetRef, 

339 /, 

340 *, 

341 predict: bool = False, 

342 ) -> ResourcePath: 

343 """Return the URI to the Dataset. 

344 

345 Parameters 

346 ---------- 

347 ref : `DatasetRef` 

348 A `DatasetRef` for which a single URI is requested. 

349 predict : `bool` 

350 If `True`, allow URIs to be returned of datasets that have not 

351 been written. 

352 

353 Returns 

354 ------- 

355 uri : `lsst.resources.ResourcePath` 

356 URI pointing to the Dataset within the datastore. If the 

357 Dataset does not exist in the datastore, and if ``predict`` is 

358 `True`, the URI will be a prediction and will include a URI 

359 fragment "#predicted". 

360 If the datastore does not have entities that relate well 

361 to the concept of a URI the returned URI string will be 

362 descriptive. The returned URI is not guaranteed to be obtainable. 

363 

364 Raises 

365 ------ 

366 RuntimeError 

367 Raised if a URI is requested for a dataset that consists of 

368 multiple artifacts. 

369 """ 

370 primary, components = self.getURIs(ref, predict=predict) 

371 

372 if primary is None or components: 

373 raise RuntimeError( 

374 f"Dataset ({ref}) includes distinct URIs for components. " 

375 "Use LimitedButler.getURIs() instead." 

376 ) 

377 return primary 

378 

379 def get_many_uris( 

380 self, 

381 refs: Iterable[DatasetRef], 

382 predict: bool = False, 

383 allow_missing: bool = False, 

384 ) -> dict[DatasetRef, DatasetRefURIs]: 

385 """Return URIs associated with many datasets. 

386 

387 Parameters 

388 ---------- 

389 refs : iterable of `DatasetIdRef` 

390 References to the required datasets. 

391 predict : `bool`, optional 

392 If `True`, allow URIs to be returned of datasets that have not 

393 been written. 

394 allow_missing : `bool` 

395 If `False`, and ``predict`` is `False`, will raise if a 

396 `DatasetRef` does not exist. 

397 

398 Returns 

399 ------- 

400 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`] 

401 A dict of primary and component URIs, indexed by the passed-in 

402 refs. 

403 

404 Raises 

405 ------ 

406 FileNotFoundError 

407 A URI has been requested for a dataset that does not exist and 

408 guessing is not allowed. 

409 

410 Notes 

411 ----- 

412 In file-based datastores, get_many_uris does not check that the file is 

413 present. It assumes that if datastore is aware of the file then it 

414 actually exists. 

415 """ 

416 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing) 

417 

418 def stored(self, ref: DatasetRef) -> bool: 

419 """Indicate whether the dataset's artifacts are present in the 

420 Datastore. 

421 

422 Parameters 

423 ---------- 

424 ref : `DatasetRef` 

425 Resolved reference to a dataset. 

426 

427 Returns 

428 ------- 

429 stored : `bool` 

430 Whether the dataset artifact exists in the datastore and can be 

431 retrieved. 

432 """ 

433 return self._datastore.exists(ref) 

434 

435 def stored_many( 

436 self, 

437 refs: Iterable[DatasetRef], 

438 ) -> dict[DatasetRef, bool]: 

439 """Check the datastore for artifact existence of multiple datasets 

440 at once. 

441 

442 Parameters 

443 ---------- 

444 refs : iterable of `DatasetRef` 

445 The datasets to be checked. 

446 

447 Returns 

448 ------- 

449 existence : `dict` of [`DatasetRef`, `bool`] 

450 Mapping from given dataset refs to boolean indicating artifact 

451 existence. 

452 """ 

453 return self._datastore.mexists(refs) 

454 

455 # TODO: remove on DM-40079. 

456 @deprecated( 

457 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). " 

458 "Will be removed after v26.0.", 

459 version="v26.0", 

460 category=FutureWarning, 

461 ) 

462 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

463 """Return `True` if a dataset is actually present in the Datastore. 

464 

465 Parameters 

466 ---------- 

467 ref : `DatasetRef` 

468 Resolved reference to a dataset. 

469 

470 Returns 

471 ------- 

472 exists : `bool` 

473 Whether the dataset exists in the Datastore. 

474 """ 

475 return self.stored(ref) 

476 

477 def markInputUnused(self, ref: DatasetRef) -> None: 

478 """Indicate that a predicted input was not actually used when 

479 processing a `Quantum`. 

480 

481 Parameters 

482 ---------- 

483 ref : `DatasetRef` 

484 Reference to the unused dataset. 

485 

486 Notes 

487 ----- 

488 By default, a dataset is considered "actually used" if it is accessed 

489 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

490 (even if the handle is not used). This method must be called after one 

491 of those in order to remove the dataset from the actual input list. 

492 

493 This method does nothing for butlers that do not store provenance 

494 information (which is the default implementation provided by the base 

495 class). 

496 """ 

497 pass 

498 

499 @abstractmethod 

500 def pruneDatasets( 

501 self, 

502 refs: Iterable[DatasetRef], 

503 *, 

504 disassociate: bool = True, 

505 unstore: bool = False, 

506 tags: Iterable[str] = (), 

507 purge: bool = False, 

508 ) -> None: 

509 """Remove one or more datasets from a collection and/or storage. 

510 

511 Parameters 

512 ---------- 

513 refs : `~collections.abc.Iterable` of `DatasetRef` 

514 Datasets to prune. These must be "resolved" references (not just 

515 a `DatasetType` and data ID). 

516 disassociate : `bool`, optional 

517 Disassociate pruned datasets from ``tags``, or from all collections 

518 if ``purge=True``. 

519 unstore : `bool`, optional 

520 If `True` (`False` is default) remove these datasets from all 

521 datastores known to this butler. Note that this will make it 

522 impossible to retrieve these datasets even via other collections. 

523 Datasets that are already not stored are ignored by this option. 

524 tags : `~collections.abc.Iterable` [ `str` ], optional 

525 `~CollectionType.TAGGED` collections to disassociate the datasets 

526 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

527 `True`. 

528 purge : `bool`, optional 

529 If `True` (`False` is default), completely remove the dataset from 

530 the `Registry`. To prevent accidental deletions, ``purge`` may 

531 only be `True` if all of the following conditions are met: 

532 

533 - ``disassociate`` is `True`; 

534 - ``unstore`` is `True`. 

535 

536 This mode may remove provenance information from datasets other 

537 than those provided, and should be used with extreme care. 

538 

539 Raises 

540 ------ 

541 TypeError 

542 Raised if the butler is read-only, if no collection was provided, 

543 or the conditions for ``purge=True`` were not met. 

544 """ 

545 raise NotImplementedError() 

546 

547 @property 

548 @abstractmethod 

549 def dimensions(self) -> DimensionUniverse: 

550 """Structure managing all dimensions recognized by this data 

551 repository (`DimensionUniverse`). 

552 """ 

553 raise NotImplementedError() 

554 

555 # TODO: remove on DM-40080. 

556 @property 

557 @deprecated( 

558 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the " 

559 "relevant functionality. Will be removed after v26.0.", 

560 version="v26.0", 

561 category=FutureWarning, 

562 ) 

563 def datastore(self) -> Datastore: 

564 """The object that manages actual dataset storage. (`Datastore`)""" 

565 return self._datastore 

566 

567 _datastore: Datastore 

568 """The object that manages actual dataset storage (`Datastore`).""" 

569 

570 storageClasses: StorageClassFactory 

571 """An object that maps known storage class names to objects that fully 

572 describe them (`StorageClassFactory`). 

573 """