Coverage for python/lsst/daf/butler/_limited_butler.py: 79%

70 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-05 11:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("LimitedButler",) 

31 

32import logging 

33from abc import ABC, abstractmethod 

34from collections.abc import Iterable 

35from typing import Any, ClassVar 

36 

37from deprecated.sphinx import deprecated 

38from lsst.resources import ResourcePath 

39 

40from ._dataset_ref import DatasetRef 

41from ._deferredDatasetHandle import DeferredDatasetHandle 

42from ._storage_class import StorageClass, StorageClassFactory 

43from .datastore import DatasetRefURIs, Datastore 

44from .dimensions import DimensionUniverse 

45 

46log = logging.getLogger(__name__) 

47 

48 

49class LimitedButler(ABC): 

50 """A minimal butler interface that is sufficient to back 

51 `~lsst.pipe.base.PipelineTask` execution. 

52 """ 

53 

54 GENERATION: ClassVar[int] = 3 

55 """This is a Generation 3 Butler. 

56 

57 This attribute may be removed in the future, once the Generation 2 Butler 

58 interface has been fully retired; it should only be used in transitional 

59 code. 

60 """ 

61 

62 @abstractmethod 

63 def isWriteable(self) -> bool: 

64 """Return `True` if this `Butler` supports write operations.""" 

65 raise NotImplementedError() 

66 

67 # TODO: remove on DM-40067. 

68 @deprecated( 

69 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

70 " Please use Butler.put(). Will be removed after v26.0.", 

71 version="v26.0", 

72 category=FutureWarning, 

73 ) 

74 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

75 """Store a dataset that already has a UUID and ``RUN`` collection. 

76 

77 Parameters 

78 ---------- 

79 obj : `object` 

80 The dataset. 

81 ref : `DatasetRef` 

82 Resolved reference for a not-yet-stored dataset. 

83 

84 Returns 

85 ------- 

86 ref : `DatasetRef` 

87 The same as the given, for convenience and symmetry with 

88 `Butler.put`. 

89 

90 Raises 

91 ------ 

92 TypeError 

93 Raised if the butler is read-only. 

94 

95 Notes 

96 ----- 

97 Whether this method inserts the given dataset into a ``Registry`` is 

98 implementation defined (some `LimitedButler` subclasses do not have a 

99 `Registry`), but it always adds the dataset to a `Datastore`, and the 

100 given ``ref.id`` and ``ref.run`` are always preserved. 

101 """ 

102 return self.put(obj, ref) 

103 

104 @abstractmethod 

105 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

106 """Store a dataset that already has a UUID and ``RUN`` collection. 

107 

108 Parameters 

109 ---------- 

110 obj : `object` 

111 The dataset. 

112 ref : `DatasetRef` 

113 Resolved reference for a not-yet-stored dataset. 

114 

115 Returns 

116 ------- 

117 ref : `DatasetRef` 

118 The same as the given, for convenience and symmetry with 

119 `Butler.put`. 

120 

121 Raises 

122 ------ 

123 TypeError 

124 Raised if the butler is read-only. 

125 

126 Notes 

127 ----- 

128 Whether this method inserts the given dataset into a ``Registry`` is 

129 implementation defined (some `LimitedButler` subclasses do not have a 

130 `Registry`), but it always adds the dataset to a `Datastore`, and the 

131 given ``ref.id`` and ``ref.run`` are always preserved. 

132 """ 

133 raise NotImplementedError() 

134 

135 def get( 

136 self, 

137 ref: DatasetRef, 

138 /, 

139 *, 

140 parameters: dict[str, Any] | None = None, 

141 storageClass: StorageClass | str | None = None, 

142 ) -> Any: 

143 """Retrieve a stored dataset. 

144 

145 Parameters 

146 ---------- 

147 ref: `DatasetRef` 

148 A resolved `DatasetRef` directly associated with a dataset. 

149 parameters : `dict` 

150 Additional StorageClass-defined options to control reading, 

151 typically used to efficiently read only a subset of the dataset. 

152 storageClass : `StorageClass` or `str`, optional 

153 The storage class to be used to override the Python type 

154 returned by this method. By default the returned type matches 

155 the dataset type definition for this dataset. Specifying a 

156 read `StorageClass` can force a different type to be returned. 

157 This type must be compatible with the original type. 

158 

159 Returns 

160 ------- 

161 obj : `object` 

162 The dataset. 

163 

164 Raises 

165 ------ 

166 AmbiguousDatasetError 

167 Raised if the supplied `DatasetRef` is unresolved. 

168 

169 Notes 

170 ----- 

171 In a `LimitedButler` the only allowable way to specify a dataset is 

172 to use a resolved `DatasetRef`. Subclasses can support more options. 

173 """ 

174 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

175 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

176 

177 # TODO: remove on DM-40067. 

178 @deprecated( 

179 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

180 " Please use Butler.get(). Will be removed after v26.0.", 

181 version="v26.0", 

182 category=FutureWarning, 

183 ) 

184 def getDirect( 

185 self, 

186 ref: DatasetRef, 

187 *, 

188 parameters: dict[str, Any] | None = None, 

189 storageClass: str | StorageClass | None = None, 

190 ) -> Any: 

191 """Retrieve a stored dataset. 

192 

193 Parameters 

194 ---------- 

195 ref : `DatasetRef` 

196 Resolved reference to an already stored dataset. 

197 parameters : `dict` 

198 Additional StorageClass-defined options to control reading, 

199 typically used to efficiently read only a subset of the dataset. 

200 storageClass : `StorageClass` or `str`, optional 

201 The storage class to be used to override the Python type 

202 returned by this method. By default the returned type matches 

203 the dataset type definition for this dataset. Specifying a 

204 read `StorageClass` can force a different type to be returned. 

205 This type must be compatible with the original type. 

206 

207 Returns 

208 ------- 

209 obj : `object` 

210 The dataset. 

211 """ 

212 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

213 

214 # TODO: remove on DM-40067. 

215 @deprecated( 

216 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

217 "Please use Butler.getDeferred(). Will be removed after v26.0.", 

218 version="v26.0", 

219 category=FutureWarning, 

220 ) 

221 def getDirectDeferred( 

222 self, 

223 ref: DatasetRef, 

224 *, 

225 parameters: dict[str, Any] | None = None, 

226 storageClass: str | StorageClass | None = None, 

227 ) -> DeferredDatasetHandle: 

228 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

229 from a resolved `DatasetRef`. 

230 

231 Parameters 

232 ---------- 

233 ref : `DatasetRef` 

234 Resolved reference to an already stored dataset. 

235 parameters : `dict` 

236 Additional StorageClass-defined options to control reading, 

237 typically used to efficiently read only a subset of the dataset. 

238 storageClass : `StorageClass` or `str`, optional 

239 The storage class to be used to override the Python type 

240 returned by this method. By default the returned type matches 

241 the dataset type definition for this dataset. Specifying a 

242 read `StorageClass` can force a different type to be returned. 

243 This type must be compatible with the original type. 

244 

245 Returns 

246 ------- 

247 obj : `DeferredDatasetHandle` 

248 A handle which can be used to retrieve a dataset at a later time. 

249 """ 

250 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

251 

252 def getDeferred( 

253 self, 

254 ref: DatasetRef, 

255 /, 

256 *, 

257 parameters: dict[str, Any] | None = None, 

258 storageClass: str | StorageClass | None = None, 

259 ) -> DeferredDatasetHandle: 

260 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

261 after an immediate registry lookup. 

262 

263 Parameters 

264 ---------- 

265 ref : `DatasetRef` 

266 For the default implementation of a `LimitedButler`, the only 

267 acceptable parameter is a resolved `DatasetRef`. 

268 parameters : `dict` 

269 Additional StorageClass-defined options to control reading, 

270 typically used to efficiently read only a subset of the dataset. 

271 storageClass : `StorageClass` or `str`, optional 

272 The storage class to be used to override the Python type 

273 returned by this method. By default the returned type matches 

274 the dataset type definition for this dataset. Specifying a 

275 read `StorageClass` can force a different type to be returned. 

276 This type must be compatible with the original type. 

277 

278 Returns 

279 ------- 

280 obj : `DeferredDatasetHandle` 

281 A handle which can be used to retrieve a dataset at a later time. 

282 

283 Notes 

284 ----- 

285 In a `LimitedButler` the only allowable way to specify a dataset is 

286 to use a resolved `DatasetRef`. Subclasses can support more options. 

287 """ 

288 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

289 

290 def get_datastore_names(self) -> tuple[str, ...]: 

291 """Return the names of the datastores associated with this butler. 

292 

293 Returns 

294 ------- 

295 names : `tuple` [`str`, ...] 

296 The names of the datastores. 

297 """ 

298 return self._datastore.names 

299 

300 def get_datastore_roots(self) -> dict[str, ResourcePath | None]: 

301 """Return the defined root URIs for all registered datastores. 

302 

303 Returns 

304 ------- 

305 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`] 

306 A mapping from datastore name to datastore root URI. The root 

307 can be `None` if the datastore does not have any concept of a root 

308 URI. 

309 """ 

310 return self._datastore.roots 

311 

312 def getURIs( 

313 self, 

314 ref: DatasetRef, 

315 /, 

316 *, 

317 predict: bool = False, 

318 ) -> DatasetRefURIs: 

319 """Return the URIs associated with the dataset. 

320 

321 Parameters 

322 ---------- 

323 ref : `DatasetRef` 

324 A `DatasetRef` for which URIs are requested. 

325 predict : `bool` 

326 If `True`, allow URIs to be returned of datasets that have not 

327 been written. 

328 

329 Returns 

330 ------- 

331 uris : `DatasetRefURIs` 

332 The URI to the primary artifact associated with this dataset (if 

333 the dataset was disassembled within the datastore this may be 

334 `None`), and the URIs to any components associated with the dataset 

335 artifact (can be empty if there are no components). 

336 """ 

337 return self._datastore.getURIs(ref, predict) 

338 

339 def getURI( 

340 self, 

341 ref: DatasetRef, 

342 /, 

343 *, 

344 predict: bool = False, 

345 ) -> ResourcePath: 

346 """Return the URI to the Dataset. 

347 

348 Parameters 

349 ---------- 

350 ref : `DatasetRef` 

351 A `DatasetRef` for which a single URI is requested. 

352 predict : `bool` 

353 If `True`, allow URIs to be returned of datasets that have not 

354 been written. 

355 

356 Returns 

357 ------- 

358 uri : `lsst.resources.ResourcePath` 

359 URI pointing to the Dataset within the datastore. If the 

360 Dataset does not exist in the datastore, and if ``predict`` is 

361 `True`, the URI will be a prediction and will include a URI 

362 fragment "#predicted". 

363 If the datastore does not have entities that relate well 

364 to the concept of a URI the returned URI string will be 

365 descriptive. The returned URI is not guaranteed to be obtainable. 

366 

367 Raises 

368 ------ 

369 RuntimeError 

370 Raised if a URI is requested for a dataset that consists of 

371 multiple artifacts. 

372 """ 

373 primary, components = self.getURIs(ref, predict=predict) 

374 

375 if primary is None or components: 

376 raise RuntimeError( 

377 f"Dataset ({ref}) includes distinct URIs for components. " 

378 "Use LimitedButler.getURIs() instead." 

379 ) 

380 return primary 

381 

382 def get_many_uris( 

383 self, 

384 refs: Iterable[DatasetRef], 

385 predict: bool = False, 

386 allow_missing: bool = False, 

387 ) -> dict[DatasetRef, DatasetRefURIs]: 

388 """Return URIs associated with many datasets. 

389 

390 Parameters 

391 ---------- 

392 refs : iterable of `DatasetIdRef` 

393 References to the required datasets. 

394 predict : `bool`, optional 

395 If `True`, allow URIs to be returned of datasets that have not 

396 been written. 

397 allow_missing : `bool` 

398 If `False`, and ``predict`` is `False`, will raise if a 

399 `DatasetRef` does not exist. 

400 

401 Returns 

402 ------- 

403 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`] 

404 A dict of primary and component URIs, indexed by the passed-in 

405 refs. 

406 

407 Raises 

408 ------ 

409 FileNotFoundError 

410 A URI has been requested for a dataset that does not exist and 

411 guessing is not allowed. 

412 

413 Notes 

414 ----- 

415 In file-based datastores, get_many_uris does not check that the file is 

416 present. It assumes that if datastore is aware of the file then it 

417 actually exists. 

418 """ 

419 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing) 

420 

421 def stored(self, ref: DatasetRef) -> bool: 

422 """Indicate whether the dataset's artifacts are present in the 

423 Datastore. 

424 

425 Parameters 

426 ---------- 

427 ref : `DatasetRef` 

428 Resolved reference to a dataset. 

429 

430 Returns 

431 ------- 

432 stored : `bool` 

433 Whether the dataset artifact exists in the datastore and can be 

434 retrieved. 

435 """ 

436 return self._datastore.exists(ref) 

437 

438 def stored_many( 

439 self, 

440 refs: Iterable[DatasetRef], 

441 ) -> dict[DatasetRef, bool]: 

442 """Check the datastore for artifact existence of multiple datasets 

443 at once. 

444 

445 Parameters 

446 ---------- 

447 refs : iterable of `DatasetRef` 

448 The datasets to be checked. 

449 

450 Returns 

451 ------- 

452 existence : `dict` of [`DatasetRef`, `bool`] 

453 Mapping from given dataset refs to boolean indicating artifact 

454 existence. 

455 """ 

456 return self._datastore.mexists(refs) 

457 

458 # TODO: remove on DM-40079. 

459 @deprecated( 

460 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). " 

461 "Will be removed after v26.0.", 

462 version="v26.0", 

463 category=FutureWarning, 

464 ) 

465 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

466 """Return `True` if a dataset is actually present in the Datastore. 

467 

468 Parameters 

469 ---------- 

470 ref : `DatasetRef` 

471 Resolved reference to a dataset. 

472 

473 Returns 

474 ------- 

475 exists : `bool` 

476 Whether the dataset exists in the Datastore. 

477 """ 

478 return self.stored(ref) 

479 

480 def markInputUnused(self, ref: DatasetRef) -> None: 

481 """Indicate that a predicted input was not actually used when 

482 processing a `Quantum`. 

483 

484 Parameters 

485 ---------- 

486 ref : `DatasetRef` 

487 Reference to the unused dataset. 

488 

489 Notes 

490 ----- 

491 By default, a dataset is considered "actually used" if it is accessed 

492 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

493 (even if the handle is not used). This method must be called after one 

494 of those in order to remove the dataset from the actual input list. 

495 

496 This method does nothing for butlers that do not store provenance 

497 information (which is the default implementation provided by the base 

498 class). 

499 """ 

500 pass 

501 

502 @abstractmethod 

503 def pruneDatasets( 

504 self, 

505 refs: Iterable[DatasetRef], 

506 *, 

507 disassociate: bool = True, 

508 unstore: bool = False, 

509 tags: Iterable[str] = (), 

510 purge: bool = False, 

511 ) -> None: 

512 """Remove one or more datasets from a collection and/or storage. 

513 

514 Parameters 

515 ---------- 

516 refs : `~collections.abc.Iterable` of `DatasetRef` 

517 Datasets to prune. These must be "resolved" references (not just 

518 a `DatasetType` and data ID). 

519 disassociate : `bool`, optional 

520 Disassociate pruned datasets from ``tags``, or from all collections 

521 if ``purge=True``. 

522 unstore : `bool`, optional 

523 If `True` (`False` is default) remove these datasets from all 

524 datastores known to this butler. Note that this will make it 

525 impossible to retrieve these datasets even via other collections. 

526 Datasets that are already not stored are ignored by this option. 

527 tags : `~collections.abc.Iterable` [ `str` ], optional 

528 `~CollectionType.TAGGED` collections to disassociate the datasets 

529 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

530 `True`. 

531 purge : `bool`, optional 

532 If `True` (`False` is default), completely remove the dataset from 

533 the `Registry`. To prevent accidental deletions, ``purge`` may 

534 only be `True` if all of the following conditions are met: 

535 

536 - ``disassociate`` is `True`; 

537 - ``unstore`` is `True`. 

538 

539 This mode may remove provenance information from datasets other 

540 than those provided, and should be used with extreme care. 

541 

542 Raises 

543 ------ 

544 TypeError 

545 Raised if the butler is read-only, if no collection was provided, 

546 or the conditions for ``purge=True`` were not met. 

547 """ 

548 raise NotImplementedError() 

549 

550 @property 

551 @abstractmethod 

552 def dimensions(self) -> DimensionUniverse: 

553 """Structure managing all dimensions recognized by this data 

554 repository (`DimensionUniverse`). 

555 """ 

556 raise NotImplementedError() 

557 

558 # TODO: remove on DM-40080. 

559 @property 

560 @deprecated( 

561 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the " 

562 "relevant functionality. Will be removed after v26.0.", 

563 version="v26.0", 

564 category=FutureWarning, 

565 ) 

566 def datastore(self) -> Datastore: 

567 """The object that manages actual dataset storage. (`Datastore`)""" 

568 return self._datastore 

569 

570 _datastore: Datastore 

571 """The object that manages actual dataset storage (`Datastore`).""" 

572 

573 storageClasses: StorageClassFactory 

574 """An object that maps known storage class names to objects that fully 

575 describe them (`StorageClassFactory`). 

576 """