Coverage for python/lsst/daf/butler/_limited_butler.py: 78%

67 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable 

29from typing import Any, ClassVar 

30 

31from deprecated.sphinx import deprecated 

32from lsst.resources import ResourcePath 

33 

34from ._deferredDatasetHandle import DeferredDatasetHandle 

35from .core import DatasetRef, DatasetRefURIs, Datastore, DimensionUniverse, StorageClass, StorageClassFactory 

36 

37log = logging.getLogger(__name__) 

38 

39 

40class LimitedButler(ABC): 

41 """A minimal butler interface that is sufficient to back 

42 `~lsst.pipe.base.PipelineTask` execution. 

43 """ 

44 

45 GENERATION: ClassVar[int] = 3 

46 """This is a Generation 3 Butler. 

47 

48 This attribute may be removed in the future, once the Generation 2 Butler 

49 interface has been fully retired; it should only be used in transitional 

50 code. 

51 """ 

52 

53 @abstractmethod 

54 def isWriteable(self) -> bool: 

55 """Return `True` if this `Butler` supports write operations.""" 

56 raise NotImplementedError() 

57 

58 # TODO: remove on DM-40067. 

59 @deprecated( 

60 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

61 " Please use Butler.put(). Will be removed after v26.0.", 

62 version="v26.0", 

63 category=FutureWarning, 

64 ) 

65 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

66 """Store a dataset that already has a UUID and ``RUN`` collection. 

67 

68 Parameters 

69 ---------- 

70 obj : `object` 

71 The dataset. 

72 ref : `DatasetRef` 

73 Resolved reference for a not-yet-stored dataset. 

74 

75 Returns 

76 ------- 

77 ref : `DatasetRef` 

78 The same as the given, for convenience and symmetry with 

79 `Butler.put`. 

80 

81 Raises 

82 ------ 

83 TypeError 

84 Raised if the butler is read-only. 

85 

86 Notes 

87 ----- 

88 Whether this method inserts the given dataset into a ``Registry`` is 

89 implementation defined (some `LimitedButler` subclasses do not have a 

90 `Registry`), but it always adds the dataset to a `Datastore`, and the 

91 given ``ref.id`` and ``ref.run`` are always preserved. 

92 """ 

93 return self.put(obj, ref) 

94 

95 @abstractmethod 

96 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

97 """Store a dataset that already has a UUID and ``RUN`` collection. 

98 

99 Parameters 

100 ---------- 

101 obj : `object` 

102 The dataset. 

103 ref : `DatasetRef` 

104 Resolved reference for a not-yet-stored dataset. 

105 

106 Returns 

107 ------- 

108 ref : `DatasetRef` 

109 The same as the given, for convenience and symmetry with 

110 `Butler.put`. 

111 

112 Raises 

113 ------ 

114 TypeError 

115 Raised if the butler is read-only. 

116 

117 Notes 

118 ----- 

119 Whether this method inserts the given dataset into a ``Registry`` is 

120 implementation defined (some `LimitedButler` subclasses do not have a 

121 `Registry`), but it always adds the dataset to a `Datastore`, and the 

122 given ``ref.id`` and ``ref.run`` are always preserved. 

123 """ 

124 raise NotImplementedError() 

125 

126 def get( 

127 self, 

128 ref: DatasetRef, 

129 /, 

130 *, 

131 parameters: dict[str, Any] | None = None, 

132 storageClass: StorageClass | str | None = None, 

133 ) -> Any: 

134 """Retrieve a stored dataset. 

135 

136 Parameters 

137 ---------- 

138 ref: `DatasetRef` 

139 A resolved `DatasetRef` directly associated with a dataset. 

140 parameters : `dict` 

141 Additional StorageClass-defined options to control reading, 

142 typically used to efficiently read only a subset of the dataset. 

143 storageClass : `StorageClass` or `str`, optional 

144 The storage class to be used to override the Python type 

145 returned by this method. By default the returned type matches 

146 the dataset type definition for this dataset. Specifying a 

147 read `StorageClass` can force a different type to be returned. 

148 This type must be compatible with the original type. 

149 

150 Returns 

151 ------- 

152 obj : `object` 

153 The dataset. 

154 

155 Raises 

156 ------ 

157 AmbiguousDatasetError 

158 Raised if the supplied `DatasetRef` is unresolved. 

159 

160 Notes 

161 ----- 

162 In a `LimitedButler` the only allowable way to specify a dataset is 

163 to use a resolved `DatasetRef`. Subclasses can support more options. 

164 """ 

165 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

166 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

167 

168 # TODO: remove on DM-40067. 

169 @deprecated( 

170 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

171 " Please use Butler.get(). Will be removed after v26.0.", 

172 version="v26.0", 

173 category=FutureWarning, 

174 ) 

175 def getDirect( 

176 self, 

177 ref: DatasetRef, 

178 *, 

179 parameters: dict[str, Any] | None = None, 

180 storageClass: str | StorageClass | None = None, 

181 ) -> Any: 

182 """Retrieve a stored dataset. 

183 

184 Parameters 

185 ---------- 

186 ref : `DatasetRef` 

187 Resolved reference to an already stored dataset. 

188 parameters : `dict` 

189 Additional StorageClass-defined options to control reading, 

190 typically used to efficiently read only a subset of the dataset. 

191 storageClass : `StorageClass` or `str`, optional 

192 The storage class to be used to override the Python type 

193 returned by this method. By default the returned type matches 

194 the dataset type definition for this dataset. Specifying a 

195 read `StorageClass` can force a different type to be returned. 

196 This type must be compatible with the original type. 

197 

198 Returns 

199 ------- 

200 obj : `object` 

201 The dataset. 

202 """ 

203 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

204 

205 # TODO: remove on DM-40067. 

206 @deprecated( 

207 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

208 "Please use Butler.getDeferred(). Will be removed after v26.0.", 

209 version="v26.0", 

210 category=FutureWarning, 

211 ) 

212 def getDirectDeferred( 

213 self, 

214 ref: DatasetRef, 

215 *, 

216 parameters: dict[str, Any] | None = None, 

217 storageClass: str | StorageClass | None = None, 

218 ) -> DeferredDatasetHandle: 

219 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

220 from a resolved `DatasetRef`. 

221 

222 Parameters 

223 ---------- 

224 ref : `DatasetRef` 

225 Resolved reference to an already stored dataset. 

226 parameters : `dict` 

227 Additional StorageClass-defined options to control reading, 

228 typically used to efficiently read only a subset of the dataset. 

229 storageClass : `StorageClass` or `str`, optional 

230 The storage class to be used to override the Python type 

231 returned by this method. By default the returned type matches 

232 the dataset type definition for this dataset. Specifying a 

233 read `StorageClass` can force a different type to be returned. 

234 This type must be compatible with the original type. 

235 

236 Returns 

237 ------- 

238 obj : `DeferredDatasetHandle` 

239 A handle which can be used to retrieve a dataset at a later time. 

240 """ 

241 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

242 

243 def getDeferred( 

244 self, 

245 ref: DatasetRef, 

246 /, 

247 *, 

248 parameters: dict[str, Any] | None = None, 

249 storageClass: str | StorageClass | None = None, 

250 ) -> DeferredDatasetHandle: 

251 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

252 after an immediate registry lookup. 

253 

254 Parameters 

255 ---------- 

256 ref : `DatasetRef` 

257 For the default implementation of a `LimitedButler`, the only 

258 acceptable parameter is a resolved `DatasetRef`. 

259 parameters : `dict` 

260 Additional StorageClass-defined options to control reading, 

261 typically used to efficiently read only a subset of the dataset. 

262 storageClass : `StorageClass` or `str`, optional 

263 The storage class to be used to override the Python type 

264 returned by this method. By default the returned type matches 

265 the dataset type definition for this dataset. Specifying a 

266 read `StorageClass` can force a different type to be returned. 

267 This type must be compatible with the original type. 

268 

269 Returns 

270 ------- 

271 obj : `DeferredDatasetHandle` 

272 A handle which can be used to retrieve a dataset at a later time. 

273 

274 Notes 

275 ----- 

276 In a `LimitedButler` the only allowable way to specify a dataset is 

277 to use a resolved `DatasetRef`. Subclasses can support more options. 

278 """ 

279 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

280 

281 def get_datastore_names(self) -> tuple[str, ...]: 

282 """Return the names of the datastores associated with this butler. 

283 

284 Returns 

285 ------- 

286 names : `tuple` [`str`, ...] 

287 The names of the datastores. 

288 """ 

289 return self._datastore.names 

290 

291 def get_datastore_roots(self) -> dict[str, ResourcePath | None]: 

292 """Return the defined root URIs for all registered datastores. 

293 

294 Returns 

295 ------- 

296 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`] 

297 A mapping from datastore name to datastore root URI. The root 

298 can be `None` if the datastore does not have any concept of a root 

299 URI. 

300 """ 

301 return self._datastore.roots 

302 

303 def getURIs( 

304 self, 

305 ref: DatasetRef, 

306 /, 

307 *, 

308 predict: bool = False, 

309 ) -> DatasetRefURIs: 

310 """Return the URIs associated with the dataset. 

311 

312 Parameters 

313 ---------- 

314 ref : `DatasetRef` 

315 A `DatasetRef` for which URIs are requested. 

316 predict : `bool` 

317 If `True`, allow URIs to be returned of datasets that have not 

318 been written. 

319 

320 Returns 

321 ------- 

322 uris : `DatasetRefURIs` 

323 The URI to the primary artifact associated with this dataset (if 

324 the dataset was disassembled within the datastore this may be 

325 `None`), and the URIs to any components associated with the dataset 

326 artifact (can be empty if there are no components). 

327 """ 

328 return self._datastore.getURIs(ref, predict) 

329 

330 def getURI( 

331 self, 

332 ref: DatasetRef, 

333 /, 

334 *, 

335 predict: bool = False, 

336 ) -> ResourcePath: 

337 """Return the URI to the Dataset. 

338 

339 Parameters 

340 ---------- 

341 ref : `DatasetRef` 

342 A `DatasetRef` for which a single URI is requested. 

343 predict : `bool` 

344 If `True`, allow URIs to be returned of datasets that have not 

345 been written. 

346 

347 Returns 

348 ------- 

349 uri : `lsst.resources.ResourcePath` 

350 URI pointing to the Dataset within the datastore. If the 

351 Dataset does not exist in the datastore, and if ``predict`` is 

352 `True`, the URI will be a prediction and will include a URI 

353 fragment "#predicted". 

354 If the datastore does not have entities that relate well 

355 to the concept of a URI the returned URI string will be 

356 descriptive. The returned URI is not guaranteed to be obtainable. 

357 

358 Raises 

359 ------ 

360 RuntimeError 

361 Raised if a URI is requested for a dataset that consists of 

362 multiple artifacts. 

363 """ 

364 primary, components = self.getURIs(ref, predict=predict) 

365 

366 if primary is None or components: 

367 raise RuntimeError( 

368 f"Dataset ({ref}) includes distinct URIs for components. " 

369 "Use LimitedButler.getURIs() instead." 

370 ) 

371 return primary 

372 

373 def get_many_uris( 

374 self, 

375 refs: Iterable[DatasetRef], 

376 predict: bool = False, 

377 allow_missing: bool = False, 

378 ) -> dict[DatasetRef, DatasetRefURIs]: 

379 """Return URIs associated with many datasets. 

380 

381 Parameters 

382 ---------- 

383 refs : iterable of `DatasetIdRef` 

384 References to the required datasets. 

385 predict : `bool`, optional 

386 If `True`, allow URIs to be returned of datasets that have not 

387 been written. 

388 allow_missing : `bool` 

389 If `False`, and ``predict`` is `False`, will raise if a 

390 `DatasetRef` does not exist. 

391 

392 Returns 

393 ------- 

394 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`] 

395 A dict of primary and component URIs, indexed by the passed-in 

396 refs. 

397 

398 Raises 

399 ------ 

400 FileNotFoundError 

401 A URI has been requested for a dataset that does not exist and 

402 guessing is not allowed. 

403 

404 Notes 

405 ----- 

406 In file-based datastores, get_many_uris does not check that the file is 

407 present. It assumes that if datastore is aware of the file then it 

408 actually exists. 

409 """ 

410 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing) 

411 

412 def stored(self, ref: DatasetRef) -> bool: 

413 """Indicate whether the dataset's artifacts are present in the 

414 Datastore. 

415 

416 Parameters 

417 ---------- 

418 ref : `DatasetRef` 

419 Resolved reference to a dataset. 

420 

421 Returns 

422 ------- 

423 stored : `bool` 

424 Whether the dataset artifact exists in the datastore and can be 

425 retrieved. 

426 """ 

427 return self._datastore.exists(ref) 

428 

429 def stored_many( 

430 self, 

431 refs: Iterable[DatasetRef], 

432 ) -> dict[DatasetRef, bool]: 

433 """Check the datastore for artifact existence of multiple datasets 

434 at once. 

435 

436 Parameters 

437 ---------- 

438 refs : iterable of `DatasetRef` 

439 The datasets to be checked. 

440 

441 Returns 

442 ------- 

443 existence : `dict` of [`DatasetRef`, `bool`] 

444 Mapping from given dataset refs to boolean indicating artifact 

445 existence. 

446 """ 

447 return self._datastore.mexists(refs) 

448 

449 # TODO: remove on DM-40079. 

450 @deprecated( 

451 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). " 

452 "Will be removed after v26.0.", 

453 version="v26.0", 

454 category=FutureWarning, 

455 ) 

456 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

457 """Return `True` if a dataset is actually present in the Datastore. 

458 

459 Parameters 

460 ---------- 

461 ref : `DatasetRef` 

462 Resolved reference to a dataset. 

463 

464 Returns 

465 ------- 

466 exists : `bool` 

467 Whether the dataset exists in the Datastore. 

468 """ 

469 return self.stored(ref) 

470 

471 def markInputUnused(self, ref: DatasetRef) -> None: 

472 """Indicate that a predicted input was not actually used when 

473 processing a `Quantum`. 

474 

475 Parameters 

476 ---------- 

477 ref : `DatasetRef` 

478 Reference to the unused dataset. 

479 

480 Notes 

481 ----- 

482 By default, a dataset is considered "actually used" if it is accessed 

483 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

484 (even if the handle is not used). This method must be called after one 

485 of those in order to remove the dataset from the actual input list. 

486 

487 This method does nothing for butlers that do not store provenance 

488 information (which is the default implementation provided by the base 

489 class). 

490 """ 

491 pass 

492 

493 @abstractmethod 

494 def pruneDatasets( 

495 self, 

496 refs: Iterable[DatasetRef], 

497 *, 

498 disassociate: bool = True, 

499 unstore: bool = False, 

500 tags: Iterable[str] = (), 

501 purge: bool = False, 

502 ) -> None: 

503 """Remove one or more datasets from a collection and/or storage. 

504 

505 Parameters 

506 ---------- 

507 refs : `~collections.abc.Iterable` of `DatasetRef` 

508 Datasets to prune. These must be "resolved" references (not just 

509 a `DatasetType` and data ID). 

510 disassociate : `bool`, optional 

511 Disassociate pruned datasets from ``tags``, or from all collections 

512 if ``purge=True``. 

513 unstore : `bool`, optional 

514 If `True` (`False` is default) remove these datasets from all 

515 datastores known to this butler. Note that this will make it 

516 impossible to retrieve these datasets even via other collections. 

517 Datasets that are already not stored are ignored by this option. 

518 tags : `~collections.abc.Iterable` [ `str` ], optional 

519 `~CollectionType.TAGGED` collections to disassociate the datasets 

520 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

521 `True`. 

522 purge : `bool`, optional 

523 If `True` (`False` is default), completely remove the dataset from 

524 the `Registry`. To prevent accidental deletions, ``purge`` may 

525 only be `True` if all of the following conditions are met: 

526 

527 - ``disassociate`` is `True`; 

528 - ``unstore`` is `True`. 

529 

530 This mode may remove provenance information from datasets other 

531 than those provided, and should be used with extreme care. 

532 

533 Raises 

534 ------ 

535 TypeError 

536 Raised if the butler is read-only, if no collection was provided, 

537 or the conditions for ``purge=True`` were not met. 

538 """ 

539 raise NotImplementedError() 

540 

541 @property 

542 @abstractmethod 

543 def dimensions(self) -> DimensionUniverse: 

544 """Structure managing all dimensions recognized by this data 

545 repository (`DimensionUniverse`). 

546 """ 

547 raise NotImplementedError() 

548 

549 # TODO: remove on DM-40080. 

550 @property 

551 @deprecated( 

552 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the " 

553 "relevant functionality. Will be removed after v26.0.", 

554 version="v26.0", 

555 category=FutureWarning, 

556 ) 

557 def datastore(self) -> Datastore: 

558 """The object that manages actual dataset storage. (`Datastore`)""" 

559 return self._datastore 

560 

561 _datastore: Datastore 

562 """The object that manages actual dataset storage (`Datastore`).""" 

563 

564 storageClasses: StorageClassFactory 

565 """An object that maps known storage class names to objects that fully 

566 describe them (`StorageClassFactory`). 

567 """