Coverage for python/lsst/daf/butler/_limited_butler.py: 78%

67 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-14 19:21 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("LimitedButler",) 

25 

26import logging 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable 

29from typing import Any, ClassVar 

30 

31from deprecated.sphinx import deprecated 

32from lsst.resources import ResourcePath 

33 

34from ._deferredDatasetHandle import DeferredDatasetHandle 

35from .core import DatasetRef, DatasetRefURIs, Datastore, DimensionUniverse, StorageClass, StorageClassFactory 

36 

37log = logging.getLogger(__name__) 

38 

39 

40class LimitedButler(ABC): 

41 """A minimal butler interface that is sufficient to back 

42 `~lsst.pipe.base.PipelineTask` execution. 

43 """ 

44 

45 GENERATION: ClassVar[int] = 3 

46 """This is a Generation 3 Butler. 

47 

48 This attribute may be removed in the future, once the Generation 2 Butler 

49 interface has been fully retired; it should only be used in transitional 

50 code. 

51 """ 

52 

53 @abstractmethod 

54 def isWriteable(self) -> bool: 

55 """Return `True` if this `Butler` supports write operations.""" 

56 raise NotImplementedError() 

57 

58 @deprecated( 

59 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef." 

60 " Please use Butler.put(). Will be removed after v27.0.", 

61 version="v26.0", 

62 category=FutureWarning, 

63 ) 

64 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

65 """Store a dataset that already has a UUID and ``RUN`` collection. 

66 

67 Parameters 

68 ---------- 

69 obj : `object` 

70 The dataset. 

71 ref : `DatasetRef` 

72 Resolved reference for a not-yet-stored dataset. 

73 

74 Returns 

75 ------- 

76 ref : `DatasetRef` 

77 The same as the given, for convenience and symmetry with 

78 `Butler.put`. 

79 

80 Raises 

81 ------ 

82 TypeError 

83 Raised if the butler is read-only. 

84 

85 Notes 

86 ----- 

87 Whether this method inserts the given dataset into a ``Registry`` is 

88 implementation defined (some `LimitedButler` subclasses do not have a 

89 `Registry`), but it always adds the dataset to a `Datastore`, and the 

90 given ``ref.id`` and ``ref.run`` are always preserved. 

91 """ 

92 return self.put(obj, ref) 

93 

94 @abstractmethod 

95 def put(self, obj: Any, ref: DatasetRef, /) -> DatasetRef: 

96 """Store a dataset that already has a UUID and ``RUN`` collection. 

97 

98 Parameters 

99 ---------- 

100 obj : `object` 

101 The dataset. 

102 ref : `DatasetRef` 

103 Resolved reference for a not-yet-stored dataset. 

104 

105 Returns 

106 ------- 

107 ref : `DatasetRef` 

108 The same as the given, for convenience and symmetry with 

109 `Butler.put`. 

110 

111 Raises 

112 ------ 

113 TypeError 

114 Raised if the butler is read-only. 

115 

116 Notes 

117 ----- 

118 Whether this method inserts the given dataset into a ``Registry`` is 

119 implementation defined (some `LimitedButler` subclasses do not have a 

120 `Registry`), but it always adds the dataset to a `Datastore`, and the 

121 given ``ref.id`` and ``ref.run`` are always preserved. 

122 """ 

123 raise NotImplementedError() 

124 

125 def get( 

126 self, 

127 ref: DatasetRef, 

128 /, 

129 *, 

130 parameters: dict[str, Any] | None = None, 

131 storageClass: StorageClass | str | None = None, 

132 ) -> Any: 

133 """Retrieve a stored dataset. 

134 

135 Parameters 

136 ---------- 

137 ref: `DatasetRef` 

138 A resolved `DatasetRef` directly associated with a dataset. 

139 parameters : `dict` 

140 Additional StorageClass-defined options to control reading, 

141 typically used to efficiently read only a subset of the dataset. 

142 storageClass : `StorageClass` or `str`, optional 

143 The storage class to be used to override the Python type 

144 returned by this method. By default the returned type matches 

145 the dataset type definition for this dataset. Specifying a 

146 read `StorageClass` can force a different type to be returned. 

147 This type must be compatible with the original type. 

148 

149 Returns 

150 ------- 

151 obj : `object` 

152 The dataset. 

153 

154 Raises 

155 ------ 

156 AmbiguousDatasetError 

157 Raised if the supplied `DatasetRef` is unresolved. 

158 

159 Notes 

160 ----- 

161 In a `LimitedButler` the only allowable way to specify a dataset is 

162 to use a resolved `DatasetRef`. Subclasses can support more options. 

163 """ 

164 log.debug("Butler get: %s, parameters=%s, storageClass: %s", ref, parameters, storageClass) 

165 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

166 

167 @deprecated( 

168 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef." 

169 " Please use Butler.get(). Will be removed after v27.0.", 

170 version="v26.0", 

171 category=FutureWarning, 

172 ) 

173 def getDirect( 

174 self, 

175 ref: DatasetRef, 

176 *, 

177 parameters: dict[str, Any] | None = None, 

178 storageClass: str | StorageClass | None = None, 

179 ) -> Any: 

180 """Retrieve a stored dataset. 

181 

182 Parameters 

183 ---------- 

184 ref : `DatasetRef` 

185 Resolved reference to an already stored dataset. 

186 parameters : `dict` 

187 Additional StorageClass-defined options to control reading, 

188 typically used to efficiently read only a subset of the dataset. 

189 storageClass : `StorageClass` or `str`, optional 

190 The storage class to be used to override the Python type 

191 returned by this method. By default the returned type matches 

192 the dataset type definition for this dataset. Specifying a 

193 read `StorageClass` can force a different type to be returned. 

194 This type must be compatible with the original type. 

195 

196 Returns 

197 ------- 

198 obj : `object` 

199 The dataset. 

200 """ 

201 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass) 

202 

203 @deprecated( 

204 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. " 

205 "Please use Butler.getDeferred(). Will be removed after v27.0.", 

206 version="v26.0", 

207 category=FutureWarning, 

208 ) 

209 def getDirectDeferred( 

210 self, 

211 ref: DatasetRef, 

212 *, 

213 parameters: dict[str, Any] | None = None, 

214 storageClass: str | StorageClass | None = None, 

215 ) -> DeferredDatasetHandle: 

216 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

217 from a resolved `DatasetRef`. 

218 

219 Parameters 

220 ---------- 

221 ref : `DatasetRef` 

222 Resolved reference to an already stored dataset. 

223 parameters : `dict` 

224 Additional StorageClass-defined options to control reading, 

225 typically used to efficiently read only a subset of the dataset. 

226 storageClass : `StorageClass` or `str`, optional 

227 The storage class to be used to override the Python type 

228 returned by this method. By default the returned type matches 

229 the dataset type definition for this dataset. Specifying a 

230 read `StorageClass` can force a different type to be returned. 

231 This type must be compatible with the original type. 

232 

233 Returns 

234 ------- 

235 obj : `DeferredDatasetHandle` 

236 A handle which can be used to retrieve a dataset at a later time. 

237 """ 

238 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

239 

240 def getDeferred( 

241 self, 

242 ref: DatasetRef, 

243 /, 

244 *, 

245 parameters: dict[str, Any] | None = None, 

246 storageClass: str | StorageClass | None = None, 

247 ) -> DeferredDatasetHandle: 

248 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

249 after an immediate registry lookup. 

250 

251 Parameters 

252 ---------- 

253 ref : `DatasetRef` 

254 For the default implementation of a `LimitedButler`, the only 

255 acceptable parameter is a resolved `DatasetRef`. 

256 parameters : `dict` 

257 Additional StorageClass-defined options to control reading, 

258 typically used to efficiently read only a subset of the dataset. 

259 storageClass : `StorageClass` or `str`, optional 

260 The storage class to be used to override the Python type 

261 returned by this method. By default the returned type matches 

262 the dataset type definition for this dataset. Specifying a 

263 read `StorageClass` can force a different type to be returned. 

264 This type must be compatible with the original type. 

265 

266 Returns 

267 ------- 

268 obj : `DeferredDatasetHandle` 

269 A handle which can be used to retrieve a dataset at a later time. 

270 

271 Notes 

272 ----- 

273 In a `LimitedButler` the only allowable way to specify a dataset is 

274 to use a resolved `DatasetRef`. Subclasses can support more options. 

275 """ 

276 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass) 

277 

278 def get_datastore_names(self) -> tuple[str, ...]: 

279 """Return the names of the datastores associated with this butler. 

280 

281 Returns 

282 ------- 

283 names : `tuple` [`str`, ...] 

284 The names of the datastores. 

285 """ 

286 return self._datastore.names 

287 

288 def get_datastore_roots(self) -> dict[str, ResourcePath | None]: 

289 """Return the defined root URIs for all registered datastores. 

290 

291 Returns 

292 ------- 

293 roots : `dict` [`str`, `~lsst.resources.ResourcePath` | `None`] 

294 A mapping from datastore name to datastore root URI. The root 

295 can be `None` if the datastore does not have any concept of a root 

296 URI. 

297 """ 

298 return self._datastore.roots 

299 

300 def getURIs( 

301 self, 

302 ref: DatasetRef, 

303 /, 

304 *, 

305 predict: bool = False, 

306 ) -> DatasetRefURIs: 

307 """Return the URIs associated with the dataset. 

308 

309 Parameters 

310 ---------- 

311 ref : `DatasetRef` 

312 A `DatasetRef` for which URIs are requested. 

313 predict : `bool` 

314 If `True`, allow URIs to be returned of datasets that have not 

315 been written. 

316 

317 Returns 

318 ------- 

319 uris : `DatasetRefURIs` 

320 The URI to the primary artifact associated with this dataset (if 

321 the dataset was disassembled within the datastore this may be 

322 `None`), and the URIs to any components associated with the dataset 

323 artifact (can be empty if there are no components). 

324 """ 

325 return self._datastore.getURIs(ref, predict) 

326 

327 def getURI( 

328 self, 

329 ref: DatasetRef, 

330 /, 

331 *, 

332 predict: bool = False, 

333 ) -> ResourcePath: 

334 """Return the URI to the Dataset. 

335 

336 Parameters 

337 ---------- 

338 ref : `DatasetRef` 

339 A `DatasetRef` for which a single URI is requested. 

340 predict : `bool` 

341 If `True`, allow URIs to be returned of datasets that have not 

342 been written. 

343 

344 Returns 

345 ------- 

346 uri : `lsst.resources.ResourcePath` 

347 URI pointing to the Dataset within the datastore. If the 

348 Dataset does not exist in the datastore, and if ``predict`` is 

349 `True`, the URI will be a prediction and will include a URI 

350 fragment "#predicted". 

351 If the datastore does not have entities that relate well 

352 to the concept of a URI the returned URI string will be 

353 descriptive. The returned URI is not guaranteed to be obtainable. 

354 

355 Raises 

356 ------ 

357 RuntimeError 

358 Raised if a URI is requested for a dataset that consists of 

359 multiple artifacts. 

360 """ 

361 primary, components = self.getURIs(ref, predict=predict) 

362 

363 if primary is None or components: 

364 raise RuntimeError( 

365 f"Dataset ({ref}) includes distinct URIs for components. " 

366 "Use LimitedButler.getURIs() instead." 

367 ) 

368 return primary 

369 

370 def get_many_uris( 

371 self, 

372 refs: Iterable[DatasetRef], 

373 predict: bool = False, 

374 allow_missing: bool = False, 

375 ) -> dict[DatasetRef, DatasetRefURIs]: 

376 """Return URIs associated with many datasets. 

377 

378 Parameters 

379 ---------- 

380 refs : iterable of `DatasetIdRef` 

381 References to the required datasets. 

382 predict : `bool`, optional 

383 If `True`, allow URIs to be returned of datasets that have not 

384 been written. 

385 allow_missing : `bool` 

386 If `False`, and ``predict`` is `False`, will raise if a 

387 `DatasetRef` does not exist. 

388 

389 Returns 

390 ------- 

391 URIs : `dict` of [`DatasetRef`, `DatasetRefURIs`] 

392 A dict of primary and component URIs, indexed by the passed-in 

393 refs. 

394 

395 Raises 

396 ------ 

397 FileNotFoundError 

398 A URI has been requested for a dataset that does not exist and 

399 guessing is not allowed. 

400 

401 Notes 

402 ----- 

403 In file-based datastores, get_many_uris does not check that the file is 

404 present. It assumes that if datastore is aware of the file then it 

405 actually exists. 

406 """ 

407 return self._datastore.getManyURIs(refs, predict=predict, allow_missing=allow_missing) 

408 

409 def stored(self, ref: DatasetRef) -> bool: 

410 """Indicate whether the dataset's artifacts are present in the 

411 Datastore. 

412 

413 Parameters 

414 ---------- 

415 ref : `DatasetRef` 

416 Resolved reference to a dataset. 

417 

418 Returns 

419 ------- 

420 stored : `bool` 

421 Whether the dataset artifact exists in the datastore and can be 

422 retrieved. 

423 """ 

424 return self._datastore.exists(ref) 

425 

426 def stored_many( 

427 self, 

428 refs: Iterable[DatasetRef], 

429 ) -> dict[DatasetRef, bool]: 

430 """Check the datastore for artifact existence of multiple datasets 

431 at once. 

432 

433 Parameters 

434 ---------- 

435 refs : iterable of `DatasetRef` 

436 The datasets to be checked. 

437 

438 Returns 

439 ------- 

440 existence : `dict` of [`DatasetRef`, `bool`] 

441 Mapping from given dataset refs to boolean indicating artifact 

442 existence. 

443 """ 

444 return self._datastore.mexists(refs) 

445 

446 @deprecated( 

447 reason="Butler.datasetExistsDirect() has been replaced by Butler.stored(). " 

448 "Will be removed after v27.0.", 

449 version="v26.0", 

450 category=FutureWarning, 

451 ) 

452 def datasetExistsDirect(self, ref: DatasetRef) -> bool: 

453 """Return `True` if a dataset is actually present in the Datastore. 

454 

455 Parameters 

456 ---------- 

457 ref : `DatasetRef` 

458 Resolved reference to a dataset. 

459 

460 Returns 

461 ------- 

462 exists : `bool` 

463 Whether the dataset exists in the Datastore. 

464 """ 

465 return self.stored(ref) 

466 

467 def markInputUnused(self, ref: DatasetRef) -> None: 

468 """Indicate that a predicted input was not actually used when 

469 processing a `Quantum`. 

470 

471 Parameters 

472 ---------- 

473 ref : `DatasetRef` 

474 Reference to the unused dataset. 

475 

476 Notes 

477 ----- 

478 By default, a dataset is considered "actually used" if it is accessed 

479 via `getDirect` or a handle to it is obtained via `getDirectDeferred` 

480 (even if the handle is not used). This method must be called after one 

481 of those in order to remove the dataset from the actual input list. 

482 

483 This method does nothing for butlers that do not store provenance 

484 information (which is the default implementation provided by the base 

485 class). 

486 """ 

487 pass 

488 

489 @abstractmethod 

490 def pruneDatasets( 

491 self, 

492 refs: Iterable[DatasetRef], 

493 *, 

494 disassociate: bool = True, 

495 unstore: bool = False, 

496 tags: Iterable[str] = (), 

497 purge: bool = False, 

498 ) -> None: 

499 """Remove one or more datasets from a collection and/or storage. 

500 

501 Parameters 

502 ---------- 

503 refs : `~collections.abc.Iterable` of `DatasetRef` 

504 Datasets to prune. These must be "resolved" references (not just 

505 a `DatasetType` and data ID). 

506 disassociate : `bool`, optional 

507 Disassociate pruned datasets from ``tags``, or from all collections 

508 if ``purge=True``. 

509 unstore : `bool`, optional 

510 If `True` (`False` is default) remove these datasets from all 

511 datastores known to this butler. Note that this will make it 

512 impossible to retrieve these datasets even via other collections. 

513 Datasets that are already not stored are ignored by this option. 

514 tags : `~collections.abc.Iterable` [ `str` ], optional 

515 `~CollectionType.TAGGED` collections to disassociate the datasets 

516 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

517 `True`. 

518 purge : `bool`, optional 

519 If `True` (`False` is default), completely remove the dataset from 

520 the `Registry`. To prevent accidental deletions, ``purge`` may 

521 only be `True` if all of the following conditions are met: 

522 

523 - ``disassociate`` is `True`; 

524 - ``unstore`` is `True`. 

525 

526 This mode may remove provenance information from datasets other 

527 than those provided, and should be used with extreme care. 

528 

529 Raises 

530 ------ 

531 TypeError 

532 Raised if the butler is read-only, if no collection was provided, 

533 or the conditions for ``purge=True`` were not met. 

534 """ 

535 raise NotImplementedError() 

536 

537 @property 

538 @abstractmethod 

539 def dimensions(self) -> DimensionUniverse: 

540 """Structure managing all dimensions recognized by this data 

541 repository (`DimensionUniverse`). 

542 """ 

543 raise NotImplementedError() 

544 

545 @property 

546 @deprecated( 

547 reason="The Butler.datastore property is now deprecated. Butler APIs should now exist with the " 

548 "relevant functionality. Will be removed after v27.0.", 

549 version="v26.0", 

550 category=FutureWarning, 

551 ) 

552 def datastore(self) -> Datastore: 

553 """The object that manages actual dataset storage. (`Datastore`)""" 

554 return self._datastore 

555 

556 _datastore: Datastore 

557 """The object that manages actual dataset storage (`Datastore`).""" 

558 

559 storageClasses: StorageClassFactory 

560 """An object that maps known storage class names to objects that fully 

561 describe them (`StorageClassFactory`). 

562 """