Coverage for python/lsst/daf/butler/_butler.py: 62%

133 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Sequence 

34from contextlib import AbstractContextManager 

35from typing import Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.logging import getLogger 

40 

41from ._butler_config import ButlerConfig 

42from ._butler_repo_index import ButlerRepoIndex 

43from ._config import Config, ConfigSubset 

44from ._dataset_existence import DatasetExistence 

45from ._dataset_ref import DatasetIdGenEnum, DatasetRef 

46from ._dataset_type import DatasetType 

47from ._deferredDatasetHandle import DeferredDatasetHandle 

48from ._file_dataset import FileDataset 

49from ._limited_butler import LimitedButler 

50from ._storage_class import StorageClass 

51from .datastore import DatasetRefURIs, Datastore 

52from .dimensions import DataId, DimensionConfig 

53from .registry import Registry, RegistryConfig, _RegistryFactory 

54from .repo_relocation import BUTLER_ROOT_TAG 

55from .transfers import RepoExportContext 

56 

57_LOG = getLogger(__name__) 

58 

59 

60class Butler(LimitedButler): 

61 """Interface for data butler and factory for Butler instances. 

62 

63 Parameters 

64 ---------- 

65 config : `ButlerConfig`, `Config` or `str`, optional. 

66 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

67 If a directory path is given the configuration will be read from a 

68 ``butler.yaml`` file in that location. If `None` is given default 

69 values will be used. If ``config`` contains "cls" key then its value is 

70 used as a name of butler class and it must be a sub-class of this 

71 class, otherwise `DirectButler` is instantiated. 

72 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

73 An expression specifying the collections to be searched (in order) when 

74 reading datasets. 

75 This may be a `str` collection name or an iterable thereof. 

76 See :ref:`daf_butler_collection_expressions` for more information. 

77 These collections are not registered automatically and must be 

78 manually registered before they are used by any method, but they may be 

79 manually registered after the `Butler` is initialized. 

80 run : `str`, optional 

81 Name of the `~CollectionType.RUN` collection new datasets should be 

82 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

83 ``collections`` will be set to ``[run]``. If not `None`, this 

84 collection will automatically be registered. If this is not set (and 

85 ``writeable`` is not set either), a read-only butler will be created. 

86 searchPaths : `list` of `str`, optional 

87 Directory paths to search when calculating the full Butler 

88 configuration. Not used if the supplied config is already a 

89 `ButlerConfig`. 

90 writeable : `bool`, optional 

91 Explicitly sets whether the butler supports write operations. If not 

92 provided, a read-write butler is created if any of ``run``, ``tags``, 

93 or ``chains`` is non-empty. 

94 inferDefaults : `bool`, optional 

95 If `True` (default) infer default data ID values from the values 

96 present in the datasets in ``collections``: if all collections have the 

97 same value (or no value) for a governor dimension, that value will be 

98 the default for that dimension. Nonexistent collections are ignored. 

99 If a default value is provided explicitly for a governor dimension via 

100 ``**kwargs``, no default will be inferred for that dimension. 

101 **kwargs : `Any` 

102 Additional keyword arguments passed to a constructor of actual butler 

103 class. 

104 

105 Notes 

106 ----- 

107 The preferred way to instantiate Butler is via the `from_config` method. 

108 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

109 but ``mypy`` will complain about the former. 

110 """ 

111 

112 def __new__( 

113 cls, 

114 config: Config | ResourcePathExpression | None = None, 

115 *, 

116 collections: Any = None, 

117 run: str | None = None, 

118 searchPaths: Sequence[ResourcePathExpression] | None = None, 

119 writeable: bool | None = None, 

120 inferDefaults: bool = True, 

121 **kwargs: Any, 

122 ) -> Butler: 

123 if cls is Butler: 

124 cls = cls._find_butler_class(config, searchPaths) 

125 # Note: we do not pass any parameters to __new__, Python will pass them 

126 # to __init__ after __new__ returns sub-class instance. 

127 return super().__new__(cls) 

128 

129 @staticmethod 

130 def _find_butler_class( 

131 config: Config | ResourcePathExpression | None = None, 

132 searchPaths: Sequence[ResourcePathExpression] | None = None, 

133 ) -> type[Butler]: 

134 """Find actual class to instantiate.""" 

135 butler_class_name: str | None = None 

136 if config is not None: 

137 # Check for optional "cls" key in config. 

138 if not isinstance(config, Config): 

139 config = ButlerConfig(config, searchPaths=searchPaths) 

140 butler_class_name = config.get("cls") 

141 

142 # Make DirectButler if class is not specified. 

143 butler_class: type[Butler] 

144 if butler_class_name is None: 

145 from .direct_butler import DirectButler 

146 

147 butler_class = DirectButler 

148 else: 

149 butler_class = doImportType(butler_class_name) 

150 if not issubclass(butler_class, Butler): 

151 raise TypeError(f"{butler_class_name} is not a subclass of Butler") 

152 return butler_class 

153 

154 @classmethod 

155 def from_config( 

156 cls, 

157 config: Config | ResourcePathExpression | None = None, 

158 *, 

159 collections: Any = None, 

160 run: str | None = None, 

161 searchPaths: Sequence[ResourcePathExpression] | None = None, 

162 writeable: bool | None = None, 

163 inferDefaults: bool = True, 

164 **kwargs: Any, 

165 ) -> Butler: 

166 """Create butler instance from configuration. 

167 

168 Parameters 

169 ---------- 

170 config : `ButlerConfig`, `Config` or `str`, optional. 

171 Configuration. Anything acceptable to the `ButlerConfig` 

172 constructor. If a directory path is given the configuration will be 

173 read from a ``butler.yaml`` file in that location. If `None` is 

174 given default values will be used. If ``config`` contains "cls" key 

175 then its value is used as a name of butler class and it must be a 

176 sub-class of this class, otherwise `DirectButler` is instantiated. 

177 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

178 An expression specifying the collections to be searched (in order) 

179 when reading datasets. 

180 This may be a `str` collection name or an iterable thereof. 

181 See :ref:`daf_butler_collection_expressions` for more information. 

182 These collections are not registered automatically and must be 

183 manually registered before they are used by any method, but they 

184 may be manually registered after the `Butler` is initialized. 

185 run : `str`, optional 

186 Name of the `~CollectionType.RUN` collection new datasets should be 

187 inserted into. If ``collections`` is `None` and ``run`` is not 

188 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

189 this collection will automatically be registered. If this is not 

190 set (and ``writeable`` is not set either), a read-only butler will 

191 be created. 

192 searchPaths : `list` of `str`, optional 

193 Directory paths to search when calculating the full Butler 

194 configuration. Not used if the supplied config is already a 

195 `ButlerConfig`. 

196 writeable : `bool`, optional 

197 Explicitly sets whether the butler supports write operations. If 

198 not provided, a read-write butler is created if any of ``run``, 

199 ``tags``, or ``chains`` is non-empty. 

200 inferDefaults : `bool`, optional 

201 If `True` (default) infer default data ID values from the values 

202 present in the datasets in ``collections``: if all collections have 

203 the same value (or no value) for a governor dimension, that value 

204 will be the default for that dimension. Nonexistent collections 

205 are ignored. If a default value is provided explicitly for a 

206 governor dimension via ``**kwargs``, no default will be inferred 

207 for that dimension. 

208 **kwargs : `Any` 

209 Additional keyword arguments passed to a constructor of actual 

210 butler class. 

211 

212 Notes 

213 ----- 

214 Calling this factory method is identical to calling 

215 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

216 complains about ``Butler()`` call. 

217 

218 Examples 

219 -------- 

220 While there are many ways to control exactly how a `Butler` interacts 

221 with the collections in its `Registry`, the most common cases are still 

222 simple. 

223 

224 For a read-only `Butler` that searches one collection, do:: 

225 

226 butler = Butler.from_config( 

227 "/path/to/repo", collections=["u/alice/DM-50000"] 

228 ) 

229 

230 For a read-write `Butler` that writes to and reads from a 

231 `~CollectionType.RUN` collection:: 

232 

233 butler = Butler.from_config( 

234 "/path/to/repo", run="u/alice/DM-50000/a" 

235 ) 

236 

237 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

238 because we want to write to one `~CollectionType.RUN` collection but 

239 read from several others (as well):: 

240 

241 butler = Butler.from_config( 

242 "/path/to/repo", 

243 run="u/alice/DM-50000/a", 

244 collections=[ 

245 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

246 ] 

247 ) 

248 

249 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

250 Datasets will be read first from that run (since it appears first in 

251 the chain), and then from ``u/bob/DM-49998`` and finally 

252 ``HSC/defaults``. 

253 

254 Finally, one can always create a `Butler` with no collections:: 

255 

256 butler = Butler.from_config("/path/to/repo", writeable=True) 

257 

258 This can be extremely useful when you just want to use 

259 ``butler.registry``, e.g. for inserting dimension data or managing 

260 collections, or when the collections you want to use with the butler 

261 are not consistent. Passing ``writeable`` explicitly here is only 

262 necessary if you want to be able to make changes to the repo - usually 

263 the value for ``writeable`` can be guessed from the collection 

264 arguments provided, but it defaults to `False` when there are not 

265 collection arguments. 

266 """ 

267 cls = cls._find_butler_class(config, searchPaths) 

268 return cls( 

269 config, 

270 collections=collections, 

271 run=run, 

272 searchPaths=searchPaths, 

273 writeable=writeable, 

274 inferDefaults=inferDefaults, 

275 **kwargs, 

276 ) 

277 

278 @staticmethod 

279 def makeRepo( 

280 root: ResourcePathExpression, 

281 config: Config | str | None = None, 

282 dimensionConfig: Config | str | None = None, 

283 standalone: bool = False, 

284 searchPaths: list[str] | None = None, 

285 forceConfigRoot: bool = True, 

286 outfile: ResourcePathExpression | None = None, 

287 overwrite: bool = False, 

288 ) -> Config: 

289 """Create an empty data repository by adding a butler.yaml config 

290 to a repository root directory. 

291 

292 Parameters 

293 ---------- 

294 root : `lsst.resources.ResourcePathExpression` 

295 Path or URI to the root location of the new repository. Will be 

296 created if it does not exist. 

297 config : `Config` or `str`, optional 

298 Configuration to write to the repository, after setting any 

299 root-dependent Registry or Datastore config options. Can not 

300 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

301 configuration will be used. Root-dependent config options 

302 specified in this config are overwritten if ``forceConfigRoot`` 

303 is `True`. 

304 dimensionConfig : `Config` or `str`, optional 

305 Configuration for dimensions, will be used to initialize registry 

306 database. 

307 standalone : `bool` 

308 If True, write all expanded defaults, not just customized or 

309 repository-specific settings. 

310 This (mostly) decouples the repository from the default 

311 configuration, insulating it from changes to the defaults (which 

312 may be good or bad, depending on the nature of the changes). 

313 Future *additions* to the defaults will still be picked up when 

314 initializing `Butlers` to repos created with ``standalone=True``. 

315 searchPaths : `list` of `str`, optional 

316 Directory paths to search when calculating the full butler 

317 configuration. 

318 forceConfigRoot : `bool`, optional 

319 If `False`, any values present in the supplied ``config`` that 

320 would normally be reset are not overridden and will appear 

321 directly in the output config. This allows non-standard overrides 

322 of the root directory for a datastore or registry to be given. 

323 If this parameter is `True` the values for ``root`` will be 

324 forced into the resulting config if appropriate. 

325 outfile : `lss.resources.ResourcePathExpression`, optional 

326 If not-`None`, the output configuration will be written to this 

327 location rather than into the repository itself. Can be a URI 

328 string. Can refer to a directory that will be used to write 

329 ``butler.yaml``. 

330 overwrite : `bool`, optional 

331 Create a new configuration file even if one already exists 

332 in the specified output location. Default is to raise 

333 an exception. 

334 

335 Returns 

336 ------- 

337 config : `Config` 

338 The updated `Config` instance written to the repo. 

339 

340 Raises 

341 ------ 

342 ValueError 

343 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

344 regular Config (as these subclasses would make it impossible to 

345 support ``standalone=False``). 

346 FileExistsError 

347 Raised if the output config file already exists. 

348 os.error 

349 Raised if the directory does not exist, exists but is not a 

350 directory, or cannot be created. 

351 

352 Notes 

353 ----- 

354 Note that when ``standalone=False`` (the default), the configuration 

355 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

356 construct the repository should also be used to construct any Butlers 

357 to avoid configuration inconsistencies. 

358 """ 

359 if isinstance(config, ButlerConfig | ConfigSubset): 

360 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

361 

362 # Ensure that the root of the repository exists or can be made 

363 root_uri = ResourcePath(root, forceDirectory=True) 

364 root_uri.mkdir() 

365 

366 config = Config(config) 

367 

368 # If we are creating a new repo from scratch with relative roots, 

369 # do not propagate an explicit root from the config file 

370 if "root" in config: 

371 del config["root"] 

372 

373 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

374 imported_class = doImportType(full["datastore", "cls"]) 

375 if not issubclass(imported_class, Datastore): 

376 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

377 datastoreClass: type[Datastore] = imported_class 

378 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

379 

380 # if key exists in given config, parse it, otherwise parse the defaults 

381 # in the expanded config 

382 if config.get(("registry", "db")): 

383 registryConfig = RegistryConfig(config) 

384 else: 

385 registryConfig = RegistryConfig(full) 

386 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

387 if defaultDatabaseUri is not None: 

388 Config.updateParameters( 

389 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

390 ) 

391 else: 

392 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

393 

394 if standalone: 

395 config.merge(full) 

396 else: 

397 # Always expand the registry.managers section into the per-repo 

398 # config, because after the database schema is created, it's not 

399 # allowed to change anymore. Note that in the standalone=True 

400 # branch, _everything_ in the config is expanded, so there's no 

401 # need to special case this. 

402 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

403 configURI: ResourcePathExpression 

404 if outfile is not None: 

405 # When writing to a separate location we must include 

406 # the root of the butler repo in the config else it won't know 

407 # where to look. 

408 config["root"] = root_uri.geturl() 

409 configURI = outfile 

410 else: 

411 configURI = root_uri 

412 # Strip obscore configuration, if it is present, before writing config 

413 # to a file, obscore config will be stored in registry. 

414 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

415 config_to_write = config.copy() 

416 del config_to_write[obscore_config_key] 

417 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

418 # configFile attribute is updated, need to copy it to original. 

419 config.configFile = config_to_write.configFile 

420 else: 

421 config.dumpToUri(configURI, overwrite=overwrite) 

422 

423 # Create Registry and populate tables 

424 registryConfig = RegistryConfig(config.get("registry")) 

425 dimensionConfig = DimensionConfig(dimensionConfig) 

426 _RegistryFactory(registryConfig).create_from_config( 

427 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

428 ) 

429 

430 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

431 

432 return config 

433 

434 @classmethod 

435 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

436 """Look up the label in a butler repository index. 

437 

438 Parameters 

439 ---------- 

440 label : `str` 

441 Label of the Butler repository to look up. 

442 return_label : `bool`, optional 

443 If ``label`` cannot be found in the repository index (either 

444 because index is not defined or ``label`` is not in the index) and 

445 ``return_label`` is `True` then return ``ResourcePath(label)``. 

446 If ``return_label`` is `False` (default) then an exception will be 

447 raised instead. 

448 

449 Returns 

450 ------- 

451 uri : `lsst.resources.ResourcePath` 

452 URI to the Butler repository associated with the given label or 

453 default value if it is provided. 

454 

455 Raises 

456 ------ 

457 KeyError 

458 Raised if the label is not found in the index, or if an index 

459 is not defined, and ``return_label`` is `False`. 

460 

461 Notes 

462 ----- 

463 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

464 information is discovered. 

465 """ 

466 return ButlerRepoIndex.get_repo_uri(label, return_label) 

467 

468 @classmethod 

469 def get_known_repos(cls) -> set[str]: 

470 """Retrieve the list of known repository labels. 

471 

472 Returns 

473 ------- 

474 repos : `set` of `str` 

475 All the known labels. Can be empty if no index can be found. 

476 

477 Notes 

478 ----- 

479 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

480 information is discovered. 

481 """ 

482 return ButlerRepoIndex.get_known_repos() 

483 

484 @abstractmethod 

485 def transaction(self) -> AbstractContextManager[None]: 

486 """Context manager supporting `Butler` transactions. 

487 

488 Transactions can be nested. 

489 """ 

490 raise NotImplementedError() 

491 

492 @abstractmethod 

493 def put( 

494 self, 

495 obj: Any, 

496 datasetRefOrType: DatasetRef | DatasetType | str, 

497 /, 

498 dataId: DataId | None = None, 

499 *, 

500 run: str | None = None, 

501 **kwargs: Any, 

502 ) -> DatasetRef: 

503 """Store and register a dataset. 

504 

505 Parameters 

506 ---------- 

507 obj : `object` 

508 The dataset. 

509 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

510 When `DatasetRef` is provided, ``dataId`` should be `None`. 

511 Otherwise the `DatasetType` or name thereof. If a fully resolved 

512 `DatasetRef` is given the run and ID are used directly. 

513 dataId : `dict` or `DataCoordinate` 

514 A `dict` of `Dimension` link name, value pairs that label the 

515 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

516 should be provided as the second argument. 

517 run : `str`, optional 

518 The name of the run the dataset should be added to, overriding 

519 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

520 **kwargs 

521 Additional keyword arguments used to augment or construct a 

522 `DataCoordinate`. See `DataCoordinate.standardize` 

523 parameters. Not used if a resolve `DatasetRef` is provided. 

524 

525 Returns 

526 ------- 

527 ref : `DatasetRef` 

528 A reference to the stored dataset, updated with the correct id if 

529 given. 

530 

531 Raises 

532 ------ 

533 TypeError 

534 Raised if the butler is read-only or if no run has been provided. 

535 """ 

536 raise NotImplementedError() 

537 

538 @abstractmethod 

539 def getDeferred( 

540 self, 

541 datasetRefOrType: DatasetRef | DatasetType | str, 

542 /, 

543 dataId: DataId | None = None, 

544 *, 

545 parameters: dict | None = None, 

546 collections: Any = None, 

547 storageClass: str | StorageClass | None = None, 

548 **kwargs: Any, 

549 ) -> DeferredDatasetHandle: 

550 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

551 after an immediate registry lookup. 

552 

553 Parameters 

554 ---------- 

555 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

556 When `DatasetRef` the `dataId` should be `None`. 

557 Otherwise the `DatasetType` or name thereof. 

558 dataId : `dict` or `DataCoordinate`, optional 

559 A `dict` of `Dimension` link name, value pairs that label the 

560 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

561 should be provided as the first argument. 

562 parameters : `dict` 

563 Additional StorageClass-defined options to control reading, 

564 typically used to efficiently read only a subset of the dataset. 

565 collections : Any, optional 

566 Collections to be searched, overriding ``self.collections``. 

567 Can be any of the types supported by the ``collections`` argument 

568 to butler construction. 

569 storageClass : `StorageClass` or `str`, optional 

570 The storage class to be used to override the Python type 

571 returned by this method. By default the returned type matches 

572 the dataset type definition for this dataset. Specifying a 

573 read `StorageClass` can force a different type to be returned. 

574 This type must be compatible with the original type. 

575 **kwargs 

576 Additional keyword arguments used to augment or construct a 

577 `DataId`. See `DataId` parameters. 

578 

579 Returns 

580 ------- 

581 obj : `DeferredDatasetHandle` 

582 A handle which can be used to retrieve a dataset at a later time. 

583 

584 Raises 

585 ------ 

586 LookupError 

587 Raised if no matching dataset exists in the `Registry` or 

588 datastore. 

589 ValueError 

590 Raised if a resolved `DatasetRef` was passed as an input, but it 

591 differs from the one found in the registry. 

592 TypeError 

593 Raised if no collections were provided. 

594 """ 

595 raise NotImplementedError() 

596 

597 @abstractmethod 

598 def get( 

599 self, 

600 datasetRefOrType: DatasetRef | DatasetType | str, 

601 /, 

602 dataId: DataId | None = None, 

603 *, 

604 parameters: dict[str, Any] | None = None, 

605 collections: Any = None, 

606 storageClass: StorageClass | str | None = None, 

607 **kwargs: Any, 

608 ) -> Any: 

609 """Retrieve a stored dataset. 

610 

611 Parameters 

612 ---------- 

613 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

614 When `DatasetRef` the `dataId` should be `None`. 

615 Otherwise the `DatasetType` or name thereof. 

616 If a resolved `DatasetRef`, the associated dataset 

617 is returned directly without additional querying. 

618 dataId : `dict` or `DataCoordinate` 

619 A `dict` of `Dimension` link name, value pairs that label the 

620 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

621 should be provided as the first argument. 

622 parameters : `dict` 

623 Additional StorageClass-defined options to control reading, 

624 typically used to efficiently read only a subset of the dataset. 

625 collections : Any, optional 

626 Collections to be searched, overriding ``self.collections``. 

627 Can be any of the types supported by the ``collections`` argument 

628 to butler construction. 

629 storageClass : `StorageClass` or `str`, optional 

630 The storage class to be used to override the Python type 

631 returned by this method. By default the returned type matches 

632 the dataset type definition for this dataset. Specifying a 

633 read `StorageClass` can force a different type to be returned. 

634 This type must be compatible with the original type. 

635 **kwargs 

636 Additional keyword arguments used to augment or construct a 

637 `DataCoordinate`. See `DataCoordinate.standardize` 

638 parameters. 

639 

640 Returns 

641 ------- 

642 obj : `object` 

643 The dataset. 

644 

645 Raises 

646 ------ 

647 LookupError 

648 Raised if no matching dataset exists in the `Registry`. 

649 TypeError 

650 Raised if no collections were provided. 

651 

652 Notes 

653 ----- 

654 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

655 this method requires that the given data ID include temporal dimensions 

656 beyond the dimensions of the dataset type itself, in order to find the 

657 dataset with the appropriate validity range. For example, a "bias" 

658 dataset with native dimensions ``{instrument, detector}`` could be 

659 fetched with a ``{instrument, detector, exposure}`` data ID, because 

660 ``exposure`` is a temporal dimension. 

661 """ 

662 raise NotImplementedError() 

663 

664 @abstractmethod 

665 def getURIs( 

666 self, 

667 datasetRefOrType: DatasetRef | DatasetType | str, 

668 /, 

669 dataId: DataId | None = None, 

670 *, 

671 predict: bool = False, 

672 collections: Any = None, 

673 run: str | None = None, 

674 **kwargs: Any, 

675 ) -> DatasetRefURIs: 

676 """Return the URIs associated with the dataset. 

677 

678 Parameters 

679 ---------- 

680 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

681 When `DatasetRef` the `dataId` should be `None`. 

682 Otherwise the `DatasetType` or name thereof. 

683 dataId : `dict` or `DataCoordinate` 

684 A `dict` of `Dimension` link name, value pairs that label the 

685 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

686 should be provided as the first argument. 

687 predict : `bool` 

688 If `True`, allow URIs to be returned of datasets that have not 

689 been written. 

690 collections : Any, optional 

691 Collections to be searched, overriding ``self.collections``. 

692 Can be any of the types supported by the ``collections`` argument 

693 to butler construction. 

694 run : `str`, optional 

695 Run to use for predictions, overriding ``self.run``. 

696 **kwargs 

697 Additional keyword arguments used to augment or construct a 

698 `DataCoordinate`. See `DataCoordinate.standardize` 

699 parameters. 

700 

701 Returns 

702 ------- 

703 uris : `DatasetRefURIs` 

704 The URI to the primary artifact associated with this dataset (if 

705 the dataset was disassembled within the datastore this may be 

706 `None`), and the URIs to any components associated with the dataset 

707 artifact. (can be empty if there are no components). 

708 """ 

709 raise NotImplementedError() 

710 

711 @abstractmethod 

712 def getURI( 

713 self, 

714 datasetRefOrType: DatasetRef | DatasetType | str, 

715 /, 

716 dataId: DataId | None = None, 

717 *, 

718 predict: bool = False, 

719 collections: Any = None, 

720 run: str | None = None, 

721 **kwargs: Any, 

722 ) -> ResourcePath: 

723 """Return the URI to the Dataset. 

724 

725 Parameters 

726 ---------- 

727 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

728 When `DatasetRef` the `dataId` should be `None`. 

729 Otherwise the `DatasetType` or name thereof. 

730 dataId : `dict` or `DataCoordinate` 

731 A `dict` of `Dimension` link name, value pairs that label the 

732 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

733 should be provided as the first argument. 

734 predict : `bool` 

735 If `True`, allow URIs to be returned of datasets that have not 

736 been written. 

737 collections : Any, optional 

738 Collections to be searched, overriding ``self.collections``. 

739 Can be any of the types supported by the ``collections`` argument 

740 to butler construction. 

741 run : `str`, optional 

742 Run to use for predictions, overriding ``self.run``. 

743 **kwargs 

744 Additional keyword arguments used to augment or construct a 

745 `DataCoordinate`. See `DataCoordinate.standardize` 

746 parameters. 

747 

748 Returns 

749 ------- 

750 uri : `lsst.resources.ResourcePath` 

751 URI pointing to the Dataset within the datastore. If the 

752 Dataset does not exist in the datastore, and if ``predict`` is 

753 `True`, the URI will be a prediction and will include a URI 

754 fragment "#predicted". 

755 If the datastore does not have entities that relate well 

756 to the concept of a URI the returned URI string will be 

757 descriptive. The returned URI is not guaranteed to be obtainable. 

758 

759 Raises 

760 ------ 

761 LookupError 

762 A URI has been requested for a dataset that does not exist and 

763 guessing is not allowed. 

764 ValueError 

765 Raised if a resolved `DatasetRef` was passed as an input, but it 

766 differs from the one found in the registry. 

767 TypeError 

768 Raised if no collections were provided. 

769 RuntimeError 

770 Raised if a URI is requested for a dataset that consists of 

771 multiple artifacts. 

772 """ 

773 raise NotImplementedError() 

774 

775 @abstractmethod 

776 def retrieveArtifacts( 

777 self, 

778 refs: Iterable[DatasetRef], 

779 destination: ResourcePathExpression, 

780 transfer: str = "auto", 

781 preserve_path: bool = True, 

782 overwrite: bool = False, 

783 ) -> list[ResourcePath]: 

784 """Retrieve the artifacts associated with the supplied refs. 

785 

786 Parameters 

787 ---------- 

788 refs : iterable of `DatasetRef` 

789 The datasets for which artifacts are to be retrieved. 

790 A single ref can result in multiple artifacts. The refs must 

791 be resolved. 

792 destination : `lsst.resources.ResourcePath` or `str` 

793 Location to write the artifacts. 

794 transfer : `str`, optional 

795 Method to use to transfer the artifacts. Must be one of the options 

796 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

797 "move" is not allowed. 

798 preserve_path : `bool`, optional 

799 If `True` the full path of the artifact within the datastore 

800 is preserved. If `False` the final file component of the path 

801 is used. 

802 overwrite : `bool`, optional 

803 If `True` allow transfers to overwrite existing files at the 

804 destination. 

805 

806 Returns 

807 ------- 

808 targets : `list` of `lsst.resources.ResourcePath` 

809 URIs of file artifacts in destination location. Order is not 

810 preserved. 

811 

812 Notes 

813 ----- 

814 For non-file datastores the artifacts written to the destination 

815 may not match the representation inside the datastore. For example 

816 a hierarchical data structure in a NoSQL database may well be stored 

817 as a JSON file. 

818 """ 

819 raise NotImplementedError() 

820 

821 @abstractmethod 

822 def exists( 

823 self, 

824 dataset_ref_or_type: DatasetRef | DatasetType | str, 

825 /, 

826 data_id: DataId | None = None, 

827 *, 

828 full_check: bool = True, 

829 collections: Any = None, 

830 **kwargs: Any, 

831 ) -> DatasetExistence: 

832 """Indicate whether a dataset is known to Butler registry and 

833 datastore. 

834 

835 Parameters 

836 ---------- 

837 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

838 When `DatasetRef` the `dataId` should be `None`. 

839 Otherwise the `DatasetType` or name thereof. 

840 data_id : `dict` or `DataCoordinate` 

841 A `dict` of `Dimension` link name, value pairs that label the 

842 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

843 should be provided as the first argument. 

844 full_check : `bool`, optional 

845 If `True`, an additional check will be made for dataset artifact 

846 existence. This will involve additional overhead due to the need 

847 to query an external system. If `False` registry and datastore 

848 will solely be asked if they know about the dataset but no 

849 check for the artifact will be performed. 

850 collections : Any, optional 

851 Collections to be searched, overriding ``self.collections``. 

852 Can be any of the types supported by the ``collections`` argument 

853 to butler construction. 

854 **kwargs 

855 Additional keyword arguments used to augment or construct a 

856 `DataCoordinate`. See `DataCoordinate.standardize` 

857 parameters. 

858 

859 Returns 

860 ------- 

861 existence : `DatasetExistence` 

862 Object indicating whether the dataset is known to registry and 

863 datastore. Evaluates to `True` if the dataset is present and known 

864 to both. 

865 """ 

866 raise NotImplementedError() 

867 

868 @abstractmethod 

869 def _exists_many( 

870 self, 

871 refs: Iterable[DatasetRef], 

872 /, 

873 *, 

874 full_check: bool = True, 

875 ) -> dict[DatasetRef, DatasetExistence]: 

876 """Indicate whether multiple datasets are known to Butler registry and 

877 datastore. 

878 

879 This is an experimental API that may change at any moment. 

880 

881 Parameters 

882 ---------- 

883 refs : iterable of `DatasetRef` 

884 The datasets to be checked. 

885 full_check : `bool`, optional 

886 If `True`, an additional check will be made for dataset artifact 

887 existence. This will involve additional overhead due to the need 

888 to query an external system. If `False` registry and datastore 

889 will solely be asked if they know about the dataset but no 

890 check for the artifact will be performed. 

891 

892 Returns 

893 ------- 

894 existence : dict of [`DatasetRef`, `DatasetExistence`] 

895 Mapping from the given dataset refs to an enum indicating the 

896 status of the dataset in registry and datastore. 

897 Each value evaluates to `True` if the dataset is present and known 

898 to both. 

899 """ 

900 raise NotImplementedError() 

901 

902 @abstractmethod 

903 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

904 """Remove one or more `~CollectionType.RUN` collections and the 

905 datasets within them. 

906 

907 Parameters 

908 ---------- 

909 names : `~collections.abc.Iterable` [ `str` ] 

910 The names of the collections to remove. 

911 unstore : `bool`, optional 

912 If `True` (default), delete datasets from all datastores in which 

913 they are present, and attempt to rollback the registry deletions if 

914 datastore deletions fail (which may not always be possible). If 

915 `False`, datastore records for these datasets are still removed, 

916 but any artifacts (e.g. files) will not be. 

917 

918 Raises 

919 ------ 

920 TypeError 

921 Raised if one or more collections are not of type 

922 `~CollectionType.RUN`. 

923 """ 

924 raise NotImplementedError() 

925 

926 @abstractmethod 

927 def ingest( 

928 self, 

929 *datasets: FileDataset, 

930 transfer: str | None = "auto", 

931 run: str | None = None, 

932 idGenerationMode: DatasetIdGenEnum | None = None, 

933 record_validation_info: bool = True, 

934 ) -> None: 

935 """Store and register one or more datasets that already exist on disk. 

936 

937 Parameters 

938 ---------- 

939 datasets : `FileDataset` 

940 Each positional argument is a struct containing information about 

941 a file to be ingested, including its URI (either absolute or 

942 relative to the datastore root, if applicable), a resolved 

943 `DatasetRef`, and optionally a formatter class or its 

944 fully-qualified string name. If a formatter is not provided, the 

945 formatter that would be used for `put` is assumed. On successful 

946 ingest all `FileDataset.formatter` attributes will be set to the 

947 formatter class used. `FileDataset.path` attributes may be modified 

948 to put paths in whatever the datastore considers a standardized 

949 form. 

950 transfer : `str`, optional 

951 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

952 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

953 transfer the file. 

954 run : `str`, optional 

955 The name of the run ingested datasets should be added to, 

956 overriding ``self.run``. This parameter is now deprecated since 

957 the run is encoded in the ``FileDataset``. 

958 idGenerationMode : `DatasetIdGenEnum`, optional 

959 Specifies option for generating dataset IDs. Parameter is 

960 deprecated. 

961 record_validation_info : `bool`, optional 

962 If `True`, the default, the datastore can record validation 

963 information associated with the file. If `False` the datastore 

964 will not attempt to track any information such as checksums 

965 or file sizes. This can be useful if such information is tracked 

966 in an external system or if the file is to be compressed in place. 

967 It is up to the datastore whether this parameter is relevant. 

968 

969 Raises 

970 ------ 

971 TypeError 

972 Raised if the butler is read-only or if no run was provided. 

973 NotImplementedError 

974 Raised if the `Datastore` does not support the given transfer mode. 

975 DatasetTypeNotSupportedError 

976 Raised if one or more files to be ingested have a dataset type that 

977 is not supported by the `Datastore`.. 

978 FileNotFoundError 

979 Raised if one of the given files does not exist. 

980 FileExistsError 

981 Raised if transfer is not `None` but the (internal) location the 

982 file would be moved to is already occupied. 

983 

984 Notes 

985 ----- 

986 This operation is not fully exception safe: if a database operation 

987 fails, the given `FileDataset` instances may be only partially updated. 

988 

989 It is atomic in terms of database operations (they will either all 

990 succeed or all fail) providing the database engine implements 

991 transactions correctly. It will attempt to be atomic in terms of 

992 filesystem operations as well, but this cannot be implemented 

993 rigorously for most datastores. 

994 """ 

995 raise NotImplementedError() 

996 

997 @abstractmethod 

998 def export( 

999 self, 

1000 *, 

1001 directory: str | None = None, 

1002 filename: str | None = None, 

1003 format: str | None = None, 

1004 transfer: str | None = None, 

1005 ) -> AbstractContextManager[RepoExportContext]: 

1006 """Export datasets from the repository represented by this `Butler`. 

1007 

1008 This method is a context manager that returns a helper object 

1009 (`RepoExportContext`) that is used to indicate what information from 

1010 the repository should be exported. 

1011 

1012 Parameters 

1013 ---------- 

1014 directory : `str`, optional 

1015 Directory dataset files should be written to if ``transfer`` is not 

1016 `None`. 

1017 filename : `str`, optional 

1018 Name for the file that will include database information associated 

1019 with the exported datasets. If this is not an absolute path and 

1020 ``directory`` is not `None`, it will be written to ``directory`` 

1021 instead of the current working directory. Defaults to 

1022 "export.{format}". 

1023 format : `str`, optional 

1024 File format for the database information file. If `None`, the 

1025 extension of ``filename`` will be used. 

1026 transfer : `str`, optional 

1027 Transfer mode passed to `Datastore.export`. 

1028 

1029 Raises 

1030 ------ 

1031 TypeError 

1032 Raised if the set of arguments passed is inconsistent. 

1033 

1034 Examples 

1035 -------- 

1036 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1037 methods are used to provide the iterables over data IDs and/or datasets 

1038 to be exported:: 

1039 

1040 with butler.export("exports.yaml") as export: 

1041 # Export all flats, but none of the dimension element rows 

1042 # (i.e. data ID information) associated with them. 

1043 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1044 elements=()) 

1045 # Export all datasets that start with "deepCoadd_" and all of 

1046 # their associated data ID information. 

1047 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1048 """ 

1049 raise NotImplementedError() 

1050 

1051 @abstractmethod 

1052 def import_( 

1053 self, 

1054 *, 

1055 directory: ResourcePathExpression | None = None, 

1056 filename: ResourcePathExpression | TextIO | None = None, 

1057 format: str | None = None, 

1058 transfer: str | None = None, 

1059 skip_dimensions: set | None = None, 

1060 ) -> None: 

1061 """Import datasets into this repository that were exported from a 

1062 different butler repository via `~lsst.daf.butler.Butler.export`. 

1063 

1064 Parameters 

1065 ---------- 

1066 directory : `~lsst.resources.ResourcePathExpression`, optional 

1067 Directory containing dataset files to import from. If `None`, 

1068 ``filename`` and all dataset file paths specified therein must 

1069 be absolute. 

1070 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1071 A stream or name of file that contains database information 

1072 associated with the exported datasets, typically generated by 

1073 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1074 `~lsst.resources.ResourcePath` and is not an absolute path, 

1075 it will first be looked for relative to ``directory`` and if not 

1076 found there it will be looked for in the current working 

1077 directory. Defaults to "export.{format}". 

1078 format : `str`, optional 

1079 File format for ``filename``. If `None`, the extension of 

1080 ``filename`` will be used. 

1081 transfer : `str`, optional 

1082 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1083 skip_dimensions : `set`, optional 

1084 Names of dimensions that should be skipped and not imported. 

1085 

1086 Raises 

1087 ------ 

1088 TypeError 

1089 Raised if the set of arguments passed is inconsistent, or if the 

1090 butler is read-only. 

1091 """ 

1092 raise NotImplementedError() 

1093 

1094 @abstractmethod 

1095 def transfer_from( 

1096 self, 

1097 source_butler: LimitedButler, 

1098 source_refs: Iterable[DatasetRef], 

1099 transfer: str = "auto", 

1100 skip_missing: bool = True, 

1101 register_dataset_types: bool = False, 

1102 transfer_dimensions: bool = False, 

1103 ) -> Collection[DatasetRef]: 

1104 """Transfer datasets to this Butler from a run in another Butler. 

1105 

1106 Parameters 

1107 ---------- 

1108 source_butler : `LimitedButler` 

1109 Butler from which the datasets are to be transferred. If data IDs 

1110 in ``source_refs`` are not expanded then this has to be a full 

1111 `Butler` whose registry will be used to expand data IDs. 

1112 source_refs : iterable of `DatasetRef` 

1113 Datasets defined in the source butler that should be transferred to 

1114 this butler. In most circumstances, ``transfer_from`` is faster if 

1115 the dataset refs are expanded. 

1116 transfer : `str`, optional 

1117 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1118 skip_missing : `bool` 

1119 If `True`, datasets with no datastore artifact associated with 

1120 them are not transferred. If `False` a registry entry will be 

1121 created even if no datastore record is created (and so will 

1122 look equivalent to the dataset being unstored). 

1123 register_dataset_types : `bool` 

1124 If `True` any missing dataset types are registered. Otherwise 

1125 an exception is raised. 

1126 transfer_dimensions : `bool`, optional 

1127 If `True`, dimension record data associated with the new datasets 

1128 will be transferred. 

1129 

1130 Returns 

1131 ------- 

1132 refs : `list` of `DatasetRef` 

1133 The refs added to this Butler. 

1134 

1135 Notes 

1136 ----- 

1137 The datastore artifact has to exist for a transfer 

1138 to be made but non-existence is not an error. 

1139 

1140 Datasets that already exist in this run will be skipped. 

1141 

1142 The datasets are imported as part of a transaction, although 

1143 dataset types are registered before the transaction is started. 

1144 This means that it is possible for a dataset type to be registered 

1145 even though transfer has failed. 

1146 """ 

1147 raise NotImplementedError() 

1148 

1149 @abstractmethod 

1150 def validateConfiguration( 

1151 self, 

1152 logFailures: bool = False, 

1153 datasetTypeNames: Iterable[str] | None = None, 

1154 ignore: Iterable[str] | None = None, 

1155 ) -> None: 

1156 """Validate butler configuration. 

1157 

1158 Checks that each `DatasetType` can be stored in the `Datastore`. 

1159 

1160 Parameters 

1161 ---------- 

1162 logFailures : `bool`, optional 

1163 If `True`, output a log message for every validation error 

1164 detected. 

1165 datasetTypeNames : iterable of `str`, optional 

1166 The `DatasetType` names that should be checked. This allows 

1167 only a subset to be selected. 

1168 ignore : iterable of `str`, optional 

1169 Names of DatasetTypes to skip over. This can be used to skip 

1170 known problems. If a named `DatasetType` corresponds to a 

1171 composite, all components of that `DatasetType` will also be 

1172 ignored. 

1173 

1174 Raises 

1175 ------ 

1176 ButlerValidationError 

1177 Raised if there is some inconsistency with how this Butler 

1178 is configured. 

1179 """ 

1180 raise NotImplementedError() 

1181 

1182 @property 

1183 @abstractmethod 

1184 def collections(self) -> Sequence[str]: 

1185 """The collections to search by default, in order 

1186 (`~collections.abc.Sequence` [ `str` ]). 

1187 """ 

1188 raise NotImplementedError() 

1189 

1190 @property 

1191 @abstractmethod 

1192 def run(self) -> str | None: 

1193 """Name of the run this butler writes outputs to by default (`str` or 

1194 `None`). 

1195 """ 

1196 raise NotImplementedError() 

1197 

1198 @property 

1199 @abstractmethod 

1200 def registry(self) -> Registry: 

1201 """The object that manages dataset metadata and relationships 

1202 (`Registry`). 

1203 

1204 Many operations that don't involve reading or writing butler datasets 

1205 are accessible only via `Registry` methods. Eventually these methods 

1206 will be replaced by equivalent `Butler` methods. 

1207 """ 

1208 raise NotImplementedError()