Coverage for python/lsst/daf/butler/_butler.py: 65%

142 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Sequence 

34from contextlib import AbstractContextManager 

35from typing import Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.logging import getLogger 

40 

41from ._butler_config import ButlerConfig 

42from ._butler_repo_index import ButlerRepoIndex 

43from ._config import Config, ConfigSubset 

44from ._dataset_existence import DatasetExistence 

45from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

46from ._dataset_type import DatasetType 

47from ._deferredDatasetHandle import DeferredDatasetHandle 

48from ._file_dataset import FileDataset 

49from ._limited_butler import LimitedButler 

50from ._storage_class import StorageClass 

51from ._timespan import Timespan 

52from .datastore import DatasetRefURIs, Datastore 

53from .dimensions import DataId, DimensionConfig 

54from .registry import Registry, RegistryConfig, _RegistryFactory 

55from .repo_relocation import BUTLER_ROOT_TAG 

56from .transfers import RepoExportContext 

57 

58_LOG = getLogger(__name__) 

59 

60 

61class Butler(LimitedButler): 

62 """Interface for data butler and factory for Butler instances. 

63 

64 Parameters 

65 ---------- 

66 config : `ButlerConfig`, `Config` or `str`, optional. 

67 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

68 If a directory path is given the configuration will be read from a 

69 ``butler.yaml`` file in that location. If `None` is given default 

70 values will be used. If ``config`` contains "cls" key then its value is 

71 used as a name of butler class and it must be a sub-class of this 

72 class, otherwise `DirectButler` is instantiated. 

73 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

74 An expression specifying the collections to be searched (in order) when 

75 reading datasets. 

76 This may be a `str` collection name or an iterable thereof. 

77 See :ref:`daf_butler_collection_expressions` for more information. 

78 These collections are not registered automatically and must be 

79 manually registered before they are used by any method, but they may be 

80 manually registered after the `Butler` is initialized. 

81 run : `str`, optional 

82 Name of the `~CollectionType.RUN` collection new datasets should be 

83 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

84 ``collections`` will be set to ``[run]``. If not `None`, this 

85 collection will automatically be registered. If this is not set (and 

86 ``writeable`` is not set either), a read-only butler will be created. 

87 searchPaths : `list` of `str`, optional 

88 Directory paths to search when calculating the full Butler 

89 configuration. Not used if the supplied config is already a 

90 `ButlerConfig`. 

91 writeable : `bool`, optional 

92 Explicitly sets whether the butler supports write operations. If not 

93 provided, a read-write butler is created if any of ``run``, ``tags``, 

94 or ``chains`` is non-empty. 

95 inferDefaults : `bool`, optional 

96 If `True` (default) infer default data ID values from the values 

97 present in the datasets in ``collections``: if all collections have the 

98 same value (or no value) for a governor dimension, that value will be 

99 the default for that dimension. Nonexistent collections are ignored. 

100 If a default value is provided explicitly for a governor dimension via 

101 ``**kwargs``, no default will be inferred for that dimension. 

102 **kwargs : `Any` 

103 Additional keyword arguments passed to a constructor of actual butler 

104 class. 

105 

106 Notes 

107 ----- 

108 The preferred way to instantiate Butler is via the `from_config` method. 

109 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

110 but ``mypy`` will complain about the former. 

111 """ 

112 

113 def __new__( 

114 cls, 

115 config: Config | ResourcePathExpression | None = None, 

116 *, 

117 collections: Any = None, 

118 run: str | None = None, 

119 searchPaths: Sequence[ResourcePathExpression] | None = None, 

120 writeable: bool | None = None, 

121 inferDefaults: bool = True, 

122 **kwargs: Any, 

123 ) -> Butler: 

124 if cls is Butler: 

125 cls = cls._find_butler_class(config, searchPaths) 

126 # Note: we do not pass any parameters to __new__, Python will pass them 

127 # to __init__ after __new__ returns sub-class instance. 

128 return super().__new__(cls) 

129 

130 @staticmethod 

131 def _find_butler_class( 

132 config: Config | ResourcePathExpression | None = None, 

133 searchPaths: Sequence[ResourcePathExpression] | None = None, 

134 ) -> type[Butler]: 

135 """Find actual class to instantiate.""" 

136 butler_class_name: str | None = None 

137 if config is not None: 

138 # Check for optional "cls" key in config. 

139 if not isinstance(config, Config): 

140 config = ButlerConfig(config, searchPaths=searchPaths) 

141 butler_class_name = config.get("cls") 

142 

143 # Make DirectButler if class is not specified. 

144 butler_class: type[Butler] 

145 if butler_class_name is None: 

146 from .direct_butler import DirectButler 

147 

148 butler_class = DirectButler 

149 else: 

150 butler_class = doImportType(butler_class_name) 

151 if not issubclass(butler_class, Butler): 

152 raise TypeError(f"{butler_class_name} is not a subclass of Butler") 

153 return butler_class 

154 

155 @classmethod 

156 def from_config( 

157 cls, 

158 config: Config | ResourcePathExpression | None = None, 

159 *, 

160 collections: Any = None, 

161 run: str | None = None, 

162 searchPaths: Sequence[ResourcePathExpression] | None = None, 

163 writeable: bool | None = None, 

164 inferDefaults: bool = True, 

165 **kwargs: Any, 

166 ) -> Butler: 

167 """Create butler instance from configuration. 

168 

169 Parameters 

170 ---------- 

171 config : `ButlerConfig`, `Config` or `str`, optional. 

172 Configuration. Anything acceptable to the `ButlerConfig` 

173 constructor. If a directory path is given the configuration will be 

174 read from a ``butler.yaml`` file in that location. If `None` is 

175 given default values will be used. If ``config`` contains "cls" key 

176 then its value is used as a name of butler class and it must be a 

177 sub-class of this class, otherwise `DirectButler` is instantiated. 

178 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

179 An expression specifying the collections to be searched (in order) 

180 when reading datasets. 

181 This may be a `str` collection name or an iterable thereof. 

182 See :ref:`daf_butler_collection_expressions` for more information. 

183 These collections are not registered automatically and must be 

184 manually registered before they are used by any method, but they 

185 may be manually registered after the `Butler` is initialized. 

186 run : `str`, optional 

187 Name of the `~CollectionType.RUN` collection new datasets should be 

188 inserted into. If ``collections`` is `None` and ``run`` is not 

189 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

190 this collection will automatically be registered. If this is not 

191 set (and ``writeable`` is not set either), a read-only butler will 

192 be created. 

193 searchPaths : `list` of `str`, optional 

194 Directory paths to search when calculating the full Butler 

195 configuration. Not used if the supplied config is already a 

196 `ButlerConfig`. 

197 writeable : `bool`, optional 

198 Explicitly sets whether the butler supports write operations. If 

199 not provided, a read-write butler is created if any of ``run``, 

200 ``tags``, or ``chains`` is non-empty. 

201 inferDefaults : `bool`, optional 

202 If `True` (default) infer default data ID values from the values 

203 present in the datasets in ``collections``: if all collections have 

204 the same value (or no value) for a governor dimension, that value 

205 will be the default for that dimension. Nonexistent collections 

206 are ignored. If a default value is provided explicitly for a 

207 governor dimension via ``**kwargs``, no default will be inferred 

208 for that dimension. 

209 **kwargs : `Any` 

210 Additional keyword arguments passed to a constructor of actual 

211 butler class. 

212 

213 Notes 

214 ----- 

215 Calling this factory method is identical to calling 

216 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

217 complains about ``Butler()`` call. 

218 

219 Examples 

220 -------- 

221 While there are many ways to control exactly how a `Butler` interacts 

222 with the collections in its `Registry`, the most common cases are still 

223 simple. 

224 

225 For a read-only `Butler` that searches one collection, do:: 

226 

227 butler = Butler.from_config( 

228 "/path/to/repo", collections=["u/alice/DM-50000"] 

229 ) 

230 

231 For a read-write `Butler` that writes to and reads from a 

232 `~CollectionType.RUN` collection:: 

233 

234 butler = Butler.from_config( 

235 "/path/to/repo", run="u/alice/DM-50000/a" 

236 ) 

237 

238 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

239 because we want to write to one `~CollectionType.RUN` collection but 

240 read from several others (as well):: 

241 

242 butler = Butler.from_config( 

243 "/path/to/repo", 

244 run="u/alice/DM-50000/a", 

245 collections=[ 

246 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

247 ] 

248 ) 

249 

250 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

251 Datasets will be read first from that run (since it appears first in 

252 the chain), and then from ``u/bob/DM-49998`` and finally 

253 ``HSC/defaults``. 

254 

255 Finally, one can always create a `Butler` with no collections:: 

256 

257 butler = Butler.from_config("/path/to/repo", writeable=True) 

258 

259 This can be extremely useful when you just want to use 

260 ``butler.registry``, e.g. for inserting dimension data or managing 

261 collections, or when the collections you want to use with the butler 

262 are not consistent. Passing ``writeable`` explicitly here is only 

263 necessary if you want to be able to make changes to the repo - usually 

264 the value for ``writeable`` can be guessed from the collection 

265 arguments provided, but it defaults to `False` when there are not 

266 collection arguments. 

267 """ 

268 cls = cls._find_butler_class(config, searchPaths) 

269 return cls( 

270 config, 

271 collections=collections, 

272 run=run, 

273 searchPaths=searchPaths, 

274 writeable=writeable, 

275 inferDefaults=inferDefaults, 

276 **kwargs, 

277 ) 

278 

279 @staticmethod 

280 def makeRepo( 

281 root: ResourcePathExpression, 

282 config: Config | str | None = None, 

283 dimensionConfig: Config | str | None = None, 

284 standalone: bool = False, 

285 searchPaths: list[str] | None = None, 

286 forceConfigRoot: bool = True, 

287 outfile: ResourcePathExpression | None = None, 

288 overwrite: bool = False, 

289 ) -> Config: 

290 """Create an empty data repository by adding a butler.yaml config 

291 to a repository root directory. 

292 

293 Parameters 

294 ---------- 

295 root : `lsst.resources.ResourcePathExpression` 

296 Path or URI to the root location of the new repository. Will be 

297 created if it does not exist. 

298 config : `Config` or `str`, optional 

299 Configuration to write to the repository, after setting any 

300 root-dependent Registry or Datastore config options. Can not 

301 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

302 configuration will be used. Root-dependent config options 

303 specified in this config are overwritten if ``forceConfigRoot`` 

304 is `True`. 

305 dimensionConfig : `Config` or `str`, optional 

306 Configuration for dimensions, will be used to initialize registry 

307 database. 

308 standalone : `bool` 

309 If True, write all expanded defaults, not just customized or 

310 repository-specific settings. 

311 This (mostly) decouples the repository from the default 

312 configuration, insulating it from changes to the defaults (which 

313 may be good or bad, depending on the nature of the changes). 

314 Future *additions* to the defaults will still be picked up when 

315 initializing `Butlers` to repos created with ``standalone=True``. 

316 searchPaths : `list` of `str`, optional 

317 Directory paths to search when calculating the full butler 

318 configuration. 

319 forceConfigRoot : `bool`, optional 

320 If `False`, any values present in the supplied ``config`` that 

321 would normally be reset are not overridden and will appear 

322 directly in the output config. This allows non-standard overrides 

323 of the root directory for a datastore or registry to be given. 

324 If this parameter is `True` the values for ``root`` will be 

325 forced into the resulting config if appropriate. 

326 outfile : `lss.resources.ResourcePathExpression`, optional 

327 If not-`None`, the output configuration will be written to this 

328 location rather than into the repository itself. Can be a URI 

329 string. Can refer to a directory that will be used to write 

330 ``butler.yaml``. 

331 overwrite : `bool`, optional 

332 Create a new configuration file even if one already exists 

333 in the specified output location. Default is to raise 

334 an exception. 

335 

336 Returns 

337 ------- 

338 config : `Config` 

339 The updated `Config` instance written to the repo. 

340 

341 Raises 

342 ------ 

343 ValueError 

344 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

345 regular Config (as these subclasses would make it impossible to 

346 support ``standalone=False``). 

347 FileExistsError 

348 Raised if the output config file already exists. 

349 os.error 

350 Raised if the directory does not exist, exists but is not a 

351 directory, or cannot be created. 

352 

353 Notes 

354 ----- 

355 Note that when ``standalone=False`` (the default), the configuration 

356 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

357 construct the repository should also be used to construct any Butlers 

358 to avoid configuration inconsistencies. 

359 """ 

360 if isinstance(config, ButlerConfig | ConfigSubset): 

361 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

362 

363 # Ensure that the root of the repository exists or can be made 

364 root_uri = ResourcePath(root, forceDirectory=True) 

365 root_uri.mkdir() 

366 

367 config = Config(config) 

368 

369 # If we are creating a new repo from scratch with relative roots, 

370 # do not propagate an explicit root from the config file 

371 if "root" in config: 

372 del config["root"] 

373 

374 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

375 imported_class = doImportType(full["datastore", "cls"]) 

376 if not issubclass(imported_class, Datastore): 

377 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

378 datastoreClass: type[Datastore] = imported_class 

379 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

380 

381 # if key exists in given config, parse it, otherwise parse the defaults 

382 # in the expanded config 

383 if config.get(("registry", "db")): 

384 registryConfig = RegistryConfig(config) 

385 else: 

386 registryConfig = RegistryConfig(full) 

387 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

388 if defaultDatabaseUri is not None: 

389 Config.updateParameters( 

390 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

391 ) 

392 else: 

393 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

394 

395 if standalone: 

396 config.merge(full) 

397 else: 

398 # Always expand the registry.managers section into the per-repo 

399 # config, because after the database schema is created, it's not 

400 # allowed to change anymore. Note that in the standalone=True 

401 # branch, _everything_ in the config is expanded, so there's no 

402 # need to special case this. 

403 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

404 configURI: ResourcePathExpression 

405 if outfile is not None: 

406 # When writing to a separate location we must include 

407 # the root of the butler repo in the config else it won't know 

408 # where to look. 

409 config["root"] = root_uri.geturl() 

410 configURI = outfile 

411 else: 

412 configURI = root_uri 

413 # Strip obscore configuration, if it is present, before writing config 

414 # to a file, obscore config will be stored in registry. 

415 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

416 config_to_write = config.copy() 

417 del config_to_write[obscore_config_key] 

418 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

419 # configFile attribute is updated, need to copy it to original. 

420 config.configFile = config_to_write.configFile 

421 else: 

422 config.dumpToUri(configURI, overwrite=overwrite) 

423 

424 # Create Registry and populate tables 

425 registryConfig = RegistryConfig(config.get("registry")) 

426 dimensionConfig = DimensionConfig(dimensionConfig) 

427 _RegistryFactory(registryConfig).create_from_config( 

428 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

429 ) 

430 

431 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

432 

433 return config 

434 

435 @classmethod 

436 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

437 """Look up the label in a butler repository index. 

438 

439 Parameters 

440 ---------- 

441 label : `str` 

442 Label of the Butler repository to look up. 

443 return_label : `bool`, optional 

444 If ``label`` cannot be found in the repository index (either 

445 because index is not defined or ``label`` is not in the index) and 

446 ``return_label`` is `True` then return ``ResourcePath(label)``. 

447 If ``return_label`` is `False` (default) then an exception will be 

448 raised instead. 

449 

450 Returns 

451 ------- 

452 uri : `lsst.resources.ResourcePath` 

453 URI to the Butler repository associated with the given label or 

454 default value if it is provided. 

455 

456 Raises 

457 ------ 

458 KeyError 

459 Raised if the label is not found in the index, or if an index 

460 is not defined, and ``return_label`` is `False`. 

461 

462 Notes 

463 ----- 

464 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

465 information is discovered. 

466 """ 

467 return ButlerRepoIndex.get_repo_uri(label, return_label) 

468 

469 @classmethod 

470 def get_known_repos(cls) -> set[str]: 

471 """Retrieve the list of known repository labels. 

472 

473 Returns 

474 ------- 

475 repos : `set` of `str` 

476 All the known labels. Can be empty if no index can be found. 

477 

478 Notes 

479 ----- 

480 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

481 information is discovered. 

482 """ 

483 return ButlerRepoIndex.get_known_repos() 

484 

485 @abstractmethod 

486 def _caching_context(self) -> AbstractContextManager[None]: 

487 """Context manager that enables caching.""" 

488 raise NotImplementedError() 

489 

490 @abstractmethod 

491 def transaction(self) -> AbstractContextManager[None]: 

492 """Context manager supporting `Butler` transactions. 

493 

494 Transactions can be nested. 

495 """ 

496 raise NotImplementedError() 

497 

498 @abstractmethod 

499 def put( 

500 self, 

501 obj: Any, 

502 datasetRefOrType: DatasetRef | DatasetType | str, 

503 /, 

504 dataId: DataId | None = None, 

505 *, 

506 run: str | None = None, 

507 **kwargs: Any, 

508 ) -> DatasetRef: 

509 """Store and register a dataset. 

510 

511 Parameters 

512 ---------- 

513 obj : `object` 

514 The dataset. 

515 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

516 When `DatasetRef` is provided, ``dataId`` should be `None`. 

517 Otherwise the `DatasetType` or name thereof. If a fully resolved 

518 `DatasetRef` is given the run and ID are used directly. 

519 dataId : `dict` or `DataCoordinate` 

520 A `dict` of `Dimension` link name, value pairs that label the 

521 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

522 should be provided as the second argument. 

523 run : `str`, optional 

524 The name of the run the dataset should be added to, overriding 

525 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

526 **kwargs 

527 Additional keyword arguments used to augment or construct a 

528 `DataCoordinate`. See `DataCoordinate.standardize` 

529 parameters. Not used if a resolve `DatasetRef` is provided. 

530 

531 Returns 

532 ------- 

533 ref : `DatasetRef` 

534 A reference to the stored dataset, updated with the correct id if 

535 given. 

536 

537 Raises 

538 ------ 

539 TypeError 

540 Raised if the butler is read-only or if no run has been provided. 

541 """ 

542 raise NotImplementedError() 

543 

544 @abstractmethod 

545 def getDeferred( 

546 self, 

547 datasetRefOrType: DatasetRef | DatasetType | str, 

548 /, 

549 dataId: DataId | None = None, 

550 *, 

551 parameters: dict | None = None, 

552 collections: Any = None, 

553 storageClass: str | StorageClass | None = None, 

554 **kwargs: Any, 

555 ) -> DeferredDatasetHandle: 

556 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

557 after an immediate registry lookup. 

558 

559 Parameters 

560 ---------- 

561 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

562 When `DatasetRef` the `dataId` should be `None`. 

563 Otherwise the `DatasetType` or name thereof. 

564 dataId : `dict` or `DataCoordinate`, optional 

565 A `dict` of `Dimension` link name, value pairs that label the 

566 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

567 should be provided as the first argument. 

568 parameters : `dict` 

569 Additional StorageClass-defined options to control reading, 

570 typically used to efficiently read only a subset of the dataset. 

571 collections : Any, optional 

572 Collections to be searched, overriding ``self.collections``. 

573 Can be any of the types supported by the ``collections`` argument 

574 to butler construction. 

575 storageClass : `StorageClass` or `str`, optional 

576 The storage class to be used to override the Python type 

577 returned by this method. By default the returned type matches 

578 the dataset type definition for this dataset. Specifying a 

579 read `StorageClass` can force a different type to be returned. 

580 This type must be compatible with the original type. 

581 **kwargs 

582 Additional keyword arguments used to augment or construct a 

583 `DataId`. See `DataId` parameters. 

584 

585 Returns 

586 ------- 

587 obj : `DeferredDatasetHandle` 

588 A handle which can be used to retrieve a dataset at a later time. 

589 

590 Raises 

591 ------ 

592 LookupError 

593 Raised if no matching dataset exists in the `Registry` or 

594 datastore. 

595 ValueError 

596 Raised if a resolved `DatasetRef` was passed as an input, but it 

597 differs from the one found in the registry. 

598 TypeError 

599 Raised if no collections were provided. 

600 """ 

601 raise NotImplementedError() 

602 

603 @abstractmethod 

604 def get( 

605 self, 

606 datasetRefOrType: DatasetRef | DatasetType | str, 

607 /, 

608 dataId: DataId | None = None, 

609 *, 

610 parameters: dict[str, Any] | None = None, 

611 collections: Any = None, 

612 storageClass: StorageClass | str | None = None, 

613 **kwargs: Any, 

614 ) -> Any: 

615 """Retrieve a stored dataset. 

616 

617 Parameters 

618 ---------- 

619 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

620 When `DatasetRef` the `dataId` should be `None`. 

621 Otherwise the `DatasetType` or name thereof. 

622 If a resolved `DatasetRef`, the associated dataset 

623 is returned directly without additional querying. 

624 dataId : `dict` or `DataCoordinate` 

625 A `dict` of `Dimension` link name, value pairs that label the 

626 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

627 should be provided as the first argument. 

628 parameters : `dict` 

629 Additional StorageClass-defined options to control reading, 

630 typically used to efficiently read only a subset of the dataset. 

631 collections : Any, optional 

632 Collections to be searched, overriding ``self.collections``. 

633 Can be any of the types supported by the ``collections`` argument 

634 to butler construction. 

635 storageClass : `StorageClass` or `str`, optional 

636 The storage class to be used to override the Python type 

637 returned by this method. By default the returned type matches 

638 the dataset type definition for this dataset. Specifying a 

639 read `StorageClass` can force a different type to be returned. 

640 This type must be compatible with the original type. 

641 **kwargs 

642 Additional keyword arguments used to augment or construct a 

643 `DataCoordinate`. See `DataCoordinate.standardize` 

644 parameters. 

645 

646 Returns 

647 ------- 

648 obj : `object` 

649 The dataset. 

650 

651 Raises 

652 ------ 

653 LookupError 

654 Raised if no matching dataset exists in the `Registry`. 

655 TypeError 

656 Raised if no collections were provided. 

657 

658 Notes 

659 ----- 

660 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

661 this method requires that the given data ID include temporal dimensions 

662 beyond the dimensions of the dataset type itself, in order to find the 

663 dataset with the appropriate validity range. For example, a "bias" 

664 dataset with native dimensions ``{instrument, detector}`` could be 

665 fetched with a ``{instrument, detector, exposure}`` data ID, because 

666 ``exposure`` is a temporal dimension. 

667 """ 

668 raise NotImplementedError() 

669 

670 @abstractmethod 

671 def getURIs( 

672 self, 

673 datasetRefOrType: DatasetRef | DatasetType | str, 

674 /, 

675 dataId: DataId | None = None, 

676 *, 

677 predict: bool = False, 

678 collections: Any = None, 

679 run: str | None = None, 

680 **kwargs: Any, 

681 ) -> DatasetRefURIs: 

682 """Return the URIs associated with the dataset. 

683 

684 Parameters 

685 ---------- 

686 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

687 When `DatasetRef` the `dataId` should be `None`. 

688 Otherwise the `DatasetType` or name thereof. 

689 dataId : `dict` or `DataCoordinate` 

690 A `dict` of `Dimension` link name, value pairs that label the 

691 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

692 should be provided as the first argument. 

693 predict : `bool` 

694 If `True`, allow URIs to be returned of datasets that have not 

695 been written. 

696 collections : Any, optional 

697 Collections to be searched, overriding ``self.collections``. 

698 Can be any of the types supported by the ``collections`` argument 

699 to butler construction. 

700 run : `str`, optional 

701 Run to use for predictions, overriding ``self.run``. 

702 **kwargs 

703 Additional keyword arguments used to augment or construct a 

704 `DataCoordinate`. See `DataCoordinate.standardize` 

705 parameters. 

706 

707 Returns 

708 ------- 

709 uris : `DatasetRefURIs` 

710 The URI to the primary artifact associated with this dataset (if 

711 the dataset was disassembled within the datastore this may be 

712 `None`), and the URIs to any components associated with the dataset 

713 artifact. (can be empty if there are no components). 

714 """ 

715 raise NotImplementedError() 

716 

717 @abstractmethod 

718 def getURI( 

719 self, 

720 datasetRefOrType: DatasetRef | DatasetType | str, 

721 /, 

722 dataId: DataId | None = None, 

723 *, 

724 predict: bool = False, 

725 collections: Any = None, 

726 run: str | None = None, 

727 **kwargs: Any, 

728 ) -> ResourcePath: 

729 """Return the URI to the Dataset. 

730 

731 Parameters 

732 ---------- 

733 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

734 When `DatasetRef` the `dataId` should be `None`. 

735 Otherwise the `DatasetType` or name thereof. 

736 dataId : `dict` or `DataCoordinate` 

737 A `dict` of `Dimension` link name, value pairs that label the 

738 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

739 should be provided as the first argument. 

740 predict : `bool` 

741 If `True`, allow URIs to be returned of datasets that have not 

742 been written. 

743 collections : Any, optional 

744 Collections to be searched, overriding ``self.collections``. 

745 Can be any of the types supported by the ``collections`` argument 

746 to butler construction. 

747 run : `str`, optional 

748 Run to use for predictions, overriding ``self.run``. 

749 **kwargs 

750 Additional keyword arguments used to augment or construct a 

751 `DataCoordinate`. See `DataCoordinate.standardize` 

752 parameters. 

753 

754 Returns 

755 ------- 

756 uri : `lsst.resources.ResourcePath` 

757 URI pointing to the Dataset within the datastore. If the 

758 Dataset does not exist in the datastore, and if ``predict`` is 

759 `True`, the URI will be a prediction and will include a URI 

760 fragment "#predicted". 

761 If the datastore does not have entities that relate well 

762 to the concept of a URI the returned URI string will be 

763 descriptive. The returned URI is not guaranteed to be obtainable. 

764 

765 Raises 

766 ------ 

767 LookupError 

768 A URI has been requested for a dataset that does not exist and 

769 guessing is not allowed. 

770 ValueError 

771 Raised if a resolved `DatasetRef` was passed as an input, but it 

772 differs from the one found in the registry. 

773 TypeError 

774 Raised if no collections were provided. 

775 RuntimeError 

776 Raised if a URI is requested for a dataset that consists of 

777 multiple artifacts. 

778 """ 

779 raise NotImplementedError() 

780 

781 @abstractmethod 

782 def get_dataset_type(self, name: str) -> DatasetType: 

783 """Get the `DatasetType`. 

784 

785 Parameters 

786 ---------- 

787 name : `str` 

788 Name of the type. 

789 

790 Returns 

791 ------- 

792 type : `DatasetType` 

793 The `DatasetType` associated with the given name. 

794 

795 Raises 

796 ------ 

797 lsst.daf.butler.MissingDatasetTypeError 

798 Raised if the requested dataset type has not been registered. 

799 

800 Notes 

801 ----- 

802 This method handles component dataset types automatically, though most 

803 other operations do not. 

804 """ 

805 raise NotImplementedError() 

806 

807 @abstractmethod 

808 def get_dataset( 

809 self, 

810 id: DatasetId, 

811 storage_class: str | StorageClass | None, 

812 dimension_records: bool = False, 

813 datastore_records: bool = False, 

814 ) -> DatasetRef | None: 

815 """Retrieve a Dataset entry. 

816 

817 Parameters 

818 ---------- 

819 id : `DatasetId` 

820 The unique identifier for the dataset. 

821 storage_class : `str` or `StorageClass` or `None` 

822 A storage class to use when creating the returned entry. If given 

823 it must be compatible with the default storage class. 

824 dimension_records: `bool`, optional 

825 If `True` the ref will be expanded and contain dimension records. 

826 datastore_records: `bool`, optional. 

827 If `True` the ref will contain associated datastore records. 

828 

829 Returns 

830 ------- 

831 ref : `DatasetRef` or `None` 

832 A ref to the Dataset, or `None` if no matching Dataset 

833 was found. 

834 """ 

835 raise NotImplementedError() 

836 

837 @abstractmethod 

838 def find_dataset( 

839 self, 

840 dataset_type: DatasetType | str, 

841 data_id: DataId | None = None, 

842 *, 

843 collections: str | Sequence[str] | None = None, 

844 timespan: Timespan | None = None, 

845 storage_class: str | StorageClass | None = None, 

846 dimension_records: bool = False, 

847 datastore_records: bool = False, 

848 **kwargs: Any, 

849 ) -> DatasetRef | None: 

850 """Find a dataset given its `DatasetType` and data ID. 

851 

852 This can be used to obtain a `DatasetRef` that permits the dataset to 

853 be read from a `Datastore`. If the dataset is a component and can not 

854 be found using the provided dataset type, a dataset ref for the parent 

855 will be returned instead but with the correct dataset type. 

856 

857 Parameters 

858 ---------- 

859 dataset_type : `DatasetType` or `str` 

860 A `DatasetType` or the name of one. If this is a `DatasetType` 

861 instance, its storage class will be respected and propagated to 

862 the output, even if it differs from the dataset type definition 

863 in the registry, as long as the storage classes are convertible. 

864 data_id : `dict` or `DataCoordinate`, optional 

865 A `dict`-like object containing the `Dimension` links that identify 

866 the dataset within a collection. If it is a `dict` the dataId 

867 can include dimension record values such as ``day_obs`` and 

868 ``seq_num`` or ``full_name`` that can be used to derive the 

869 primary dimension. 

870 collections : `str` or `list` [`str`], optional 

871 A an ordered list of collections to search for the dataset. 

872 Defaults to ``self.defaults.collections``. 

873 timespan : `Timespan`, optional 

874 A timespan that the validity range of the dataset must overlap. 

875 If not provided, any `~CollectionType.CALIBRATION` collections 

876 matched by the ``collections`` argument will not be searched. 

877 storage_class : `str` or `StorageClass` or `None` 

878 A storage class to use when creating the returned entry. If given 

879 it must be compatible with the default storage class. 

880 dimension_records: `bool`, optional 

881 If `True` the ref will be expanded and contain dimension records. 

882 datastore_records: `bool`, optional. 

883 If `True` the ref will contain associated datastore records. 

884 **kwargs 

885 Additional keyword arguments passed to 

886 `DataCoordinate.standardize` to convert ``dataId`` to a true 

887 `DataCoordinate` or augment an existing one. This can also include 

888 dimension record metadata that can be used to derive a primary 

889 dimension value. 

890 

891 Returns 

892 ------- 

893 ref : `DatasetRef` 

894 A reference to the dataset, or `None` if no matching Dataset 

895 was found. 

896 

897 Raises 

898 ------ 

899 lsst.daf.butler.NoDefaultCollectionError 

900 Raised if ``collections`` is `None` and 

901 ``self.collections`` is `None`. 

902 LookupError 

903 Raised if one or more data ID keys are missing. 

904 lsst.daf.butler.MissingDatasetTypeError 

905 Raised if the dataset type does not exist. 

906 lsst.daf.butler.MissingCollectionError 

907 Raised if any of ``collections`` does not exist in the registry. 

908 

909 Notes 

910 ----- 

911 This method simply returns `None` and does not raise an exception even 

912 when the set of collections searched is intrinsically incompatible with 

913 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

914 only `~CollectionType.CALIBRATION` collections are being searched. 

915 This may make it harder to debug some lookup failures, but the behavior 

916 is intentional; we consider it more important that failed searches are 

917 reported consistently, regardless of the reason, and that adding 

918 additional collections that do not contain a match to the search path 

919 never changes the behavior. 

920 

921 This method handles component dataset types automatically, though most 

922 other query operations do not. 

923 """ 

924 raise NotImplementedError() 

925 

926 @abstractmethod 

927 def retrieveArtifacts( 

928 self, 

929 refs: Iterable[DatasetRef], 

930 destination: ResourcePathExpression, 

931 transfer: str = "auto", 

932 preserve_path: bool = True, 

933 overwrite: bool = False, 

934 ) -> list[ResourcePath]: 

935 """Retrieve the artifacts associated with the supplied refs. 

936 

937 Parameters 

938 ---------- 

939 refs : iterable of `DatasetRef` 

940 The datasets for which artifacts are to be retrieved. 

941 A single ref can result in multiple artifacts. The refs must 

942 be resolved. 

943 destination : `lsst.resources.ResourcePath` or `str` 

944 Location to write the artifacts. 

945 transfer : `str`, optional 

946 Method to use to transfer the artifacts. Must be one of the options 

947 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

948 "move" is not allowed. 

949 preserve_path : `bool`, optional 

950 If `True` the full path of the artifact within the datastore 

951 is preserved. If `False` the final file component of the path 

952 is used. 

953 overwrite : `bool`, optional 

954 If `True` allow transfers to overwrite existing files at the 

955 destination. 

956 

957 Returns 

958 ------- 

959 targets : `list` of `lsst.resources.ResourcePath` 

960 URIs of file artifacts in destination location. Order is not 

961 preserved. 

962 

963 Notes 

964 ----- 

965 For non-file datastores the artifacts written to the destination 

966 may not match the representation inside the datastore. For example 

967 a hierarchical data structure in a NoSQL database may well be stored 

968 as a JSON file. 

969 """ 

970 raise NotImplementedError() 

971 

972 @abstractmethod 

973 def exists( 

974 self, 

975 dataset_ref_or_type: DatasetRef | DatasetType | str, 

976 /, 

977 data_id: DataId | None = None, 

978 *, 

979 full_check: bool = True, 

980 collections: Any = None, 

981 **kwargs: Any, 

982 ) -> DatasetExistence: 

983 """Indicate whether a dataset is known to Butler registry and 

984 datastore. 

985 

986 Parameters 

987 ---------- 

988 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

989 When `DatasetRef` the `dataId` should be `None`. 

990 Otherwise the `DatasetType` or name thereof. 

991 data_id : `dict` or `DataCoordinate` 

992 A `dict` of `Dimension` link name, value pairs that label the 

993 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

994 should be provided as the first argument. 

995 full_check : `bool`, optional 

996 If `True`, an additional check will be made for dataset artifact 

997 existence. This will involve additional overhead due to the need 

998 to query an external system. If `False` registry and datastore 

999 will solely be asked if they know about the dataset but no 

1000 check for the artifact will be performed. 

1001 collections : Any, optional 

1002 Collections to be searched, overriding ``self.collections``. 

1003 Can be any of the types supported by the ``collections`` argument 

1004 to butler construction. 

1005 **kwargs 

1006 Additional keyword arguments used to augment or construct a 

1007 `DataCoordinate`. See `DataCoordinate.standardize` 

1008 parameters. 

1009 

1010 Returns 

1011 ------- 

1012 existence : `DatasetExistence` 

1013 Object indicating whether the dataset is known to registry and 

1014 datastore. Evaluates to `True` if the dataset is present and known 

1015 to both. 

1016 """ 

1017 raise NotImplementedError() 

1018 

1019 @abstractmethod 

1020 def _exists_many( 

1021 self, 

1022 refs: Iterable[DatasetRef], 

1023 /, 

1024 *, 

1025 full_check: bool = True, 

1026 ) -> dict[DatasetRef, DatasetExistence]: 

1027 """Indicate whether multiple datasets are known to Butler registry and 

1028 datastore. 

1029 

1030 This is an experimental API that may change at any moment. 

1031 

1032 Parameters 

1033 ---------- 

1034 refs : iterable of `DatasetRef` 

1035 The datasets to be checked. 

1036 full_check : `bool`, optional 

1037 If `True`, an additional check will be made for dataset artifact 

1038 existence. This will involve additional overhead due to the need 

1039 to query an external system. If `False` registry and datastore 

1040 will solely be asked if they know about the dataset but no 

1041 check for the artifact will be performed. 

1042 

1043 Returns 

1044 ------- 

1045 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1046 Mapping from the given dataset refs to an enum indicating the 

1047 status of the dataset in registry and datastore. 

1048 Each value evaluates to `True` if the dataset is present and known 

1049 to both. 

1050 """ 

1051 raise NotImplementedError() 

1052 

1053 @abstractmethod 

1054 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1055 """Remove one or more `~CollectionType.RUN` collections and the 

1056 datasets within them. 

1057 

1058 Parameters 

1059 ---------- 

1060 names : `~collections.abc.Iterable` [ `str` ] 

1061 The names of the collections to remove. 

1062 unstore : `bool`, optional 

1063 If `True` (default), delete datasets from all datastores in which 

1064 they are present, and attempt to rollback the registry deletions if 

1065 datastore deletions fail (which may not always be possible). If 

1066 `False`, datastore records for these datasets are still removed, 

1067 but any artifacts (e.g. files) will not be. 

1068 

1069 Raises 

1070 ------ 

1071 TypeError 

1072 Raised if one or more collections are not of type 

1073 `~CollectionType.RUN`. 

1074 """ 

1075 raise NotImplementedError() 

1076 

1077 @abstractmethod 

1078 def ingest( 

1079 self, 

1080 *datasets: FileDataset, 

1081 transfer: str | None = "auto", 

1082 run: str | None = None, 

1083 idGenerationMode: DatasetIdGenEnum | None = None, 

1084 record_validation_info: bool = True, 

1085 ) -> None: 

1086 """Store and register one or more datasets that already exist on disk. 

1087 

1088 Parameters 

1089 ---------- 

1090 datasets : `FileDataset` 

1091 Each positional argument is a struct containing information about 

1092 a file to be ingested, including its URI (either absolute or 

1093 relative to the datastore root, if applicable), a resolved 

1094 `DatasetRef`, and optionally a formatter class or its 

1095 fully-qualified string name. If a formatter is not provided, the 

1096 formatter that would be used for `put` is assumed. On successful 

1097 ingest all `FileDataset.formatter` attributes will be set to the 

1098 formatter class used. `FileDataset.path` attributes may be modified 

1099 to put paths in whatever the datastore considers a standardized 

1100 form. 

1101 transfer : `str`, optional 

1102 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1103 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1104 transfer the file. 

1105 run : `str`, optional 

1106 The name of the run ingested datasets should be added to, 

1107 overriding ``self.run``. This parameter is now deprecated since 

1108 the run is encoded in the ``FileDataset``. 

1109 idGenerationMode : `DatasetIdGenEnum`, optional 

1110 Specifies option for generating dataset IDs. Parameter is 

1111 deprecated. 

1112 record_validation_info : `bool`, optional 

1113 If `True`, the default, the datastore can record validation 

1114 information associated with the file. If `False` the datastore 

1115 will not attempt to track any information such as checksums 

1116 or file sizes. This can be useful if such information is tracked 

1117 in an external system or if the file is to be compressed in place. 

1118 It is up to the datastore whether this parameter is relevant. 

1119 

1120 Raises 

1121 ------ 

1122 TypeError 

1123 Raised if the butler is read-only or if no run was provided. 

1124 NotImplementedError 

1125 Raised if the `Datastore` does not support the given transfer mode. 

1126 DatasetTypeNotSupportedError 

1127 Raised if one or more files to be ingested have a dataset type that 

1128 is not supported by the `Datastore`.. 

1129 FileNotFoundError 

1130 Raised if one of the given files does not exist. 

1131 FileExistsError 

1132 Raised if transfer is not `None` but the (internal) location the 

1133 file would be moved to is already occupied. 

1134 

1135 Notes 

1136 ----- 

1137 This operation is not fully exception safe: if a database operation 

1138 fails, the given `FileDataset` instances may be only partially updated. 

1139 

1140 It is atomic in terms of database operations (they will either all 

1141 succeed or all fail) providing the database engine implements 

1142 transactions correctly. It will attempt to be atomic in terms of 

1143 filesystem operations as well, but this cannot be implemented 

1144 rigorously for most datastores. 

1145 """ 

1146 raise NotImplementedError() 

1147 

1148 @abstractmethod 

1149 def export( 

1150 self, 

1151 *, 

1152 directory: str | None = None, 

1153 filename: str | None = None, 

1154 format: str | None = None, 

1155 transfer: str | None = None, 

1156 ) -> AbstractContextManager[RepoExportContext]: 

1157 """Export datasets from the repository represented by this `Butler`. 

1158 

1159 This method is a context manager that returns a helper object 

1160 (`RepoExportContext`) that is used to indicate what information from 

1161 the repository should be exported. 

1162 

1163 Parameters 

1164 ---------- 

1165 directory : `str`, optional 

1166 Directory dataset files should be written to if ``transfer`` is not 

1167 `None`. 

1168 filename : `str`, optional 

1169 Name for the file that will include database information associated 

1170 with the exported datasets. If this is not an absolute path and 

1171 ``directory`` is not `None`, it will be written to ``directory`` 

1172 instead of the current working directory. Defaults to 

1173 "export.{format}". 

1174 format : `str`, optional 

1175 File format for the database information file. If `None`, the 

1176 extension of ``filename`` will be used. 

1177 transfer : `str`, optional 

1178 Transfer mode passed to `Datastore.export`. 

1179 

1180 Raises 

1181 ------ 

1182 TypeError 

1183 Raised if the set of arguments passed is inconsistent. 

1184 

1185 Examples 

1186 -------- 

1187 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1188 methods are used to provide the iterables over data IDs and/or datasets 

1189 to be exported:: 

1190 

1191 with butler.export("exports.yaml") as export: 

1192 # Export all flats, but none of the dimension element rows 

1193 # (i.e. data ID information) associated with them. 

1194 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1195 elements=()) 

1196 # Export all datasets that start with "deepCoadd_" and all of 

1197 # their associated data ID information. 

1198 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1199 """ 

1200 raise NotImplementedError() 

1201 

1202 @abstractmethod 

1203 def import_( 

1204 self, 

1205 *, 

1206 directory: ResourcePathExpression | None = None, 

1207 filename: ResourcePathExpression | TextIO | None = None, 

1208 format: str | None = None, 

1209 transfer: str | None = None, 

1210 skip_dimensions: set | None = None, 

1211 ) -> None: 

1212 """Import datasets into this repository that were exported from a 

1213 different butler repository via `~lsst.daf.butler.Butler.export`. 

1214 

1215 Parameters 

1216 ---------- 

1217 directory : `~lsst.resources.ResourcePathExpression`, optional 

1218 Directory containing dataset files to import from. If `None`, 

1219 ``filename`` and all dataset file paths specified therein must 

1220 be absolute. 

1221 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1222 A stream or name of file that contains database information 

1223 associated with the exported datasets, typically generated by 

1224 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1225 `~lsst.resources.ResourcePath` and is not an absolute path, 

1226 it will first be looked for relative to ``directory`` and if not 

1227 found there it will be looked for in the current working 

1228 directory. Defaults to "export.{format}". 

1229 format : `str`, optional 

1230 File format for ``filename``. If `None`, the extension of 

1231 ``filename`` will be used. 

1232 transfer : `str`, optional 

1233 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1234 skip_dimensions : `set`, optional 

1235 Names of dimensions that should be skipped and not imported. 

1236 

1237 Raises 

1238 ------ 

1239 TypeError 

1240 Raised if the set of arguments passed is inconsistent, or if the 

1241 butler is read-only. 

1242 """ 

1243 raise NotImplementedError() 

1244 

1245 @abstractmethod 

1246 def transfer_from( 

1247 self, 

1248 source_butler: LimitedButler, 

1249 source_refs: Iterable[DatasetRef], 

1250 transfer: str = "auto", 

1251 skip_missing: bool = True, 

1252 register_dataset_types: bool = False, 

1253 transfer_dimensions: bool = False, 

1254 ) -> Collection[DatasetRef]: 

1255 """Transfer datasets to this Butler from a run in another Butler. 

1256 

1257 Parameters 

1258 ---------- 

1259 source_butler : `LimitedButler` 

1260 Butler from which the datasets are to be transferred. If data IDs 

1261 in ``source_refs`` are not expanded then this has to be a full 

1262 `Butler` whose registry will be used to expand data IDs. 

1263 source_refs : iterable of `DatasetRef` 

1264 Datasets defined in the source butler that should be transferred to 

1265 this butler. In most circumstances, ``transfer_from`` is faster if 

1266 the dataset refs are expanded. 

1267 transfer : `str`, optional 

1268 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1269 skip_missing : `bool` 

1270 If `True`, datasets with no datastore artifact associated with 

1271 them are not transferred. If `False` a registry entry will be 

1272 created even if no datastore record is created (and so will 

1273 look equivalent to the dataset being unstored). 

1274 register_dataset_types : `bool` 

1275 If `True` any missing dataset types are registered. Otherwise 

1276 an exception is raised. 

1277 transfer_dimensions : `bool`, optional 

1278 If `True`, dimension record data associated with the new datasets 

1279 will be transferred. 

1280 

1281 Returns 

1282 ------- 

1283 refs : `list` of `DatasetRef` 

1284 The refs added to this Butler. 

1285 

1286 Notes 

1287 ----- 

1288 The datastore artifact has to exist for a transfer 

1289 to be made but non-existence is not an error. 

1290 

1291 Datasets that already exist in this run will be skipped. 

1292 

1293 The datasets are imported as part of a transaction, although 

1294 dataset types are registered before the transaction is started. 

1295 This means that it is possible for a dataset type to be registered 

1296 even though transfer has failed. 

1297 """ 

1298 raise NotImplementedError() 

1299 

1300 @abstractmethod 

1301 def validateConfiguration( 

1302 self, 

1303 logFailures: bool = False, 

1304 datasetTypeNames: Iterable[str] | None = None, 

1305 ignore: Iterable[str] | None = None, 

1306 ) -> None: 

1307 """Validate butler configuration. 

1308 

1309 Checks that each `DatasetType` can be stored in the `Datastore`. 

1310 

1311 Parameters 

1312 ---------- 

1313 logFailures : `bool`, optional 

1314 If `True`, output a log message for every validation error 

1315 detected. 

1316 datasetTypeNames : iterable of `str`, optional 

1317 The `DatasetType` names that should be checked. This allows 

1318 only a subset to be selected. 

1319 ignore : iterable of `str`, optional 

1320 Names of DatasetTypes to skip over. This can be used to skip 

1321 known problems. If a named `DatasetType` corresponds to a 

1322 composite, all components of that `DatasetType` will also be 

1323 ignored. 

1324 

1325 Raises 

1326 ------ 

1327 ButlerValidationError 

1328 Raised if there is some inconsistency with how this Butler 

1329 is configured. 

1330 """ 

1331 raise NotImplementedError() 

1332 

1333 @property 

1334 @abstractmethod 

1335 def collections(self) -> Sequence[str]: 

1336 """The collections to search by default, in order 

1337 (`~collections.abc.Sequence` [ `str` ]). 

1338 """ 

1339 raise NotImplementedError() 

1340 

1341 @property 

1342 @abstractmethod 

1343 def run(self) -> str | None: 

1344 """Name of the run this butler writes outputs to by default (`str` or 

1345 `None`). 

1346 """ 

1347 raise NotImplementedError() 

1348 

1349 @property 

1350 @abstractmethod 

1351 def registry(self) -> Registry: 

1352 """The object that manages dataset metadata and relationships 

1353 (`Registry`). 

1354 

1355 Many operations that don't involve reading or writing butler datasets 

1356 are accessible only via `Registry` methods. Eventually these methods 

1357 will be replaced by equivalent `Butler` methods. 

1358 """ 

1359 raise NotImplementedError()