Coverage for python/lsst/daf/butler/_butler.py: 64%

140 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-04 09:46 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Sequence 

34from contextlib import AbstractContextManager 

35from typing import Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.logging import getLogger 

40 

41from ._butler_config import ButlerConfig 

42from ._butler_repo_index import ButlerRepoIndex 

43from ._config import Config, ConfigSubset 

44from ._dataset_existence import DatasetExistence 

45from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

46from ._dataset_type import DatasetType 

47from ._deferredDatasetHandle import DeferredDatasetHandle 

48from ._file_dataset import FileDataset 

49from ._limited_butler import LimitedButler 

50from ._storage_class import StorageClass 

51from ._timespan import Timespan 

52from .datastore import DatasetRefURIs, Datastore 

53from .dimensions import DataId, DimensionConfig 

54from .registry import Registry, RegistryConfig, _RegistryFactory 

55from .repo_relocation import BUTLER_ROOT_TAG 

56from .transfers import RepoExportContext 

57 

58_LOG = getLogger(__name__) 

59 

60 

61class Butler(LimitedButler): 

62 """Interface for data butler and factory for Butler instances. 

63 

64 Parameters 

65 ---------- 

66 config : `ButlerConfig`, `Config` or `str`, optional. 

67 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

68 If a directory path is given the configuration will be read from a 

69 ``butler.yaml`` file in that location. If `None` is given default 

70 values will be used. If ``config`` contains "cls" key then its value is 

71 used as a name of butler class and it must be a sub-class of this 

72 class, otherwise `DirectButler` is instantiated. 

73 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

74 An expression specifying the collections to be searched (in order) when 

75 reading datasets. 

76 This may be a `str` collection name or an iterable thereof. 

77 See :ref:`daf_butler_collection_expressions` for more information. 

78 These collections are not registered automatically and must be 

79 manually registered before they are used by any method, but they may be 

80 manually registered after the `Butler` is initialized. 

81 run : `str`, optional 

82 Name of the `~CollectionType.RUN` collection new datasets should be 

83 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

84 ``collections`` will be set to ``[run]``. If not `None`, this 

85 collection will automatically be registered. If this is not set (and 

86 ``writeable`` is not set either), a read-only butler will be created. 

87 searchPaths : `list` of `str`, optional 

88 Directory paths to search when calculating the full Butler 

89 configuration. Not used if the supplied config is already a 

90 `ButlerConfig`. 

91 writeable : `bool`, optional 

92 Explicitly sets whether the butler supports write operations. If not 

93 provided, a read-write butler is created if any of ``run``, ``tags``, 

94 or ``chains`` is non-empty. 

95 inferDefaults : `bool`, optional 

96 If `True` (default) infer default data ID values from the values 

97 present in the datasets in ``collections``: if all collections have the 

98 same value (or no value) for a governor dimension, that value will be 

99 the default for that dimension. Nonexistent collections are ignored. 

100 If a default value is provided explicitly for a governor dimension via 

101 ``**kwargs``, no default will be inferred for that dimension. 

102 **kwargs : `Any` 

103 Additional keyword arguments passed to a constructor of actual butler 

104 class. 

105 

106 Notes 

107 ----- 

108 The preferred way to instantiate Butler is via the `from_config` method. 

109 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

110 but ``mypy`` will complain about the former. 

111 """ 

112 

113 def __new__( 

114 cls, 

115 config: Config | ResourcePathExpression | None = None, 

116 *, 

117 collections: Any = None, 

118 run: str | None = None, 

119 searchPaths: Sequence[ResourcePathExpression] | None = None, 

120 writeable: bool | None = None, 

121 inferDefaults: bool = True, 

122 **kwargs: Any, 

123 ) -> Butler: 

124 if cls is Butler: 

125 cls = cls._find_butler_class(config, searchPaths) 

126 # Note: we do not pass any parameters to __new__, Python will pass them 

127 # to __init__ after __new__ returns sub-class instance. 

128 return super().__new__(cls) 

129 

130 @staticmethod 

131 def _find_butler_class( 

132 config: Config | ResourcePathExpression | None = None, 

133 searchPaths: Sequence[ResourcePathExpression] | None = None, 

134 ) -> type[Butler]: 

135 """Find actual class to instantiate.""" 

136 butler_class_name: str | None = None 

137 if config is not None: 

138 # Check for optional "cls" key in config. 

139 if not isinstance(config, Config): 

140 config = ButlerConfig(config, searchPaths=searchPaths) 

141 butler_class_name = config.get("cls") 

142 

143 # Make DirectButler if class is not specified. 

144 butler_class: type[Butler] 

145 if butler_class_name is None: 

146 from .direct_butler import DirectButler 

147 

148 butler_class = DirectButler 

149 else: 

150 butler_class = doImportType(butler_class_name) 

151 if not issubclass(butler_class, Butler): 

152 raise TypeError(f"{butler_class_name} is not a subclass of Butler") 

153 return butler_class 

154 

155 @classmethod 

156 def from_config( 

157 cls, 

158 config: Config | ResourcePathExpression | None = None, 

159 *, 

160 collections: Any = None, 

161 run: str | None = None, 

162 searchPaths: Sequence[ResourcePathExpression] | None = None, 

163 writeable: bool | None = None, 

164 inferDefaults: bool = True, 

165 **kwargs: Any, 

166 ) -> Butler: 

167 """Create butler instance from configuration. 

168 

169 Parameters 

170 ---------- 

171 config : `ButlerConfig`, `Config` or `str`, optional. 

172 Configuration. Anything acceptable to the `ButlerConfig` 

173 constructor. If a directory path is given the configuration will be 

174 read from a ``butler.yaml`` file in that location. If `None` is 

175 given default values will be used. If ``config`` contains "cls" key 

176 then its value is used as a name of butler class and it must be a 

177 sub-class of this class, otherwise `DirectButler` is instantiated. 

178 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

179 An expression specifying the collections to be searched (in order) 

180 when reading datasets. 

181 This may be a `str` collection name or an iterable thereof. 

182 See :ref:`daf_butler_collection_expressions` for more information. 

183 These collections are not registered automatically and must be 

184 manually registered before they are used by any method, but they 

185 may be manually registered after the `Butler` is initialized. 

186 run : `str`, optional 

187 Name of the `~CollectionType.RUN` collection new datasets should be 

188 inserted into. If ``collections`` is `None` and ``run`` is not 

189 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

190 this collection will automatically be registered. If this is not 

191 set (and ``writeable`` is not set either), a read-only butler will 

192 be created. 

193 searchPaths : `list` of `str`, optional 

194 Directory paths to search when calculating the full Butler 

195 configuration. Not used if the supplied config is already a 

196 `ButlerConfig`. 

197 writeable : `bool`, optional 

198 Explicitly sets whether the butler supports write operations. If 

199 not provided, a read-write butler is created if any of ``run``, 

200 ``tags``, or ``chains`` is non-empty. 

201 inferDefaults : `bool`, optional 

202 If `True` (default) infer default data ID values from the values 

203 present in the datasets in ``collections``: if all collections have 

204 the same value (or no value) for a governor dimension, that value 

205 will be the default for that dimension. Nonexistent collections 

206 are ignored. If a default value is provided explicitly for a 

207 governor dimension via ``**kwargs``, no default will be inferred 

208 for that dimension. 

209 **kwargs : `Any` 

210 Additional keyword arguments passed to a constructor of actual 

211 butler class. 

212 

213 Notes 

214 ----- 

215 Calling this factory method is identical to calling 

216 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

217 complains about ``Butler()`` call. 

218 

219 Examples 

220 -------- 

221 While there are many ways to control exactly how a `Butler` interacts 

222 with the collections in its `Registry`, the most common cases are still 

223 simple. 

224 

225 For a read-only `Butler` that searches one collection, do:: 

226 

227 butler = Butler.from_config( 

228 "/path/to/repo", collections=["u/alice/DM-50000"] 

229 ) 

230 

231 For a read-write `Butler` that writes to and reads from a 

232 `~CollectionType.RUN` collection:: 

233 

234 butler = Butler.from_config( 

235 "/path/to/repo", run="u/alice/DM-50000/a" 

236 ) 

237 

238 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

239 because we want to write to one `~CollectionType.RUN` collection but 

240 read from several others (as well):: 

241 

242 butler = Butler.from_config( 

243 "/path/to/repo", 

244 run="u/alice/DM-50000/a", 

245 collections=[ 

246 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

247 ] 

248 ) 

249 

250 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

251 Datasets will be read first from that run (since it appears first in 

252 the chain), and then from ``u/bob/DM-49998`` and finally 

253 ``HSC/defaults``. 

254 

255 Finally, one can always create a `Butler` with no collections:: 

256 

257 butler = Butler.from_config("/path/to/repo", writeable=True) 

258 

259 This can be extremely useful when you just want to use 

260 ``butler.registry``, e.g. for inserting dimension data or managing 

261 collections, or when the collections you want to use with the butler 

262 are not consistent. Passing ``writeable`` explicitly here is only 

263 necessary if you want to be able to make changes to the repo - usually 

264 the value for ``writeable`` can be guessed from the collection 

265 arguments provided, but it defaults to `False` when there are not 

266 collection arguments. 

267 """ 

268 cls = cls._find_butler_class(config, searchPaths) 

269 return cls( 

270 config, 

271 collections=collections, 

272 run=run, 

273 searchPaths=searchPaths, 

274 writeable=writeable, 

275 inferDefaults=inferDefaults, 

276 **kwargs, 

277 ) 

278 

279 @staticmethod 

280 def makeRepo( 

281 root: ResourcePathExpression, 

282 config: Config | str | None = None, 

283 dimensionConfig: Config | str | None = None, 

284 standalone: bool = False, 

285 searchPaths: list[str] | None = None, 

286 forceConfigRoot: bool = True, 

287 outfile: ResourcePathExpression | None = None, 

288 overwrite: bool = False, 

289 ) -> Config: 

290 """Create an empty data repository by adding a butler.yaml config 

291 to a repository root directory. 

292 

293 Parameters 

294 ---------- 

295 root : `lsst.resources.ResourcePathExpression` 

296 Path or URI to the root location of the new repository. Will be 

297 created if it does not exist. 

298 config : `Config` or `str`, optional 

299 Configuration to write to the repository, after setting any 

300 root-dependent Registry or Datastore config options. Can not 

301 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

302 configuration will be used. Root-dependent config options 

303 specified in this config are overwritten if ``forceConfigRoot`` 

304 is `True`. 

305 dimensionConfig : `Config` or `str`, optional 

306 Configuration for dimensions, will be used to initialize registry 

307 database. 

308 standalone : `bool` 

309 If True, write all expanded defaults, not just customized or 

310 repository-specific settings. 

311 This (mostly) decouples the repository from the default 

312 configuration, insulating it from changes to the defaults (which 

313 may be good or bad, depending on the nature of the changes). 

314 Future *additions* to the defaults will still be picked up when 

315 initializing `Butlers` to repos created with ``standalone=True``. 

316 searchPaths : `list` of `str`, optional 

317 Directory paths to search when calculating the full butler 

318 configuration. 

319 forceConfigRoot : `bool`, optional 

320 If `False`, any values present in the supplied ``config`` that 

321 would normally be reset are not overridden and will appear 

322 directly in the output config. This allows non-standard overrides 

323 of the root directory for a datastore or registry to be given. 

324 If this parameter is `True` the values for ``root`` will be 

325 forced into the resulting config if appropriate. 

326 outfile : `lss.resources.ResourcePathExpression`, optional 

327 If not-`None`, the output configuration will be written to this 

328 location rather than into the repository itself. Can be a URI 

329 string. Can refer to a directory that will be used to write 

330 ``butler.yaml``. 

331 overwrite : `bool`, optional 

332 Create a new configuration file even if one already exists 

333 in the specified output location. Default is to raise 

334 an exception. 

335 

336 Returns 

337 ------- 

338 config : `Config` 

339 The updated `Config` instance written to the repo. 

340 

341 Raises 

342 ------ 

343 ValueError 

344 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

345 regular Config (as these subclasses would make it impossible to 

346 support ``standalone=False``). 

347 FileExistsError 

348 Raised if the output config file already exists. 

349 os.error 

350 Raised if the directory does not exist, exists but is not a 

351 directory, or cannot be created. 

352 

353 Notes 

354 ----- 

355 Note that when ``standalone=False`` (the default), the configuration 

356 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

357 construct the repository should also be used to construct any Butlers 

358 to avoid configuration inconsistencies. 

359 """ 

360 if isinstance(config, ButlerConfig | ConfigSubset): 

361 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

362 

363 # Ensure that the root of the repository exists or can be made 

364 root_uri = ResourcePath(root, forceDirectory=True) 

365 root_uri.mkdir() 

366 

367 config = Config(config) 

368 

369 # If we are creating a new repo from scratch with relative roots, 

370 # do not propagate an explicit root from the config file 

371 if "root" in config: 

372 del config["root"] 

373 

374 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

375 imported_class = doImportType(full["datastore", "cls"]) 

376 if not issubclass(imported_class, Datastore): 

377 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

378 datastoreClass: type[Datastore] = imported_class 

379 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

380 

381 # if key exists in given config, parse it, otherwise parse the defaults 

382 # in the expanded config 

383 if config.get(("registry", "db")): 

384 registryConfig = RegistryConfig(config) 

385 else: 

386 registryConfig = RegistryConfig(full) 

387 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

388 if defaultDatabaseUri is not None: 

389 Config.updateParameters( 

390 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

391 ) 

392 else: 

393 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

394 

395 if standalone: 

396 config.merge(full) 

397 else: 

398 # Always expand the registry.managers section into the per-repo 

399 # config, because after the database schema is created, it's not 

400 # allowed to change anymore. Note that in the standalone=True 

401 # branch, _everything_ in the config is expanded, so there's no 

402 # need to special case this. 

403 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

404 configURI: ResourcePathExpression 

405 if outfile is not None: 

406 # When writing to a separate location we must include 

407 # the root of the butler repo in the config else it won't know 

408 # where to look. 

409 config["root"] = root_uri.geturl() 

410 configURI = outfile 

411 else: 

412 configURI = root_uri 

413 # Strip obscore configuration, if it is present, before writing config 

414 # to a file, obscore config will be stored in registry. 

415 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

416 config_to_write = config.copy() 

417 del config_to_write[obscore_config_key] 

418 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

419 # configFile attribute is updated, need to copy it to original. 

420 config.configFile = config_to_write.configFile 

421 else: 

422 config.dumpToUri(configURI, overwrite=overwrite) 

423 

424 # Create Registry and populate tables 

425 registryConfig = RegistryConfig(config.get("registry")) 

426 dimensionConfig = DimensionConfig(dimensionConfig) 

427 _RegistryFactory(registryConfig).create_from_config( 

428 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

429 ) 

430 

431 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

432 

433 return config 

434 

435 @classmethod 

436 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

437 """Look up the label in a butler repository index. 

438 

439 Parameters 

440 ---------- 

441 label : `str` 

442 Label of the Butler repository to look up. 

443 return_label : `bool`, optional 

444 If ``label`` cannot be found in the repository index (either 

445 because index is not defined or ``label`` is not in the index) and 

446 ``return_label`` is `True` then return ``ResourcePath(label)``. 

447 If ``return_label`` is `False` (default) then an exception will be 

448 raised instead. 

449 

450 Returns 

451 ------- 

452 uri : `lsst.resources.ResourcePath` 

453 URI to the Butler repository associated with the given label or 

454 default value if it is provided. 

455 

456 Raises 

457 ------ 

458 KeyError 

459 Raised if the label is not found in the index, or if an index 

460 is not defined, and ``return_label`` is `False`. 

461 

462 Notes 

463 ----- 

464 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

465 information is discovered. 

466 """ 

467 return ButlerRepoIndex.get_repo_uri(label, return_label) 

468 

469 @classmethod 

470 def get_known_repos(cls) -> set[str]: 

471 """Retrieve the list of known repository labels. 

472 

473 Returns 

474 ------- 

475 repos : `set` of `str` 

476 All the known labels. Can be empty if no index can be found. 

477 

478 Notes 

479 ----- 

480 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

481 information is discovered. 

482 """ 

483 return ButlerRepoIndex.get_known_repos() 

484 

485 @abstractmethod 

486 def transaction(self) -> AbstractContextManager[None]: 

487 """Context manager supporting `Butler` transactions. 

488 

489 Transactions can be nested. 

490 """ 

491 raise NotImplementedError() 

492 

493 @abstractmethod 

494 def put( 

495 self, 

496 obj: Any, 

497 datasetRefOrType: DatasetRef | DatasetType | str, 

498 /, 

499 dataId: DataId | None = None, 

500 *, 

501 run: str | None = None, 

502 **kwargs: Any, 

503 ) -> DatasetRef: 

504 """Store and register a dataset. 

505 

506 Parameters 

507 ---------- 

508 obj : `object` 

509 The dataset. 

510 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

511 When `DatasetRef` is provided, ``dataId`` should be `None`. 

512 Otherwise the `DatasetType` or name thereof. If a fully resolved 

513 `DatasetRef` is given the run and ID are used directly. 

514 dataId : `dict` or `DataCoordinate` 

515 A `dict` of `Dimension` link name, value pairs that label the 

516 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

517 should be provided as the second argument. 

518 run : `str`, optional 

519 The name of the run the dataset should be added to, overriding 

520 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

521 **kwargs 

522 Additional keyword arguments used to augment or construct a 

523 `DataCoordinate`. See `DataCoordinate.standardize` 

524 parameters. Not used if a resolve `DatasetRef` is provided. 

525 

526 Returns 

527 ------- 

528 ref : `DatasetRef` 

529 A reference to the stored dataset, updated with the correct id if 

530 given. 

531 

532 Raises 

533 ------ 

534 TypeError 

535 Raised if the butler is read-only or if no run has been provided. 

536 """ 

537 raise NotImplementedError() 

538 

539 @abstractmethod 

540 def getDeferred( 

541 self, 

542 datasetRefOrType: DatasetRef | DatasetType | str, 

543 /, 

544 dataId: DataId | None = None, 

545 *, 

546 parameters: dict | None = None, 

547 collections: Any = None, 

548 storageClass: str | StorageClass | None = None, 

549 **kwargs: Any, 

550 ) -> DeferredDatasetHandle: 

551 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

552 after an immediate registry lookup. 

553 

554 Parameters 

555 ---------- 

556 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

557 When `DatasetRef` the `dataId` should be `None`. 

558 Otherwise the `DatasetType` or name thereof. 

559 dataId : `dict` or `DataCoordinate`, optional 

560 A `dict` of `Dimension` link name, value pairs that label the 

561 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

562 should be provided as the first argument. 

563 parameters : `dict` 

564 Additional StorageClass-defined options to control reading, 

565 typically used to efficiently read only a subset of the dataset. 

566 collections : Any, optional 

567 Collections to be searched, overriding ``self.collections``. 

568 Can be any of the types supported by the ``collections`` argument 

569 to butler construction. 

570 storageClass : `StorageClass` or `str`, optional 

571 The storage class to be used to override the Python type 

572 returned by this method. By default the returned type matches 

573 the dataset type definition for this dataset. Specifying a 

574 read `StorageClass` can force a different type to be returned. 

575 This type must be compatible with the original type. 

576 **kwargs 

577 Additional keyword arguments used to augment or construct a 

578 `DataId`. See `DataId` parameters. 

579 

580 Returns 

581 ------- 

582 obj : `DeferredDatasetHandle` 

583 A handle which can be used to retrieve a dataset at a later time. 

584 

585 Raises 

586 ------ 

587 LookupError 

588 Raised if no matching dataset exists in the `Registry` or 

589 datastore. 

590 ValueError 

591 Raised if a resolved `DatasetRef` was passed as an input, but it 

592 differs from the one found in the registry. 

593 TypeError 

594 Raised if no collections were provided. 

595 """ 

596 raise NotImplementedError() 

597 

598 @abstractmethod 

599 def get( 

600 self, 

601 datasetRefOrType: DatasetRef | DatasetType | str, 

602 /, 

603 dataId: DataId | None = None, 

604 *, 

605 parameters: dict[str, Any] | None = None, 

606 collections: Any = None, 

607 storageClass: StorageClass | str | None = None, 

608 **kwargs: Any, 

609 ) -> Any: 

610 """Retrieve a stored dataset. 

611 

612 Parameters 

613 ---------- 

614 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

615 When `DatasetRef` the `dataId` should be `None`. 

616 Otherwise the `DatasetType` or name thereof. 

617 If a resolved `DatasetRef`, the associated dataset 

618 is returned directly without additional querying. 

619 dataId : `dict` or `DataCoordinate` 

620 A `dict` of `Dimension` link name, value pairs that label the 

621 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

622 should be provided as the first argument. 

623 parameters : `dict` 

624 Additional StorageClass-defined options to control reading, 

625 typically used to efficiently read only a subset of the dataset. 

626 collections : Any, optional 

627 Collections to be searched, overriding ``self.collections``. 

628 Can be any of the types supported by the ``collections`` argument 

629 to butler construction. 

630 storageClass : `StorageClass` or `str`, optional 

631 The storage class to be used to override the Python type 

632 returned by this method. By default the returned type matches 

633 the dataset type definition for this dataset. Specifying a 

634 read `StorageClass` can force a different type to be returned. 

635 This type must be compatible with the original type. 

636 **kwargs 

637 Additional keyword arguments used to augment or construct a 

638 `DataCoordinate`. See `DataCoordinate.standardize` 

639 parameters. 

640 

641 Returns 

642 ------- 

643 obj : `object` 

644 The dataset. 

645 

646 Raises 

647 ------ 

648 LookupError 

649 Raised if no matching dataset exists in the `Registry`. 

650 TypeError 

651 Raised if no collections were provided. 

652 

653 Notes 

654 ----- 

655 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

656 this method requires that the given data ID include temporal dimensions 

657 beyond the dimensions of the dataset type itself, in order to find the 

658 dataset with the appropriate validity range. For example, a "bias" 

659 dataset with native dimensions ``{instrument, detector}`` could be 

660 fetched with a ``{instrument, detector, exposure}`` data ID, because 

661 ``exposure`` is a temporal dimension. 

662 """ 

663 raise NotImplementedError() 

664 

665 @abstractmethod 

666 def getURIs( 

667 self, 

668 datasetRefOrType: DatasetRef | DatasetType | str, 

669 /, 

670 dataId: DataId | None = None, 

671 *, 

672 predict: bool = False, 

673 collections: Any = None, 

674 run: str | None = None, 

675 **kwargs: Any, 

676 ) -> DatasetRefURIs: 

677 """Return the URIs associated with the dataset. 

678 

679 Parameters 

680 ---------- 

681 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

682 When `DatasetRef` the `dataId` should be `None`. 

683 Otherwise the `DatasetType` or name thereof. 

684 dataId : `dict` or `DataCoordinate` 

685 A `dict` of `Dimension` link name, value pairs that label the 

686 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

687 should be provided as the first argument. 

688 predict : `bool` 

689 If `True`, allow URIs to be returned of datasets that have not 

690 been written. 

691 collections : Any, optional 

692 Collections to be searched, overriding ``self.collections``. 

693 Can be any of the types supported by the ``collections`` argument 

694 to butler construction. 

695 run : `str`, optional 

696 Run to use for predictions, overriding ``self.run``. 

697 **kwargs 

698 Additional keyword arguments used to augment or construct a 

699 `DataCoordinate`. See `DataCoordinate.standardize` 

700 parameters. 

701 

702 Returns 

703 ------- 

704 uris : `DatasetRefURIs` 

705 The URI to the primary artifact associated with this dataset (if 

706 the dataset was disassembled within the datastore this may be 

707 `None`), and the URIs to any components associated with the dataset 

708 artifact. (can be empty if there are no components). 

709 """ 

710 raise NotImplementedError() 

711 

712 @abstractmethod 

713 def getURI( 

714 self, 

715 datasetRefOrType: DatasetRef | DatasetType | str, 

716 /, 

717 dataId: DataId | None = None, 

718 *, 

719 predict: bool = False, 

720 collections: Any = None, 

721 run: str | None = None, 

722 **kwargs: Any, 

723 ) -> ResourcePath: 

724 """Return the URI to the Dataset. 

725 

726 Parameters 

727 ---------- 

728 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

729 When `DatasetRef` the `dataId` should be `None`. 

730 Otherwise the `DatasetType` or name thereof. 

731 dataId : `dict` or `DataCoordinate` 

732 A `dict` of `Dimension` link name, value pairs that label the 

733 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

734 should be provided as the first argument. 

735 predict : `bool` 

736 If `True`, allow URIs to be returned of datasets that have not 

737 been written. 

738 collections : Any, optional 

739 Collections to be searched, overriding ``self.collections``. 

740 Can be any of the types supported by the ``collections`` argument 

741 to butler construction. 

742 run : `str`, optional 

743 Run to use for predictions, overriding ``self.run``. 

744 **kwargs 

745 Additional keyword arguments used to augment or construct a 

746 `DataCoordinate`. See `DataCoordinate.standardize` 

747 parameters. 

748 

749 Returns 

750 ------- 

751 uri : `lsst.resources.ResourcePath` 

752 URI pointing to the Dataset within the datastore. If the 

753 Dataset does not exist in the datastore, and if ``predict`` is 

754 `True`, the URI will be a prediction and will include a URI 

755 fragment "#predicted". 

756 If the datastore does not have entities that relate well 

757 to the concept of a URI the returned URI string will be 

758 descriptive. The returned URI is not guaranteed to be obtainable. 

759 

760 Raises 

761 ------ 

762 LookupError 

763 A URI has been requested for a dataset that does not exist and 

764 guessing is not allowed. 

765 ValueError 

766 Raised if a resolved `DatasetRef` was passed as an input, but it 

767 differs from the one found in the registry. 

768 TypeError 

769 Raised if no collections were provided. 

770 RuntimeError 

771 Raised if a URI is requested for a dataset that consists of 

772 multiple artifacts. 

773 """ 

774 raise NotImplementedError() 

775 

776 @abstractmethod 

777 def get_dataset_type(self, name: str) -> DatasetType: 

778 """Get the `DatasetType`. 

779 

780 Parameters 

781 ---------- 

782 name : `str` 

783 Name of the type. 

784 

785 Returns 

786 ------- 

787 type : `DatasetType` 

788 The `DatasetType` associated with the given name. 

789 

790 Raises 

791 ------ 

792 lsst.daf.butler.MissingDatasetTypeError 

793 Raised if the requested dataset type has not been registered. 

794 

795 Notes 

796 ----- 

797 This method handles component dataset types automatically, though most 

798 other operations do not. 

799 """ 

800 raise NotImplementedError() 

801 

802 @abstractmethod 

803 def get_dataset( 

804 self, 

805 id: DatasetId, 

806 storage_class: str | StorageClass | None, 

807 dimension_records: bool = False, 

808 datastore_records: bool = False, 

809 ) -> DatasetRef | None: 

810 """Retrieve a Dataset entry. 

811 

812 Parameters 

813 ---------- 

814 id : `DatasetId` 

815 The unique identifier for the dataset. 

816 storage_class : `str` or `StorageClass` or `None` 

817 A storage class to use when creating the returned entry. If given 

818 it must be compatible with the default storage class. 

819 dimension_records: `bool`, optional 

820 If `True` the ref will be expanded and contain dimension records. 

821 datastore_records: `bool`, optional. 

822 If `True` the ref will contain associated datastore records. 

823 

824 Returns 

825 ------- 

826 ref : `DatasetRef` or `None` 

827 A ref to the Dataset, or `None` if no matching Dataset 

828 was found. 

829 """ 

830 raise NotImplementedError() 

831 

832 @abstractmethod 

833 def find_dataset( 

834 self, 

835 dataset_type: DatasetType | str, 

836 data_id: DataId | None = None, 

837 *, 

838 collections: str | Sequence[str] | None = None, 

839 timespan: Timespan | None = None, 

840 storage_class: str | StorageClass | None = None, 

841 dimension_records: bool = False, 

842 datastore_records: bool = False, 

843 **kwargs: Any, 

844 ) -> DatasetRef | None: 

845 """Find a dataset given its `DatasetType` and data ID. 

846 

847 This can be used to obtain a `DatasetRef` that permits the dataset to 

848 be read from a `Datastore`. If the dataset is a component and can not 

849 be found using the provided dataset type, a dataset ref for the parent 

850 will be returned instead but with the correct dataset type. 

851 

852 Parameters 

853 ---------- 

854 dataset_type : `DatasetType` or `str` 

855 A `DatasetType` or the name of one. If this is a `DatasetType` 

856 instance, its storage class will be respected and propagated to 

857 the output, even if it differs from the dataset type definition 

858 in the registry, as long as the storage classes are convertible. 

859 data_id : `dict` or `DataCoordinate`, optional 

860 A `dict`-like object containing the `Dimension` links that identify 

861 the dataset within a collection. If it is a `dict` the dataId 

862 can include dimension record values such as ``day_obs`` and 

863 ``seq_num`` or ``full_name`` that can be used to derive the 

864 primary dimension. 

865 collections : `str` or `list` [`str`], optional 

866 A an ordered list of collections to search for the dataset. 

867 Defaults to ``self.defaults.collections``. 

868 timespan : `Timespan`, optional 

869 A timespan that the validity range of the dataset must overlap. 

870 If not provided, any `~CollectionType.CALIBRATION` collections 

871 matched by the ``collections`` argument will not be searched. 

872 storage_class : `str` or `StorageClass` or `None` 

873 A storage class to use when creating the returned entry. If given 

874 it must be compatible with the default storage class. 

875 dimension_records: `bool`, optional 

876 If `True` the ref will be expanded and contain dimension records. 

877 datastore_records: `bool`, optional. 

878 If `True` the ref will contain associated datastore records. 

879 **kwargs 

880 Additional keyword arguments passed to 

881 `DataCoordinate.standardize` to convert ``dataId`` to a true 

882 `DataCoordinate` or augment an existing one. This can also include 

883 dimension record metadata that can be used to derive a primary 

884 dimension value. 

885 

886 Returns 

887 ------- 

888 ref : `DatasetRef` 

889 A reference to the dataset, or `None` if no matching Dataset 

890 was found. 

891 

892 Raises 

893 ------ 

894 lsst.daf.butler.NoDefaultCollectionError 

895 Raised if ``collections`` is `None` and 

896 ``self.collections`` is `None`. 

897 LookupError 

898 Raised if one or more data ID keys are missing. 

899 lsst.daf.butler.MissingDatasetTypeError 

900 Raised if the dataset type does not exist. 

901 lsst.daf.butler.MissingCollectionError 

902 Raised if any of ``collections`` does not exist in the registry. 

903 

904 Notes 

905 ----- 

906 This method simply returns `None` and does not raise an exception even 

907 when the set of collections searched is intrinsically incompatible with 

908 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

909 only `~CollectionType.CALIBRATION` collections are being searched. 

910 This may make it harder to debug some lookup failures, but the behavior 

911 is intentional; we consider it more important that failed searches are 

912 reported consistently, regardless of the reason, and that adding 

913 additional collections that do not contain a match to the search path 

914 never changes the behavior. 

915 

916 This method handles component dataset types automatically, though most 

917 other query operations do not. 

918 """ 

919 raise NotImplementedError() 

920 

921 @abstractmethod 

922 def retrieveArtifacts( 

923 self, 

924 refs: Iterable[DatasetRef], 

925 destination: ResourcePathExpression, 

926 transfer: str = "auto", 

927 preserve_path: bool = True, 

928 overwrite: bool = False, 

929 ) -> list[ResourcePath]: 

930 """Retrieve the artifacts associated with the supplied refs. 

931 

932 Parameters 

933 ---------- 

934 refs : iterable of `DatasetRef` 

935 The datasets for which artifacts are to be retrieved. 

936 A single ref can result in multiple artifacts. The refs must 

937 be resolved. 

938 destination : `lsst.resources.ResourcePath` or `str` 

939 Location to write the artifacts. 

940 transfer : `str`, optional 

941 Method to use to transfer the artifacts. Must be one of the options 

942 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

943 "move" is not allowed. 

944 preserve_path : `bool`, optional 

945 If `True` the full path of the artifact within the datastore 

946 is preserved. If `False` the final file component of the path 

947 is used. 

948 overwrite : `bool`, optional 

949 If `True` allow transfers to overwrite existing files at the 

950 destination. 

951 

952 Returns 

953 ------- 

954 targets : `list` of `lsst.resources.ResourcePath` 

955 URIs of file artifacts in destination location. Order is not 

956 preserved. 

957 

958 Notes 

959 ----- 

960 For non-file datastores the artifacts written to the destination 

961 may not match the representation inside the datastore. For example 

962 a hierarchical data structure in a NoSQL database may well be stored 

963 as a JSON file. 

964 """ 

965 raise NotImplementedError() 

966 

967 @abstractmethod 

968 def exists( 

969 self, 

970 dataset_ref_or_type: DatasetRef | DatasetType | str, 

971 /, 

972 data_id: DataId | None = None, 

973 *, 

974 full_check: bool = True, 

975 collections: Any = None, 

976 **kwargs: Any, 

977 ) -> DatasetExistence: 

978 """Indicate whether a dataset is known to Butler registry and 

979 datastore. 

980 

981 Parameters 

982 ---------- 

983 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

984 When `DatasetRef` the `dataId` should be `None`. 

985 Otherwise the `DatasetType` or name thereof. 

986 data_id : `dict` or `DataCoordinate` 

987 A `dict` of `Dimension` link name, value pairs that label the 

988 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

989 should be provided as the first argument. 

990 full_check : `bool`, optional 

991 If `True`, an additional check will be made for dataset artifact 

992 existence. This will involve additional overhead due to the need 

993 to query an external system. If `False` registry and datastore 

994 will solely be asked if they know about the dataset but no 

995 check for the artifact will be performed. 

996 collections : Any, optional 

997 Collections to be searched, overriding ``self.collections``. 

998 Can be any of the types supported by the ``collections`` argument 

999 to butler construction. 

1000 **kwargs 

1001 Additional keyword arguments used to augment or construct a 

1002 `DataCoordinate`. See `DataCoordinate.standardize` 

1003 parameters. 

1004 

1005 Returns 

1006 ------- 

1007 existence : `DatasetExistence` 

1008 Object indicating whether the dataset is known to registry and 

1009 datastore. Evaluates to `True` if the dataset is present and known 

1010 to both. 

1011 """ 

1012 raise NotImplementedError() 

1013 

1014 @abstractmethod 

1015 def _exists_many( 

1016 self, 

1017 refs: Iterable[DatasetRef], 

1018 /, 

1019 *, 

1020 full_check: bool = True, 

1021 ) -> dict[DatasetRef, DatasetExistence]: 

1022 """Indicate whether multiple datasets are known to Butler registry and 

1023 datastore. 

1024 

1025 This is an experimental API that may change at any moment. 

1026 

1027 Parameters 

1028 ---------- 

1029 refs : iterable of `DatasetRef` 

1030 The datasets to be checked. 

1031 full_check : `bool`, optional 

1032 If `True`, an additional check will be made for dataset artifact 

1033 existence. This will involve additional overhead due to the need 

1034 to query an external system. If `False` registry and datastore 

1035 will solely be asked if they know about the dataset but no 

1036 check for the artifact will be performed. 

1037 

1038 Returns 

1039 ------- 

1040 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1041 Mapping from the given dataset refs to an enum indicating the 

1042 status of the dataset in registry and datastore. 

1043 Each value evaluates to `True` if the dataset is present and known 

1044 to both. 

1045 """ 

1046 raise NotImplementedError() 

1047 

1048 @abstractmethod 

1049 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1050 """Remove one or more `~CollectionType.RUN` collections and the 

1051 datasets within them. 

1052 

1053 Parameters 

1054 ---------- 

1055 names : `~collections.abc.Iterable` [ `str` ] 

1056 The names of the collections to remove. 

1057 unstore : `bool`, optional 

1058 If `True` (default), delete datasets from all datastores in which 

1059 they are present, and attempt to rollback the registry deletions if 

1060 datastore deletions fail (which may not always be possible). If 

1061 `False`, datastore records for these datasets are still removed, 

1062 but any artifacts (e.g. files) will not be. 

1063 

1064 Raises 

1065 ------ 

1066 TypeError 

1067 Raised if one or more collections are not of type 

1068 `~CollectionType.RUN`. 

1069 """ 

1070 raise NotImplementedError() 

1071 

1072 @abstractmethod 

1073 def ingest( 

1074 self, 

1075 *datasets: FileDataset, 

1076 transfer: str | None = "auto", 

1077 run: str | None = None, 

1078 idGenerationMode: DatasetIdGenEnum | None = None, 

1079 record_validation_info: bool = True, 

1080 ) -> None: 

1081 """Store and register one or more datasets that already exist on disk. 

1082 

1083 Parameters 

1084 ---------- 

1085 datasets : `FileDataset` 

1086 Each positional argument is a struct containing information about 

1087 a file to be ingested, including its URI (either absolute or 

1088 relative to the datastore root, if applicable), a resolved 

1089 `DatasetRef`, and optionally a formatter class or its 

1090 fully-qualified string name. If a formatter is not provided, the 

1091 formatter that would be used for `put` is assumed. On successful 

1092 ingest all `FileDataset.formatter` attributes will be set to the 

1093 formatter class used. `FileDataset.path` attributes may be modified 

1094 to put paths in whatever the datastore considers a standardized 

1095 form. 

1096 transfer : `str`, optional 

1097 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1098 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1099 transfer the file. 

1100 run : `str`, optional 

1101 The name of the run ingested datasets should be added to, 

1102 overriding ``self.run``. This parameter is now deprecated since 

1103 the run is encoded in the ``FileDataset``. 

1104 idGenerationMode : `DatasetIdGenEnum`, optional 

1105 Specifies option for generating dataset IDs. Parameter is 

1106 deprecated. 

1107 record_validation_info : `bool`, optional 

1108 If `True`, the default, the datastore can record validation 

1109 information associated with the file. If `False` the datastore 

1110 will not attempt to track any information such as checksums 

1111 or file sizes. This can be useful if such information is tracked 

1112 in an external system or if the file is to be compressed in place. 

1113 It is up to the datastore whether this parameter is relevant. 

1114 

1115 Raises 

1116 ------ 

1117 TypeError 

1118 Raised if the butler is read-only or if no run was provided. 

1119 NotImplementedError 

1120 Raised if the `Datastore` does not support the given transfer mode. 

1121 DatasetTypeNotSupportedError 

1122 Raised if one or more files to be ingested have a dataset type that 

1123 is not supported by the `Datastore`.. 

1124 FileNotFoundError 

1125 Raised if one of the given files does not exist. 

1126 FileExistsError 

1127 Raised if transfer is not `None` but the (internal) location the 

1128 file would be moved to is already occupied. 

1129 

1130 Notes 

1131 ----- 

1132 This operation is not fully exception safe: if a database operation 

1133 fails, the given `FileDataset` instances may be only partially updated. 

1134 

1135 It is atomic in terms of database operations (they will either all 

1136 succeed or all fail) providing the database engine implements 

1137 transactions correctly. It will attempt to be atomic in terms of 

1138 filesystem operations as well, but this cannot be implemented 

1139 rigorously for most datastores. 

1140 """ 

1141 raise NotImplementedError() 

1142 

1143 @abstractmethod 

1144 def export( 

1145 self, 

1146 *, 

1147 directory: str | None = None, 

1148 filename: str | None = None, 

1149 format: str | None = None, 

1150 transfer: str | None = None, 

1151 ) -> AbstractContextManager[RepoExportContext]: 

1152 """Export datasets from the repository represented by this `Butler`. 

1153 

1154 This method is a context manager that returns a helper object 

1155 (`RepoExportContext`) that is used to indicate what information from 

1156 the repository should be exported. 

1157 

1158 Parameters 

1159 ---------- 

1160 directory : `str`, optional 

1161 Directory dataset files should be written to if ``transfer`` is not 

1162 `None`. 

1163 filename : `str`, optional 

1164 Name for the file that will include database information associated 

1165 with the exported datasets. If this is not an absolute path and 

1166 ``directory`` is not `None`, it will be written to ``directory`` 

1167 instead of the current working directory. Defaults to 

1168 "export.{format}". 

1169 format : `str`, optional 

1170 File format for the database information file. If `None`, the 

1171 extension of ``filename`` will be used. 

1172 transfer : `str`, optional 

1173 Transfer mode passed to `Datastore.export`. 

1174 

1175 Raises 

1176 ------ 

1177 TypeError 

1178 Raised if the set of arguments passed is inconsistent. 

1179 

1180 Examples 

1181 -------- 

1182 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1183 methods are used to provide the iterables over data IDs and/or datasets 

1184 to be exported:: 

1185 

1186 with butler.export("exports.yaml") as export: 

1187 # Export all flats, but none of the dimension element rows 

1188 # (i.e. data ID information) associated with them. 

1189 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1190 elements=()) 

1191 # Export all datasets that start with "deepCoadd_" and all of 

1192 # their associated data ID information. 

1193 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1194 """ 

1195 raise NotImplementedError() 

1196 

1197 @abstractmethod 

1198 def import_( 

1199 self, 

1200 *, 

1201 directory: ResourcePathExpression | None = None, 

1202 filename: ResourcePathExpression | TextIO | None = None, 

1203 format: str | None = None, 

1204 transfer: str | None = None, 

1205 skip_dimensions: set | None = None, 

1206 ) -> None: 

1207 """Import datasets into this repository that were exported from a 

1208 different butler repository via `~lsst.daf.butler.Butler.export`. 

1209 

1210 Parameters 

1211 ---------- 

1212 directory : `~lsst.resources.ResourcePathExpression`, optional 

1213 Directory containing dataset files to import from. If `None`, 

1214 ``filename`` and all dataset file paths specified therein must 

1215 be absolute. 

1216 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1217 A stream or name of file that contains database information 

1218 associated with the exported datasets, typically generated by 

1219 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1220 `~lsst.resources.ResourcePath` and is not an absolute path, 

1221 it will first be looked for relative to ``directory`` and if not 

1222 found there it will be looked for in the current working 

1223 directory. Defaults to "export.{format}". 

1224 format : `str`, optional 

1225 File format for ``filename``. If `None`, the extension of 

1226 ``filename`` will be used. 

1227 transfer : `str`, optional 

1228 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1229 skip_dimensions : `set`, optional 

1230 Names of dimensions that should be skipped and not imported. 

1231 

1232 Raises 

1233 ------ 

1234 TypeError 

1235 Raised if the set of arguments passed is inconsistent, or if the 

1236 butler is read-only. 

1237 """ 

1238 raise NotImplementedError() 

1239 

1240 @abstractmethod 

1241 def transfer_from( 

1242 self, 

1243 source_butler: LimitedButler, 

1244 source_refs: Iterable[DatasetRef], 

1245 transfer: str = "auto", 

1246 skip_missing: bool = True, 

1247 register_dataset_types: bool = False, 

1248 transfer_dimensions: bool = False, 

1249 ) -> Collection[DatasetRef]: 

1250 """Transfer datasets to this Butler from a run in another Butler. 

1251 

1252 Parameters 

1253 ---------- 

1254 source_butler : `LimitedButler` 

1255 Butler from which the datasets are to be transferred. If data IDs 

1256 in ``source_refs`` are not expanded then this has to be a full 

1257 `Butler` whose registry will be used to expand data IDs. 

1258 source_refs : iterable of `DatasetRef` 

1259 Datasets defined in the source butler that should be transferred to 

1260 this butler. In most circumstances, ``transfer_from`` is faster if 

1261 the dataset refs are expanded. 

1262 transfer : `str`, optional 

1263 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1264 skip_missing : `bool` 

1265 If `True`, datasets with no datastore artifact associated with 

1266 them are not transferred. If `False` a registry entry will be 

1267 created even if no datastore record is created (and so will 

1268 look equivalent to the dataset being unstored). 

1269 register_dataset_types : `bool` 

1270 If `True` any missing dataset types are registered. Otherwise 

1271 an exception is raised. 

1272 transfer_dimensions : `bool`, optional 

1273 If `True`, dimension record data associated with the new datasets 

1274 will be transferred. 

1275 

1276 Returns 

1277 ------- 

1278 refs : `list` of `DatasetRef` 

1279 The refs added to this Butler. 

1280 

1281 Notes 

1282 ----- 

1283 The datastore artifact has to exist for a transfer 

1284 to be made but non-existence is not an error. 

1285 

1286 Datasets that already exist in this run will be skipped. 

1287 

1288 The datasets are imported as part of a transaction, although 

1289 dataset types are registered before the transaction is started. 

1290 This means that it is possible for a dataset type to be registered 

1291 even though transfer has failed. 

1292 """ 

1293 raise NotImplementedError() 

1294 

1295 @abstractmethod 

1296 def validateConfiguration( 

1297 self, 

1298 logFailures: bool = False, 

1299 datasetTypeNames: Iterable[str] | None = None, 

1300 ignore: Iterable[str] | None = None, 

1301 ) -> None: 

1302 """Validate butler configuration. 

1303 

1304 Checks that each `DatasetType` can be stored in the `Datastore`. 

1305 

1306 Parameters 

1307 ---------- 

1308 logFailures : `bool`, optional 

1309 If `True`, output a log message for every validation error 

1310 detected. 

1311 datasetTypeNames : iterable of `str`, optional 

1312 The `DatasetType` names that should be checked. This allows 

1313 only a subset to be selected. 

1314 ignore : iterable of `str`, optional 

1315 Names of DatasetTypes to skip over. This can be used to skip 

1316 known problems. If a named `DatasetType` corresponds to a 

1317 composite, all components of that `DatasetType` will also be 

1318 ignored. 

1319 

1320 Raises 

1321 ------ 

1322 ButlerValidationError 

1323 Raised if there is some inconsistency with how this Butler 

1324 is configured. 

1325 """ 

1326 raise NotImplementedError() 

1327 

1328 @property 

1329 @abstractmethod 

1330 def collections(self) -> Sequence[str]: 

1331 """The collections to search by default, in order 

1332 (`~collections.abc.Sequence` [ `str` ]). 

1333 """ 

1334 raise NotImplementedError() 

1335 

1336 @property 

1337 @abstractmethod 

1338 def run(self) -> str | None: 

1339 """Name of the run this butler writes outputs to by default (`str` or 

1340 `None`). 

1341 """ 

1342 raise NotImplementedError() 

1343 

1344 @property 

1345 @abstractmethod 

1346 def registry(self) -> Registry: 

1347 """The object that manages dataset metadata and relationships 

1348 (`Registry`). 

1349 

1350 Many operations that don't involve reading or writing butler datasets 

1351 are accessible only via `Registry` methods. Eventually these methods 

1352 will be replaced by equivalent `Butler` methods. 

1353 """ 

1354 raise NotImplementedError()