Coverage for python/lsst/daf/butler/_butler.py: 62%

154 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Mapping, Sequence 

34from contextlib import AbstractContextManager 

35from typing import TYPE_CHECKING, Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.logging import getLogger 

40 

41from ._butler_config import ButlerConfig, ButlerType 

42from ._butler_instance_options import ButlerInstanceOptions 

43from ._butler_repo_index import ButlerRepoIndex 

44from ._config import Config, ConfigSubset 

45from ._limited_butler import LimitedButler 

46from .datastore import Datastore 

47from .dimensions import DimensionConfig 

48from .registry import RegistryConfig, _RegistryFactory 

49from .repo_relocation import BUTLER_ROOT_TAG 

50 

51if TYPE_CHECKING: 

52 from ._dataset_existence import DatasetExistence 

53 from ._dataset_ref import DatasetId, DatasetRef 

54 from ._dataset_type import DatasetType 

55 from ._deferredDatasetHandle import DeferredDatasetHandle 

56 from ._file_dataset import FileDataset 

57 from ._query import Query 

58 from ._storage_class import StorageClass 

59 from ._timespan import Timespan 

60 from .datastore import DatasetRefURIs 

61 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord 

62 from .registry import CollectionArgType, Registry 

63 from .transfers import RepoExportContext 

64 

65_LOG = getLogger(__name__) 

66 

67 

68class Butler(LimitedButler): # numpydoc ignore=PR02 

69 """Interface for data butler and factory for Butler instances. 

70 

71 Parameters 

72 ---------- 

73 config : `ButlerConfig`, `Config` or `str`, optional 

74 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

75 If a directory path is given the configuration will be read from a 

76 ``butler.yaml`` file in that location. If `None` is given default 

77 values will be used. If ``config`` contains "cls" key then its value is 

78 used as a name of butler class and it must be a sub-class of this 

79 class, otherwise `DirectButler` is instantiated. 

80 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

81 An expression specifying the collections to be searched (in order) when 

82 reading datasets. 

83 This may be a `str` collection name or an iterable thereof. 

84 See :ref:`daf_butler_collection_expressions` for more information. 

85 These collections are not registered automatically and must be 

86 manually registered before they are used by any method, but they may be 

87 manually registered after the `Butler` is initialized. 

88 run : `str`, optional 

89 Name of the `~CollectionType.RUN` collection new datasets should be 

90 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

91 ``collections`` will be set to ``[run]``. If not `None`, this 

92 collection will automatically be registered. If this is not set (and 

93 ``writeable`` is not set either), a read-only butler will be created. 

94 searchPaths : `list` of `str`, optional 

95 Directory paths to search when calculating the full Butler 

96 configuration. Not used if the supplied config is already a 

97 `ButlerConfig`. 

98 writeable : `bool`, optional 

99 Explicitly sets whether the butler supports write operations. If not 

100 provided, a read-write butler is created if any of ``run``, ``tags``, 

101 or ``chains`` is non-empty. 

102 inferDefaults : `bool`, optional 

103 If `True` (default) infer default data ID values from the values 

104 present in the datasets in ``collections``: if all collections have the 

105 same value (or no value) for a governor dimension, that value will be 

106 the default for that dimension. Nonexistent collections are ignored. 

107 If a default value is provided explicitly for a governor dimension via 

108 ``**kwargs``, no default will be inferred for that dimension. 

109 without_datastore : `bool`, optional 

110 If `True` do not attach a datastore to this butler. Any attempts 

111 to use a datastore will fail. 

112 **kwargs : `Any` 

113 Additional keyword arguments passed to a constructor of actual butler 

114 class. 

115 

116 Notes 

117 ----- 

118 The preferred way to instantiate Butler is via the `from_config` method. 

119 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

120 but ``mypy`` will complain about the former. 

121 """ 

122 

123 def __new__( 

124 cls, 

125 config: Config | ResourcePathExpression | None = None, 

126 *, 

127 collections: Any = None, 

128 run: str | None = None, 

129 searchPaths: Sequence[ResourcePathExpression] | None = None, 

130 writeable: bool | None = None, 

131 inferDefaults: bool = True, 

132 without_datastore: bool = False, 

133 **kwargs: Any, 

134 ) -> Butler: 

135 if cls is Butler: 

136 return Butler.from_config( 

137 config=config, 

138 collections=collections, 

139 run=run, 

140 searchPaths=searchPaths, 

141 writeable=writeable, 

142 inferDefaults=inferDefaults, 

143 without_datastore=without_datastore, 

144 **kwargs, 

145 ) 

146 

147 # Note: we do not pass any parameters to __new__, Python will pass them 

148 # to __init__ after __new__ returns sub-class instance. 

149 return super().__new__(cls) 

150 

151 @classmethod 

152 def from_config( 

153 cls, 

154 config: Config | ResourcePathExpression | None = None, 

155 *, 

156 collections: Any = None, 

157 run: str | None = None, 

158 searchPaths: Sequence[ResourcePathExpression] | None = None, 

159 writeable: bool | None = None, 

160 inferDefaults: bool = True, 

161 without_datastore: bool = False, 

162 **kwargs: Any, 

163 ) -> Butler: 

164 """Create butler instance from configuration. 

165 

166 Parameters 

167 ---------- 

168 config : `ButlerConfig`, `Config` or `str`, optional 

169 Configuration. Anything acceptable to the `ButlerConfig` 

170 constructor. If a directory path is given the configuration will be 

171 read from a ``butler.yaml`` file in that location. If `None` is 

172 given default values will be used. If ``config`` contains "cls" key 

173 then its value is used as a name of butler class and it must be a 

174 sub-class of this class, otherwise `DirectButler` is instantiated. 

175 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

176 An expression specifying the collections to be searched (in order) 

177 when reading datasets. 

178 This may be a `str` collection name or an iterable thereof. 

179 See :ref:`daf_butler_collection_expressions` for more information. 

180 These collections are not registered automatically and must be 

181 manually registered before they are used by any method, but they 

182 may be manually registered after the `Butler` is initialized. 

183 run : `str`, optional 

184 Name of the `~CollectionType.RUN` collection new datasets should be 

185 inserted into. If ``collections`` is `None` and ``run`` is not 

186 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

187 this collection will automatically be registered. If this is not 

188 set (and ``writeable`` is not set either), a read-only butler will 

189 be created. 

190 searchPaths : `list` of `str`, optional 

191 Directory paths to search when calculating the full Butler 

192 configuration. Not used if the supplied config is already a 

193 `ButlerConfig`. 

194 writeable : `bool`, optional 

195 Explicitly sets whether the butler supports write operations. If 

196 not provided, a read-write butler is created if any of ``run``, 

197 ``tags``, or ``chains`` is non-empty. 

198 inferDefaults : `bool`, optional 

199 If `True` (default) infer default data ID values from the values 

200 present in the datasets in ``collections``: if all collections have 

201 the same value (or no value) for a governor dimension, that value 

202 will be the default for that dimension. Nonexistent collections 

203 are ignored. If a default value is provided explicitly for a 

204 governor dimension via ``**kwargs``, no default will be inferred 

205 for that dimension. 

206 without_datastore : `bool`, optional 

207 If `True` do not attach a datastore to this butler. Any attempts 

208 to use a datastore will fail. 

209 **kwargs : `Any` 

210 Default data ID key-value pairs. These may only identify 

211 "governor" dimensions like ``instrument`` and ``skymap``. 

212 

213 Returns 

214 ------- 

215 butler : `Butler` 

216 A `Butler` constructed from the given configuration. 

217 

218 Notes 

219 ----- 

220 Calling this factory method is identical to calling 

221 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

222 complains about ``Butler()`` call. 

223 

224 Examples 

225 -------- 

226 While there are many ways to control exactly how a `Butler` interacts 

227 with the collections in its `Registry`, the most common cases are still 

228 simple. 

229 

230 For a read-only `Butler` that searches one collection, do:: 

231 

232 butler = Butler.from_config( 

233 "/path/to/repo", collections=["u/alice/DM-50000"] 

234 ) 

235 

236 For a read-write `Butler` that writes to and reads from a 

237 `~CollectionType.RUN` collection:: 

238 

239 butler = Butler.from_config( 

240 "/path/to/repo", run="u/alice/DM-50000/a" 

241 ) 

242 

243 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

244 because we want to write to one `~CollectionType.RUN` collection but 

245 read from several others (as well):: 

246 

247 butler = Butler.from_config( 

248 "/path/to/repo", 

249 run="u/alice/DM-50000/a", 

250 collections=[ 

251 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

252 ] 

253 ) 

254 

255 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

256 Datasets will be read first from that run (since it appears first in 

257 the chain), and then from ``u/bob/DM-49998`` and finally 

258 ``HSC/defaults``. 

259 

260 Finally, one can always create a `Butler` with no collections:: 

261 

262 butler = Butler.from_config("/path/to/repo", writeable=True) 

263 

264 This can be extremely useful when you just want to use 

265 ``butler.registry``, e.g. for inserting dimension data or managing 

266 collections, or when the collections you want to use with the butler 

267 are not consistent. Passing ``writeable`` explicitly here is only 

268 necessary if you want to be able to make changes to the repo - usually 

269 the value for ``writeable`` can be guessed from the collection 

270 arguments provided, but it defaults to `False` when there are not 

271 collection arguments. 

272 """ 

273 # DirectButler used to have a way to specify a "copy constructor" by 

274 # passing the "butler" parameter to its constructor. This 

275 # functionality has been moved out of the constructor into 

276 # Butler._clone(), but the new interface is not public yet. 

277 butler = kwargs.pop("butler", None) 

278 if butler is not None: 

279 if not isinstance(butler, Butler): 

280 raise TypeError("'butler' parameter must be a Butler instance") 

281 if config is not None or searchPaths is not None or writeable is not None: 

282 raise TypeError( 

283 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

284 ) 

285 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs) 

286 

287 options = ButlerInstanceOptions( 

288 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs 

289 ) 

290 

291 # Load the Butler configuration. This may involve searching the 

292 # environment to locate a configuration file. 

293 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) 

294 butler_type = butler_config.get_butler_type() 

295 

296 # Make DirectButler if class is not specified. 

297 match butler_type: 

298 case ButlerType.DIRECT: 

299 from .direct_butler import DirectButler 

300 

301 return DirectButler.create_from_config( 

302 butler_config, 

303 options=options, 

304 without_datastore=without_datastore, 

305 ) 

306 case ButlerType.REMOTE: 

307 from .remote_butler import RemoteButlerFactory 

308 

309 factory = RemoteButlerFactory.create_factory_from_config(butler_config) 

310 return factory.create_butler_with_credentials_from_environment(butler_options=options) 

311 case _: 

312 raise TypeError(f"Unknown Butler type '{butler_type}'") 

313 

314 @staticmethod 

315 def makeRepo( 

316 root: ResourcePathExpression, 

317 config: Config | str | None = None, 

318 dimensionConfig: Config | str | None = None, 

319 standalone: bool = False, 

320 searchPaths: list[str] | None = None, 

321 forceConfigRoot: bool = True, 

322 outfile: ResourcePathExpression | None = None, 

323 overwrite: bool = False, 

324 ) -> Config: 

325 """Create an empty data repository by adding a butler.yaml config 

326 to a repository root directory. 

327 

328 Parameters 

329 ---------- 

330 root : `lsst.resources.ResourcePathExpression` 

331 Path or URI to the root location of the new repository. Will be 

332 created if it does not exist. 

333 config : `Config` or `str`, optional 

334 Configuration to write to the repository, after setting any 

335 root-dependent Registry or Datastore config options. Can not 

336 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

337 configuration will be used. Root-dependent config options 

338 specified in this config are overwritten if ``forceConfigRoot`` 

339 is `True`. 

340 dimensionConfig : `Config` or `str`, optional 

341 Configuration for dimensions, will be used to initialize registry 

342 database. 

343 standalone : `bool` 

344 If True, write all expanded defaults, not just customized or 

345 repository-specific settings. 

346 This (mostly) decouples the repository from the default 

347 configuration, insulating it from changes to the defaults (which 

348 may be good or bad, depending on the nature of the changes). 

349 Future *additions* to the defaults will still be picked up when 

350 initializing `Butlers` to repos created with ``standalone=True``. 

351 searchPaths : `list` of `str`, optional 

352 Directory paths to search when calculating the full butler 

353 configuration. 

354 forceConfigRoot : `bool`, optional 

355 If `False`, any values present in the supplied ``config`` that 

356 would normally be reset are not overridden and will appear 

357 directly in the output config. This allows non-standard overrides 

358 of the root directory for a datastore or registry to be given. 

359 If this parameter is `True` the values for ``root`` will be 

360 forced into the resulting config if appropriate. 

361 outfile : `lss.resources.ResourcePathExpression`, optional 

362 If not-`None`, the output configuration will be written to this 

363 location rather than into the repository itself. Can be a URI 

364 string. Can refer to a directory that will be used to write 

365 ``butler.yaml``. 

366 overwrite : `bool`, optional 

367 Create a new configuration file even if one already exists 

368 in the specified output location. Default is to raise 

369 an exception. 

370 

371 Returns 

372 ------- 

373 config : `Config` 

374 The updated `Config` instance written to the repo. 

375 

376 Raises 

377 ------ 

378 ValueError 

379 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

380 regular Config (as these subclasses would make it impossible to 

381 support ``standalone=False``). 

382 FileExistsError 

383 Raised if the output config file already exists. 

384 os.error 

385 Raised if the directory does not exist, exists but is not a 

386 directory, or cannot be created. 

387 

388 Notes 

389 ----- 

390 Note that when ``standalone=False`` (the default), the configuration 

391 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

392 construct the repository should also be used to construct any Butlers 

393 to avoid configuration inconsistencies. 

394 """ 

395 if isinstance(config, ButlerConfig | ConfigSubset): 

396 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

397 

398 # Ensure that the root of the repository exists or can be made 

399 root_uri = ResourcePath(root, forceDirectory=True) 

400 root_uri.mkdir() 

401 

402 config = Config(config) 

403 

404 # If we are creating a new repo from scratch with relative roots, 

405 # do not propagate an explicit root from the config file 

406 if "root" in config: 

407 del config["root"] 

408 

409 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

410 imported_class = doImportType(full["datastore", "cls"]) 

411 if not issubclass(imported_class, Datastore): 

412 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

413 datastoreClass: type[Datastore] = imported_class 

414 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

415 

416 # if key exists in given config, parse it, otherwise parse the defaults 

417 # in the expanded config 

418 if config.get(("registry", "db")): 

419 registryConfig = RegistryConfig(config) 

420 else: 

421 registryConfig = RegistryConfig(full) 

422 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

423 if defaultDatabaseUri is not None: 

424 Config.updateParameters( 

425 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

426 ) 

427 else: 

428 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

429 

430 if standalone: 

431 config.merge(full) 

432 else: 

433 # Always expand the registry.managers section into the per-repo 

434 # config, because after the database schema is created, it's not 

435 # allowed to change anymore. Note that in the standalone=True 

436 # branch, _everything_ in the config is expanded, so there's no 

437 # need to special case this. 

438 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

439 configURI: ResourcePathExpression 

440 if outfile is not None: 

441 # When writing to a separate location we must include 

442 # the root of the butler repo in the config else it won't know 

443 # where to look. 

444 config["root"] = root_uri.geturl() 

445 configURI = outfile 

446 else: 

447 configURI = root_uri 

448 # Strip obscore configuration, if it is present, before writing config 

449 # to a file, obscore config will be stored in registry. 

450 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

451 config_to_write = config.copy() 

452 del config_to_write[obscore_config_key] 

453 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

454 # configFile attribute is updated, need to copy it to original. 

455 config.configFile = config_to_write.configFile 

456 else: 

457 config.dumpToUri(configURI, overwrite=overwrite) 

458 

459 # Create Registry and populate tables 

460 registryConfig = RegistryConfig(config.get("registry")) 

461 dimensionConfig = DimensionConfig(dimensionConfig) 

462 _RegistryFactory(registryConfig).create_from_config( 

463 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

464 ) 

465 

466 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

467 

468 return config 

469 

470 @classmethod 

471 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

472 """Look up the label in a butler repository index. 

473 

474 Parameters 

475 ---------- 

476 label : `str` 

477 Label of the Butler repository to look up. 

478 return_label : `bool`, optional 

479 If ``label`` cannot be found in the repository index (either 

480 because index is not defined or ``label`` is not in the index) and 

481 ``return_label`` is `True` then return ``ResourcePath(label)``. 

482 If ``return_label`` is `False` (default) then an exception will be 

483 raised instead. 

484 

485 Returns 

486 ------- 

487 uri : `lsst.resources.ResourcePath` 

488 URI to the Butler repository associated with the given label or 

489 default value if it is provided. 

490 

491 Raises 

492 ------ 

493 KeyError 

494 Raised if the label is not found in the index, or if an index 

495 is not defined, and ``return_label`` is `False`. 

496 

497 Notes 

498 ----- 

499 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

500 information is discovered. 

501 """ 

502 return ButlerRepoIndex.get_repo_uri(label, return_label) 

503 

504 @classmethod 

505 def get_known_repos(cls) -> set[str]: 

506 """Retrieve the list of known repository labels. 

507 

508 Returns 

509 ------- 

510 repos : `set` of `str` 

511 All the known labels. Can be empty if no index can be found. 

512 

513 Notes 

514 ----- 

515 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

516 information is discovered. 

517 """ 

518 return ButlerRepoIndex.get_known_repos() 

519 

520 @abstractmethod 

521 def _caching_context(self) -> AbstractContextManager[None]: 

522 """Context manager that enables caching.""" 

523 raise NotImplementedError() 

524 

525 @abstractmethod 

526 def transaction(self) -> AbstractContextManager[None]: 

527 """Context manager supporting `Butler` transactions. 

528 

529 Transactions can be nested. 

530 """ 

531 raise NotImplementedError() 

532 

533 @abstractmethod 

534 def put( 

535 self, 

536 obj: Any, 

537 datasetRefOrType: DatasetRef | DatasetType | str, 

538 /, 

539 dataId: DataId | None = None, 

540 *, 

541 run: str | None = None, 

542 **kwargs: Any, 

543 ) -> DatasetRef: 

544 """Store and register a dataset. 

545 

546 Parameters 

547 ---------- 

548 obj : `object` 

549 The dataset. 

550 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

551 When `DatasetRef` is provided, ``dataId`` should be `None`. 

552 Otherwise the `DatasetType` or name thereof. If a fully resolved 

553 `DatasetRef` is given the run and ID are used directly. 

554 dataId : `dict` or `DataCoordinate` 

555 A `dict` of `Dimension` link name, value pairs that label the 

556 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

557 should be provided as the second argument. 

558 run : `str`, optional 

559 The name of the run the dataset should be added to, overriding 

560 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

561 **kwargs 

562 Additional keyword arguments used to augment or construct a 

563 `DataCoordinate`. See `DataCoordinate.standardize` 

564 parameters. Not used if a resolve `DatasetRef` is provided. 

565 

566 Returns 

567 ------- 

568 ref : `DatasetRef` 

569 A reference to the stored dataset, updated with the correct id if 

570 given. 

571 

572 Raises 

573 ------ 

574 TypeError 

575 Raised if the butler is read-only or if no run has been provided. 

576 """ 

577 raise NotImplementedError() 

578 

579 @abstractmethod 

580 def getDeferred( 

581 self, 

582 datasetRefOrType: DatasetRef | DatasetType | str, 

583 /, 

584 dataId: DataId | None = None, 

585 *, 

586 parameters: dict | None = None, 

587 collections: Any = None, 

588 storageClass: str | StorageClass | None = None, 

589 **kwargs: Any, 

590 ) -> DeferredDatasetHandle: 

591 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

592 after an immediate registry lookup. 

593 

594 Parameters 

595 ---------- 

596 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

597 When `DatasetRef` the `dataId` should be `None`. 

598 Otherwise the `DatasetType` or name thereof. 

599 dataId : `dict` or `DataCoordinate`, optional 

600 A `dict` of `Dimension` link name, value pairs that label the 

601 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

602 should be provided as the first argument. 

603 parameters : `dict` 

604 Additional StorageClass-defined options to control reading, 

605 typically used to efficiently read only a subset of the dataset. 

606 collections : Any, optional 

607 Collections to be searched, overriding ``self.collections``. 

608 Can be any of the types supported by the ``collections`` argument 

609 to butler construction. 

610 storageClass : `StorageClass` or `str`, optional 

611 The storage class to be used to override the Python type 

612 returned by this method. By default the returned type matches 

613 the dataset type definition for this dataset. Specifying a 

614 read `StorageClass` can force a different type to be returned. 

615 This type must be compatible with the original type. 

616 **kwargs 

617 Additional keyword arguments used to augment or construct a 

618 `DataId`. See `DataId` parameters. 

619 

620 Returns 

621 ------- 

622 obj : `DeferredDatasetHandle` 

623 A handle which can be used to retrieve a dataset at a later time. 

624 

625 Raises 

626 ------ 

627 LookupError 

628 Raised if no matching dataset exists in the `Registry` or 

629 datastore. 

630 ValueError 

631 Raised if a resolved `DatasetRef` was passed as an input, but it 

632 differs from the one found in the registry. 

633 TypeError 

634 Raised if no collections were provided. 

635 """ 

636 raise NotImplementedError() 

637 

638 @abstractmethod 

639 def get( 

640 self, 

641 datasetRefOrType: DatasetRef | DatasetType | str, 

642 /, 

643 dataId: DataId | None = None, 

644 *, 

645 parameters: dict[str, Any] | None = None, 

646 collections: Any = None, 

647 storageClass: StorageClass | str | None = None, 

648 **kwargs: Any, 

649 ) -> Any: 

650 """Retrieve a stored dataset. 

651 

652 Parameters 

653 ---------- 

654 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

655 When `DatasetRef` the `dataId` should be `None`. 

656 Otherwise the `DatasetType` or name thereof. 

657 If a resolved `DatasetRef`, the associated dataset 

658 is returned directly without additional querying. 

659 dataId : `dict` or `DataCoordinate` 

660 A `dict` of `Dimension` link name, value pairs that label the 

661 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

662 should be provided as the first argument. 

663 parameters : `dict` 

664 Additional StorageClass-defined options to control reading, 

665 typically used to efficiently read only a subset of the dataset. 

666 collections : Any, optional 

667 Collections to be searched, overriding ``self.collections``. 

668 Can be any of the types supported by the ``collections`` argument 

669 to butler construction. 

670 storageClass : `StorageClass` or `str`, optional 

671 The storage class to be used to override the Python type 

672 returned by this method. By default the returned type matches 

673 the dataset type definition for this dataset. Specifying a 

674 read `StorageClass` can force a different type to be returned. 

675 This type must be compatible with the original type. 

676 **kwargs 

677 Additional keyword arguments used to augment or construct a 

678 `DataCoordinate`. See `DataCoordinate.standardize` 

679 parameters. 

680 

681 Returns 

682 ------- 

683 obj : `object` 

684 The dataset. 

685 

686 Raises 

687 ------ 

688 LookupError 

689 Raised if no matching dataset exists in the `Registry`. 

690 TypeError 

691 Raised if no collections were provided. 

692 

693 Notes 

694 ----- 

695 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

696 this method requires that the given data ID include temporal dimensions 

697 beyond the dimensions of the dataset type itself, in order to find the 

698 dataset with the appropriate validity range. For example, a "bias" 

699 dataset with native dimensions ``{instrument, detector}`` could be 

700 fetched with a ``{instrument, detector, exposure}`` data ID, because 

701 ``exposure`` is a temporal dimension. 

702 """ 

703 raise NotImplementedError() 

704 

705 @abstractmethod 

706 def getURIs( 

707 self, 

708 datasetRefOrType: DatasetRef | DatasetType | str, 

709 /, 

710 dataId: DataId | None = None, 

711 *, 

712 predict: bool = False, 

713 collections: Any = None, 

714 run: str | None = None, 

715 **kwargs: Any, 

716 ) -> DatasetRefURIs: 

717 """Return the URIs associated with the dataset. 

718 

719 Parameters 

720 ---------- 

721 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

722 When `DatasetRef` the `dataId` should be `None`. 

723 Otherwise the `DatasetType` or name thereof. 

724 dataId : `dict` or `DataCoordinate` 

725 A `dict` of `Dimension` link name, value pairs that label the 

726 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

727 should be provided as the first argument. 

728 predict : `bool` 

729 If `True`, allow URIs to be returned of datasets that have not 

730 been written. 

731 collections : Any, optional 

732 Collections to be searched, overriding ``self.collections``. 

733 Can be any of the types supported by the ``collections`` argument 

734 to butler construction. 

735 run : `str`, optional 

736 Run to use for predictions, overriding ``self.run``. 

737 **kwargs 

738 Additional keyword arguments used to augment or construct a 

739 `DataCoordinate`. See `DataCoordinate.standardize` 

740 parameters. 

741 

742 Returns 

743 ------- 

744 uris : `DatasetRefURIs` 

745 The URI to the primary artifact associated with this dataset (if 

746 the dataset was disassembled within the datastore this may be 

747 `None`), and the URIs to any components associated with the dataset 

748 artifact. (can be empty if there are no components). 

749 """ 

750 raise NotImplementedError() 

751 

752 def getURI( 

753 self, 

754 datasetRefOrType: DatasetRef | DatasetType | str, 

755 /, 

756 dataId: DataId | None = None, 

757 *, 

758 predict: bool = False, 

759 collections: Any = None, 

760 run: str | None = None, 

761 **kwargs: Any, 

762 ) -> ResourcePath: 

763 """Return the URI to the Dataset. 

764 

765 Parameters 

766 ---------- 

767 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

768 When `DatasetRef` the `dataId` should be `None`. 

769 Otherwise the `DatasetType` or name thereof. 

770 dataId : `dict` or `DataCoordinate` 

771 A `dict` of `Dimension` link name, value pairs that label the 

772 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

773 should be provided as the first argument. 

774 predict : `bool` 

775 If `True`, allow URIs to be returned of datasets that have not 

776 been written. 

777 collections : Any, optional 

778 Collections to be searched, overriding ``self.collections``. 

779 Can be any of the types supported by the ``collections`` argument 

780 to butler construction. 

781 run : `str`, optional 

782 Run to use for predictions, overriding ``self.run``. 

783 **kwargs 

784 Additional keyword arguments used to augment or construct a 

785 `DataCoordinate`. See `DataCoordinate.standardize` 

786 parameters. 

787 

788 Returns 

789 ------- 

790 uri : `lsst.resources.ResourcePath` 

791 URI pointing to the Dataset within the datastore. If the 

792 Dataset does not exist in the datastore, and if ``predict`` is 

793 `True`, the URI will be a prediction and will include a URI 

794 fragment "#predicted". 

795 If the datastore does not have entities that relate well 

796 to the concept of a URI the returned URI string will be 

797 descriptive. The returned URI is not guaranteed to be obtainable. 

798 

799 Raises 

800 ------ 

801 LookupError 

802 A URI has been requested for a dataset that does not exist and 

803 guessing is not allowed. 

804 ValueError 

805 Raised if a resolved `DatasetRef` was passed as an input, but it 

806 differs from the one found in the registry. 

807 TypeError 

808 Raised if no collections were provided. 

809 RuntimeError 

810 Raised if a URI is requested for a dataset that consists of 

811 multiple artifacts. 

812 """ 

813 primary, components = self.getURIs( 

814 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

815 ) 

816 

817 if primary is None or components: 

818 raise RuntimeError( 

819 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

820 "Use Butler.getURIs() instead." 

821 ) 

822 return primary 

823 

824 @abstractmethod 

825 def get_dataset_type(self, name: str) -> DatasetType: 

826 """Get the `DatasetType`. 

827 

828 Parameters 

829 ---------- 

830 name : `str` 

831 Name of the type. 

832 

833 Returns 

834 ------- 

835 type : `DatasetType` 

836 The `DatasetType` associated with the given name. 

837 

838 Raises 

839 ------ 

840 lsst.daf.butler.MissingDatasetTypeError 

841 Raised if the requested dataset type has not been registered. 

842 

843 Notes 

844 ----- 

845 This method handles component dataset types automatically, though most 

846 other operations do not. 

847 """ 

848 raise NotImplementedError() 

849 

850 @abstractmethod 

851 def get_dataset( 

852 self, 

853 id: DatasetId, 

854 *, 

855 storage_class: str | StorageClass | None = None, 

856 dimension_records: bool = False, 

857 datastore_records: bool = False, 

858 ) -> DatasetRef | None: 

859 """Retrieve a Dataset entry. 

860 

861 Parameters 

862 ---------- 

863 id : `DatasetId` 

864 The unique identifier for the dataset. 

865 storage_class : `str` or `StorageClass` or `None` 

866 A storage class to use when creating the returned entry. If given 

867 it must be compatible with the default storage class. 

868 dimension_records : `bool`, optional 

869 If `True` the ref will be expanded and contain dimension records. 

870 datastore_records : `bool`, optional 

871 If `True` the ref will contain associated datastore records. 

872 

873 Returns 

874 ------- 

875 ref : `DatasetRef` or `None` 

876 A ref to the Dataset, or `None` if no matching Dataset 

877 was found. 

878 """ 

879 raise NotImplementedError() 

880 

881 @abstractmethod 

882 def find_dataset( 

883 self, 

884 dataset_type: DatasetType | str, 

885 data_id: DataId | None = None, 

886 *, 

887 collections: str | Sequence[str] | None = None, 

888 timespan: Timespan | None = None, 

889 storage_class: str | StorageClass | None = None, 

890 dimension_records: bool = False, 

891 datastore_records: bool = False, 

892 **kwargs: Any, 

893 ) -> DatasetRef | None: 

894 """Find a dataset given its `DatasetType` and data ID. 

895 

896 This can be used to obtain a `DatasetRef` that permits the dataset to 

897 be read from a `Datastore`. If the dataset is a component and can not 

898 be found using the provided dataset type, a dataset ref for the parent 

899 will be returned instead but with the correct dataset type. 

900 

901 Parameters 

902 ---------- 

903 dataset_type : `DatasetType` or `str` 

904 A `DatasetType` or the name of one. If this is a `DatasetType` 

905 instance, its storage class will be respected and propagated to 

906 the output, even if it differs from the dataset type definition 

907 in the registry, as long as the storage classes are convertible. 

908 data_id : `dict` or `DataCoordinate`, optional 

909 A `dict`-like object containing the `Dimension` links that identify 

910 the dataset within a collection. If it is a `dict` the dataId 

911 can include dimension record values such as ``day_obs`` and 

912 ``seq_num`` or ``full_name`` that can be used to derive the 

913 primary dimension. 

914 collections : `str` or `list` [`str`], optional 

915 A an ordered list of collections to search for the dataset. 

916 Defaults to ``self.defaults.collections``. 

917 timespan : `Timespan`, optional 

918 A timespan that the validity range of the dataset must overlap. 

919 If not provided, any `~CollectionType.CALIBRATION` collections 

920 matched by the ``collections`` argument will not be searched. 

921 storage_class : `str` or `StorageClass` or `None` 

922 A storage class to use when creating the returned entry. If given 

923 it must be compatible with the default storage class. 

924 dimension_records : `bool`, optional 

925 If `True` the ref will be expanded and contain dimension records. 

926 datastore_records : `bool`, optional 

927 If `True` the ref will contain associated datastore records. 

928 **kwargs 

929 Additional keyword arguments passed to 

930 `DataCoordinate.standardize` to convert ``dataId`` to a true 

931 `DataCoordinate` or augment an existing one. This can also include 

932 dimension record metadata that can be used to derive a primary 

933 dimension value. 

934 

935 Returns 

936 ------- 

937 ref : `DatasetRef` 

938 A reference to the dataset, or `None` if no matching Dataset 

939 was found. 

940 

941 Raises 

942 ------ 

943 lsst.daf.butler.NoDefaultCollectionError 

944 Raised if ``collections`` is `None` and 

945 ``self.collections`` is `None`. 

946 LookupError 

947 Raised if one or more data ID keys are missing. 

948 lsst.daf.butler.MissingDatasetTypeError 

949 Raised if the dataset type does not exist. 

950 lsst.daf.butler.MissingCollectionError 

951 Raised if any of ``collections`` does not exist in the registry. 

952 

953 Notes 

954 ----- 

955 This method simply returns `None` and does not raise an exception even 

956 when the set of collections searched is intrinsically incompatible with 

957 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

958 only `~CollectionType.CALIBRATION` collections are being searched. 

959 This may make it harder to debug some lookup failures, but the behavior 

960 is intentional; we consider it more important that failed searches are 

961 reported consistently, regardless of the reason, and that adding 

962 additional collections that do not contain a match to the search path 

963 never changes the behavior. 

964 

965 This method handles component dataset types automatically, though most 

966 other query operations do not. 

967 """ 

968 raise NotImplementedError() 

969 

970 @abstractmethod 

971 def retrieveArtifacts( 

972 self, 

973 refs: Iterable[DatasetRef], 

974 destination: ResourcePathExpression, 

975 transfer: str = "auto", 

976 preserve_path: bool = True, 

977 overwrite: bool = False, 

978 ) -> list[ResourcePath]: 

979 """Retrieve the artifacts associated with the supplied refs. 

980 

981 Parameters 

982 ---------- 

983 refs : iterable of `DatasetRef` 

984 The datasets for which artifacts are to be retrieved. 

985 A single ref can result in multiple artifacts. The refs must 

986 be resolved. 

987 destination : `lsst.resources.ResourcePath` or `str` 

988 Location to write the artifacts. 

989 transfer : `str`, optional 

990 Method to use to transfer the artifacts. Must be one of the options 

991 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

992 "move" is not allowed. 

993 preserve_path : `bool`, optional 

994 If `True` the full path of the artifact within the datastore 

995 is preserved. If `False` the final file component of the path 

996 is used. 

997 overwrite : `bool`, optional 

998 If `True` allow transfers to overwrite existing files at the 

999 destination. 

1000 

1001 Returns 

1002 ------- 

1003 targets : `list` of `lsst.resources.ResourcePath` 

1004 URIs of file artifacts in destination location. Order is not 

1005 preserved. 

1006 

1007 Notes 

1008 ----- 

1009 For non-file datastores the artifacts written to the destination 

1010 may not match the representation inside the datastore. For example 

1011 a hierarchical data structure in a NoSQL database may well be stored 

1012 as a JSON file. 

1013 """ 

1014 raise NotImplementedError() 

1015 

1016 @abstractmethod 

1017 def exists( 

1018 self, 

1019 dataset_ref_or_type: DatasetRef | DatasetType | str, 

1020 /, 

1021 data_id: DataId | None = None, 

1022 *, 

1023 full_check: bool = True, 

1024 collections: Any = None, 

1025 **kwargs: Any, 

1026 ) -> DatasetExistence: 

1027 """Indicate whether a dataset is known to Butler registry and 

1028 datastore. 

1029 

1030 Parameters 

1031 ---------- 

1032 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

1033 When `DatasetRef` the `dataId` should be `None`. 

1034 Otherwise the `DatasetType` or name thereof. 

1035 data_id : `dict` or `DataCoordinate` 

1036 A `dict` of `Dimension` link name, value pairs that label the 

1037 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1038 should be provided as the first argument. 

1039 full_check : `bool`, optional 

1040 If `True`, a check will be made for the actual existence of a 

1041 dataset artifact. This will involve additional overhead due to 

1042 the need to query an external system. If `False`, this check will 

1043 be omitted, and the registry and datastore will solely be asked 

1044 if they know about the dataset but no direct check for the 

1045 artifact will be performed. 

1046 collections : Any, optional 

1047 Collections to be searched, overriding ``self.collections``. 

1048 Can be any of the types supported by the ``collections`` argument 

1049 to butler construction. 

1050 **kwargs 

1051 Additional keyword arguments used to augment or construct a 

1052 `DataCoordinate`. See `DataCoordinate.standardize` 

1053 parameters. 

1054 

1055 Returns 

1056 ------- 

1057 existence : `DatasetExistence` 

1058 Object indicating whether the dataset is known to registry and 

1059 datastore. Evaluates to `True` if the dataset is present and known 

1060 to both. 

1061 """ 

1062 raise NotImplementedError() 

1063 

1064 @abstractmethod 

1065 def _exists_many( 

1066 self, 

1067 refs: Iterable[DatasetRef], 

1068 /, 

1069 *, 

1070 full_check: bool = True, 

1071 ) -> dict[DatasetRef, DatasetExistence]: 

1072 """Indicate whether multiple datasets are known to Butler registry and 

1073 datastore. 

1074 

1075 This is an experimental API that may change at any moment. 

1076 

1077 Parameters 

1078 ---------- 

1079 refs : iterable of `DatasetRef` 

1080 The datasets to be checked. 

1081 full_check : `bool`, optional 

1082 If `True`, a check will be made for the actual existence of each 

1083 dataset artifact. This will involve additional overhead due to 

1084 the need to query an external system. If `False`, this check will 

1085 be omitted, and the registry and datastore will solely be asked 

1086 if they know about the dataset(s) but no direct check for the 

1087 artifact(s) will be performed. 

1088 

1089 Returns 

1090 ------- 

1091 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1092 Mapping from the given dataset refs to an enum indicating the 

1093 status of the dataset in registry and datastore. 

1094 Each value evaluates to `True` if the dataset is present and known 

1095 to both. 

1096 """ 

1097 raise NotImplementedError() 

1098 

1099 @abstractmethod 

1100 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1101 """Remove one or more `~CollectionType.RUN` collections and the 

1102 datasets within them. 

1103 

1104 Parameters 

1105 ---------- 

1106 names : `~collections.abc.Iterable` [ `str` ] 

1107 The names of the collections to remove. 

1108 unstore : `bool`, optional 

1109 If `True` (default), delete datasets from all datastores in which 

1110 they are present, and attempt to rollback the registry deletions if 

1111 datastore deletions fail (which may not always be possible). If 

1112 `False`, datastore records for these datasets are still removed, 

1113 but any artifacts (e.g. files) will not be. 

1114 

1115 Raises 

1116 ------ 

1117 TypeError 

1118 Raised if one or more collections are not of type 

1119 `~CollectionType.RUN`. 

1120 """ 

1121 raise NotImplementedError() 

1122 

1123 @abstractmethod 

1124 def ingest( 

1125 self, 

1126 *datasets: FileDataset, 

1127 transfer: str | None = "auto", 

1128 record_validation_info: bool = True, 

1129 ) -> None: 

1130 """Store and register one or more datasets that already exist on disk. 

1131 

1132 Parameters 

1133 ---------- 

1134 *datasets : `FileDataset` 

1135 Each positional argument is a struct containing information about 

1136 a file to be ingested, including its URI (either absolute or 

1137 relative to the datastore root, if applicable), a resolved 

1138 `DatasetRef`, and optionally a formatter class or its 

1139 fully-qualified string name. If a formatter is not provided, the 

1140 formatter that would be used for `put` is assumed. On successful 

1141 ingest all `FileDataset.formatter` attributes will be set to the 

1142 formatter class used. `FileDataset.path` attributes may be modified 

1143 to put paths in whatever the datastore considers a standardized 

1144 form. 

1145 transfer : `str`, optional 

1146 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1147 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1148 transfer the file. 

1149 record_validation_info : `bool`, optional 

1150 If `True`, the default, the datastore can record validation 

1151 information associated with the file. If `False` the datastore 

1152 will not attempt to track any information such as checksums 

1153 or file sizes. This can be useful if such information is tracked 

1154 in an external system or if the file is to be compressed in place. 

1155 It is up to the datastore whether this parameter is relevant. 

1156 

1157 Raises 

1158 ------ 

1159 TypeError 

1160 Raised if the butler is read-only or if no run was provided. 

1161 NotImplementedError 

1162 Raised if the `Datastore` does not support the given transfer mode. 

1163 DatasetTypeNotSupportedError 

1164 Raised if one or more files to be ingested have a dataset type that 

1165 is not supported by the `Datastore`.. 

1166 FileNotFoundError 

1167 Raised if one of the given files does not exist. 

1168 FileExistsError 

1169 Raised if transfer is not `None` but the (internal) location the 

1170 file would be moved to is already occupied. 

1171 

1172 Notes 

1173 ----- 

1174 This operation is not fully exception safe: if a database operation 

1175 fails, the given `FileDataset` instances may be only partially updated. 

1176 

1177 It is atomic in terms of database operations (they will either all 

1178 succeed or all fail) providing the database engine implements 

1179 transactions correctly. It will attempt to be atomic in terms of 

1180 filesystem operations as well, but this cannot be implemented 

1181 rigorously for most datastores. 

1182 """ 

1183 raise NotImplementedError() 

1184 

1185 @abstractmethod 

1186 def export( 

1187 self, 

1188 *, 

1189 directory: str | None = None, 

1190 filename: str | None = None, 

1191 format: str | None = None, 

1192 transfer: str | None = None, 

1193 ) -> AbstractContextManager[RepoExportContext]: 

1194 """Export datasets from the repository represented by this `Butler`. 

1195 

1196 This method is a context manager that returns a helper object 

1197 (`RepoExportContext`) that is used to indicate what information from 

1198 the repository should be exported. 

1199 

1200 Parameters 

1201 ---------- 

1202 directory : `str`, optional 

1203 Directory dataset files should be written to if ``transfer`` is not 

1204 `None`. 

1205 filename : `str`, optional 

1206 Name for the file that will include database information associated 

1207 with the exported datasets. If this is not an absolute path and 

1208 ``directory`` is not `None`, it will be written to ``directory`` 

1209 instead of the current working directory. Defaults to 

1210 "export.{format}". 

1211 format : `str`, optional 

1212 File format for the database information file. If `None`, the 

1213 extension of ``filename`` will be used. 

1214 transfer : `str`, optional 

1215 Transfer mode passed to `Datastore.export`. 

1216 

1217 Raises 

1218 ------ 

1219 TypeError 

1220 Raised if the set of arguments passed is inconsistent. 

1221 

1222 Examples 

1223 -------- 

1224 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1225 methods are used to provide the iterables over data IDs and/or datasets 

1226 to be exported:: 

1227 

1228 with butler.export("exports.yaml") as export: 

1229 # Export all flats, but none of the dimension element rows 

1230 # (i.e. data ID information) associated with them. 

1231 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1232 elements=()) 

1233 # Export all datasets that start with "deepCoadd_" and all of 

1234 # their associated data ID information. 

1235 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1236 """ 

1237 raise NotImplementedError() 

1238 

1239 @abstractmethod 

1240 def import_( 

1241 self, 

1242 *, 

1243 directory: ResourcePathExpression | None = None, 

1244 filename: ResourcePathExpression | TextIO | None = None, 

1245 format: str | None = None, 

1246 transfer: str | None = None, 

1247 skip_dimensions: set | None = None, 

1248 ) -> None: 

1249 """Import datasets into this repository that were exported from a 

1250 different butler repository via `~lsst.daf.butler.Butler.export`. 

1251 

1252 Parameters 

1253 ---------- 

1254 directory : `~lsst.resources.ResourcePathExpression`, optional 

1255 Directory containing dataset files to import from. If `None`, 

1256 ``filename`` and all dataset file paths specified therein must 

1257 be absolute. 

1258 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1259 A stream or name of file that contains database information 

1260 associated with the exported datasets, typically generated by 

1261 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1262 `~lsst.resources.ResourcePath` and is not an absolute path, 

1263 it will first be looked for relative to ``directory`` and if not 

1264 found there it will be looked for in the current working 

1265 directory. Defaults to "export.{format}". 

1266 format : `str`, optional 

1267 File format for ``filename``. If `None`, the extension of 

1268 ``filename`` will be used. 

1269 transfer : `str`, optional 

1270 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1271 skip_dimensions : `set`, optional 

1272 Names of dimensions that should be skipped and not imported. 

1273 

1274 Raises 

1275 ------ 

1276 TypeError 

1277 Raised if the set of arguments passed is inconsistent, or if the 

1278 butler is read-only. 

1279 """ 

1280 raise NotImplementedError() 

1281 

1282 @abstractmethod 

1283 def transfer_dimension_records_from( 

1284 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef] 

1285 ) -> None: 

1286 """Transfer dimension records to this Butler from another Butler. 

1287 

1288 Parameters 

1289 ---------- 

1290 source_butler : `LimitedButler` or `Butler` 

1291 Butler from which the records are to be transferred. If data IDs 

1292 in ``source_refs`` are not expanded then this has to be a full 

1293 `Butler` whose registry will be used to expand data IDs. If the 

1294 source refs contain coordinates that are used to populate other 

1295 records then this will also need to be a full `Butler`. 

1296 source_refs : iterable of `DatasetRef` 

1297 Datasets defined in the source butler whose dimension records 

1298 should be transferred to this butler. In most circumstances. 

1299 transfer is faster if the dataset refs are expanded. 

1300 """ 

1301 raise NotImplementedError() 

1302 

1303 @abstractmethod 

1304 def transfer_from( 

1305 self, 

1306 source_butler: LimitedButler, 

1307 source_refs: Iterable[DatasetRef], 

1308 transfer: str = "auto", 

1309 skip_missing: bool = True, 

1310 register_dataset_types: bool = False, 

1311 transfer_dimensions: bool = False, 

1312 ) -> Collection[DatasetRef]: 

1313 """Transfer datasets to this Butler from a run in another Butler. 

1314 

1315 Parameters 

1316 ---------- 

1317 source_butler : `LimitedButler` 

1318 Butler from which the datasets are to be transferred. If data IDs 

1319 in ``source_refs`` are not expanded then this has to be a full 

1320 `Butler` whose registry will be used to expand data IDs. 

1321 source_refs : iterable of `DatasetRef` 

1322 Datasets defined in the source butler that should be transferred to 

1323 this butler. In most circumstances, ``transfer_from`` is faster if 

1324 the dataset refs are expanded. 

1325 transfer : `str`, optional 

1326 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1327 skip_missing : `bool` 

1328 If `True`, datasets with no datastore artifact associated with 

1329 them are not transferred. If `False` a registry entry will be 

1330 created even if no datastore record is created (and so will 

1331 look equivalent to the dataset being unstored). 

1332 register_dataset_types : `bool` 

1333 If `True` any missing dataset types are registered. Otherwise 

1334 an exception is raised. 

1335 transfer_dimensions : `bool`, optional 

1336 If `True`, dimension record data associated with the new datasets 

1337 will be transferred. 

1338 

1339 Returns 

1340 ------- 

1341 refs : `list` of `DatasetRef` 

1342 The refs added to this Butler. 

1343 

1344 Notes 

1345 ----- 

1346 The datastore artifact has to exist for a transfer 

1347 to be made but non-existence is not an error. 

1348 

1349 Datasets that already exist in this run will be skipped. 

1350 

1351 The datasets are imported as part of a transaction, although 

1352 dataset types are registered before the transaction is started. 

1353 This means that it is possible for a dataset type to be registered 

1354 even though transfer has failed. 

1355 """ 

1356 raise NotImplementedError() 

1357 

1358 @abstractmethod 

1359 def validateConfiguration( 

1360 self, 

1361 logFailures: bool = False, 

1362 datasetTypeNames: Iterable[str] | None = None, 

1363 ignore: Iterable[str] | None = None, 

1364 ) -> None: 

1365 """Validate butler configuration. 

1366 

1367 Checks that each `DatasetType` can be stored in the `Datastore`. 

1368 

1369 Parameters 

1370 ---------- 

1371 logFailures : `bool`, optional 

1372 If `True`, output a log message for every validation error 

1373 detected. 

1374 datasetTypeNames : iterable of `str`, optional 

1375 The `DatasetType` names that should be checked. This allows 

1376 only a subset to be selected. 

1377 ignore : iterable of `str`, optional 

1378 Names of DatasetTypes to skip over. This can be used to skip 

1379 known problems. If a named `DatasetType` corresponds to a 

1380 composite, all components of that `DatasetType` will also be 

1381 ignored. 

1382 

1383 Raises 

1384 ------ 

1385 ButlerValidationError 

1386 Raised if there is some inconsistency with how this Butler 

1387 is configured. 

1388 """ 

1389 raise NotImplementedError() 

1390 

1391 @property 

1392 @abstractmethod 

1393 def collections(self) -> Sequence[str]: 

1394 """The collections to search by default, in order 

1395 (`~collections.abc.Sequence` [ `str` ]). 

1396 """ 

1397 raise NotImplementedError() 

1398 

1399 @property 

1400 @abstractmethod 

1401 def run(self) -> str | None: 

1402 """Name of the run this butler writes outputs to by default (`str` or 

1403 `None`). 

1404 """ 

1405 raise NotImplementedError() 

1406 

1407 @property 

1408 @abstractmethod 

1409 def registry(self) -> Registry: 

1410 """The object that manages dataset metadata and relationships 

1411 (`Registry`). 

1412 

1413 Many operations that don't involve reading or writing butler datasets 

1414 are accessible only via `Registry` methods. Eventually these methods 

1415 will be replaced by equivalent `Butler` methods. 

1416 """ 

1417 raise NotImplementedError() 

1418 

1419 @abstractmethod 

1420 def _query(self) -> AbstractContextManager[Query]: 

1421 """Context manager returning a `Query` object used for construction 

1422 and execution of complex queries. 

1423 """ 

1424 raise NotImplementedError() 

1425 

1426 @abstractmethod 

1427 def _query_data_ids( 

1428 self, 

1429 dimensions: DimensionGroup | Iterable[str] | str, 

1430 *, 

1431 data_id: DataId | None = None, 

1432 where: str = "", 

1433 bind: Mapping[str, Any] | None = None, 

1434 expanded: bool = False, 

1435 order_by: Iterable[str] | str | None = None, 

1436 limit: int | None = None, 

1437 offset: int | None = None, 

1438 explain: bool = True, 

1439 **kwargs: Any, 

1440 ) -> list[DataCoordinate]: 

1441 """Query for data IDs matching user-provided criteria. 

1442 

1443 Parameters 

1444 ---------- 

1445 dimensions : `DimensionGroup`, `str`, or \ 

1446 `~collections.abc.Iterable` [`str`] 

1447 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1448 instances or `str`. Will be automatically expanded to a complete 

1449 `DimensionGroup`. 

1450 data_id : `dict` or `DataCoordinate`, optional 

1451 A data ID whose key-value pairs are used as equality constraints 

1452 in the query. 

1453 where : `str`, optional 

1454 A string expression similar to a SQL WHERE clause. May involve 

1455 any column of a dimension table or (as a shortcut for the primary 

1456 key column of a dimension table) dimension name. See 

1457 :ref:`daf_butler_dimension_expressions` for more information. 

1458 bind : `~collections.abc.Mapping`, optional 

1459 Mapping containing literal values that should be injected into the 

1460 ``where`` expression, keyed by the identifiers they replace. 

1461 Values of collection type can be expanded in some cases; see 

1462 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1463 information. 

1464 expanded : `bool`, optional 

1465 If `True` (default is `False`) then returned data IDs will have 

1466 dimension records. 

1467 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1468 Names of the columns/dimensions to use for ordering returned data 

1469 IDs. Column name can be prefixed with minus (``-``) to use 

1470 descending ordering. 

1471 limit : `int`, optional 

1472 Upper limit on the number of returned records. 

1473 offset : `int`, optional 

1474 The number of records to skip before returning at most ``limit`` 

1475 records. If ``offset`` is specified then ``limit`` must be 

1476 specified as well. 

1477 explain : `bool`, optional 

1478 If `True` (default) then `EmptyQueryResultError` exception is 

1479 raised when resulting list is empty. The exception contains 

1480 non-empty list of strings explaining possible causes for empty 

1481 result. 

1482 **kwargs 

1483 Additional keyword arguments are forwarded to 

1484 `DataCoordinate.standardize` when processing the ``data_id`` 

1485 argument (and may be used to provide a constraining data ID even 

1486 when the ``data_id`` argument is `None`). 

1487 

1488 Returns 

1489 ------- 

1490 dataIds : `list` [`DataCoordinate`] 

1491 Data IDs matching the given query parameters. These are always 

1492 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1493 returns `True`). 

1494 

1495 Raises 

1496 ------ 

1497 lsst.daf.butler.registry.DataIdError 

1498 Raised when ``data_id`` or keyword arguments specify unknown 

1499 dimensions or values, or when they contain inconsistent values. 

1500 lsst.daf.butler.registry.UserExpressionError 

1501 Raised when ``where`` expression is invalid. 

1502 lsst.daf.butler.EmptyQueryResultError 

1503 Raised when query generates empty result and ``explain`` is set to 

1504 `True`. 

1505 TypeError 

1506 Raised when the arguments are incompatible, e.g. ``offset`` is 

1507 specified, but ``limit`` is not. 

1508 """ 

1509 raise NotImplementedError() 

1510 

1511 @abstractmethod 

1512 def _query_datasets( 

1513 self, 

1514 dataset_type: Any, 

1515 collections: CollectionArgType | None = None, 

1516 *, 

1517 find_first: bool = True, 

1518 data_id: DataId | None = None, 

1519 where: str = "", 

1520 bind: Mapping[str, Any] | None = None, 

1521 expanded: bool = False, 

1522 explain: bool = True, 

1523 **kwargs: Any, 

1524 ) -> list[DatasetRef]: 

1525 """Query for dataset references matching user-provided criteria. 

1526 

1527 Parameters 

1528 ---------- 

1529 dataset_type : dataset type expression 

1530 An expression that fully or partially identifies the dataset types 

1531 to be queried. Allowed types include `DatasetType`, `str`, 

1532 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1533 be used to query all dataset types. See 

1534 :ref:`daf_butler_dataset_type_expressions` for more information. 

1535 collections : collection expression, optional 

1536 An expression that identifies the collections to search, such as a 

1537 `str` (for full matches or partial matches via globs), `re.Pattern` 

1538 (for partial matches), or iterable thereof. ``...`` can be used to 

1539 search all collections (actually just all `~CollectionType.RUN` 

1540 collections, because this will still find all datasets). 

1541 If not provided, the default collections are used. See 

1542 :ref:`daf_butler_collection_expressions` for more information. 

1543 find_first : `bool`, optional 

1544 If `True` (default), for each result data ID, only yield one 

1545 `DatasetRef` of each `DatasetType`, from the first collection in 

1546 which a dataset of that dataset type appears (according to the 

1547 order of ``collections`` passed in). If `True`, ``collections`` 

1548 must not contain regular expressions and may not be ``...``. 

1549 data_id : `dict` or `DataCoordinate`, optional 

1550 A data ID whose key-value pairs are used as equality constraints 

1551 in the query. 

1552 where : `str`, optional 

1553 A string expression similar to a SQL WHERE clause. May involve 

1554 any column of a dimension table or (as a shortcut for the primary 

1555 key column of a dimension table) dimension name. See 

1556 :ref:`daf_butler_dimension_expressions` for more information. 

1557 bind : `~collections.abc.Mapping`, optional 

1558 Mapping containing literal values that should be injected into the 

1559 ``where`` expression, keyed by the identifiers they replace. 

1560 Values of collection type can be expanded in some cases; see 

1561 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1562 information. 

1563 expanded : `bool`, optional 

1564 If `True` (default is `False`) then returned data IDs will have 

1565 dimension records. 

1566 explain : `bool`, optional 

1567 If `True` (default) then `EmptyQueryResultError` exception is 

1568 raised when resulting list is empty. The exception contains 

1569 non-empty list of strings explaining possible causes for empty 

1570 result. 

1571 **kwargs 

1572 Additional keyword arguments are forwarded to 

1573 `DataCoordinate.standardize` when processing the ``data_id`` 

1574 argument (and may be used to provide a constraining data ID even 

1575 when the ``data_id`` argument is `None`). 

1576 

1577 Returns 

1578 ------- 

1579 refs : `.queries.DatasetQueryResults` 

1580 Dataset references matching the given query criteria. Nested data 

1581 IDs are guaranteed to include values for all implied dimensions 

1582 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1583 include dimension records (`DataCoordinate.hasRecords` will be 

1584 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

1585 called on the result object (which returns a new one). 

1586 

1587 Raises 

1588 ------ 

1589 lsst.daf.butler.registry.DatasetTypeExpressionError 

1590 Raised when ``dataset_type`` expression is invalid. 

1591 lsst.daf.butler.registry.DataIdError 

1592 Raised when ``data_id`` or keyword arguments specify unknown 

1593 dimensions or values, or when they contain inconsistent values. 

1594 lsst.daf.butler.registry.UserExpressionError 

1595 Raised when ``where`` expression is invalid. 

1596 lsst.daf.butler.EmptyQueryResultError 

1597 Raised when query generates empty result and ``explain`` is set to 

1598 `True`. 

1599 TypeError 

1600 Raised when the arguments are incompatible, such as when a 

1601 collection wildcard is passed when ``find_first`` is `True`, or 

1602 when ``collections`` is `None` and default butler collections are 

1603 not defined. 

1604 

1605 Notes 

1606 ----- 

1607 When multiple dataset types are queried in a single call, the 

1608 results of this operation are equivalent to querying for each dataset 

1609 type separately in turn, and no information about the relationships 

1610 between datasets of different types is included. 

1611 """ 

1612 raise NotImplementedError() 

1613 

1614 @abstractmethod 

1615 def _query_dimension_records( 

1616 self, 

1617 element: str, 

1618 *, 

1619 data_id: DataId | None = None, 

1620 where: str = "", 

1621 bind: Mapping[str, Any] | None = None, 

1622 order_by: Iterable[str] | str | None = None, 

1623 limit: int | None = None, 

1624 offset: int | None = None, 

1625 explain: bool = True, 

1626 **kwargs: Any, 

1627 ) -> list[DimensionRecord]: 

1628 """Query for dimension information matching user-provided criteria. 

1629 

1630 Parameters 

1631 ---------- 

1632 element : `str` 

1633 The name of a dimension element to obtain records for. 

1634 data_id : `dict` or `DataCoordinate`, optional 

1635 A data ID whose key-value pairs are used as equality constraints 

1636 in the query. 

1637 where : `str`, optional 

1638 A string expression similar to a SQL WHERE clause. See 

1639 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1640 information. 

1641 bind : `~collections.abc.Mapping`, optional 

1642 Mapping containing literal values that should be injected into the 

1643 ``where`` expression, keyed by the identifiers they replace. 

1644 Values of collection type can be expanded in some cases; see 

1645 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1646 information. 

1647 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1648 Names of the columns/dimensions to use for ordering returned data 

1649 IDs. Column name can be prefixed with minus (``-``) to use 

1650 descending ordering. 

1651 limit : `int`, optional 

1652 Upper limit on the number of returned records. 

1653 offset : `int`, optional 

1654 The number of records to skip before returning at most ``limit`` 

1655 records. If ``offset`` is specified then ``limit`` must be 

1656 specified as well. 

1657 explain : `bool`, optional 

1658 If `True` (default) then `EmptyQueryResultError` exception is 

1659 raised when resulting list is empty. The exception contains 

1660 non-empty list of strings explaining possible causes for empty 

1661 result. 

1662 **kwargs 

1663 Additional keyword arguments are forwarded to 

1664 `DataCoordinate.standardize` when processing the ``data_id`` 

1665 argument (and may be used to provide a constraining data ID even 

1666 when the ``data_id`` argument is `None`). 

1667 

1668 Returns 

1669 ------- 

1670 records : `list`[`DimensionRecord`] 

1671 Dimension records matching the given query parameters. 

1672 

1673 Raises 

1674 ------ 

1675 lsst.daf.butler.registry.DataIdError 

1676 Raised when ``data_id`` or keyword arguments specify unknown 

1677 dimensions or values, or when they contain inconsistent values. 

1678 lsst.daf.butler.registry.UserExpressionError 

1679 Raised when ``where`` expression is invalid. 

1680 lsst.daf.butler.EmptyQueryResultError 

1681 Raised when query generates empty result and ``explain`` is set to 

1682 `True`. 

1683 TypeError 

1684 Raised when the arguments are incompatible, such as when a 

1685 collection wildcard is passed when ``find_first`` is `True`, or 

1686 when ``collections`` is `None` and default butler collections are 

1687 not defined. 

1688 """ 

1689 raise NotImplementedError() 

1690 

1691 @abstractmethod 

1692 def _clone( 

1693 self, 

1694 *, 

1695 collections: Any = None, 

1696 run: str | None = None, 

1697 inferDefaults: bool = True, 

1698 **kwargs: Any, 

1699 ) -> Butler: 

1700 """Return a new Butler instance connected to the same repository 

1701 as this one, but overriding ``collections``, ``run``, 

1702 ``inferDefaults``, and default data ID. 

1703 """ 

1704 raise NotImplementedError()