Coverage for python/lsst/daf/butler/_butler.py: 51%

182 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-05 11:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Mapping, Sequence 

34from contextlib import AbstractContextManager 

35from types import EllipsisType 

36from typing import TYPE_CHECKING, Any, TextIO 

37 

38from lsst.resources import ResourcePath, ResourcePathExpression 

39from lsst.utils import doImportType 

40from lsst.utils.iteration import ensure_iterable 

41from lsst.utils.logging import getLogger 

42 

43from ._butler_config import ButlerConfig, ButlerType 

44from ._butler_instance_options import ButlerInstanceOptions 

45from ._butler_repo_index import ButlerRepoIndex 

46from ._config import Config, ConfigSubset 

47from ._exceptions import EmptyQueryResultError 

48from ._limited_butler import LimitedButler 

49from .datastore import Datastore 

50from .dimensions import DimensionConfig 

51from .registry import RegistryConfig, _RegistryFactory 

52from .repo_relocation import BUTLER_ROOT_TAG 

53 

54if TYPE_CHECKING: 

55 from ._dataset_existence import DatasetExistence 

56 from ._dataset_ref import DatasetId, DatasetRef 

57 from ._dataset_type import DatasetType 

58 from ._deferredDatasetHandle import DeferredDatasetHandle 

59 from ._file_dataset import FileDataset 

60 from ._storage_class import StorageClass 

61 from ._timespan import Timespan 

62 from .datastore import DatasetRefURIs 

63 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord 

64 from .queries import Query 

65 from .registry import Registry 

66 from .transfers import RepoExportContext 

67 

68_LOG = getLogger(__name__) 

69 

70 

71class Butler(LimitedButler): # numpydoc ignore=PR02 

72 """Interface for data butler and factory for Butler instances. 

73 

74 Parameters 

75 ---------- 

76 config : `ButlerConfig`, `Config` or `str`, optional 

77 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

78 If a directory path is given the configuration will be read from a 

79 ``butler.yaml`` file in that location. If `None` is given default 

80 values will be used. If ``config`` contains "cls" key then its value is 

81 used as a name of butler class and it must be a sub-class of this 

82 class, otherwise `DirectButler` is instantiated. 

83 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

84 An expression specifying the collections to be searched (in order) when 

85 reading datasets. 

86 This may be a `str` collection name or an iterable thereof. 

87 See :ref:`daf_butler_collection_expressions` for more information. 

88 These collections are not registered automatically and must be 

89 manually registered before they are used by any method, but they may be 

90 manually registered after the `Butler` is initialized. 

91 run : `str`, optional 

92 Name of the `~CollectionType.RUN` collection new datasets should be 

93 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

94 ``collections`` will be set to ``[run]``. If not `None`, this 

95 collection will automatically be registered. If this is not set (and 

96 ``writeable`` is not set either), a read-only butler will be created. 

97 searchPaths : `list` of `str`, optional 

98 Directory paths to search when calculating the full Butler 

99 configuration. Not used if the supplied config is already a 

100 `ButlerConfig`. 

101 writeable : `bool`, optional 

102 Explicitly sets whether the butler supports write operations. If not 

103 provided, a read-write butler is created if any of ``run``, ``tags``, 

104 or ``chains`` is non-empty. 

105 inferDefaults : `bool`, optional 

106 If `True` (default) infer default data ID values from the values 

107 present in the datasets in ``collections``: if all collections have the 

108 same value (or no value) for a governor dimension, that value will be 

109 the default for that dimension. Nonexistent collections are ignored. 

110 If a default value is provided explicitly for a governor dimension via 

111 ``**kwargs``, no default will be inferred for that dimension. 

112 without_datastore : `bool`, optional 

113 If `True` do not attach a datastore to this butler. Any attempts 

114 to use a datastore will fail. 

115 **kwargs : `Any` 

116 Additional keyword arguments passed to a constructor of actual butler 

117 class. 

118 

119 Notes 

120 ----- 

121 The preferred way to instantiate Butler is via the `from_config` method. 

122 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

123 but ``mypy`` will complain about the former. 

124 """ 

125 

126 def __new__( 

127 cls, 

128 config: Config | ResourcePathExpression | None = None, 

129 *, 

130 collections: Any = None, 

131 run: str | None = None, 

132 searchPaths: Sequence[ResourcePathExpression] | None = None, 

133 writeable: bool | None = None, 

134 inferDefaults: bool = True, 

135 without_datastore: bool = False, 

136 **kwargs: Any, 

137 ) -> Butler: 

138 if cls is Butler: 

139 return Butler.from_config( 

140 config=config, 

141 collections=collections, 

142 run=run, 

143 searchPaths=searchPaths, 

144 writeable=writeable, 

145 inferDefaults=inferDefaults, 

146 without_datastore=without_datastore, 

147 **kwargs, 

148 ) 

149 

150 # Note: we do not pass any parameters to __new__, Python will pass them 

151 # to __init__ after __new__ returns sub-class instance. 

152 return super().__new__(cls) 

153 

154 @classmethod 

155 def from_config( 

156 cls, 

157 config: Config | ResourcePathExpression | None = None, 

158 *, 

159 collections: Any = None, 

160 run: str | None = None, 

161 searchPaths: Sequence[ResourcePathExpression] | None = None, 

162 writeable: bool | None = None, 

163 inferDefaults: bool = True, 

164 without_datastore: bool = False, 

165 **kwargs: Any, 

166 ) -> Butler: 

167 """Create butler instance from configuration. 

168 

169 Parameters 

170 ---------- 

171 config : `ButlerConfig`, `Config` or `str`, optional 

172 Configuration. Anything acceptable to the `ButlerConfig` 

173 constructor. If a directory path is given the configuration will be 

174 read from a ``butler.yaml`` file in that location. If `None` is 

175 given default values will be used. If ``config`` contains "cls" key 

176 then its value is used as a name of butler class and it must be a 

177 sub-class of this class, otherwise `DirectButler` is instantiated. 

178 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

179 An expression specifying the collections to be searched (in order) 

180 when reading datasets. 

181 This may be a `str` collection name or an iterable thereof. 

182 See :ref:`daf_butler_collection_expressions` for more information. 

183 These collections are not registered automatically and must be 

184 manually registered before they are used by any method, but they 

185 may be manually registered after the `Butler` is initialized. 

186 run : `str`, optional 

187 Name of the `~CollectionType.RUN` collection new datasets should be 

188 inserted into. If ``collections`` is `None` and ``run`` is not 

189 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

190 this collection will automatically be registered. If this is not 

191 set (and ``writeable`` is not set either), a read-only butler will 

192 be created. 

193 searchPaths : `list` of `str`, optional 

194 Directory paths to search when calculating the full Butler 

195 configuration. Not used if the supplied config is already a 

196 `ButlerConfig`. 

197 writeable : `bool`, optional 

198 Explicitly sets whether the butler supports write operations. If 

199 not provided, a read-write butler is created if any of ``run``, 

200 ``tags``, or ``chains`` is non-empty. 

201 inferDefaults : `bool`, optional 

202 If `True` (default) infer default data ID values from the values 

203 present in the datasets in ``collections``: if all collections have 

204 the same value (or no value) for a governor dimension, that value 

205 will be the default for that dimension. Nonexistent collections 

206 are ignored. If a default value is provided explicitly for a 

207 governor dimension via ``**kwargs``, no default will be inferred 

208 for that dimension. 

209 without_datastore : `bool`, optional 

210 If `True` do not attach a datastore to this butler. Any attempts 

211 to use a datastore will fail. 

212 **kwargs : `Any` 

213 Default data ID key-value pairs. These may only identify 

214 "governor" dimensions like ``instrument`` and ``skymap``. 

215 

216 Returns 

217 ------- 

218 butler : `Butler` 

219 A `Butler` constructed from the given configuration. 

220 

221 Notes 

222 ----- 

223 Calling this factory method is identical to calling 

224 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

225 complains about ``Butler()`` call. 

226 

227 Examples 

228 -------- 

229 While there are many ways to control exactly how a `Butler` interacts 

230 with the collections in its `Registry`, the most common cases are still 

231 simple. 

232 

233 For a read-only `Butler` that searches one collection, do:: 

234 

235 butler = Butler.from_config( 

236 "/path/to/repo", collections=["u/alice/DM-50000"] 

237 ) 

238 

239 For a read-write `Butler` that writes to and reads from a 

240 `~CollectionType.RUN` collection:: 

241 

242 butler = Butler.from_config( 

243 "/path/to/repo", run="u/alice/DM-50000/a" 

244 ) 

245 

246 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

247 because we want to write to one `~CollectionType.RUN` collection but 

248 read from several others (as well):: 

249 

250 butler = Butler.from_config( 

251 "/path/to/repo", 

252 run="u/alice/DM-50000/a", 

253 collections=[ 

254 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

255 ] 

256 ) 

257 

258 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

259 Datasets will be read first from that run (since it appears first in 

260 the chain), and then from ``u/bob/DM-49998`` and finally 

261 ``HSC/defaults``. 

262 

263 Finally, one can always create a `Butler` with no collections:: 

264 

265 butler = Butler.from_config("/path/to/repo", writeable=True) 

266 

267 This can be extremely useful when you just want to use 

268 ``butler.registry``, e.g. for inserting dimension data or managing 

269 collections, or when the collections you want to use with the butler 

270 are not consistent. Passing ``writeable`` explicitly here is only 

271 necessary if you want to be able to make changes to the repo - usually 

272 the value for ``writeable`` can be guessed from the collection 

273 arguments provided, but it defaults to `False` when there are not 

274 collection arguments. 

275 """ 

276 # DirectButler used to have a way to specify a "copy constructor" by 

277 # passing the "butler" parameter to its constructor. This 

278 # functionality has been moved out of the constructor into 

279 # Butler._clone(), but the new interface is not public yet. 

280 butler = kwargs.pop("butler", None) 

281 if butler is not None: 

282 if not isinstance(butler, Butler): 

283 raise TypeError("'butler' parameter must be a Butler instance") 

284 if config is not None or searchPaths is not None or writeable is not None: 

285 raise TypeError( 

286 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

287 ) 

288 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs) 

289 

290 options = ButlerInstanceOptions( 

291 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs 

292 ) 

293 

294 # Load the Butler configuration. This may involve searching the 

295 # environment to locate a configuration file. 

296 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) 

297 butler_type = butler_config.get_butler_type() 

298 

299 # Make DirectButler if class is not specified. 

300 match butler_type: 

301 case ButlerType.DIRECT: 

302 from .direct_butler import DirectButler 

303 

304 return DirectButler.create_from_config( 

305 butler_config, 

306 options=options, 

307 without_datastore=without_datastore, 

308 ) 

309 case ButlerType.REMOTE: 

310 from .remote_butler import RemoteButlerFactory 

311 

312 factory = RemoteButlerFactory.create_factory_from_config(butler_config) 

313 return factory.create_butler_with_credentials_from_environment(butler_options=options) 

314 case _: 

315 raise TypeError(f"Unknown Butler type '{butler_type}'") 

316 

317 @staticmethod 

318 def makeRepo( 

319 root: ResourcePathExpression, 

320 config: Config | str | None = None, 

321 dimensionConfig: Config | str | None = None, 

322 standalone: bool = False, 

323 searchPaths: list[str] | None = None, 

324 forceConfigRoot: bool = True, 

325 outfile: ResourcePathExpression | None = None, 

326 overwrite: bool = False, 

327 ) -> Config: 

328 """Create an empty data repository by adding a butler.yaml config 

329 to a repository root directory. 

330 

331 Parameters 

332 ---------- 

333 root : `lsst.resources.ResourcePathExpression` 

334 Path or URI to the root location of the new repository. Will be 

335 created if it does not exist. 

336 config : `Config` or `str`, optional 

337 Configuration to write to the repository, after setting any 

338 root-dependent Registry or Datastore config options. Can not 

339 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

340 configuration will be used. Root-dependent config options 

341 specified in this config are overwritten if ``forceConfigRoot`` 

342 is `True`. 

343 dimensionConfig : `Config` or `str`, optional 

344 Configuration for dimensions, will be used to initialize registry 

345 database. 

346 standalone : `bool` 

347 If True, write all expanded defaults, not just customized or 

348 repository-specific settings. 

349 This (mostly) decouples the repository from the default 

350 configuration, insulating it from changes to the defaults (which 

351 may be good or bad, depending on the nature of the changes). 

352 Future *additions* to the defaults will still be picked up when 

353 initializing `Butlers` to repos created with ``standalone=True``. 

354 searchPaths : `list` of `str`, optional 

355 Directory paths to search when calculating the full butler 

356 configuration. 

357 forceConfigRoot : `bool`, optional 

358 If `False`, any values present in the supplied ``config`` that 

359 would normally be reset are not overridden and will appear 

360 directly in the output config. This allows non-standard overrides 

361 of the root directory for a datastore or registry to be given. 

362 If this parameter is `True` the values for ``root`` will be 

363 forced into the resulting config if appropriate. 

364 outfile : `lss.resources.ResourcePathExpression`, optional 

365 If not-`None`, the output configuration will be written to this 

366 location rather than into the repository itself. Can be a URI 

367 string. Can refer to a directory that will be used to write 

368 ``butler.yaml``. 

369 overwrite : `bool`, optional 

370 Create a new configuration file even if one already exists 

371 in the specified output location. Default is to raise 

372 an exception. 

373 

374 Returns 

375 ------- 

376 config : `Config` 

377 The updated `Config` instance written to the repo. 

378 

379 Raises 

380 ------ 

381 ValueError 

382 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

383 regular Config (as these subclasses would make it impossible to 

384 support ``standalone=False``). 

385 FileExistsError 

386 Raised if the output config file already exists. 

387 os.error 

388 Raised if the directory does not exist, exists but is not a 

389 directory, or cannot be created. 

390 

391 Notes 

392 ----- 

393 Note that when ``standalone=False`` (the default), the configuration 

394 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

395 construct the repository should also be used to construct any Butlers 

396 to avoid configuration inconsistencies. 

397 """ 

398 if isinstance(config, ButlerConfig | ConfigSubset): 

399 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

400 

401 # Ensure that the root of the repository exists or can be made 

402 root_uri = ResourcePath(root, forceDirectory=True) 

403 root_uri.mkdir() 

404 

405 config = Config(config) 

406 

407 # If we are creating a new repo from scratch with relative roots, 

408 # do not propagate an explicit root from the config file 

409 if "root" in config: 

410 del config["root"] 

411 

412 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

413 imported_class = doImportType(full["datastore", "cls"]) 

414 if not issubclass(imported_class, Datastore): 

415 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

416 datastoreClass: type[Datastore] = imported_class 

417 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

418 

419 # if key exists in given config, parse it, otherwise parse the defaults 

420 # in the expanded config 

421 if config.get(("registry", "db")): 

422 registryConfig = RegistryConfig(config) 

423 else: 

424 registryConfig = RegistryConfig(full) 

425 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

426 if defaultDatabaseUri is not None: 

427 Config.updateParameters( 

428 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

429 ) 

430 else: 

431 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

432 

433 if standalone: 

434 config.merge(full) 

435 else: 

436 # Always expand the registry.managers section into the per-repo 

437 # config, because after the database schema is created, it's not 

438 # allowed to change anymore. Note that in the standalone=True 

439 # branch, _everything_ in the config is expanded, so there's no 

440 # need to special case this. 

441 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

442 configURI: ResourcePathExpression 

443 if outfile is not None: 

444 # When writing to a separate location we must include 

445 # the root of the butler repo in the config else it won't know 

446 # where to look. 

447 config["root"] = root_uri.geturl() 

448 configURI = outfile 

449 else: 

450 configURI = root_uri 

451 # Strip obscore configuration, if it is present, before writing config 

452 # to a file, obscore config will be stored in registry. 

453 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

454 config_to_write = config.copy() 

455 del config_to_write[obscore_config_key] 

456 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

457 # configFile attribute is updated, need to copy it to original. 

458 config.configFile = config_to_write.configFile 

459 else: 

460 config.dumpToUri(configURI, overwrite=overwrite) 

461 

462 # Create Registry and populate tables 

463 registryConfig = RegistryConfig(config.get("registry")) 

464 dimensionConfig = DimensionConfig(dimensionConfig) 

465 _RegistryFactory(registryConfig).create_from_config( 

466 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

467 ) 

468 

469 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

470 

471 return config 

472 

473 @classmethod 

474 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

475 """Look up the label in a butler repository index. 

476 

477 Parameters 

478 ---------- 

479 label : `str` 

480 Label of the Butler repository to look up. 

481 return_label : `bool`, optional 

482 If ``label`` cannot be found in the repository index (either 

483 because index is not defined or ``label`` is not in the index) and 

484 ``return_label`` is `True` then return ``ResourcePath(label)``. 

485 If ``return_label`` is `False` (default) then an exception will be 

486 raised instead. 

487 

488 Returns 

489 ------- 

490 uri : `lsst.resources.ResourcePath` 

491 URI to the Butler repository associated with the given label or 

492 default value if it is provided. 

493 

494 Raises 

495 ------ 

496 KeyError 

497 Raised if the label is not found in the index, or if an index 

498 is not defined, and ``return_label`` is `False`. 

499 

500 Notes 

501 ----- 

502 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

503 information is discovered. 

504 """ 

505 return ButlerRepoIndex.get_repo_uri(label, return_label) 

506 

507 @classmethod 

508 def get_known_repos(cls) -> set[str]: 

509 """Retrieve the list of known repository labels. 

510 

511 Returns 

512 ------- 

513 repos : `set` of `str` 

514 All the known labels. Can be empty if no index can be found. 

515 

516 Notes 

517 ----- 

518 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

519 information is discovered. 

520 """ 

521 return ButlerRepoIndex.get_known_repos() 

522 

523 @abstractmethod 

524 def _caching_context(self) -> AbstractContextManager[None]: 

525 """Context manager that enables caching.""" 

526 raise NotImplementedError() 

527 

528 @abstractmethod 

529 def transaction(self) -> AbstractContextManager[None]: 

530 """Context manager supporting `Butler` transactions. 

531 

532 Transactions can be nested. 

533 """ 

534 raise NotImplementedError() 

535 

536 @abstractmethod 

537 def put( 

538 self, 

539 obj: Any, 

540 datasetRefOrType: DatasetRef | DatasetType | str, 

541 /, 

542 dataId: DataId | None = None, 

543 *, 

544 run: str | None = None, 

545 **kwargs: Any, 

546 ) -> DatasetRef: 

547 """Store and register a dataset. 

548 

549 Parameters 

550 ---------- 

551 obj : `object` 

552 The dataset. 

553 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

554 When `DatasetRef` is provided, ``dataId`` should be `None`. 

555 Otherwise the `DatasetType` or name thereof. If a fully resolved 

556 `DatasetRef` is given the run and ID are used directly. 

557 dataId : `dict` or `DataCoordinate` 

558 A `dict` of `Dimension` link name, value pairs that label the 

559 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

560 should be provided as the second argument. 

561 run : `str`, optional 

562 The name of the run the dataset should be added to, overriding 

563 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

564 **kwargs 

565 Additional keyword arguments used to augment or construct a 

566 `DataCoordinate`. See `DataCoordinate.standardize` 

567 parameters. Not used if a resolve `DatasetRef` is provided. 

568 

569 Returns 

570 ------- 

571 ref : `DatasetRef` 

572 A reference to the stored dataset, updated with the correct id if 

573 given. 

574 

575 Raises 

576 ------ 

577 TypeError 

578 Raised if the butler is read-only or if no run has been provided. 

579 """ 

580 raise NotImplementedError() 

581 

582 @abstractmethod 

583 def getDeferred( 

584 self, 

585 datasetRefOrType: DatasetRef | DatasetType | str, 

586 /, 

587 dataId: DataId | None = None, 

588 *, 

589 parameters: dict | None = None, 

590 collections: Any = None, 

591 storageClass: str | StorageClass | None = None, 

592 **kwargs: Any, 

593 ) -> DeferredDatasetHandle: 

594 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

595 after an immediate registry lookup. 

596 

597 Parameters 

598 ---------- 

599 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

600 When `DatasetRef` the `dataId` should be `None`. 

601 Otherwise the `DatasetType` or name thereof. 

602 dataId : `dict` or `DataCoordinate`, optional 

603 A `dict` of `Dimension` link name, value pairs that label the 

604 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

605 should be provided as the first argument. 

606 parameters : `dict` 

607 Additional StorageClass-defined options to control reading, 

608 typically used to efficiently read only a subset of the dataset. 

609 collections : Any, optional 

610 Collections to be searched, overriding ``self.collections``. 

611 Can be any of the types supported by the ``collections`` argument 

612 to butler construction. 

613 storageClass : `StorageClass` or `str`, optional 

614 The storage class to be used to override the Python type 

615 returned by this method. By default the returned type matches 

616 the dataset type definition for this dataset. Specifying a 

617 read `StorageClass` can force a different type to be returned. 

618 This type must be compatible with the original type. 

619 **kwargs 

620 Additional keyword arguments used to augment or construct a 

621 `DataId`. See `DataId` parameters. 

622 

623 Returns 

624 ------- 

625 obj : `DeferredDatasetHandle` 

626 A handle which can be used to retrieve a dataset at a later time. 

627 

628 Raises 

629 ------ 

630 LookupError 

631 Raised if no matching dataset exists in the `Registry` or 

632 datastore. 

633 ValueError 

634 Raised if a resolved `DatasetRef` was passed as an input, but it 

635 differs from the one found in the registry. 

636 TypeError 

637 Raised if no collections were provided. 

638 """ 

639 raise NotImplementedError() 

640 

641 @abstractmethod 

642 def get( 

643 self, 

644 datasetRefOrType: DatasetRef | DatasetType | str, 

645 /, 

646 dataId: DataId | None = None, 

647 *, 

648 parameters: dict[str, Any] | None = None, 

649 collections: Any = None, 

650 storageClass: StorageClass | str | None = None, 

651 **kwargs: Any, 

652 ) -> Any: 

653 """Retrieve a stored dataset. 

654 

655 Parameters 

656 ---------- 

657 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

658 When `DatasetRef` the `dataId` should be `None`. 

659 Otherwise the `DatasetType` or name thereof. 

660 If a resolved `DatasetRef`, the associated dataset 

661 is returned directly without additional querying. 

662 dataId : `dict` or `DataCoordinate` 

663 A `dict` of `Dimension` link name, value pairs that label the 

664 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

665 should be provided as the first argument. 

666 parameters : `dict` 

667 Additional StorageClass-defined options to control reading, 

668 typically used to efficiently read only a subset of the dataset. 

669 collections : Any, optional 

670 Collections to be searched, overriding ``self.collections``. 

671 Can be any of the types supported by the ``collections`` argument 

672 to butler construction. 

673 storageClass : `StorageClass` or `str`, optional 

674 The storage class to be used to override the Python type 

675 returned by this method. By default the returned type matches 

676 the dataset type definition for this dataset. Specifying a 

677 read `StorageClass` can force a different type to be returned. 

678 This type must be compatible with the original type. 

679 **kwargs 

680 Additional keyword arguments used to augment or construct a 

681 `DataCoordinate`. See `DataCoordinate.standardize` 

682 parameters. 

683 

684 Returns 

685 ------- 

686 obj : `object` 

687 The dataset. 

688 

689 Raises 

690 ------ 

691 LookupError 

692 Raised if no matching dataset exists in the `Registry`. 

693 TypeError 

694 Raised if no collections were provided. 

695 

696 Notes 

697 ----- 

698 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

699 this method requires that the given data ID include temporal dimensions 

700 beyond the dimensions of the dataset type itself, in order to find the 

701 dataset with the appropriate validity range. For example, a "bias" 

702 dataset with native dimensions ``{instrument, detector}`` could be 

703 fetched with a ``{instrument, detector, exposure}`` data ID, because 

704 ``exposure`` is a temporal dimension. 

705 """ 

706 raise NotImplementedError() 

707 

708 @abstractmethod 

709 def getURIs( 

710 self, 

711 datasetRefOrType: DatasetRef | DatasetType | str, 

712 /, 

713 dataId: DataId | None = None, 

714 *, 

715 predict: bool = False, 

716 collections: Any = None, 

717 run: str | None = None, 

718 **kwargs: Any, 

719 ) -> DatasetRefURIs: 

720 """Return the URIs associated with the dataset. 

721 

722 Parameters 

723 ---------- 

724 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

725 When `DatasetRef` the `dataId` should be `None`. 

726 Otherwise the `DatasetType` or name thereof. 

727 dataId : `dict` or `DataCoordinate` 

728 A `dict` of `Dimension` link name, value pairs that label the 

729 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

730 should be provided as the first argument. 

731 predict : `bool` 

732 If `True`, allow URIs to be returned of datasets that have not 

733 been written. 

734 collections : Any, optional 

735 Collections to be searched, overriding ``self.collections``. 

736 Can be any of the types supported by the ``collections`` argument 

737 to butler construction. 

738 run : `str`, optional 

739 Run to use for predictions, overriding ``self.run``. 

740 **kwargs 

741 Additional keyword arguments used to augment or construct a 

742 `DataCoordinate`. See `DataCoordinate.standardize` 

743 parameters. 

744 

745 Returns 

746 ------- 

747 uris : `DatasetRefURIs` 

748 The URI to the primary artifact associated with this dataset (if 

749 the dataset was disassembled within the datastore this may be 

750 `None`), and the URIs to any components associated with the dataset 

751 artifact. (can be empty if there are no components). 

752 """ 

753 raise NotImplementedError() 

754 

755 def getURI( 

756 self, 

757 datasetRefOrType: DatasetRef | DatasetType | str, 

758 /, 

759 dataId: DataId | None = None, 

760 *, 

761 predict: bool = False, 

762 collections: Any = None, 

763 run: str | None = None, 

764 **kwargs: Any, 

765 ) -> ResourcePath: 

766 """Return the URI to the Dataset. 

767 

768 Parameters 

769 ---------- 

770 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

771 When `DatasetRef` the `dataId` should be `None`. 

772 Otherwise the `DatasetType` or name thereof. 

773 dataId : `dict` or `DataCoordinate` 

774 A `dict` of `Dimension` link name, value pairs that label the 

775 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

776 should be provided as the first argument. 

777 predict : `bool` 

778 If `True`, allow URIs to be returned of datasets that have not 

779 been written. 

780 collections : Any, optional 

781 Collections to be searched, overriding ``self.collections``. 

782 Can be any of the types supported by the ``collections`` argument 

783 to butler construction. 

784 run : `str`, optional 

785 Run to use for predictions, overriding ``self.run``. 

786 **kwargs 

787 Additional keyword arguments used to augment or construct a 

788 `DataCoordinate`. See `DataCoordinate.standardize` 

789 parameters. 

790 

791 Returns 

792 ------- 

793 uri : `lsst.resources.ResourcePath` 

794 URI pointing to the Dataset within the datastore. If the 

795 Dataset does not exist in the datastore, and if ``predict`` is 

796 `True`, the URI will be a prediction and will include a URI 

797 fragment "#predicted". 

798 If the datastore does not have entities that relate well 

799 to the concept of a URI the returned URI string will be 

800 descriptive. The returned URI is not guaranteed to be obtainable. 

801 

802 Raises 

803 ------ 

804 LookupError 

805 A URI has been requested for a dataset that does not exist and 

806 guessing is not allowed. 

807 ValueError 

808 Raised if a resolved `DatasetRef` was passed as an input, but it 

809 differs from the one found in the registry. 

810 TypeError 

811 Raised if no collections were provided. 

812 RuntimeError 

813 Raised if a URI is requested for a dataset that consists of 

814 multiple artifacts. 

815 """ 

816 primary, components = self.getURIs( 

817 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

818 ) 

819 

820 if primary is None or components: 

821 raise RuntimeError( 

822 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

823 "Use Butler.getURIs() instead." 

824 ) 

825 return primary 

826 

827 @abstractmethod 

828 def get_dataset_type(self, name: str) -> DatasetType: 

829 """Get the `DatasetType`. 

830 

831 Parameters 

832 ---------- 

833 name : `str` 

834 Name of the type. 

835 

836 Returns 

837 ------- 

838 type : `DatasetType` 

839 The `DatasetType` associated with the given name. 

840 

841 Raises 

842 ------ 

843 lsst.daf.butler.MissingDatasetTypeError 

844 Raised if the requested dataset type has not been registered. 

845 

846 Notes 

847 ----- 

848 This method handles component dataset types automatically, though most 

849 other operations do not. 

850 """ 

851 raise NotImplementedError() 

852 

853 @abstractmethod 

854 def get_dataset( 

855 self, 

856 id: DatasetId, 

857 *, 

858 storage_class: str | StorageClass | None = None, 

859 dimension_records: bool = False, 

860 datastore_records: bool = False, 

861 ) -> DatasetRef | None: 

862 """Retrieve a Dataset entry. 

863 

864 Parameters 

865 ---------- 

866 id : `DatasetId` 

867 The unique identifier for the dataset. 

868 storage_class : `str` or `StorageClass` or `None` 

869 A storage class to use when creating the returned entry. If given 

870 it must be compatible with the default storage class. 

871 dimension_records : `bool`, optional 

872 If `True` the ref will be expanded and contain dimension records. 

873 datastore_records : `bool`, optional 

874 If `True` the ref will contain associated datastore records. 

875 

876 Returns 

877 ------- 

878 ref : `DatasetRef` or `None` 

879 A ref to the Dataset, or `None` if no matching Dataset 

880 was found. 

881 """ 

882 raise NotImplementedError() 

883 

884 @abstractmethod 

885 def find_dataset( 

886 self, 

887 dataset_type: DatasetType | str, 

888 data_id: DataId | None = None, 

889 *, 

890 collections: str | Sequence[str] | None = None, 

891 timespan: Timespan | None = None, 

892 storage_class: str | StorageClass | None = None, 

893 dimension_records: bool = False, 

894 datastore_records: bool = False, 

895 **kwargs: Any, 

896 ) -> DatasetRef | None: 

897 """Find a dataset given its `DatasetType` and data ID. 

898 

899 This can be used to obtain a `DatasetRef` that permits the dataset to 

900 be read from a `Datastore`. If the dataset is a component and can not 

901 be found using the provided dataset type, a dataset ref for the parent 

902 will be returned instead but with the correct dataset type. 

903 

904 Parameters 

905 ---------- 

906 dataset_type : `DatasetType` or `str` 

907 A `DatasetType` or the name of one. If this is a `DatasetType` 

908 instance, its storage class will be respected and propagated to 

909 the output, even if it differs from the dataset type definition 

910 in the registry, as long as the storage classes are convertible. 

911 data_id : `dict` or `DataCoordinate`, optional 

912 A `dict`-like object containing the `Dimension` links that identify 

913 the dataset within a collection. If it is a `dict` the dataId 

914 can include dimension record values such as ``day_obs`` and 

915 ``seq_num`` or ``full_name`` that can be used to derive the 

916 primary dimension. 

917 collections : `str` or `list` [`str`], optional 

918 A an ordered list of collections to search for the dataset. 

919 Defaults to ``self.defaults.collections``. 

920 timespan : `Timespan`, optional 

921 A timespan that the validity range of the dataset must overlap. 

922 If not provided, any `~CollectionType.CALIBRATION` collections 

923 matched by the ``collections`` argument will not be searched. 

924 storage_class : `str` or `StorageClass` or `None` 

925 A storage class to use when creating the returned entry. If given 

926 it must be compatible with the default storage class. 

927 dimension_records : `bool`, optional 

928 If `True` the ref will be expanded and contain dimension records. 

929 datastore_records : `bool`, optional 

930 If `True` the ref will contain associated datastore records. 

931 **kwargs 

932 Additional keyword arguments passed to 

933 `DataCoordinate.standardize` to convert ``dataId`` to a true 

934 `DataCoordinate` or augment an existing one. This can also include 

935 dimension record metadata that can be used to derive a primary 

936 dimension value. 

937 

938 Returns 

939 ------- 

940 ref : `DatasetRef` 

941 A reference to the dataset, or `None` if no matching Dataset 

942 was found. 

943 

944 Raises 

945 ------ 

946 lsst.daf.butler.NoDefaultCollectionError 

947 Raised if ``collections`` is `None` and 

948 ``self.collections`` is `None`. 

949 LookupError 

950 Raised if one or more data ID keys are missing. 

951 lsst.daf.butler.MissingDatasetTypeError 

952 Raised if the dataset type does not exist. 

953 lsst.daf.butler.MissingCollectionError 

954 Raised if any of ``collections`` does not exist in the registry. 

955 

956 Notes 

957 ----- 

958 This method simply returns `None` and does not raise an exception even 

959 when the set of collections searched is intrinsically incompatible with 

960 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

961 only `~CollectionType.CALIBRATION` collections are being searched. 

962 This may make it harder to debug some lookup failures, but the behavior 

963 is intentional; we consider it more important that failed searches are 

964 reported consistently, regardless of the reason, and that adding 

965 additional collections that do not contain a match to the search path 

966 never changes the behavior. 

967 

968 This method handles component dataset types automatically, though most 

969 other query operations do not. 

970 """ 

971 raise NotImplementedError() 

972 

973 @abstractmethod 

974 def retrieveArtifacts( 

975 self, 

976 refs: Iterable[DatasetRef], 

977 destination: ResourcePathExpression, 

978 transfer: str = "auto", 

979 preserve_path: bool = True, 

980 overwrite: bool = False, 

981 ) -> list[ResourcePath]: 

982 """Retrieve the artifacts associated with the supplied refs. 

983 

984 Parameters 

985 ---------- 

986 refs : iterable of `DatasetRef` 

987 The datasets for which artifacts are to be retrieved. 

988 A single ref can result in multiple artifacts. The refs must 

989 be resolved. 

990 destination : `lsst.resources.ResourcePath` or `str` 

991 Location to write the artifacts. 

992 transfer : `str`, optional 

993 Method to use to transfer the artifacts. Must be one of the options 

994 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

995 "move" is not allowed. 

996 preserve_path : `bool`, optional 

997 If `True` the full path of the artifact within the datastore 

998 is preserved. If `False` the final file component of the path 

999 is used. 

1000 overwrite : `bool`, optional 

1001 If `True` allow transfers to overwrite existing files at the 

1002 destination. 

1003 

1004 Returns 

1005 ------- 

1006 targets : `list` of `lsst.resources.ResourcePath` 

1007 URIs of file artifacts in destination location. Order is not 

1008 preserved. 

1009 

1010 Notes 

1011 ----- 

1012 For non-file datastores the artifacts written to the destination 

1013 may not match the representation inside the datastore. For example 

1014 a hierarchical data structure in a NoSQL database may well be stored 

1015 as a JSON file. 

1016 """ 

1017 raise NotImplementedError() 

1018 

1019 @abstractmethod 

1020 def exists( 

1021 self, 

1022 dataset_ref_or_type: DatasetRef | DatasetType | str, 

1023 /, 

1024 data_id: DataId | None = None, 

1025 *, 

1026 full_check: bool = True, 

1027 collections: Any = None, 

1028 **kwargs: Any, 

1029 ) -> DatasetExistence: 

1030 """Indicate whether a dataset is known to Butler registry and 

1031 datastore. 

1032 

1033 Parameters 

1034 ---------- 

1035 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

1036 When `DatasetRef` the `dataId` should be `None`. 

1037 Otherwise the `DatasetType` or name thereof. 

1038 data_id : `dict` or `DataCoordinate` 

1039 A `dict` of `Dimension` link name, value pairs that label the 

1040 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1041 should be provided as the first argument. 

1042 full_check : `bool`, optional 

1043 If `True`, a check will be made for the actual existence of a 

1044 dataset artifact. This will involve additional overhead due to 

1045 the need to query an external system. If `False`, this check will 

1046 be omitted, and the registry and datastore will solely be asked 

1047 if they know about the dataset but no direct check for the 

1048 artifact will be performed. 

1049 collections : Any, optional 

1050 Collections to be searched, overriding ``self.collections``. 

1051 Can be any of the types supported by the ``collections`` argument 

1052 to butler construction. 

1053 **kwargs 

1054 Additional keyword arguments used to augment or construct a 

1055 `DataCoordinate`. See `DataCoordinate.standardize` 

1056 parameters. 

1057 

1058 Returns 

1059 ------- 

1060 existence : `DatasetExistence` 

1061 Object indicating whether the dataset is known to registry and 

1062 datastore. Evaluates to `True` if the dataset is present and known 

1063 to both. 

1064 """ 

1065 raise NotImplementedError() 

1066 

1067 @abstractmethod 

1068 def _exists_many( 

1069 self, 

1070 refs: Iterable[DatasetRef], 

1071 /, 

1072 *, 

1073 full_check: bool = True, 

1074 ) -> dict[DatasetRef, DatasetExistence]: 

1075 """Indicate whether multiple datasets are known to Butler registry and 

1076 datastore. 

1077 

1078 This is an experimental API that may change at any moment. 

1079 

1080 Parameters 

1081 ---------- 

1082 refs : iterable of `DatasetRef` 

1083 The datasets to be checked. 

1084 full_check : `bool`, optional 

1085 If `True`, a check will be made for the actual existence of each 

1086 dataset artifact. This will involve additional overhead due to 

1087 the need to query an external system. If `False`, this check will 

1088 be omitted, and the registry and datastore will solely be asked 

1089 if they know about the dataset(s) but no direct check for the 

1090 artifact(s) will be performed. 

1091 

1092 Returns 

1093 ------- 

1094 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1095 Mapping from the given dataset refs to an enum indicating the 

1096 status of the dataset in registry and datastore. 

1097 Each value evaluates to `True` if the dataset is present and known 

1098 to both. 

1099 """ 

1100 raise NotImplementedError() 

1101 

1102 @abstractmethod 

1103 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1104 """Remove one or more `~CollectionType.RUN` collections and the 

1105 datasets within them. 

1106 

1107 Parameters 

1108 ---------- 

1109 names : `~collections.abc.Iterable` [ `str` ] 

1110 The names of the collections to remove. 

1111 unstore : `bool`, optional 

1112 If `True` (default), delete datasets from all datastores in which 

1113 they are present, and attempt to rollback the registry deletions if 

1114 datastore deletions fail (which may not always be possible). If 

1115 `False`, datastore records for these datasets are still removed, 

1116 but any artifacts (e.g. files) will not be. 

1117 

1118 Raises 

1119 ------ 

1120 TypeError 

1121 Raised if one or more collections are not of type 

1122 `~CollectionType.RUN`. 

1123 """ 

1124 raise NotImplementedError() 

1125 

1126 @abstractmethod 

1127 def ingest( 

1128 self, 

1129 *datasets: FileDataset, 

1130 transfer: str | None = "auto", 

1131 record_validation_info: bool = True, 

1132 ) -> None: 

1133 """Store and register one or more datasets that already exist on disk. 

1134 

1135 Parameters 

1136 ---------- 

1137 *datasets : `FileDataset` 

1138 Each positional argument is a struct containing information about 

1139 a file to be ingested, including its URI (either absolute or 

1140 relative to the datastore root, if applicable), a resolved 

1141 `DatasetRef`, and optionally a formatter class or its 

1142 fully-qualified string name. If a formatter is not provided, the 

1143 formatter that would be used for `put` is assumed. On successful 

1144 ingest all `FileDataset.formatter` attributes will be set to the 

1145 formatter class used. `FileDataset.path` attributes may be modified 

1146 to put paths in whatever the datastore considers a standardized 

1147 form. 

1148 transfer : `str`, optional 

1149 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1150 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1151 transfer the file. 

1152 record_validation_info : `bool`, optional 

1153 If `True`, the default, the datastore can record validation 

1154 information associated with the file. If `False` the datastore 

1155 will not attempt to track any information such as checksums 

1156 or file sizes. This can be useful if such information is tracked 

1157 in an external system or if the file is to be compressed in place. 

1158 It is up to the datastore whether this parameter is relevant. 

1159 

1160 Raises 

1161 ------ 

1162 TypeError 

1163 Raised if the butler is read-only or if no run was provided. 

1164 NotImplementedError 

1165 Raised if the `Datastore` does not support the given transfer mode. 

1166 DatasetTypeNotSupportedError 

1167 Raised if one or more files to be ingested have a dataset type that 

1168 is not supported by the `Datastore`.. 

1169 FileNotFoundError 

1170 Raised if one of the given files does not exist. 

1171 FileExistsError 

1172 Raised if transfer is not `None` but the (internal) location the 

1173 file would be moved to is already occupied. 

1174 

1175 Notes 

1176 ----- 

1177 This operation is not fully exception safe: if a database operation 

1178 fails, the given `FileDataset` instances may be only partially updated. 

1179 

1180 It is atomic in terms of database operations (they will either all 

1181 succeed or all fail) providing the database engine implements 

1182 transactions correctly. It will attempt to be atomic in terms of 

1183 filesystem operations as well, but this cannot be implemented 

1184 rigorously for most datastores. 

1185 """ 

1186 raise NotImplementedError() 

1187 

1188 @abstractmethod 

1189 def export( 

1190 self, 

1191 *, 

1192 directory: str | None = None, 

1193 filename: str | None = None, 

1194 format: str | None = None, 

1195 transfer: str | None = None, 

1196 ) -> AbstractContextManager[RepoExportContext]: 

1197 """Export datasets from the repository represented by this `Butler`. 

1198 

1199 This method is a context manager that returns a helper object 

1200 (`RepoExportContext`) that is used to indicate what information from 

1201 the repository should be exported. 

1202 

1203 Parameters 

1204 ---------- 

1205 directory : `str`, optional 

1206 Directory dataset files should be written to if ``transfer`` is not 

1207 `None`. 

1208 filename : `str`, optional 

1209 Name for the file that will include database information associated 

1210 with the exported datasets. If this is not an absolute path and 

1211 ``directory`` is not `None`, it will be written to ``directory`` 

1212 instead of the current working directory. Defaults to 

1213 "export.{format}". 

1214 format : `str`, optional 

1215 File format for the database information file. If `None`, the 

1216 extension of ``filename`` will be used. 

1217 transfer : `str`, optional 

1218 Transfer mode passed to `Datastore.export`. 

1219 

1220 Raises 

1221 ------ 

1222 TypeError 

1223 Raised if the set of arguments passed is inconsistent. 

1224 

1225 Examples 

1226 -------- 

1227 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1228 methods are used to provide the iterables over data IDs and/or datasets 

1229 to be exported:: 

1230 

1231 with butler.export("exports.yaml") as export: 

1232 # Export all flats, but none of the dimension element rows 

1233 # (i.e. data ID information) associated with them. 

1234 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1235 elements=()) 

1236 # Export all datasets that start with "deepCoadd_" and all of 

1237 # their associated data ID information. 

1238 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1239 """ 

1240 raise NotImplementedError() 

1241 

1242 @abstractmethod 

1243 def import_( 

1244 self, 

1245 *, 

1246 directory: ResourcePathExpression | None = None, 

1247 filename: ResourcePathExpression | TextIO | None = None, 

1248 format: str | None = None, 

1249 transfer: str | None = None, 

1250 skip_dimensions: set | None = None, 

1251 ) -> None: 

1252 """Import datasets into this repository that were exported from a 

1253 different butler repository via `~lsst.daf.butler.Butler.export`. 

1254 

1255 Parameters 

1256 ---------- 

1257 directory : `~lsst.resources.ResourcePathExpression`, optional 

1258 Directory containing dataset files to import from. If `None`, 

1259 ``filename`` and all dataset file paths specified therein must 

1260 be absolute. 

1261 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1262 A stream or name of file that contains database information 

1263 associated with the exported datasets, typically generated by 

1264 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1265 `~lsst.resources.ResourcePath` and is not an absolute path, 

1266 it will first be looked for relative to ``directory`` and if not 

1267 found there it will be looked for in the current working 

1268 directory. Defaults to "export.{format}". 

1269 format : `str`, optional 

1270 File format for ``filename``. If `None`, the extension of 

1271 ``filename`` will be used. 

1272 transfer : `str`, optional 

1273 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1274 skip_dimensions : `set`, optional 

1275 Names of dimensions that should be skipped and not imported. 

1276 

1277 Raises 

1278 ------ 

1279 TypeError 

1280 Raised if the set of arguments passed is inconsistent, or if the 

1281 butler is read-only. 

1282 """ 

1283 raise NotImplementedError() 

1284 

1285 @abstractmethod 

1286 def transfer_dimension_records_from( 

1287 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef] 

1288 ) -> None: 

1289 """Transfer dimension records to this Butler from another Butler. 

1290 

1291 Parameters 

1292 ---------- 

1293 source_butler : `LimitedButler` or `Butler` 

1294 Butler from which the records are to be transferred. If data IDs 

1295 in ``source_refs`` are not expanded then this has to be a full 

1296 `Butler` whose registry will be used to expand data IDs. If the 

1297 source refs contain coordinates that are used to populate other 

1298 records then this will also need to be a full `Butler`. 

1299 source_refs : iterable of `DatasetRef` 

1300 Datasets defined in the source butler whose dimension records 

1301 should be transferred to this butler. In most circumstances. 

1302 transfer is faster if the dataset refs are expanded. 

1303 """ 

1304 raise NotImplementedError() 

1305 

1306 @abstractmethod 

1307 def transfer_from( 

1308 self, 

1309 source_butler: LimitedButler, 

1310 source_refs: Iterable[DatasetRef], 

1311 transfer: str = "auto", 

1312 skip_missing: bool = True, 

1313 register_dataset_types: bool = False, 

1314 transfer_dimensions: bool = False, 

1315 dry_run: bool = False, 

1316 ) -> Collection[DatasetRef]: 

1317 """Transfer datasets to this Butler from a run in another Butler. 

1318 

1319 Parameters 

1320 ---------- 

1321 source_butler : `LimitedButler` 

1322 Butler from which the datasets are to be transferred. If data IDs 

1323 in ``source_refs`` are not expanded then this has to be a full 

1324 `Butler` whose registry will be used to expand data IDs. 

1325 source_refs : iterable of `DatasetRef` 

1326 Datasets defined in the source butler that should be transferred to 

1327 this butler. In most circumstances, ``transfer_from`` is faster if 

1328 the dataset refs are expanded. 

1329 transfer : `str`, optional 

1330 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1331 skip_missing : `bool` 

1332 If `True`, datasets with no datastore artifact associated with 

1333 them are not transferred. If `False` a registry entry will be 

1334 created even if no datastore record is created (and so will 

1335 look equivalent to the dataset being unstored). 

1336 register_dataset_types : `bool` 

1337 If `True` any missing dataset types are registered. Otherwise 

1338 an exception is raised. 

1339 transfer_dimensions : `bool`, optional 

1340 If `True`, dimension record data associated with the new datasets 

1341 will be transferred. 

1342 dry_run : `bool`, optional 

1343 If `True` the transfer will be processed without any modifications 

1344 made to the target butler and as if the target butler did not 

1345 have any of the datasets. 

1346 

1347 Returns 

1348 ------- 

1349 refs : `list` of `DatasetRef` 

1350 The refs added to this Butler. 

1351 

1352 Notes 

1353 ----- 

1354 The datastore artifact has to exist for a transfer 

1355 to be made but non-existence is not an error. 

1356 

1357 Datasets that already exist in this run will be skipped. 

1358 

1359 The datasets are imported as part of a transaction, although 

1360 dataset types are registered before the transaction is started. 

1361 This means that it is possible for a dataset type to be registered 

1362 even though transfer has failed. 

1363 """ 

1364 raise NotImplementedError() 

1365 

1366 @abstractmethod 

1367 def validateConfiguration( 

1368 self, 

1369 logFailures: bool = False, 

1370 datasetTypeNames: Iterable[str] | None = None, 

1371 ignore: Iterable[str] | None = None, 

1372 ) -> None: 

1373 """Validate butler configuration. 

1374 

1375 Checks that each `DatasetType` can be stored in the `Datastore`. 

1376 

1377 Parameters 

1378 ---------- 

1379 logFailures : `bool`, optional 

1380 If `True`, output a log message for every validation error 

1381 detected. 

1382 datasetTypeNames : iterable of `str`, optional 

1383 The `DatasetType` names that should be checked. This allows 

1384 only a subset to be selected. 

1385 ignore : iterable of `str`, optional 

1386 Names of DatasetTypes to skip over. This can be used to skip 

1387 known problems. If a named `DatasetType` corresponds to a 

1388 composite, all components of that `DatasetType` will also be 

1389 ignored. 

1390 

1391 Raises 

1392 ------ 

1393 ButlerValidationError 

1394 Raised if there is some inconsistency with how this Butler 

1395 is configured. 

1396 """ 

1397 raise NotImplementedError() 

1398 

1399 @property 

1400 @abstractmethod 

1401 def collections(self) -> Sequence[str]: 

1402 """The collections to search by default, in order 

1403 (`~collections.abc.Sequence` [ `str` ]). 

1404 """ 

1405 raise NotImplementedError() 

1406 

1407 @property 

1408 @abstractmethod 

1409 def run(self) -> str | None: 

1410 """Name of the run this butler writes outputs to by default (`str` or 

1411 `None`). 

1412 """ 

1413 raise NotImplementedError() 

1414 

1415 @property 

1416 @abstractmethod 

1417 def registry(self) -> Registry: 

1418 """The object that manages dataset metadata and relationships 

1419 (`Registry`). 

1420 

1421 Many operations that don't involve reading or writing butler datasets 

1422 are accessible only via `Registry` methods. Eventually these methods 

1423 will be replaced by equivalent `Butler` methods. 

1424 """ 

1425 raise NotImplementedError() 

1426 

1427 @abstractmethod 

1428 def _query(self) -> AbstractContextManager[Query]: 

1429 """Context manager returning a `Query` object used for construction 

1430 and execution of complex queries. 

1431 """ 

1432 raise NotImplementedError() 

1433 

1434 def _query_data_ids( 

1435 self, 

1436 dimensions: DimensionGroup | Iterable[str] | str, 

1437 *, 

1438 data_id: DataId | None = None, 

1439 where: str = "", 

1440 bind: Mapping[str, Any] | None = None, 

1441 with_dimension_records: bool = False, 

1442 order_by: Iterable[str] | str | None = None, 

1443 limit: int | None = None, 

1444 offset: int = 0, 

1445 explain: bool = True, 

1446 **kwargs: Any, 

1447 ) -> list[DataCoordinate]: 

1448 """Query for data IDs matching user-provided criteria. 

1449 

1450 Parameters 

1451 ---------- 

1452 dimensions : `DimensionGroup`, `str`, or \ 

1453 `~collections.abc.Iterable` [`str`] 

1454 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1455 instances or `str`. Will be automatically expanded to a complete 

1456 `DimensionGroup`. 

1457 data_id : `dict` or `DataCoordinate`, optional 

1458 A data ID whose key-value pairs are used as equality constraints 

1459 in the query. 

1460 where : `str`, optional 

1461 A string expression similar to a SQL WHERE clause. May involve 

1462 any column of a dimension table or (as a shortcut for the primary 

1463 key column of a dimension table) dimension name. See 

1464 :ref:`daf_butler_dimension_expressions` for more information. 

1465 bind : `~collections.abc.Mapping`, optional 

1466 Mapping containing literal values that should be injected into the 

1467 ``where`` expression, keyed by the identifiers they replace. 

1468 Values of collection type can be expanded in some cases; see 

1469 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1470 information. 

1471 with_dimension_records : `bool`, optional 

1472 If `True` (default is `False`) then returned data IDs will have 

1473 dimension records. 

1474 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1475 Names of the columns/dimensions to use for ordering returned data 

1476 IDs. Column name can be prefixed with minus (``-``) to use 

1477 descending ordering. 

1478 limit : `int`, optional 

1479 Upper limit on the number of returned records. 

1480 offset : `int`, optional 

1481 The number of records to skip before returning at most ``limit`` 

1482 records. If ``offset`` is specified then ``limit`` must be 

1483 specified as well. 

1484 explain : `bool`, optional 

1485 If `True` (default) then `EmptyQueryResultError` exception is 

1486 raised when resulting list is empty. The exception contains 

1487 non-empty list of strings explaining possible causes for empty 

1488 result. 

1489 **kwargs 

1490 Additional keyword arguments are forwarded to 

1491 `DataCoordinate.standardize` when processing the ``data_id`` 

1492 argument (and may be used to provide a constraining data ID even 

1493 when the ``data_id`` argument is `None`). 

1494 

1495 Returns 

1496 ------- 

1497 dataIds : `list` [`DataCoordinate`] 

1498 Data IDs matching the given query parameters. These are always 

1499 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1500 returns `True`). 

1501 

1502 Raises 

1503 ------ 

1504 lsst.daf.butler.registry.DataIdError 

1505 Raised when ``data_id`` or keyword arguments specify unknown 

1506 dimensions or values, or when they contain inconsistent values. 

1507 lsst.daf.butler.registry.UserExpressionError 

1508 Raised when ``where`` expression is invalid. 

1509 lsst.daf.butler.EmptyQueryResultError 

1510 Raised when query generates empty result and ``explain`` is set to 

1511 `True`. 

1512 TypeError 

1513 Raised when the arguments are incompatible, e.g. ``offset`` is 

1514 specified, but ``limit`` is not. 

1515 """ 

1516 if data_id is None: 

1517 data_id = DataCoordinate.make_empty(self.dimensions) 

1518 with self._query() as query: 

1519 result = ( 

1520 query.where(data_id, where, bind=bind, **kwargs) 

1521 .data_ids(dimensions) 

1522 .order_by(*ensure_iterable(order_by)) 

1523 .limit(limit, offset) 

1524 ) 

1525 if with_dimension_records: 

1526 result = result.with_dimension_records() 

1527 data_ids = list(result) 

1528 if explain and not data_ids: 

1529 raise EmptyQueryResultError(list(result.explain_no_results())) 

1530 return data_ids 

1531 

1532 def _query_datasets( 

1533 self, 

1534 dataset_type: str | Iterable[str] | DatasetType | Iterable[DatasetType] | EllipsisType, 

1535 collections: str | Iterable[str] | None = None, 

1536 *, 

1537 find_first: bool = True, 

1538 data_id: DataId | None = None, 

1539 where: str = "", 

1540 bind: Mapping[str, Any] | None = None, 

1541 with_dimension_records: bool = False, 

1542 explain: bool = True, 

1543 **kwargs: Any, 

1544 ) -> list[DatasetRef]: 

1545 """Query for dataset references matching user-provided criteria. 

1546 

1547 Parameters 

1548 ---------- 

1549 dataset_type : dataset type expression 

1550 An expression that fully or partially identifies the dataset types 

1551 to be queried. Allowed types include `DatasetType`, `str`, and 

1552 iterables thereof. The special value ``...`` can be used to query 

1553 all dataset types. See :ref:`daf_butler_dataset_type_expressions` 

1554 for more information. 

1555 collections : collection expression, optional 

1556 A collection name or iterable of collection names to search. If not 

1557 provided, the default collections are used. See 

1558 :ref:`daf_butler_collection_expressions` for more information. 

1559 find_first : `bool`, optional 

1560 If `True` (default), for each result data ID, only yield one 

1561 `DatasetRef` of each `DatasetType`, from the first collection in 

1562 which a dataset of that dataset type appears (according to the 

1563 order of ``collections`` passed in). If `True`, ``collections`` 

1564 must not contain regular expressions and may not be ``...``. 

1565 data_id : `dict` or `DataCoordinate`, optional 

1566 A data ID whose key-value pairs are used as equality constraints in 

1567 the query. 

1568 where : `str`, optional 

1569 A string expression similar to a SQL WHERE clause. May involve any 

1570 column of a dimension table or (as a shortcut for the primary key 

1571 column of a dimension table) dimension name. See 

1572 :ref:`daf_butler_dimension_expressions` for more information. 

1573 bind : `~collections.abc.Mapping`, optional 

1574 Mapping containing literal values that should be injected into the 

1575 ``where`` expression, keyed by the identifiers they replace. Values 

1576 of collection type can be expanded in some cases; see 

1577 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1578 information. 

1579 with_dimension_records : `bool`, optional 

1580 If `True` (default is `False`) then returned data IDs will have 

1581 dimension records. 

1582 explain : `bool`, optional 

1583 If `True` (default) then `EmptyQueryResultError` exception is 

1584 raised when resulting list is empty. The exception contains 

1585 non-empty list of strings explaining possible causes for empty 

1586 result. 

1587 **kwargs 

1588 Additional keyword arguments are forwarded to 

1589 `DataCoordinate.standardize` when processing the ``data_id`` 

1590 argument (and may be used to provide a constraining data ID even 

1591 when the ``data_id`` argument is `None`). 

1592 

1593 Returns 

1594 ------- 

1595 refs : `.queries.DatasetQueryResults` 

1596 Dataset references matching the given query criteria. Nested data 

1597 IDs are guaranteed to include values for all implied dimensions 

1598 (i.e. `DataCoordinate.hasFull` will return `True`). 

1599 

1600 Raises 

1601 ------ 

1602 lsst.daf.butler.registry.DatasetTypeExpressionError 

1603 Raised when ``dataset_type`` expression is invalid. 

1604 lsst.daf.butler.registry.DataIdError 

1605 Raised when ``data_id`` or keyword arguments specify unknown 

1606 dimensions or values, or when they contain inconsistent values. 

1607 lsst.daf.butler.registry.UserExpressionError 

1608 Raised when ``where`` expression is invalid. 

1609 lsst.daf.butler.EmptyQueryResultError 

1610 Raised when query generates empty result and ``explain`` is set to 

1611 `True`. 

1612 TypeError 

1613 Raised when the arguments are incompatible, such as when a 

1614 collection wildcard is passed when ``find_first`` is `True`, or 

1615 when ``collections`` is `None` and default butler collections are 

1616 not defined. 

1617 

1618 Notes 

1619 ----- 

1620 When multiple dataset types are queried in a single call, the results 

1621 of this operation are equivalent to querying for each dataset type 

1622 separately in turn, and no information about the relationships between 

1623 datasets of different types is included. 

1624 """ 

1625 if data_id is None: 

1626 data_id = DataCoordinate.make_empty(self.dimensions) 

1627 with self._query() as query: 

1628 result = query.where(data_id, where, bind=bind, **kwargs).datasets( 

1629 dataset_type, 

1630 collections=collections, 

1631 find_first=find_first, 

1632 ) 

1633 if with_dimension_records: 

1634 result = result.with_dimension_records() 

1635 refs = list(result) 

1636 if explain and not refs: 

1637 raise EmptyQueryResultError(list(result.explain_no_results())) 

1638 return refs 

1639 

1640 def _query_dimension_records( 

1641 self, 

1642 element: str, 

1643 *, 

1644 data_id: DataId | None = None, 

1645 where: str = "", 

1646 bind: Mapping[str, Any] | None = None, 

1647 order_by: Iterable[str] | str | None = None, 

1648 limit: int | None = None, 

1649 offset: int = 0, 

1650 explain: bool = True, 

1651 **kwargs: Any, 

1652 ) -> list[DimensionRecord]: 

1653 """Query for dimension information matching user-provided criteria. 

1654 

1655 Parameters 

1656 ---------- 

1657 element : `str` 

1658 The name of a dimension element to obtain records for. 

1659 data_id : `dict` or `DataCoordinate`, optional 

1660 A data ID whose key-value pairs are used as equality constraints 

1661 in the query. 

1662 where : `str`, optional 

1663 A string expression similar to a SQL WHERE clause. See 

1664 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1665 information. 

1666 bind : `~collections.abc.Mapping`, optional 

1667 Mapping containing literal values that should be injected into the 

1668 ``where`` expression, keyed by the identifiers they replace. 

1669 Values of collection type can be expanded in some cases; see 

1670 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1671 information. 

1672 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1673 Names of the columns/dimensions to use for ordering returned data 

1674 IDs. Column name can be prefixed with minus (``-``) to use 

1675 descending ordering. 

1676 limit : `int`, optional 

1677 Upper limit on the number of returned records. 

1678 offset : `int`, optional 

1679 The number of records to skip before returning at most ``limit`` 

1680 records. If ``offset`` is specified then ``limit`` must be 

1681 specified as well. 

1682 explain : `bool`, optional 

1683 If `True` (default) then `EmptyQueryResultError` exception is 

1684 raised when resulting list is empty. The exception contains 

1685 non-empty list of strings explaining possible causes for empty 

1686 result. 

1687 **kwargs 

1688 Additional keyword arguments are forwarded to 

1689 `DataCoordinate.standardize` when processing the ``data_id`` 

1690 argument (and may be used to provide a constraining data ID even 

1691 when the ``data_id`` argument is `None`). 

1692 

1693 Returns 

1694 ------- 

1695 records : `list`[`DimensionRecord`] 

1696 Dimension records matching the given query parameters. 

1697 

1698 Raises 

1699 ------ 

1700 lsst.daf.butler.registry.DataIdError 

1701 Raised when ``data_id`` or keyword arguments specify unknown 

1702 dimensions or values, or when they contain inconsistent values. 

1703 lsst.daf.butler.registry.UserExpressionError 

1704 Raised when ``where`` expression is invalid. 

1705 lsst.daf.butler.EmptyQueryResultError 

1706 Raised when query generates empty result and ``explain`` is set to 

1707 `True`. 

1708 TypeError 

1709 Raised when the arguments are incompatible, such as when a 

1710 collection wildcard is passed when ``find_first`` is `True`, or 

1711 when ``collections`` is `None` and default butler collections are 

1712 not defined. 

1713 """ 

1714 if data_id is None: 

1715 data_id = DataCoordinate.make_empty(self.dimensions) 

1716 with self._query() as query: 

1717 result = ( 

1718 query.where(data_id, where, bind=bind, **kwargs) 

1719 .dimension_records(element) 

1720 .order_by(*ensure_iterable(order_by)) 

1721 .limit(limit, offset) 

1722 ) 

1723 dimension_records = list(result) 

1724 if explain and not dimension_records: 

1725 raise EmptyQueryResultError(list(result.explain_no_results())) 

1726 return dimension_records 

1727 

1728 @abstractmethod 

1729 def _clone( 

1730 self, 

1731 *, 

1732 collections: Any = None, 

1733 run: str | None = None, 

1734 inferDefaults: bool = True, 

1735 **kwargs: Any, 

1736 ) -> Butler: 

1737 """Return a new Butler instance connected to the same repository 

1738 as this one, but overriding ``collections``, ``run``, 

1739 ``inferDefaults``, and default data ID. 

1740 """ 

1741 raise NotImplementedError()