Coverage for python/lsst/daf/butler/_butler.py: 53%

185 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-15 02:03 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Mapping, Sequence 

34from contextlib import AbstractContextManager 

35from typing import TYPE_CHECKING, Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.iteration import ensure_iterable 

40from lsst.utils.logging import getLogger 

41 

42from ._butler_collections import ButlerCollections 

43from ._butler_config import ButlerConfig, ButlerType 

44from ._butler_instance_options import ButlerInstanceOptions 

45from ._butler_repo_index import ButlerRepoIndex 

46from ._config import Config, ConfigSubset 

47from ._exceptions import EmptyQueryResultError 

48from ._limited_butler import LimitedButler 

49from .datastore import Datastore 

50from .dimensions import DimensionConfig 

51from .registry import RegistryConfig, _RegistryFactory 

52from .repo_relocation import BUTLER_ROOT_TAG 

53 

54if TYPE_CHECKING: 

55 from ._dataset_existence import DatasetExistence 

56 from ._dataset_ref import DatasetId, DatasetRef 

57 from ._dataset_type import DatasetType 

58 from ._deferredDatasetHandle import DeferredDatasetHandle 

59 from ._file_dataset import FileDataset 

60 from ._storage_class import StorageClass 

61 from ._timespan import Timespan 

62 from .datastore import DatasetRefURIs 

63 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord 

64 from .queries import Query 

65 from .registry import Registry 

66 from .transfers import RepoExportContext 

67 

68_LOG = getLogger(__name__) 

69 

70 

71class Butler(LimitedButler): # numpydoc ignore=PR02 

72 """Interface for data butler and factory for Butler instances. 

73 

74 Parameters 

75 ---------- 

76 config : `ButlerConfig`, `Config` or `str`, optional 

77 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

78 If a directory path is given the configuration will be read from a 

79 ``butler.yaml`` file in that location. If `None` is given default 

80 values will be used. If ``config`` contains "cls" key then its value is 

81 used as a name of butler class and it must be a sub-class of this 

82 class, otherwise `DirectButler` is instantiated. 

83 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

84 An expression specifying the collections to be searched (in order) when 

85 reading datasets. 

86 This may be a `str` collection name or an iterable thereof. 

87 See :ref:`daf_butler_collection_expressions` for more information. 

88 These collections are not registered automatically and must be 

89 manually registered before they are used by any method, but they may be 

90 manually registered after the `Butler` is initialized. 

91 run : `str`, optional 

92 Name of the `~CollectionType.RUN` collection new datasets should be 

93 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

94 ``collections`` will be set to ``[run]``. If not `None`, this 

95 collection will automatically be registered. If this is not set (and 

96 ``writeable`` is not set either), a read-only butler will be created. 

97 searchPaths : `list` of `str`, optional 

98 Directory paths to search when calculating the full Butler 

99 configuration. Not used if the supplied config is already a 

100 `ButlerConfig`. 

101 writeable : `bool`, optional 

102 Explicitly sets whether the butler supports write operations. If not 

103 provided, a read-write butler is created if any of ``run``, ``tags``, 

104 or ``chains`` is non-empty. 

105 inferDefaults : `bool`, optional 

106 If `True` (default) infer default data ID values from the values 

107 present in the datasets in ``collections``: if all collections have the 

108 same value (or no value) for a governor dimension, that value will be 

109 the default for that dimension. Nonexistent collections are ignored. 

110 If a default value is provided explicitly for a governor dimension via 

111 ``**kwargs``, no default will be inferred for that dimension. 

112 without_datastore : `bool`, optional 

113 If `True` do not attach a datastore to this butler. Any attempts 

114 to use a datastore will fail. 

115 **kwargs : `Any` 

116 Additional keyword arguments passed to a constructor of actual butler 

117 class. 

118 

119 Notes 

120 ----- 

121 The preferred way to instantiate Butler is via the `from_config` method. 

122 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

123 but ``mypy`` will complain about the former. 

124 """ 

125 

126 def __new__( 

127 cls, 

128 config: Config | ResourcePathExpression | None = None, 

129 *, 

130 collections: Any = None, 

131 run: str | None = None, 

132 searchPaths: Sequence[ResourcePathExpression] | None = None, 

133 writeable: bool | None = None, 

134 inferDefaults: bool = True, 

135 without_datastore: bool = False, 

136 **kwargs: Any, 

137 ) -> Butler: 

138 if cls is Butler: 

139 return Butler.from_config( 

140 config=config, 

141 collections=collections, 

142 run=run, 

143 searchPaths=searchPaths, 

144 writeable=writeable, 

145 inferDefaults=inferDefaults, 

146 without_datastore=without_datastore, 

147 **kwargs, 

148 ) 

149 

150 # Note: we do not pass any parameters to __new__, Python will pass them 

151 # to __init__ after __new__ returns sub-class instance. 

152 return super().__new__(cls) 

153 

154 @classmethod 

155 def from_config( 

156 cls, 

157 config: Config | ResourcePathExpression | None = None, 

158 *, 

159 collections: Any = None, 

160 run: str | None = None, 

161 searchPaths: Sequence[ResourcePathExpression] | None = None, 

162 writeable: bool | None = None, 

163 inferDefaults: bool = True, 

164 without_datastore: bool = False, 

165 **kwargs: Any, 

166 ) -> Butler: 

167 """Create butler instance from configuration. 

168 

169 Parameters 

170 ---------- 

171 config : `ButlerConfig`, `Config` or `str`, optional 

172 Configuration. Anything acceptable to the `ButlerConfig` 

173 constructor. If a directory path is given the configuration will be 

174 read from a ``butler.yaml`` file in that location. If `None` is 

175 given default values will be used. If ``config`` contains "cls" key 

176 then its value is used as a name of butler class and it must be a 

177 sub-class of this class, otherwise `DirectButler` is instantiated. 

178 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

179 An expression specifying the collections to be searched (in order) 

180 when reading datasets. 

181 This may be a `str` collection name or an iterable thereof. 

182 See :ref:`daf_butler_collection_expressions` for more information. 

183 These collections are not registered automatically and must be 

184 manually registered before they are used by any method, but they 

185 may be manually registered after the `Butler` is initialized. 

186 run : `str`, optional 

187 Name of the `~CollectionType.RUN` collection new datasets should be 

188 inserted into. If ``collections`` is `None` and ``run`` is not 

189 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

190 this collection will automatically be registered. If this is not 

191 set (and ``writeable`` is not set either), a read-only butler will 

192 be created. 

193 searchPaths : `list` of `str`, optional 

194 Directory paths to search when calculating the full Butler 

195 configuration. Not used if the supplied config is already a 

196 `ButlerConfig`. 

197 writeable : `bool`, optional 

198 Explicitly sets whether the butler supports write operations. If 

199 not provided, a read-write butler is created if any of ``run``, 

200 ``tags``, or ``chains`` is non-empty. 

201 inferDefaults : `bool`, optional 

202 If `True` (default) infer default data ID values from the values 

203 present in the datasets in ``collections``: if all collections have 

204 the same value (or no value) for a governor dimension, that value 

205 will be the default for that dimension. Nonexistent collections 

206 are ignored. If a default value is provided explicitly for a 

207 governor dimension via ``**kwargs``, no default will be inferred 

208 for that dimension. 

209 without_datastore : `bool`, optional 

210 If `True` do not attach a datastore to this butler. Any attempts 

211 to use a datastore will fail. 

212 **kwargs : `Any` 

213 Default data ID key-value pairs. These may only identify 

214 "governor" dimensions like ``instrument`` and ``skymap``. 

215 

216 Returns 

217 ------- 

218 butler : `Butler` 

219 A `Butler` constructed from the given configuration. 

220 

221 Notes 

222 ----- 

223 Calling this factory method is identical to calling 

224 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

225 complains about ``Butler()`` call. 

226 

227 Examples 

228 -------- 

229 While there are many ways to control exactly how a `Butler` interacts 

230 with the collections in its `Registry`, the most common cases are still 

231 simple. 

232 

233 For a read-only `Butler` that searches one collection, do:: 

234 

235 butler = Butler.from_config( 

236 "/path/to/repo", collections=["u/alice/DM-50000"] 

237 ) 

238 

239 For a read-write `Butler` that writes to and reads from a 

240 `~CollectionType.RUN` collection:: 

241 

242 butler = Butler.from_config( 

243 "/path/to/repo", run="u/alice/DM-50000/a" 

244 ) 

245 

246 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

247 because we want to write to one `~CollectionType.RUN` collection but 

248 read from several others (as well):: 

249 

250 butler = Butler.from_config( 

251 "/path/to/repo", 

252 run="u/alice/DM-50000/a", 

253 collections=[ 

254 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

255 ] 

256 ) 

257 

258 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

259 Datasets will be read first from that run (since it appears first in 

260 the chain), and then from ``u/bob/DM-49998`` and finally 

261 ``HSC/defaults``. 

262 

263 Finally, one can always create a `Butler` with no collections:: 

264 

265 butler = Butler.from_config("/path/to/repo", writeable=True) 

266 

267 This can be extremely useful when you just want to use 

268 ``butler.registry``, e.g. for inserting dimension data or managing 

269 collections, or when the collections you want to use with the butler 

270 are not consistent. Passing ``writeable`` explicitly here is only 

271 necessary if you want to be able to make changes to the repo - usually 

272 the value for ``writeable`` can be guessed from the collection 

273 arguments provided, but it defaults to `False` when there are not 

274 collection arguments. 

275 """ 

276 # DirectButler used to have a way to specify a "copy constructor" by 

277 # passing the "butler" parameter to its constructor. This 

278 # functionality has been moved out of the constructor into 

279 # Butler._clone(), but the new interface is not public yet. 

280 butler = kwargs.pop("butler", None) 

281 if butler is not None: 

282 if not isinstance(butler, Butler): 

283 raise TypeError("'butler' parameter must be a Butler instance") 

284 if config is not None or searchPaths is not None or writeable is not None: 

285 raise TypeError( 

286 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

287 ) 

288 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs) 

289 

290 options = ButlerInstanceOptions( 

291 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs 

292 ) 

293 

294 # Load the Butler configuration. This may involve searching the 

295 # environment to locate a configuration file. 

296 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) 

297 butler_type = butler_config.get_butler_type() 

298 

299 # Make DirectButler if class is not specified. 

300 match butler_type: 

301 case ButlerType.DIRECT: 

302 from .direct_butler import DirectButler 

303 

304 return DirectButler.create_from_config( 

305 butler_config, 

306 options=options, 

307 without_datastore=without_datastore, 

308 ) 

309 case ButlerType.REMOTE: 

310 from .remote_butler import RemoteButlerFactory 

311 

312 factory = RemoteButlerFactory.create_factory_from_config(butler_config) 

313 return factory.create_butler_with_credentials_from_environment(butler_options=options) 

314 case _: 

315 raise TypeError(f"Unknown Butler type '{butler_type}'") 

316 

317 @staticmethod 

318 def makeRepo( 

319 root: ResourcePathExpression, 

320 config: Config | str | None = None, 

321 dimensionConfig: Config | str | None = None, 

322 standalone: bool = False, 

323 searchPaths: list[str] | None = None, 

324 forceConfigRoot: bool = True, 

325 outfile: ResourcePathExpression | None = None, 

326 overwrite: bool = False, 

327 ) -> Config: 

328 """Create an empty data repository by adding a butler.yaml config 

329 to a repository root directory. 

330 

331 Parameters 

332 ---------- 

333 root : `lsst.resources.ResourcePathExpression` 

334 Path or URI to the root location of the new repository. Will be 

335 created if it does not exist. 

336 config : `Config` or `str`, optional 

337 Configuration to write to the repository, after setting any 

338 root-dependent Registry or Datastore config options. Can not 

339 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

340 configuration will be used. Root-dependent config options 

341 specified in this config are overwritten if ``forceConfigRoot`` 

342 is `True`. 

343 dimensionConfig : `Config` or `str`, optional 

344 Configuration for dimensions, will be used to initialize registry 

345 database. 

346 standalone : `bool` 

347 If True, write all expanded defaults, not just customized or 

348 repository-specific settings. 

349 This (mostly) decouples the repository from the default 

350 configuration, insulating it from changes to the defaults (which 

351 may be good or bad, depending on the nature of the changes). 

352 Future *additions* to the defaults will still be picked up when 

353 initializing `Butlers` to repos created with ``standalone=True``. 

354 searchPaths : `list` of `str`, optional 

355 Directory paths to search when calculating the full butler 

356 configuration. 

357 forceConfigRoot : `bool`, optional 

358 If `False`, any values present in the supplied ``config`` that 

359 would normally be reset are not overridden and will appear 

360 directly in the output config. This allows non-standard overrides 

361 of the root directory for a datastore or registry to be given. 

362 If this parameter is `True` the values for ``root`` will be 

363 forced into the resulting config if appropriate. 

364 outfile : `lss.resources.ResourcePathExpression`, optional 

365 If not-`None`, the output configuration will be written to this 

366 location rather than into the repository itself. Can be a URI 

367 string. Can refer to a directory that will be used to write 

368 ``butler.yaml``. 

369 overwrite : `bool`, optional 

370 Create a new configuration file even if one already exists 

371 in the specified output location. Default is to raise 

372 an exception. 

373 

374 Returns 

375 ------- 

376 config : `Config` 

377 The updated `Config` instance written to the repo. 

378 

379 Raises 

380 ------ 

381 ValueError 

382 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

383 regular Config (as these subclasses would make it impossible to 

384 support ``standalone=False``). 

385 FileExistsError 

386 Raised if the output config file already exists. 

387 os.error 

388 Raised if the directory does not exist, exists but is not a 

389 directory, or cannot be created. 

390 

391 Notes 

392 ----- 

393 Note that when ``standalone=False`` (the default), the configuration 

394 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

395 construct the repository should also be used to construct any Butlers 

396 to avoid configuration inconsistencies. 

397 """ 

398 if isinstance(config, ButlerConfig | ConfigSubset): 

399 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

400 

401 # Ensure that the root of the repository exists or can be made 

402 root_uri = ResourcePath(root, forceDirectory=True) 

403 root_uri.mkdir() 

404 

405 config = Config(config) 

406 

407 # If we are creating a new repo from scratch with relative roots, 

408 # do not propagate an explicit root from the config file 

409 if "root" in config: 

410 del config["root"] 

411 

412 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

413 imported_class = doImportType(full["datastore", "cls"]) 

414 if not issubclass(imported_class, Datastore): 

415 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

416 datastoreClass: type[Datastore] = imported_class 

417 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

418 

419 # if key exists in given config, parse it, otherwise parse the defaults 

420 # in the expanded config 

421 if config.get(("registry", "db")): 

422 registryConfig = RegistryConfig(config) 

423 else: 

424 registryConfig = RegistryConfig(full) 

425 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

426 if defaultDatabaseUri is not None: 

427 Config.updateParameters( 

428 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

429 ) 

430 else: 

431 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

432 

433 if standalone: 

434 config.merge(full) 

435 else: 

436 # Always expand the registry.managers section into the per-repo 

437 # config, because after the database schema is created, it's not 

438 # allowed to change anymore. Note that in the standalone=True 

439 # branch, _everything_ in the config is expanded, so there's no 

440 # need to special case this. 

441 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

442 configURI: ResourcePathExpression 

443 if outfile is not None: 

444 # When writing to a separate location we must include 

445 # the root of the butler repo in the config else it won't know 

446 # where to look. 

447 config["root"] = root_uri.geturl() 

448 configURI = outfile 

449 else: 

450 configURI = root_uri 

451 # Strip obscore configuration, if it is present, before writing config 

452 # to a file, obscore config will be stored in registry. 

453 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

454 config_to_write = config.copy() 

455 del config_to_write[obscore_config_key] 

456 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

457 # configFile attribute is updated, need to copy it to original. 

458 config.configFile = config_to_write.configFile 

459 else: 

460 config.dumpToUri(configURI, overwrite=overwrite) 

461 

462 # Create Registry and populate tables 

463 registryConfig = RegistryConfig(config.get("registry")) 

464 dimensionConfig = DimensionConfig(dimensionConfig) 

465 _RegistryFactory(registryConfig).create_from_config( 

466 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

467 ) 

468 

469 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

470 

471 return config 

472 

473 @classmethod 

474 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

475 """Look up the label in a butler repository index. 

476 

477 Parameters 

478 ---------- 

479 label : `str` 

480 Label of the Butler repository to look up. 

481 return_label : `bool`, optional 

482 If ``label`` cannot be found in the repository index (either 

483 because index is not defined or ``label`` is not in the index) and 

484 ``return_label`` is `True` then return ``ResourcePath(label)``. 

485 If ``return_label`` is `False` (default) then an exception will be 

486 raised instead. 

487 

488 Returns 

489 ------- 

490 uri : `lsst.resources.ResourcePath` 

491 URI to the Butler repository associated with the given label or 

492 default value if it is provided. 

493 

494 Raises 

495 ------ 

496 KeyError 

497 Raised if the label is not found in the index, or if an index 

498 is not defined, and ``return_label`` is `False`. 

499 

500 Notes 

501 ----- 

502 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

503 information is discovered. 

504 """ 

505 return ButlerRepoIndex.get_repo_uri(label, return_label) 

506 

507 @classmethod 

508 def get_known_repos(cls) -> set[str]: 

509 """Retrieve the list of known repository labels. 

510 

511 Returns 

512 ------- 

513 repos : `set` of `str` 

514 All the known labels. Can be empty if no index can be found. 

515 

516 Notes 

517 ----- 

518 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

519 information is discovered. 

520 """ 

521 return ButlerRepoIndex.get_known_repos() 

522 

523 @abstractmethod 

524 def _caching_context(self) -> AbstractContextManager[None]: 

525 """Context manager that enables caching.""" 

526 raise NotImplementedError() 

527 

528 @abstractmethod 

529 def transaction(self) -> AbstractContextManager[None]: 

530 """Context manager supporting `Butler` transactions. 

531 

532 Transactions can be nested. 

533 """ 

534 raise NotImplementedError() 

535 

536 @abstractmethod 

537 def put( 

538 self, 

539 obj: Any, 

540 datasetRefOrType: DatasetRef | DatasetType | str, 

541 /, 

542 dataId: DataId | None = None, 

543 *, 

544 run: str | None = None, 

545 **kwargs: Any, 

546 ) -> DatasetRef: 

547 """Store and register a dataset. 

548 

549 Parameters 

550 ---------- 

551 obj : `object` 

552 The dataset. 

553 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

554 When `DatasetRef` is provided, ``dataId`` should be `None`. 

555 Otherwise the `DatasetType` or name thereof. If a fully resolved 

556 `DatasetRef` is given the run and ID are used directly. 

557 dataId : `dict` or `DataCoordinate` 

558 A `dict` of `Dimension` link name, value pairs that label the 

559 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

560 should be provided as the second argument. 

561 run : `str`, optional 

562 The name of the run the dataset should be added to, overriding 

563 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

564 **kwargs 

565 Additional keyword arguments used to augment or construct a 

566 `DataCoordinate`. See `DataCoordinate.standardize` 

567 parameters. Not used if a resolve `DatasetRef` is provided. 

568 

569 Returns 

570 ------- 

571 ref : `DatasetRef` 

572 A reference to the stored dataset, updated with the correct id if 

573 given. 

574 

575 Raises 

576 ------ 

577 TypeError 

578 Raised if the butler is read-only or if no run has been provided. 

579 """ 

580 raise NotImplementedError() 

581 

582 @abstractmethod 

583 def getDeferred( 

584 self, 

585 datasetRefOrType: DatasetRef | DatasetType | str, 

586 /, 

587 dataId: DataId | None = None, 

588 *, 

589 parameters: dict | None = None, 

590 collections: Any = None, 

591 storageClass: str | StorageClass | None = None, 

592 timespan: Timespan | None = None, 

593 **kwargs: Any, 

594 ) -> DeferredDatasetHandle: 

595 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

596 after an immediate registry lookup. 

597 

598 Parameters 

599 ---------- 

600 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

601 When `DatasetRef` the `dataId` should be `None`. 

602 Otherwise the `DatasetType` or name thereof. 

603 dataId : `dict` or `DataCoordinate`, optional 

604 A `dict` of `Dimension` link name, value pairs that label the 

605 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

606 should be provided as the first argument. 

607 parameters : `dict` 

608 Additional StorageClass-defined options to control reading, 

609 typically used to efficiently read only a subset of the dataset. 

610 collections : Any, optional 

611 Collections to be searched, overriding ``self.collections``. 

612 Can be any of the types supported by the ``collections`` argument 

613 to butler construction. 

614 storageClass : `StorageClass` or `str`, optional 

615 The storage class to be used to override the Python type 

616 returned by this method. By default the returned type matches 

617 the dataset type definition for this dataset. Specifying a 

618 read `StorageClass` can force a different type to be returned. 

619 This type must be compatible with the original type. 

620 timespan : `Timespan` or `None`, optional 

621 A timespan that the validity range of the dataset must overlap. 

622 If not provided and this is a calibration dataset type, an attempt 

623 will be made to find the timespan from any temporal coordinate 

624 in the data ID. 

625 **kwargs 

626 Additional keyword arguments used to augment or construct a 

627 `DataId`. See `DataId` parameters. 

628 

629 Returns 

630 ------- 

631 obj : `DeferredDatasetHandle` 

632 A handle which can be used to retrieve a dataset at a later time. 

633 

634 Raises 

635 ------ 

636 LookupError 

637 Raised if no matching dataset exists in the `Registry` or 

638 datastore. 

639 ValueError 

640 Raised if a resolved `DatasetRef` was passed as an input, but it 

641 differs from the one found in the registry. 

642 TypeError 

643 Raised if no collections were provided. 

644 """ 

645 raise NotImplementedError() 

646 

647 @abstractmethod 

648 def get( 

649 self, 

650 datasetRefOrType: DatasetRef | DatasetType | str, 

651 /, 

652 dataId: DataId | None = None, 

653 *, 

654 parameters: dict[str, Any] | None = None, 

655 collections: Any = None, 

656 storageClass: StorageClass | str | None = None, 

657 timespan: Timespan | None = None, 

658 **kwargs: Any, 

659 ) -> Any: 

660 """Retrieve a stored dataset. 

661 

662 Parameters 

663 ---------- 

664 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

665 When `DatasetRef` the `dataId` should be `None`. 

666 Otherwise the `DatasetType` or name thereof. 

667 If a resolved `DatasetRef`, the associated dataset 

668 is returned directly without additional querying. 

669 dataId : `dict` or `DataCoordinate` 

670 A `dict` of `Dimension` link name, value pairs that label the 

671 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

672 should be provided as the first argument. 

673 parameters : `dict` 

674 Additional StorageClass-defined options to control reading, 

675 typically used to efficiently read only a subset of the dataset. 

676 collections : Any, optional 

677 Collections to be searched, overriding ``self.collections``. 

678 Can be any of the types supported by the ``collections`` argument 

679 to butler construction. 

680 storageClass : `StorageClass` or `str`, optional 

681 The storage class to be used to override the Python type 

682 returned by this method. By default the returned type matches 

683 the dataset type definition for this dataset. Specifying a 

684 read `StorageClass` can force a different type to be returned. 

685 This type must be compatible with the original type. 

686 timespan : `Timespan` or `None`, optional 

687 A timespan that the validity range of the dataset must overlap. 

688 If not provided and this is a calibration dataset type, an attempt 

689 will be made to find the timespan from any temporal coordinate 

690 in the data ID. 

691 **kwargs 

692 Additional keyword arguments used to augment or construct a 

693 `DataCoordinate`. See `DataCoordinate.standardize` 

694 parameters. 

695 

696 Returns 

697 ------- 

698 obj : `object` 

699 The dataset. 

700 

701 Raises 

702 ------ 

703 LookupError 

704 Raised if no matching dataset exists in the `Registry`. 

705 TypeError 

706 Raised if no collections were provided. 

707 

708 Notes 

709 ----- 

710 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

711 this method requires that the given data ID include temporal dimensions 

712 beyond the dimensions of the dataset type itself, in order to find the 

713 dataset with the appropriate validity range. For example, a "bias" 

714 dataset with native dimensions ``{instrument, detector}`` could be 

715 fetched with a ``{instrument, detector, exposure}`` data ID, because 

716 ``exposure`` is a temporal dimension. 

717 """ 

718 raise NotImplementedError() 

719 

720 @abstractmethod 

721 def getURIs( 

722 self, 

723 datasetRefOrType: DatasetRef | DatasetType | str, 

724 /, 

725 dataId: DataId | None = None, 

726 *, 

727 predict: bool = False, 

728 collections: Any = None, 

729 run: str | None = None, 

730 **kwargs: Any, 

731 ) -> DatasetRefURIs: 

732 """Return the URIs associated with the dataset. 

733 

734 Parameters 

735 ---------- 

736 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

737 When `DatasetRef` the `dataId` should be `None`. 

738 Otherwise the `DatasetType` or name thereof. 

739 dataId : `dict` or `DataCoordinate` 

740 A `dict` of `Dimension` link name, value pairs that label the 

741 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

742 should be provided as the first argument. 

743 predict : `bool` 

744 If `True`, allow URIs to be returned of datasets that have not 

745 been written. 

746 collections : Any, optional 

747 Collections to be searched, overriding ``self.collections``. 

748 Can be any of the types supported by the ``collections`` argument 

749 to butler construction. 

750 run : `str`, optional 

751 Run to use for predictions, overriding ``self.run``. 

752 **kwargs 

753 Additional keyword arguments used to augment or construct a 

754 `DataCoordinate`. See `DataCoordinate.standardize` 

755 parameters. 

756 

757 Returns 

758 ------- 

759 uris : `DatasetRefURIs` 

760 The URI to the primary artifact associated with this dataset (if 

761 the dataset was disassembled within the datastore this may be 

762 `None`), and the URIs to any components associated with the dataset 

763 artifact. (can be empty if there are no components). 

764 """ 

765 raise NotImplementedError() 

766 

767 def getURI( 

768 self, 

769 datasetRefOrType: DatasetRef | DatasetType | str, 

770 /, 

771 dataId: DataId | None = None, 

772 *, 

773 predict: bool = False, 

774 collections: Any = None, 

775 run: str | None = None, 

776 **kwargs: Any, 

777 ) -> ResourcePath: 

778 """Return the URI to the Dataset. 

779 

780 Parameters 

781 ---------- 

782 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

783 When `DatasetRef` the `dataId` should be `None`. 

784 Otherwise the `DatasetType` or name thereof. 

785 dataId : `dict` or `DataCoordinate` 

786 A `dict` of `Dimension` link name, value pairs that label the 

787 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

788 should be provided as the first argument. 

789 predict : `bool` 

790 If `True`, allow URIs to be returned of datasets that have not 

791 been written. 

792 collections : Any, optional 

793 Collections to be searched, overriding ``self.collections``. 

794 Can be any of the types supported by the ``collections`` argument 

795 to butler construction. 

796 run : `str`, optional 

797 Run to use for predictions, overriding ``self.run``. 

798 **kwargs 

799 Additional keyword arguments used to augment or construct a 

800 `DataCoordinate`. See `DataCoordinate.standardize` 

801 parameters. 

802 

803 Returns 

804 ------- 

805 uri : `lsst.resources.ResourcePath` 

806 URI pointing to the Dataset within the datastore. If the 

807 Dataset does not exist in the datastore, and if ``predict`` is 

808 `True`, the URI will be a prediction and will include a URI 

809 fragment "#predicted". 

810 If the datastore does not have entities that relate well 

811 to the concept of a URI the returned URI string will be 

812 descriptive. The returned URI is not guaranteed to be obtainable. 

813 

814 Raises 

815 ------ 

816 LookupError 

817 A URI has been requested for a dataset that does not exist and 

818 guessing is not allowed. 

819 ValueError 

820 Raised if a resolved `DatasetRef` was passed as an input, but it 

821 differs from the one found in the registry. 

822 TypeError 

823 Raised if no collections were provided. 

824 RuntimeError 

825 Raised if a URI is requested for a dataset that consists of 

826 multiple artifacts. 

827 """ 

828 primary, components = self.getURIs( 

829 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

830 ) 

831 

832 if primary is None or components: 

833 raise RuntimeError( 

834 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

835 "Use Butler.getURIs() instead." 

836 ) 

837 return primary 

838 

839 @abstractmethod 

840 def get_dataset_type(self, name: str) -> DatasetType: 

841 """Get the `DatasetType`. 

842 

843 Parameters 

844 ---------- 

845 name : `str` 

846 Name of the type. 

847 

848 Returns 

849 ------- 

850 type : `DatasetType` 

851 The `DatasetType` associated with the given name. 

852 

853 Raises 

854 ------ 

855 lsst.daf.butler.MissingDatasetTypeError 

856 Raised if the requested dataset type has not been registered. 

857 

858 Notes 

859 ----- 

860 This method handles component dataset types automatically, though most 

861 other operations do not. 

862 """ 

863 raise NotImplementedError() 

864 

865 @abstractmethod 

866 def get_dataset( 

867 self, 

868 id: DatasetId, 

869 *, 

870 storage_class: str | StorageClass | None = None, 

871 dimension_records: bool = False, 

872 datastore_records: bool = False, 

873 ) -> DatasetRef | None: 

874 """Retrieve a Dataset entry. 

875 

876 Parameters 

877 ---------- 

878 id : `DatasetId` 

879 The unique identifier for the dataset. 

880 storage_class : `str` or `StorageClass` or `None` 

881 A storage class to use when creating the returned entry. If given 

882 it must be compatible with the default storage class. 

883 dimension_records : `bool`, optional 

884 If `True` the ref will be expanded and contain dimension records. 

885 datastore_records : `bool`, optional 

886 If `True` the ref will contain associated datastore records. 

887 

888 Returns 

889 ------- 

890 ref : `DatasetRef` or `None` 

891 A ref to the Dataset, or `None` if no matching Dataset 

892 was found. 

893 """ 

894 raise NotImplementedError() 

895 

896 @abstractmethod 

897 def find_dataset( 

898 self, 

899 dataset_type: DatasetType | str, 

900 data_id: DataId | None = None, 

901 *, 

902 collections: str | Sequence[str] | None = None, 

903 timespan: Timespan | None = None, 

904 storage_class: str | StorageClass | None = None, 

905 dimension_records: bool = False, 

906 datastore_records: bool = False, 

907 **kwargs: Any, 

908 ) -> DatasetRef | None: 

909 """Find a dataset given its `DatasetType` and data ID. 

910 

911 This can be used to obtain a `DatasetRef` that permits the dataset to 

912 be read from a `Datastore`. If the dataset is a component and can not 

913 be found using the provided dataset type, a dataset ref for the parent 

914 will be returned instead but with the correct dataset type. 

915 

916 Parameters 

917 ---------- 

918 dataset_type : `DatasetType` or `str` 

919 A `DatasetType` or the name of one. If this is a `DatasetType` 

920 instance, its storage class will be respected and propagated to 

921 the output, even if it differs from the dataset type definition 

922 in the registry, as long as the storage classes are convertible. 

923 data_id : `dict` or `DataCoordinate`, optional 

924 A `dict`-like object containing the `Dimension` links that identify 

925 the dataset within a collection. If it is a `dict` the dataId 

926 can include dimension record values such as ``day_obs`` and 

927 ``seq_num`` or ``full_name`` that can be used to derive the 

928 primary dimension. 

929 collections : `str` or `list` [`str`], optional 

930 A an ordered list of collections to search for the dataset. 

931 Defaults to ``self.defaults.collections``. 

932 timespan : `Timespan`, optional 

933 A timespan that the validity range of the dataset must overlap. 

934 If not provided, any `~CollectionType.CALIBRATION` collections 

935 matched by the ``collections`` argument will not be searched. 

936 storage_class : `str` or `StorageClass` or `None` 

937 A storage class to use when creating the returned entry. If given 

938 it must be compatible with the default storage class. 

939 dimension_records : `bool`, optional 

940 If `True` the ref will be expanded and contain dimension records. 

941 datastore_records : `bool`, optional 

942 If `True` the ref will contain associated datastore records. 

943 **kwargs 

944 Additional keyword arguments passed to 

945 `DataCoordinate.standardize` to convert ``dataId`` to a true 

946 `DataCoordinate` or augment an existing one. This can also include 

947 dimension record metadata that can be used to derive a primary 

948 dimension value. 

949 

950 Returns 

951 ------- 

952 ref : `DatasetRef` 

953 A reference to the dataset, or `None` if no matching Dataset 

954 was found. 

955 

956 Raises 

957 ------ 

958 lsst.daf.butler.NoDefaultCollectionError 

959 Raised if ``collections`` is `None` and 

960 ``self.collections`` is `None`. 

961 LookupError 

962 Raised if one or more data ID keys are missing. 

963 lsst.daf.butler.MissingDatasetTypeError 

964 Raised if the dataset type does not exist. 

965 lsst.daf.butler.MissingCollectionError 

966 Raised if any of ``collections`` does not exist in the registry. 

967 

968 Notes 

969 ----- 

970 This method simply returns `None` and does not raise an exception even 

971 when the set of collections searched is intrinsically incompatible with 

972 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

973 only `~CollectionType.CALIBRATION` collections are being searched. 

974 This may make it harder to debug some lookup failures, but the behavior 

975 is intentional; we consider it more important that failed searches are 

976 reported consistently, regardless of the reason, and that adding 

977 additional collections that do not contain a match to the search path 

978 never changes the behavior. 

979 

980 This method handles component dataset types automatically, though most 

981 other query operations do not. 

982 """ 

983 raise NotImplementedError() 

984 

985 @abstractmethod 

986 def retrieveArtifacts( 

987 self, 

988 refs: Iterable[DatasetRef], 

989 destination: ResourcePathExpression, 

990 transfer: str = "auto", 

991 preserve_path: bool = True, 

992 overwrite: bool = False, 

993 ) -> list[ResourcePath]: 

994 """Retrieve the artifacts associated with the supplied refs. 

995 

996 Parameters 

997 ---------- 

998 refs : iterable of `DatasetRef` 

999 The datasets for which artifacts are to be retrieved. 

1000 A single ref can result in multiple artifacts. The refs must 

1001 be resolved. 

1002 destination : `lsst.resources.ResourcePath` or `str` 

1003 Location to write the artifacts. 

1004 transfer : `str`, optional 

1005 Method to use to transfer the artifacts. Must be one of the options 

1006 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

1007 "move" is not allowed. 

1008 preserve_path : `bool`, optional 

1009 If `True` the full path of the artifact within the datastore 

1010 is preserved. If `False` the final file component of the path 

1011 is used. 

1012 overwrite : `bool`, optional 

1013 If `True` allow transfers to overwrite existing files at the 

1014 destination. 

1015 

1016 Returns 

1017 ------- 

1018 targets : `list` of `lsst.resources.ResourcePath` 

1019 URIs of file artifacts in destination location. Order is not 

1020 preserved. 

1021 

1022 Notes 

1023 ----- 

1024 For non-file datastores the artifacts written to the destination 

1025 may not match the representation inside the datastore. For example 

1026 a hierarchical data structure in a NoSQL database may well be stored 

1027 as a JSON file. 

1028 """ 

1029 raise NotImplementedError() 

1030 

1031 @abstractmethod 

1032 def exists( 

1033 self, 

1034 dataset_ref_or_type: DatasetRef | DatasetType | str, 

1035 /, 

1036 data_id: DataId | None = None, 

1037 *, 

1038 full_check: bool = True, 

1039 collections: Any = None, 

1040 **kwargs: Any, 

1041 ) -> DatasetExistence: 

1042 """Indicate whether a dataset is known to Butler registry and 

1043 datastore. 

1044 

1045 Parameters 

1046 ---------- 

1047 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

1048 When `DatasetRef` the `dataId` should be `None`. 

1049 Otherwise the `DatasetType` or name thereof. 

1050 data_id : `dict` or `DataCoordinate` 

1051 A `dict` of `Dimension` link name, value pairs that label the 

1052 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1053 should be provided as the first argument. 

1054 full_check : `bool`, optional 

1055 If `True`, a check will be made for the actual existence of a 

1056 dataset artifact. This will involve additional overhead due to 

1057 the need to query an external system. If `False`, this check will 

1058 be omitted, and the registry and datastore will solely be asked 

1059 if they know about the dataset but no direct check for the 

1060 artifact will be performed. 

1061 collections : Any, optional 

1062 Collections to be searched, overriding ``self.collections``. 

1063 Can be any of the types supported by the ``collections`` argument 

1064 to butler construction. 

1065 **kwargs 

1066 Additional keyword arguments used to augment or construct a 

1067 `DataCoordinate`. See `DataCoordinate.standardize` 

1068 parameters. 

1069 

1070 Returns 

1071 ------- 

1072 existence : `DatasetExistence` 

1073 Object indicating whether the dataset is known to registry and 

1074 datastore. Evaluates to `True` if the dataset is present and known 

1075 to both. 

1076 """ 

1077 raise NotImplementedError() 

1078 

1079 @abstractmethod 

1080 def _exists_many( 

1081 self, 

1082 refs: Iterable[DatasetRef], 

1083 /, 

1084 *, 

1085 full_check: bool = True, 

1086 ) -> dict[DatasetRef, DatasetExistence]: 

1087 """Indicate whether multiple datasets are known to Butler registry and 

1088 datastore. 

1089 

1090 This is an experimental API that may change at any moment. 

1091 

1092 Parameters 

1093 ---------- 

1094 refs : iterable of `DatasetRef` 

1095 The datasets to be checked. 

1096 full_check : `bool`, optional 

1097 If `True`, a check will be made for the actual existence of each 

1098 dataset artifact. This will involve additional overhead due to 

1099 the need to query an external system. If `False`, this check will 

1100 be omitted, and the registry and datastore will solely be asked 

1101 if they know about the dataset(s) but no direct check for the 

1102 artifact(s) will be performed. 

1103 

1104 Returns 

1105 ------- 

1106 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1107 Mapping from the given dataset refs to an enum indicating the 

1108 status of the dataset in registry and datastore. 

1109 Each value evaluates to `True` if the dataset is present and known 

1110 to both. 

1111 """ 

1112 raise NotImplementedError() 

1113 

1114 @abstractmethod 

1115 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1116 """Remove one or more `~CollectionType.RUN` collections and the 

1117 datasets within them. 

1118 

1119 Parameters 

1120 ---------- 

1121 names : `~collections.abc.Iterable` [ `str` ] 

1122 The names of the collections to remove. 

1123 unstore : `bool`, optional 

1124 If `True` (default), delete datasets from all datastores in which 

1125 they are present, and attempt to rollback the registry deletions if 

1126 datastore deletions fail (which may not always be possible). If 

1127 `False`, datastore records for these datasets are still removed, 

1128 but any artifacts (e.g. files) will not be. 

1129 

1130 Raises 

1131 ------ 

1132 TypeError 

1133 Raised if one or more collections are not of type 

1134 `~CollectionType.RUN`. 

1135 """ 

1136 raise NotImplementedError() 

1137 

1138 @abstractmethod 

1139 def ingest( 

1140 self, 

1141 *datasets: FileDataset, 

1142 transfer: str | None = "auto", 

1143 record_validation_info: bool = True, 

1144 ) -> None: 

1145 """Store and register one or more datasets that already exist on disk. 

1146 

1147 Parameters 

1148 ---------- 

1149 *datasets : `FileDataset` 

1150 Each positional argument is a struct containing information about 

1151 a file to be ingested, including its URI (either absolute or 

1152 relative to the datastore root, if applicable), a resolved 

1153 `DatasetRef`, and optionally a formatter class or its 

1154 fully-qualified string name. If a formatter is not provided, the 

1155 formatter that would be used for `put` is assumed. On successful 

1156 ingest all `FileDataset.formatter` attributes will be set to the 

1157 formatter class used. `FileDataset.path` attributes may be modified 

1158 to put paths in whatever the datastore considers a standardized 

1159 form. 

1160 transfer : `str`, optional 

1161 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1162 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1163 transfer the file. 

1164 record_validation_info : `bool`, optional 

1165 If `True`, the default, the datastore can record validation 

1166 information associated with the file. If `False` the datastore 

1167 will not attempt to track any information such as checksums 

1168 or file sizes. This can be useful if such information is tracked 

1169 in an external system or if the file is to be compressed in place. 

1170 It is up to the datastore whether this parameter is relevant. 

1171 

1172 Raises 

1173 ------ 

1174 TypeError 

1175 Raised if the butler is read-only or if no run was provided. 

1176 NotImplementedError 

1177 Raised if the `Datastore` does not support the given transfer mode. 

1178 DatasetTypeNotSupportedError 

1179 Raised if one or more files to be ingested have a dataset type that 

1180 is not supported by the `Datastore`.. 

1181 FileNotFoundError 

1182 Raised if one of the given files does not exist. 

1183 FileExistsError 

1184 Raised if transfer is not `None` but the (internal) location the 

1185 file would be moved to is already occupied. 

1186 

1187 Notes 

1188 ----- 

1189 This operation is not fully exception safe: if a database operation 

1190 fails, the given `FileDataset` instances may be only partially updated. 

1191 

1192 It is atomic in terms of database operations (they will either all 

1193 succeed or all fail) providing the database engine implements 

1194 transactions correctly. It will attempt to be atomic in terms of 

1195 filesystem operations as well, but this cannot be implemented 

1196 rigorously for most datastores. 

1197 """ 

1198 raise NotImplementedError() 

1199 

1200 @abstractmethod 

1201 def export( 

1202 self, 

1203 *, 

1204 directory: str | None = None, 

1205 filename: str | None = None, 

1206 format: str | None = None, 

1207 transfer: str | None = None, 

1208 ) -> AbstractContextManager[RepoExportContext]: 

1209 """Export datasets from the repository represented by this `Butler`. 

1210 

1211 This method is a context manager that returns a helper object 

1212 (`RepoExportContext`) that is used to indicate what information from 

1213 the repository should be exported. 

1214 

1215 Parameters 

1216 ---------- 

1217 directory : `str`, optional 

1218 Directory dataset files should be written to if ``transfer`` is not 

1219 `None`. 

1220 filename : `str`, optional 

1221 Name for the file that will include database information associated 

1222 with the exported datasets. If this is not an absolute path and 

1223 ``directory`` is not `None`, it will be written to ``directory`` 

1224 instead of the current working directory. Defaults to 

1225 "export.{format}". 

1226 format : `str`, optional 

1227 File format for the database information file. If `None`, the 

1228 extension of ``filename`` will be used. 

1229 transfer : `str`, optional 

1230 Transfer mode passed to `Datastore.export`. 

1231 

1232 Raises 

1233 ------ 

1234 TypeError 

1235 Raised if the set of arguments passed is inconsistent. 

1236 

1237 Examples 

1238 -------- 

1239 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1240 methods are used to provide the iterables over data IDs and/or datasets 

1241 to be exported:: 

1242 

1243 with butler.export("exports.yaml") as export: 

1244 # Export all flats, but none of the dimension element rows 

1245 # (i.e. data ID information) associated with them. 

1246 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1247 elements=()) 

1248 # Export all datasets that start with "deepCoadd_" and all of 

1249 # their associated data ID information. 

1250 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1251 """ 

1252 raise NotImplementedError() 

1253 

1254 @abstractmethod 

1255 def import_( 

1256 self, 

1257 *, 

1258 directory: ResourcePathExpression | None = None, 

1259 filename: ResourcePathExpression | TextIO | None = None, 

1260 format: str | None = None, 

1261 transfer: str | None = None, 

1262 skip_dimensions: set | None = None, 

1263 ) -> None: 

1264 """Import datasets into this repository that were exported from a 

1265 different butler repository via `~lsst.daf.butler.Butler.export`. 

1266 

1267 Parameters 

1268 ---------- 

1269 directory : `~lsst.resources.ResourcePathExpression`, optional 

1270 Directory containing dataset files to import from. If `None`, 

1271 ``filename`` and all dataset file paths specified therein must 

1272 be absolute. 

1273 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1274 A stream or name of file that contains database information 

1275 associated with the exported datasets, typically generated by 

1276 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1277 `~lsst.resources.ResourcePath` and is not an absolute path, 

1278 it will first be looked for relative to ``directory`` and if not 

1279 found there it will be looked for in the current working 

1280 directory. Defaults to "export.{format}". 

1281 format : `str`, optional 

1282 File format for ``filename``. If `None`, the extension of 

1283 ``filename`` will be used. 

1284 transfer : `str`, optional 

1285 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1286 skip_dimensions : `set`, optional 

1287 Names of dimensions that should be skipped and not imported. 

1288 

1289 Raises 

1290 ------ 

1291 TypeError 

1292 Raised if the set of arguments passed is inconsistent, or if the 

1293 butler is read-only. 

1294 """ 

1295 raise NotImplementedError() 

1296 

1297 @abstractmethod 

1298 def transfer_dimension_records_from( 

1299 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef] 

1300 ) -> None: 

1301 """Transfer dimension records to this Butler from another Butler. 

1302 

1303 Parameters 

1304 ---------- 

1305 source_butler : `LimitedButler` or `Butler` 

1306 Butler from which the records are to be transferred. If data IDs 

1307 in ``source_refs`` are not expanded then this has to be a full 

1308 `Butler` whose registry will be used to expand data IDs. If the 

1309 source refs contain coordinates that are used to populate other 

1310 records then this will also need to be a full `Butler`. 

1311 source_refs : iterable of `DatasetRef` 

1312 Datasets defined in the source butler whose dimension records 

1313 should be transferred to this butler. In most circumstances. 

1314 transfer is faster if the dataset refs are expanded. 

1315 """ 

1316 raise NotImplementedError() 

1317 

1318 @abstractmethod 

1319 def transfer_from( 

1320 self, 

1321 source_butler: LimitedButler, 

1322 source_refs: Iterable[DatasetRef], 

1323 transfer: str = "auto", 

1324 skip_missing: bool = True, 

1325 register_dataset_types: bool = False, 

1326 transfer_dimensions: bool = False, 

1327 dry_run: bool = False, 

1328 ) -> Collection[DatasetRef]: 

1329 """Transfer datasets to this Butler from a run in another Butler. 

1330 

1331 Parameters 

1332 ---------- 

1333 source_butler : `LimitedButler` 

1334 Butler from which the datasets are to be transferred. If data IDs 

1335 in ``source_refs`` are not expanded then this has to be a full 

1336 `Butler` whose registry will be used to expand data IDs. 

1337 source_refs : iterable of `DatasetRef` 

1338 Datasets defined in the source butler that should be transferred to 

1339 this butler. In most circumstances, ``transfer_from`` is faster if 

1340 the dataset refs are expanded. 

1341 transfer : `str`, optional 

1342 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1343 skip_missing : `bool` 

1344 If `True`, datasets with no datastore artifact associated with 

1345 them are not transferred. If `False` a registry entry will be 

1346 created even if no datastore record is created (and so will 

1347 look equivalent to the dataset being unstored). 

1348 register_dataset_types : `bool` 

1349 If `True` any missing dataset types are registered. Otherwise 

1350 an exception is raised. 

1351 transfer_dimensions : `bool`, optional 

1352 If `True`, dimension record data associated with the new datasets 

1353 will be transferred. 

1354 dry_run : `bool`, optional 

1355 If `True` the transfer will be processed without any modifications 

1356 made to the target butler and as if the target butler did not 

1357 have any of the datasets. 

1358 

1359 Returns 

1360 ------- 

1361 refs : `list` of `DatasetRef` 

1362 The refs added to this Butler. 

1363 

1364 Notes 

1365 ----- 

1366 The datastore artifact has to exist for a transfer 

1367 to be made but non-existence is not an error. 

1368 

1369 Datasets that already exist in this run will be skipped. 

1370 

1371 The datasets are imported as part of a transaction, although 

1372 dataset types are registered before the transaction is started. 

1373 This means that it is possible for a dataset type to be registered 

1374 even though transfer has failed. 

1375 """ 

1376 raise NotImplementedError() 

1377 

1378 @abstractmethod 

1379 def validateConfiguration( 

1380 self, 

1381 logFailures: bool = False, 

1382 datasetTypeNames: Iterable[str] | None = None, 

1383 ignore: Iterable[str] | None = None, 

1384 ) -> None: 

1385 """Validate butler configuration. 

1386 

1387 Checks that each `DatasetType` can be stored in the `Datastore`. 

1388 

1389 Parameters 

1390 ---------- 

1391 logFailures : `bool`, optional 

1392 If `True`, output a log message for every validation error 

1393 detected. 

1394 datasetTypeNames : iterable of `str`, optional 

1395 The `DatasetType` names that should be checked. This allows 

1396 only a subset to be selected. 

1397 ignore : iterable of `str`, optional 

1398 Names of DatasetTypes to skip over. This can be used to skip 

1399 known problems. If a named `DatasetType` corresponds to a 

1400 composite, all components of that `DatasetType` will also be 

1401 ignored. 

1402 

1403 Raises 

1404 ------ 

1405 ButlerValidationError 

1406 Raised if there is some inconsistency with how this Butler 

1407 is configured. 

1408 """ 

1409 raise NotImplementedError() 

1410 

1411 @property 

1412 @abstractmethod 

1413 def collection_chains(self) -> ButlerCollections: 

1414 """Object with methods for modifying collection chains.""" 

1415 raise NotImplementedError() 

1416 

1417 @property 

1418 @abstractmethod 

1419 def collections(self) -> Sequence[str]: 

1420 """The collections to search by default, in order 

1421 (`~collections.abc.Sequence` [ `str` ]). 

1422 """ 

1423 raise NotImplementedError() 

1424 

1425 @property 

1426 @abstractmethod 

1427 def run(self) -> str | None: 

1428 """Name of the run this butler writes outputs to by default (`str` or 

1429 `None`). 

1430 """ 

1431 raise NotImplementedError() 

1432 

1433 @property 

1434 @abstractmethod 

1435 def registry(self) -> Registry: 

1436 """The object that manages dataset metadata and relationships 

1437 (`Registry`). 

1438 

1439 Many operations that don't involve reading or writing butler datasets 

1440 are accessible only via `Registry` methods. Eventually these methods 

1441 will be replaced by equivalent `Butler` methods. 

1442 """ 

1443 raise NotImplementedError() 

1444 

1445 @abstractmethod 

1446 def _query(self) -> AbstractContextManager[Query]: 

1447 """Context manager returning a `Query` object used for construction 

1448 and execution of complex queries. 

1449 """ 

1450 raise NotImplementedError() 

1451 

1452 def _query_data_ids( 

1453 self, 

1454 dimensions: DimensionGroup | Iterable[str] | str, 

1455 *, 

1456 data_id: DataId | None = None, 

1457 where: str = "", 

1458 bind: Mapping[str, Any] | None = None, 

1459 with_dimension_records: bool = False, 

1460 order_by: Iterable[str] | str | None = None, 

1461 limit: int | None = None, 

1462 explain: bool = True, 

1463 **kwargs: Any, 

1464 ) -> list[DataCoordinate]: 

1465 """Query for data IDs matching user-provided criteria. 

1466 

1467 Parameters 

1468 ---------- 

1469 dimensions : `DimensionGroup`, `str`, or \ 

1470 `~collections.abc.Iterable` [`str`] 

1471 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1472 instances or `str`. Will be automatically expanded to a complete 

1473 `DimensionGroup`. 

1474 data_id : `dict` or `DataCoordinate`, optional 

1475 A data ID whose key-value pairs are used as equality constraints 

1476 in the query. 

1477 where : `str`, optional 

1478 A string expression similar to a SQL WHERE clause. May involve 

1479 any column of a dimension table or (as a shortcut for the primary 

1480 key column of a dimension table) dimension name. See 

1481 :ref:`daf_butler_dimension_expressions` for more information. 

1482 bind : `~collections.abc.Mapping`, optional 

1483 Mapping containing literal values that should be injected into the 

1484 ``where`` expression, keyed by the identifiers they replace. 

1485 Values of collection type can be expanded in some cases; see 

1486 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1487 information. 

1488 with_dimension_records : `bool`, optional 

1489 If `True` (default is `False`) then returned data IDs will have 

1490 dimension records. 

1491 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1492 Names of the columns/dimensions to use for ordering returned data 

1493 IDs. Column name can be prefixed with minus (``-``) to use 

1494 descending ordering. 

1495 limit : `int`, optional 

1496 Upper limit on the number of returned records. 

1497 explain : `bool`, optional 

1498 If `True` (default) then `EmptyQueryResultError` exception is 

1499 raised when resulting list is empty. The exception contains 

1500 non-empty list of strings explaining possible causes for empty 

1501 result. 

1502 **kwargs 

1503 Additional keyword arguments are forwarded to 

1504 `DataCoordinate.standardize` when processing the ``data_id`` 

1505 argument (and may be used to provide a constraining data ID even 

1506 when the ``data_id`` argument is `None`). 

1507 

1508 Returns 

1509 ------- 

1510 dataIds : `list` [`DataCoordinate`] 

1511 Data IDs matching the given query parameters. These are always 

1512 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1513 returns `True`). 

1514 

1515 Raises 

1516 ------ 

1517 lsst.daf.butler.registry.DataIdError 

1518 Raised when ``data_id`` or keyword arguments specify unknown 

1519 dimensions or values, or when they contain inconsistent values. 

1520 lsst.daf.butler.registry.UserExpressionError 

1521 Raised when ``where`` expression is invalid. 

1522 lsst.daf.butler.EmptyQueryResultError 

1523 Raised when query generates empty result and ``explain`` is set to 

1524 `True`. 

1525 TypeError 

1526 Raised when the arguments are incompatible. 

1527 """ 

1528 if data_id is None: 

1529 data_id = DataCoordinate.make_empty(self.dimensions) 

1530 with self._query() as query: 

1531 result = ( 

1532 query.where(data_id, where, bind=bind, **kwargs) 

1533 .data_ids(dimensions) 

1534 .order_by(*ensure_iterable(order_by)) 

1535 .limit(limit) 

1536 ) 

1537 if with_dimension_records: 

1538 result = result.with_dimension_records() 

1539 data_ids = list(result) 

1540 if explain and not data_ids: 

1541 raise EmptyQueryResultError(list(result.explain_no_results())) 

1542 return data_ids 

1543 

1544 def _query_datasets( 

1545 self, 

1546 dataset_type: str | DatasetType, 

1547 collections: str | Iterable[str] | None = None, 

1548 *, 

1549 find_first: bool = True, 

1550 data_id: DataId | None = None, 

1551 where: str = "", 

1552 bind: Mapping[str, Any] | None = None, 

1553 with_dimension_records: bool = False, 

1554 explain: bool = True, 

1555 **kwargs: Any, 

1556 ) -> list[DatasetRef]: 

1557 """Query for dataset references matching user-provided criteria. 

1558 

1559 Parameters 

1560 ---------- 

1561 dataset_type : `str` or `DatasetType` 

1562 Dataset type object or name to search for. 

1563 collections : collection expression, optional 

1564 A collection name or iterable of collection names to search. If not 

1565 provided, the default collections are used. See 

1566 :ref:`daf_butler_collection_expressions` for more information. 

1567 find_first : `bool`, optional 

1568 If `True` (default), for each result data ID, only yield one 

1569 `DatasetRef` of each `DatasetType`, from the first collection in 

1570 which a dataset of that dataset type appears (according to the 

1571 order of ``collections`` passed in). If `True`, ``collections`` 

1572 must not contain regular expressions and may not be ``...``. 

1573 data_id : `dict` or `DataCoordinate`, optional 

1574 A data ID whose key-value pairs are used as equality constraints in 

1575 the query. 

1576 where : `str`, optional 

1577 A string expression similar to a SQL WHERE clause. May involve any 

1578 column of a dimension table or (as a shortcut for the primary key 

1579 column of a dimension table) dimension name. See 

1580 :ref:`daf_butler_dimension_expressions` for more information. 

1581 bind : `~collections.abc.Mapping`, optional 

1582 Mapping containing literal values that should be injected into the 

1583 ``where`` expression, keyed by the identifiers they replace. Values 

1584 of collection type can be expanded in some cases; see 

1585 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1586 information. 

1587 with_dimension_records : `bool`, optional 

1588 If `True` (default is `False`) then returned data IDs will have 

1589 dimension records. 

1590 explain : `bool`, optional 

1591 If `True` (default) then `EmptyQueryResultError` exception is 

1592 raised when resulting list is empty. The exception contains 

1593 non-empty list of strings explaining possible causes for empty 

1594 result. 

1595 **kwargs 

1596 Additional keyword arguments are forwarded to 

1597 `DataCoordinate.standardize` when processing the ``data_id`` 

1598 argument (and may be used to provide a constraining data ID even 

1599 when the ``data_id`` argument is `None`). 

1600 

1601 Returns 

1602 ------- 

1603 refs : `.queries.DatasetRefQueryResults` 

1604 Dataset references matching the given query criteria. Nested data 

1605 IDs are guaranteed to include values for all implied dimensions 

1606 (i.e. `DataCoordinate.hasFull` will return `True`). 

1607 

1608 Raises 

1609 ------ 

1610 lsst.daf.butler.registry.DatasetTypeExpressionError 

1611 Raised when ``dataset_type`` expression is invalid. 

1612 lsst.daf.butler.registry.DataIdError 

1613 Raised when ``data_id`` or keyword arguments specify unknown 

1614 dimensions or values, or when they contain inconsistent values. 

1615 lsst.daf.butler.registry.UserExpressionError 

1616 Raised when ``where`` expression is invalid. 

1617 lsst.daf.butler.EmptyQueryResultError 

1618 Raised when query generates empty result and ``explain`` is set to 

1619 `True`. 

1620 TypeError 

1621 Raised when the arguments are incompatible, such as when a 

1622 collection wildcard is passed when ``find_first`` is `True`, or 

1623 when ``collections`` is `None` and default butler collections are 

1624 not defined. 

1625 

1626 Notes 

1627 ----- 

1628 When multiple dataset types are queried in a single call, the results 

1629 of this operation are equivalent to querying for each dataset type 

1630 separately in turn, and no information about the relationships between 

1631 datasets of different types is included. 

1632 """ 

1633 if data_id is None: 

1634 data_id = DataCoordinate.make_empty(self.dimensions) 

1635 with self._query() as query: 

1636 result = query.where(data_id, where, bind=bind, **kwargs).datasets( 

1637 dataset_type, 

1638 collections=collections, 

1639 find_first=find_first, 

1640 ) 

1641 if with_dimension_records: 

1642 result = result.with_dimension_records() 

1643 refs = list(result) 

1644 if explain and not refs: 

1645 raise EmptyQueryResultError(list(result.explain_no_results())) 

1646 return refs 

1647 

1648 def _query_dimension_records( 

1649 self, 

1650 element: str, 

1651 *, 

1652 data_id: DataId | None = None, 

1653 where: str = "", 

1654 bind: Mapping[str, Any] | None = None, 

1655 order_by: Iterable[str] | str | None = None, 

1656 limit: int | None = None, 

1657 explain: bool = True, 

1658 **kwargs: Any, 

1659 ) -> list[DimensionRecord]: 

1660 """Query for dimension information matching user-provided criteria. 

1661 

1662 Parameters 

1663 ---------- 

1664 element : `str` 

1665 The name of a dimension element to obtain records for. 

1666 data_id : `dict` or `DataCoordinate`, optional 

1667 A data ID whose key-value pairs are used as equality constraints 

1668 in the query. 

1669 where : `str`, optional 

1670 A string expression similar to a SQL WHERE clause. See 

1671 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1672 information. 

1673 bind : `~collections.abc.Mapping`, optional 

1674 Mapping containing literal values that should be injected into the 

1675 ``where`` expression, keyed by the identifiers they replace. 

1676 Values of collection type can be expanded in some cases; see 

1677 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1678 information. 

1679 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1680 Names of the columns/dimensions to use for ordering returned data 

1681 IDs. Column name can be prefixed with minus (``-``) to use 

1682 descending ordering. 

1683 limit : `int`, optional 

1684 Upper limit on the number of returned records. 

1685 explain : `bool`, optional 

1686 If `True` (default) then `EmptyQueryResultError` exception is 

1687 raised when resulting list is empty. The exception contains 

1688 non-empty list of strings explaining possible causes for empty 

1689 result. 

1690 **kwargs 

1691 Additional keyword arguments are forwarded to 

1692 `DataCoordinate.standardize` when processing the ``data_id`` 

1693 argument (and may be used to provide a constraining data ID even 

1694 when the ``data_id`` argument is `None`). 

1695 

1696 Returns 

1697 ------- 

1698 records : `list`[`DimensionRecord`] 

1699 Dimension records matching the given query parameters. 

1700 

1701 Raises 

1702 ------ 

1703 lsst.daf.butler.registry.DataIdError 

1704 Raised when ``data_id`` or keyword arguments specify unknown 

1705 dimensions or values, or when they contain inconsistent values. 

1706 lsst.daf.butler.registry.UserExpressionError 

1707 Raised when ``where`` expression is invalid. 

1708 lsst.daf.butler.EmptyQueryResultError 

1709 Raised when query generates empty result and ``explain`` is set to 

1710 `True`. 

1711 TypeError 

1712 Raised when the arguments are incompatible, such as when a 

1713 collection wildcard is passed when ``find_first`` is `True`, or 

1714 when ``collections`` is `None` and default butler collections are 

1715 not defined. 

1716 """ 

1717 if data_id is None: 

1718 data_id = DataCoordinate.make_empty(self.dimensions) 

1719 with self._query() as query: 

1720 result = ( 

1721 query.where(data_id, where, bind=bind, **kwargs) 

1722 .dimension_records(element) 

1723 .order_by(*ensure_iterable(order_by)) 

1724 .limit(limit) 

1725 ) 

1726 dimension_records = list(result) 

1727 if explain and not dimension_records: 

1728 raise EmptyQueryResultError(list(result.explain_no_results())) 

1729 return dimension_records 

1730 

1731 @abstractmethod 

1732 def _clone( 

1733 self, 

1734 *, 

1735 collections: Any = None, 

1736 run: str | None = None, 

1737 inferDefaults: bool = True, 

1738 **kwargs: Any, 

1739 ) -> Butler: 

1740 """Return a new Butler instance connected to the same repository 

1741 as this one, but overriding ``collections``, ``run``, 

1742 ``inferDefaults``, and default data ID. 

1743 """ 

1744 raise NotImplementedError()