Coverage for python/lsst/daf/butler/_butler.py: 51%

181 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 09:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Mapping, Sequence 

34from contextlib import AbstractContextManager 

35from typing import TYPE_CHECKING, Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.iteration import ensure_iterable 

40from lsst.utils.logging import getLogger 

41 

42from ._butler_config import ButlerConfig, ButlerType 

43from ._butler_instance_options import ButlerInstanceOptions 

44from ._butler_repo_index import ButlerRepoIndex 

45from ._config import Config, ConfigSubset 

46from ._exceptions import EmptyQueryResultError 

47from ._limited_butler import LimitedButler 

48from .datastore import Datastore 

49from .dimensions import DimensionConfig 

50from .registry import RegistryConfig, _RegistryFactory 

51from .repo_relocation import BUTLER_ROOT_TAG 

52 

53if TYPE_CHECKING: 

54 from ._dataset_existence import DatasetExistence 

55 from ._dataset_ref import DatasetId, DatasetRef 

56 from ._dataset_type import DatasetType 

57 from ._deferredDatasetHandle import DeferredDatasetHandle 

58 from ._file_dataset import FileDataset 

59 from ._storage_class import StorageClass 

60 from ._timespan import Timespan 

61 from .datastore import DatasetRefURIs 

62 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord 

63 from .queries import Query 

64 from .registry import Registry 

65 from .transfers import RepoExportContext 

66 

67_LOG = getLogger(__name__) 

68 

69 

70class Butler(LimitedButler): # numpydoc ignore=PR02 

71 """Interface for data butler and factory for Butler instances. 

72 

73 Parameters 

74 ---------- 

75 config : `ButlerConfig`, `Config` or `str`, optional 

76 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

77 If a directory path is given the configuration will be read from a 

78 ``butler.yaml`` file in that location. If `None` is given default 

79 values will be used. If ``config`` contains "cls" key then its value is 

80 used as a name of butler class and it must be a sub-class of this 

81 class, otherwise `DirectButler` is instantiated. 

82 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

83 An expression specifying the collections to be searched (in order) when 

84 reading datasets. 

85 This may be a `str` collection name or an iterable thereof. 

86 See :ref:`daf_butler_collection_expressions` for more information. 

87 These collections are not registered automatically and must be 

88 manually registered before they are used by any method, but they may be 

89 manually registered after the `Butler` is initialized. 

90 run : `str`, optional 

91 Name of the `~CollectionType.RUN` collection new datasets should be 

92 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

93 ``collections`` will be set to ``[run]``. If not `None`, this 

94 collection will automatically be registered. If this is not set (and 

95 ``writeable`` is not set either), a read-only butler will be created. 

96 searchPaths : `list` of `str`, optional 

97 Directory paths to search when calculating the full Butler 

98 configuration. Not used if the supplied config is already a 

99 `ButlerConfig`. 

100 writeable : `bool`, optional 

101 Explicitly sets whether the butler supports write operations. If not 

102 provided, a read-write butler is created if any of ``run``, ``tags``, 

103 or ``chains`` is non-empty. 

104 inferDefaults : `bool`, optional 

105 If `True` (default) infer default data ID values from the values 

106 present in the datasets in ``collections``: if all collections have the 

107 same value (or no value) for a governor dimension, that value will be 

108 the default for that dimension. Nonexistent collections are ignored. 

109 If a default value is provided explicitly for a governor dimension via 

110 ``**kwargs``, no default will be inferred for that dimension. 

111 without_datastore : `bool`, optional 

112 If `True` do not attach a datastore to this butler. Any attempts 

113 to use a datastore will fail. 

114 **kwargs : `Any` 

115 Additional keyword arguments passed to a constructor of actual butler 

116 class. 

117 

118 Notes 

119 ----- 

120 The preferred way to instantiate Butler is via the `from_config` method. 

121 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

122 but ``mypy`` will complain about the former. 

123 """ 

124 

125 def __new__( 

126 cls, 

127 config: Config | ResourcePathExpression | None = None, 

128 *, 

129 collections: Any = None, 

130 run: str | None = None, 

131 searchPaths: Sequence[ResourcePathExpression] | None = None, 

132 writeable: bool | None = None, 

133 inferDefaults: bool = True, 

134 without_datastore: bool = False, 

135 **kwargs: Any, 

136 ) -> Butler: 

137 if cls is Butler: 

138 return Butler.from_config( 

139 config=config, 

140 collections=collections, 

141 run=run, 

142 searchPaths=searchPaths, 

143 writeable=writeable, 

144 inferDefaults=inferDefaults, 

145 without_datastore=without_datastore, 

146 **kwargs, 

147 ) 

148 

149 # Note: we do not pass any parameters to __new__, Python will pass them 

150 # to __init__ after __new__ returns sub-class instance. 

151 return super().__new__(cls) 

152 

153 @classmethod 

154 def from_config( 

155 cls, 

156 config: Config | ResourcePathExpression | None = None, 

157 *, 

158 collections: Any = None, 

159 run: str | None = None, 

160 searchPaths: Sequence[ResourcePathExpression] | None = None, 

161 writeable: bool | None = None, 

162 inferDefaults: bool = True, 

163 without_datastore: bool = False, 

164 **kwargs: Any, 

165 ) -> Butler: 

166 """Create butler instance from configuration. 

167 

168 Parameters 

169 ---------- 

170 config : `ButlerConfig`, `Config` or `str`, optional 

171 Configuration. Anything acceptable to the `ButlerConfig` 

172 constructor. If a directory path is given the configuration will be 

173 read from a ``butler.yaml`` file in that location. If `None` is 

174 given default values will be used. If ``config`` contains "cls" key 

175 then its value is used as a name of butler class and it must be a 

176 sub-class of this class, otherwise `DirectButler` is instantiated. 

177 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

178 An expression specifying the collections to be searched (in order) 

179 when reading datasets. 

180 This may be a `str` collection name or an iterable thereof. 

181 See :ref:`daf_butler_collection_expressions` for more information. 

182 These collections are not registered automatically and must be 

183 manually registered before they are used by any method, but they 

184 may be manually registered after the `Butler` is initialized. 

185 run : `str`, optional 

186 Name of the `~CollectionType.RUN` collection new datasets should be 

187 inserted into. If ``collections`` is `None` and ``run`` is not 

188 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

189 this collection will automatically be registered. If this is not 

190 set (and ``writeable`` is not set either), a read-only butler will 

191 be created. 

192 searchPaths : `list` of `str`, optional 

193 Directory paths to search when calculating the full Butler 

194 configuration. Not used if the supplied config is already a 

195 `ButlerConfig`. 

196 writeable : `bool`, optional 

197 Explicitly sets whether the butler supports write operations. If 

198 not provided, a read-write butler is created if any of ``run``, 

199 ``tags``, or ``chains`` is non-empty. 

200 inferDefaults : `bool`, optional 

201 If `True` (default) infer default data ID values from the values 

202 present in the datasets in ``collections``: if all collections have 

203 the same value (or no value) for a governor dimension, that value 

204 will be the default for that dimension. Nonexistent collections 

205 are ignored. If a default value is provided explicitly for a 

206 governor dimension via ``**kwargs``, no default will be inferred 

207 for that dimension. 

208 without_datastore : `bool`, optional 

209 If `True` do not attach a datastore to this butler. Any attempts 

210 to use a datastore will fail. 

211 **kwargs : `Any` 

212 Default data ID key-value pairs. These may only identify 

213 "governor" dimensions like ``instrument`` and ``skymap``. 

214 

215 Returns 

216 ------- 

217 butler : `Butler` 

218 A `Butler` constructed from the given configuration. 

219 

220 Notes 

221 ----- 

222 Calling this factory method is identical to calling 

223 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

224 complains about ``Butler()`` call. 

225 

226 Examples 

227 -------- 

228 While there are many ways to control exactly how a `Butler` interacts 

229 with the collections in its `Registry`, the most common cases are still 

230 simple. 

231 

232 For a read-only `Butler` that searches one collection, do:: 

233 

234 butler = Butler.from_config( 

235 "/path/to/repo", collections=["u/alice/DM-50000"] 

236 ) 

237 

238 For a read-write `Butler` that writes to and reads from a 

239 `~CollectionType.RUN` collection:: 

240 

241 butler = Butler.from_config( 

242 "/path/to/repo", run="u/alice/DM-50000/a" 

243 ) 

244 

245 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

246 because we want to write to one `~CollectionType.RUN` collection but 

247 read from several others (as well):: 

248 

249 butler = Butler.from_config( 

250 "/path/to/repo", 

251 run="u/alice/DM-50000/a", 

252 collections=[ 

253 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

254 ] 

255 ) 

256 

257 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

258 Datasets will be read first from that run (since it appears first in 

259 the chain), and then from ``u/bob/DM-49998`` and finally 

260 ``HSC/defaults``. 

261 

262 Finally, one can always create a `Butler` with no collections:: 

263 

264 butler = Butler.from_config("/path/to/repo", writeable=True) 

265 

266 This can be extremely useful when you just want to use 

267 ``butler.registry``, e.g. for inserting dimension data or managing 

268 collections, or when the collections you want to use with the butler 

269 are not consistent. Passing ``writeable`` explicitly here is only 

270 necessary if you want to be able to make changes to the repo - usually 

271 the value for ``writeable`` can be guessed from the collection 

272 arguments provided, but it defaults to `False` when there are not 

273 collection arguments. 

274 """ 

275 # DirectButler used to have a way to specify a "copy constructor" by 

276 # passing the "butler" parameter to its constructor. This 

277 # functionality has been moved out of the constructor into 

278 # Butler._clone(), but the new interface is not public yet. 

279 butler = kwargs.pop("butler", None) 

280 if butler is not None: 

281 if not isinstance(butler, Butler): 

282 raise TypeError("'butler' parameter must be a Butler instance") 

283 if config is not None or searchPaths is not None or writeable is not None: 

284 raise TypeError( 

285 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

286 ) 

287 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs) 

288 

289 options = ButlerInstanceOptions( 

290 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs 

291 ) 

292 

293 # Load the Butler configuration. This may involve searching the 

294 # environment to locate a configuration file. 

295 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) 

296 butler_type = butler_config.get_butler_type() 

297 

298 # Make DirectButler if class is not specified. 

299 match butler_type: 

300 case ButlerType.DIRECT: 

301 from .direct_butler import DirectButler 

302 

303 return DirectButler.create_from_config( 

304 butler_config, 

305 options=options, 

306 without_datastore=without_datastore, 

307 ) 

308 case ButlerType.REMOTE: 

309 from .remote_butler import RemoteButlerFactory 

310 

311 factory = RemoteButlerFactory.create_factory_from_config(butler_config) 

312 return factory.create_butler_with_credentials_from_environment(butler_options=options) 

313 case _: 

314 raise TypeError(f"Unknown Butler type '{butler_type}'") 

315 

316 @staticmethod 

317 def makeRepo( 

318 root: ResourcePathExpression, 

319 config: Config | str | None = None, 

320 dimensionConfig: Config | str | None = None, 

321 standalone: bool = False, 

322 searchPaths: list[str] | None = None, 

323 forceConfigRoot: bool = True, 

324 outfile: ResourcePathExpression | None = None, 

325 overwrite: bool = False, 

326 ) -> Config: 

327 """Create an empty data repository by adding a butler.yaml config 

328 to a repository root directory. 

329 

330 Parameters 

331 ---------- 

332 root : `lsst.resources.ResourcePathExpression` 

333 Path or URI to the root location of the new repository. Will be 

334 created if it does not exist. 

335 config : `Config` or `str`, optional 

336 Configuration to write to the repository, after setting any 

337 root-dependent Registry or Datastore config options. Can not 

338 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

339 configuration will be used. Root-dependent config options 

340 specified in this config are overwritten if ``forceConfigRoot`` 

341 is `True`. 

342 dimensionConfig : `Config` or `str`, optional 

343 Configuration for dimensions, will be used to initialize registry 

344 database. 

345 standalone : `bool` 

346 If True, write all expanded defaults, not just customized or 

347 repository-specific settings. 

348 This (mostly) decouples the repository from the default 

349 configuration, insulating it from changes to the defaults (which 

350 may be good or bad, depending on the nature of the changes). 

351 Future *additions* to the defaults will still be picked up when 

352 initializing `Butlers` to repos created with ``standalone=True``. 

353 searchPaths : `list` of `str`, optional 

354 Directory paths to search when calculating the full butler 

355 configuration. 

356 forceConfigRoot : `bool`, optional 

357 If `False`, any values present in the supplied ``config`` that 

358 would normally be reset are not overridden and will appear 

359 directly in the output config. This allows non-standard overrides 

360 of the root directory for a datastore or registry to be given. 

361 If this parameter is `True` the values for ``root`` will be 

362 forced into the resulting config if appropriate. 

363 outfile : `lss.resources.ResourcePathExpression`, optional 

364 If not-`None`, the output configuration will be written to this 

365 location rather than into the repository itself. Can be a URI 

366 string. Can refer to a directory that will be used to write 

367 ``butler.yaml``. 

368 overwrite : `bool`, optional 

369 Create a new configuration file even if one already exists 

370 in the specified output location. Default is to raise 

371 an exception. 

372 

373 Returns 

374 ------- 

375 config : `Config` 

376 The updated `Config` instance written to the repo. 

377 

378 Raises 

379 ------ 

380 ValueError 

381 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

382 regular Config (as these subclasses would make it impossible to 

383 support ``standalone=False``). 

384 FileExistsError 

385 Raised if the output config file already exists. 

386 os.error 

387 Raised if the directory does not exist, exists but is not a 

388 directory, or cannot be created. 

389 

390 Notes 

391 ----- 

392 Note that when ``standalone=False`` (the default), the configuration 

393 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

394 construct the repository should also be used to construct any Butlers 

395 to avoid configuration inconsistencies. 

396 """ 

397 if isinstance(config, ButlerConfig | ConfigSubset): 

398 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

399 

400 # Ensure that the root of the repository exists or can be made 

401 root_uri = ResourcePath(root, forceDirectory=True) 

402 root_uri.mkdir() 

403 

404 config = Config(config) 

405 

406 # If we are creating a new repo from scratch with relative roots, 

407 # do not propagate an explicit root from the config file 

408 if "root" in config: 

409 del config["root"] 

410 

411 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

412 imported_class = doImportType(full["datastore", "cls"]) 

413 if not issubclass(imported_class, Datastore): 

414 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

415 datastoreClass: type[Datastore] = imported_class 

416 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

417 

418 # if key exists in given config, parse it, otherwise parse the defaults 

419 # in the expanded config 

420 if config.get(("registry", "db")): 

421 registryConfig = RegistryConfig(config) 

422 else: 

423 registryConfig = RegistryConfig(full) 

424 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

425 if defaultDatabaseUri is not None: 

426 Config.updateParameters( 

427 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

428 ) 

429 else: 

430 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

431 

432 if standalone: 

433 config.merge(full) 

434 else: 

435 # Always expand the registry.managers section into the per-repo 

436 # config, because after the database schema is created, it's not 

437 # allowed to change anymore. Note that in the standalone=True 

438 # branch, _everything_ in the config is expanded, so there's no 

439 # need to special case this. 

440 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

441 configURI: ResourcePathExpression 

442 if outfile is not None: 

443 # When writing to a separate location we must include 

444 # the root of the butler repo in the config else it won't know 

445 # where to look. 

446 config["root"] = root_uri.geturl() 

447 configURI = outfile 

448 else: 

449 configURI = root_uri 

450 # Strip obscore configuration, if it is present, before writing config 

451 # to a file, obscore config will be stored in registry. 

452 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

453 config_to_write = config.copy() 

454 del config_to_write[obscore_config_key] 

455 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

456 # configFile attribute is updated, need to copy it to original. 

457 config.configFile = config_to_write.configFile 

458 else: 

459 config.dumpToUri(configURI, overwrite=overwrite) 

460 

461 # Create Registry and populate tables 

462 registryConfig = RegistryConfig(config.get("registry")) 

463 dimensionConfig = DimensionConfig(dimensionConfig) 

464 _RegistryFactory(registryConfig).create_from_config( 

465 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

466 ) 

467 

468 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

469 

470 return config 

471 

472 @classmethod 

473 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

474 """Look up the label in a butler repository index. 

475 

476 Parameters 

477 ---------- 

478 label : `str` 

479 Label of the Butler repository to look up. 

480 return_label : `bool`, optional 

481 If ``label`` cannot be found in the repository index (either 

482 because index is not defined or ``label`` is not in the index) and 

483 ``return_label`` is `True` then return ``ResourcePath(label)``. 

484 If ``return_label`` is `False` (default) then an exception will be 

485 raised instead. 

486 

487 Returns 

488 ------- 

489 uri : `lsst.resources.ResourcePath` 

490 URI to the Butler repository associated with the given label or 

491 default value if it is provided. 

492 

493 Raises 

494 ------ 

495 KeyError 

496 Raised if the label is not found in the index, or if an index 

497 is not defined, and ``return_label`` is `False`. 

498 

499 Notes 

500 ----- 

501 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

502 information is discovered. 

503 """ 

504 return ButlerRepoIndex.get_repo_uri(label, return_label) 

505 

506 @classmethod 

507 def get_known_repos(cls) -> set[str]: 

508 """Retrieve the list of known repository labels. 

509 

510 Returns 

511 ------- 

512 repos : `set` of `str` 

513 All the known labels. Can be empty if no index can be found. 

514 

515 Notes 

516 ----- 

517 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

518 information is discovered. 

519 """ 

520 return ButlerRepoIndex.get_known_repos() 

521 

522 @abstractmethod 

523 def _caching_context(self) -> AbstractContextManager[None]: 

524 """Context manager that enables caching.""" 

525 raise NotImplementedError() 

526 

527 @abstractmethod 

528 def transaction(self) -> AbstractContextManager[None]: 

529 """Context manager supporting `Butler` transactions. 

530 

531 Transactions can be nested. 

532 """ 

533 raise NotImplementedError() 

534 

535 @abstractmethod 

536 def put( 

537 self, 

538 obj: Any, 

539 datasetRefOrType: DatasetRef | DatasetType | str, 

540 /, 

541 dataId: DataId | None = None, 

542 *, 

543 run: str | None = None, 

544 **kwargs: Any, 

545 ) -> DatasetRef: 

546 """Store and register a dataset. 

547 

548 Parameters 

549 ---------- 

550 obj : `object` 

551 The dataset. 

552 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

553 When `DatasetRef` is provided, ``dataId`` should be `None`. 

554 Otherwise the `DatasetType` or name thereof. If a fully resolved 

555 `DatasetRef` is given the run and ID are used directly. 

556 dataId : `dict` or `DataCoordinate` 

557 A `dict` of `Dimension` link name, value pairs that label the 

558 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

559 should be provided as the second argument. 

560 run : `str`, optional 

561 The name of the run the dataset should be added to, overriding 

562 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

563 **kwargs 

564 Additional keyword arguments used to augment or construct a 

565 `DataCoordinate`. See `DataCoordinate.standardize` 

566 parameters. Not used if a resolve `DatasetRef` is provided. 

567 

568 Returns 

569 ------- 

570 ref : `DatasetRef` 

571 A reference to the stored dataset, updated with the correct id if 

572 given. 

573 

574 Raises 

575 ------ 

576 TypeError 

577 Raised if the butler is read-only or if no run has been provided. 

578 """ 

579 raise NotImplementedError() 

580 

581 @abstractmethod 

582 def getDeferred( 

583 self, 

584 datasetRefOrType: DatasetRef | DatasetType | str, 

585 /, 

586 dataId: DataId | None = None, 

587 *, 

588 parameters: dict | None = None, 

589 collections: Any = None, 

590 storageClass: str | StorageClass | None = None, 

591 timespan: Timespan | None = None, 

592 **kwargs: Any, 

593 ) -> DeferredDatasetHandle: 

594 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

595 after an immediate registry lookup. 

596 

597 Parameters 

598 ---------- 

599 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

600 When `DatasetRef` the `dataId` should be `None`. 

601 Otherwise the `DatasetType` or name thereof. 

602 dataId : `dict` or `DataCoordinate`, optional 

603 A `dict` of `Dimension` link name, value pairs that label the 

604 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

605 should be provided as the first argument. 

606 parameters : `dict` 

607 Additional StorageClass-defined options to control reading, 

608 typically used to efficiently read only a subset of the dataset. 

609 collections : Any, optional 

610 Collections to be searched, overriding ``self.collections``. 

611 Can be any of the types supported by the ``collections`` argument 

612 to butler construction. 

613 storageClass : `StorageClass` or `str`, optional 

614 The storage class to be used to override the Python type 

615 returned by this method. By default the returned type matches 

616 the dataset type definition for this dataset. Specifying a 

617 read `StorageClass` can force a different type to be returned. 

618 This type must be compatible with the original type. 

619 timespan : `Timespan` or `None`, optional 

620 A timespan that the validity range of the dataset must overlap. 

621 If not provided and this is a calibration dataset type, an attempt 

622 will be made to find the timespan from any temporal coordinate 

623 in the data ID. 

624 **kwargs 

625 Additional keyword arguments used to augment or construct a 

626 `DataId`. See `DataId` parameters. 

627 

628 Returns 

629 ------- 

630 obj : `DeferredDatasetHandle` 

631 A handle which can be used to retrieve a dataset at a later time. 

632 

633 Raises 

634 ------ 

635 LookupError 

636 Raised if no matching dataset exists in the `Registry` or 

637 datastore. 

638 ValueError 

639 Raised if a resolved `DatasetRef` was passed as an input, but it 

640 differs from the one found in the registry. 

641 TypeError 

642 Raised if no collections were provided. 

643 """ 

644 raise NotImplementedError() 

645 

646 @abstractmethod 

647 def get( 

648 self, 

649 datasetRefOrType: DatasetRef | DatasetType | str, 

650 /, 

651 dataId: DataId | None = None, 

652 *, 

653 parameters: dict[str, Any] | None = None, 

654 collections: Any = None, 

655 storageClass: StorageClass | str | None = None, 

656 timespan: Timespan | None = None, 

657 **kwargs: Any, 

658 ) -> Any: 

659 """Retrieve a stored dataset. 

660 

661 Parameters 

662 ---------- 

663 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

664 When `DatasetRef` the `dataId` should be `None`. 

665 Otherwise the `DatasetType` or name thereof. 

666 If a resolved `DatasetRef`, the associated dataset 

667 is returned directly without additional querying. 

668 dataId : `dict` or `DataCoordinate` 

669 A `dict` of `Dimension` link name, value pairs that label the 

670 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

671 should be provided as the first argument. 

672 parameters : `dict` 

673 Additional StorageClass-defined options to control reading, 

674 typically used to efficiently read only a subset of the dataset. 

675 collections : Any, optional 

676 Collections to be searched, overriding ``self.collections``. 

677 Can be any of the types supported by the ``collections`` argument 

678 to butler construction. 

679 storageClass : `StorageClass` or `str`, optional 

680 The storage class to be used to override the Python type 

681 returned by this method. By default the returned type matches 

682 the dataset type definition for this dataset. Specifying a 

683 read `StorageClass` can force a different type to be returned. 

684 This type must be compatible with the original type. 

685 timespan : `Timespan` or `None`, optional 

686 A timespan that the validity range of the dataset must overlap. 

687 If not provided and this is a calibration dataset type, an attempt 

688 will be made to find the timespan from any temporal coordinate 

689 in the data ID. 

690 **kwargs 

691 Additional keyword arguments used to augment or construct a 

692 `DataCoordinate`. See `DataCoordinate.standardize` 

693 parameters. 

694 

695 Returns 

696 ------- 

697 obj : `object` 

698 The dataset. 

699 

700 Raises 

701 ------ 

702 LookupError 

703 Raised if no matching dataset exists in the `Registry`. 

704 TypeError 

705 Raised if no collections were provided. 

706 

707 Notes 

708 ----- 

709 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

710 this method requires that the given data ID include temporal dimensions 

711 beyond the dimensions of the dataset type itself, in order to find the 

712 dataset with the appropriate validity range. For example, a "bias" 

713 dataset with native dimensions ``{instrument, detector}`` could be 

714 fetched with a ``{instrument, detector, exposure}`` data ID, because 

715 ``exposure`` is a temporal dimension. 

716 """ 

717 raise NotImplementedError() 

718 

719 @abstractmethod 

720 def getURIs( 

721 self, 

722 datasetRefOrType: DatasetRef | DatasetType | str, 

723 /, 

724 dataId: DataId | None = None, 

725 *, 

726 predict: bool = False, 

727 collections: Any = None, 

728 run: str | None = None, 

729 **kwargs: Any, 

730 ) -> DatasetRefURIs: 

731 """Return the URIs associated with the dataset. 

732 

733 Parameters 

734 ---------- 

735 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

736 When `DatasetRef` the `dataId` should be `None`. 

737 Otherwise the `DatasetType` or name thereof. 

738 dataId : `dict` or `DataCoordinate` 

739 A `dict` of `Dimension` link name, value pairs that label the 

740 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

741 should be provided as the first argument. 

742 predict : `bool` 

743 If `True`, allow URIs to be returned of datasets that have not 

744 been written. 

745 collections : Any, optional 

746 Collections to be searched, overriding ``self.collections``. 

747 Can be any of the types supported by the ``collections`` argument 

748 to butler construction. 

749 run : `str`, optional 

750 Run to use for predictions, overriding ``self.run``. 

751 **kwargs 

752 Additional keyword arguments used to augment or construct a 

753 `DataCoordinate`. See `DataCoordinate.standardize` 

754 parameters. 

755 

756 Returns 

757 ------- 

758 uris : `DatasetRefURIs` 

759 The URI to the primary artifact associated with this dataset (if 

760 the dataset was disassembled within the datastore this may be 

761 `None`), and the URIs to any components associated with the dataset 

762 artifact. (can be empty if there are no components). 

763 """ 

764 raise NotImplementedError() 

765 

766 def getURI( 

767 self, 

768 datasetRefOrType: DatasetRef | DatasetType | str, 

769 /, 

770 dataId: DataId | None = None, 

771 *, 

772 predict: bool = False, 

773 collections: Any = None, 

774 run: str | None = None, 

775 **kwargs: Any, 

776 ) -> ResourcePath: 

777 """Return the URI to the Dataset. 

778 

779 Parameters 

780 ---------- 

781 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

782 When `DatasetRef` the `dataId` should be `None`. 

783 Otherwise the `DatasetType` or name thereof. 

784 dataId : `dict` or `DataCoordinate` 

785 A `dict` of `Dimension` link name, value pairs that label the 

786 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

787 should be provided as the first argument. 

788 predict : `bool` 

789 If `True`, allow URIs to be returned of datasets that have not 

790 been written. 

791 collections : Any, optional 

792 Collections to be searched, overriding ``self.collections``. 

793 Can be any of the types supported by the ``collections`` argument 

794 to butler construction. 

795 run : `str`, optional 

796 Run to use for predictions, overriding ``self.run``. 

797 **kwargs 

798 Additional keyword arguments used to augment or construct a 

799 `DataCoordinate`. See `DataCoordinate.standardize` 

800 parameters. 

801 

802 Returns 

803 ------- 

804 uri : `lsst.resources.ResourcePath` 

805 URI pointing to the Dataset within the datastore. If the 

806 Dataset does not exist in the datastore, and if ``predict`` is 

807 `True`, the URI will be a prediction and will include a URI 

808 fragment "#predicted". 

809 If the datastore does not have entities that relate well 

810 to the concept of a URI the returned URI string will be 

811 descriptive. The returned URI is not guaranteed to be obtainable. 

812 

813 Raises 

814 ------ 

815 LookupError 

816 A URI has been requested for a dataset that does not exist and 

817 guessing is not allowed. 

818 ValueError 

819 Raised if a resolved `DatasetRef` was passed as an input, but it 

820 differs from the one found in the registry. 

821 TypeError 

822 Raised if no collections were provided. 

823 RuntimeError 

824 Raised if a URI is requested for a dataset that consists of 

825 multiple artifacts. 

826 """ 

827 primary, components = self.getURIs( 

828 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

829 ) 

830 

831 if primary is None or components: 

832 raise RuntimeError( 

833 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

834 "Use Butler.getURIs() instead." 

835 ) 

836 return primary 

837 

838 @abstractmethod 

839 def get_dataset_type(self, name: str) -> DatasetType: 

840 """Get the `DatasetType`. 

841 

842 Parameters 

843 ---------- 

844 name : `str` 

845 Name of the type. 

846 

847 Returns 

848 ------- 

849 type : `DatasetType` 

850 The `DatasetType` associated with the given name. 

851 

852 Raises 

853 ------ 

854 lsst.daf.butler.MissingDatasetTypeError 

855 Raised if the requested dataset type has not been registered. 

856 

857 Notes 

858 ----- 

859 This method handles component dataset types automatically, though most 

860 other operations do not. 

861 """ 

862 raise NotImplementedError() 

863 

864 @abstractmethod 

865 def get_dataset( 

866 self, 

867 id: DatasetId, 

868 *, 

869 storage_class: str | StorageClass | None = None, 

870 dimension_records: bool = False, 

871 datastore_records: bool = False, 

872 ) -> DatasetRef | None: 

873 """Retrieve a Dataset entry. 

874 

875 Parameters 

876 ---------- 

877 id : `DatasetId` 

878 The unique identifier for the dataset. 

879 storage_class : `str` or `StorageClass` or `None` 

880 A storage class to use when creating the returned entry. If given 

881 it must be compatible with the default storage class. 

882 dimension_records : `bool`, optional 

883 If `True` the ref will be expanded and contain dimension records. 

884 datastore_records : `bool`, optional 

885 If `True` the ref will contain associated datastore records. 

886 

887 Returns 

888 ------- 

889 ref : `DatasetRef` or `None` 

890 A ref to the Dataset, or `None` if no matching Dataset 

891 was found. 

892 """ 

893 raise NotImplementedError() 

894 

895 @abstractmethod 

896 def find_dataset( 

897 self, 

898 dataset_type: DatasetType | str, 

899 data_id: DataId | None = None, 

900 *, 

901 collections: str | Sequence[str] | None = None, 

902 timespan: Timespan | None = None, 

903 storage_class: str | StorageClass | None = None, 

904 dimension_records: bool = False, 

905 datastore_records: bool = False, 

906 **kwargs: Any, 

907 ) -> DatasetRef | None: 

908 """Find a dataset given its `DatasetType` and data ID. 

909 

910 This can be used to obtain a `DatasetRef` that permits the dataset to 

911 be read from a `Datastore`. If the dataset is a component and can not 

912 be found using the provided dataset type, a dataset ref for the parent 

913 will be returned instead but with the correct dataset type. 

914 

915 Parameters 

916 ---------- 

917 dataset_type : `DatasetType` or `str` 

918 A `DatasetType` or the name of one. If this is a `DatasetType` 

919 instance, its storage class will be respected and propagated to 

920 the output, even if it differs from the dataset type definition 

921 in the registry, as long as the storage classes are convertible. 

922 data_id : `dict` or `DataCoordinate`, optional 

923 A `dict`-like object containing the `Dimension` links that identify 

924 the dataset within a collection. If it is a `dict` the dataId 

925 can include dimension record values such as ``day_obs`` and 

926 ``seq_num`` or ``full_name`` that can be used to derive the 

927 primary dimension. 

928 collections : `str` or `list` [`str`], optional 

929 A an ordered list of collections to search for the dataset. 

930 Defaults to ``self.defaults.collections``. 

931 timespan : `Timespan`, optional 

932 A timespan that the validity range of the dataset must overlap. 

933 If not provided, any `~CollectionType.CALIBRATION` collections 

934 matched by the ``collections`` argument will not be searched. 

935 storage_class : `str` or `StorageClass` or `None` 

936 A storage class to use when creating the returned entry. If given 

937 it must be compatible with the default storage class. 

938 dimension_records : `bool`, optional 

939 If `True` the ref will be expanded and contain dimension records. 

940 datastore_records : `bool`, optional 

941 If `True` the ref will contain associated datastore records. 

942 **kwargs 

943 Additional keyword arguments passed to 

944 `DataCoordinate.standardize` to convert ``dataId`` to a true 

945 `DataCoordinate` or augment an existing one. This can also include 

946 dimension record metadata that can be used to derive a primary 

947 dimension value. 

948 

949 Returns 

950 ------- 

951 ref : `DatasetRef` 

952 A reference to the dataset, or `None` if no matching Dataset 

953 was found. 

954 

955 Raises 

956 ------ 

957 lsst.daf.butler.NoDefaultCollectionError 

958 Raised if ``collections`` is `None` and 

959 ``self.collections`` is `None`. 

960 LookupError 

961 Raised if one or more data ID keys are missing. 

962 lsst.daf.butler.MissingDatasetTypeError 

963 Raised if the dataset type does not exist. 

964 lsst.daf.butler.MissingCollectionError 

965 Raised if any of ``collections`` does not exist in the registry. 

966 

967 Notes 

968 ----- 

969 This method simply returns `None` and does not raise an exception even 

970 when the set of collections searched is intrinsically incompatible with 

971 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

972 only `~CollectionType.CALIBRATION` collections are being searched. 

973 This may make it harder to debug some lookup failures, but the behavior 

974 is intentional; we consider it more important that failed searches are 

975 reported consistently, regardless of the reason, and that adding 

976 additional collections that do not contain a match to the search path 

977 never changes the behavior. 

978 

979 This method handles component dataset types automatically, though most 

980 other query operations do not. 

981 """ 

982 raise NotImplementedError() 

983 

984 @abstractmethod 

985 def retrieveArtifacts( 

986 self, 

987 refs: Iterable[DatasetRef], 

988 destination: ResourcePathExpression, 

989 transfer: str = "auto", 

990 preserve_path: bool = True, 

991 overwrite: bool = False, 

992 ) -> list[ResourcePath]: 

993 """Retrieve the artifacts associated with the supplied refs. 

994 

995 Parameters 

996 ---------- 

997 refs : iterable of `DatasetRef` 

998 The datasets for which artifacts are to be retrieved. 

999 A single ref can result in multiple artifacts. The refs must 

1000 be resolved. 

1001 destination : `lsst.resources.ResourcePath` or `str` 

1002 Location to write the artifacts. 

1003 transfer : `str`, optional 

1004 Method to use to transfer the artifacts. Must be one of the options 

1005 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

1006 "move" is not allowed. 

1007 preserve_path : `bool`, optional 

1008 If `True` the full path of the artifact within the datastore 

1009 is preserved. If `False` the final file component of the path 

1010 is used. 

1011 overwrite : `bool`, optional 

1012 If `True` allow transfers to overwrite existing files at the 

1013 destination. 

1014 

1015 Returns 

1016 ------- 

1017 targets : `list` of `lsst.resources.ResourcePath` 

1018 URIs of file artifacts in destination location. Order is not 

1019 preserved. 

1020 

1021 Notes 

1022 ----- 

1023 For non-file datastores the artifacts written to the destination 

1024 may not match the representation inside the datastore. For example 

1025 a hierarchical data structure in a NoSQL database may well be stored 

1026 as a JSON file. 

1027 """ 

1028 raise NotImplementedError() 

1029 

1030 @abstractmethod 

1031 def exists( 

1032 self, 

1033 dataset_ref_or_type: DatasetRef | DatasetType | str, 

1034 /, 

1035 data_id: DataId | None = None, 

1036 *, 

1037 full_check: bool = True, 

1038 collections: Any = None, 

1039 **kwargs: Any, 

1040 ) -> DatasetExistence: 

1041 """Indicate whether a dataset is known to Butler registry and 

1042 datastore. 

1043 

1044 Parameters 

1045 ---------- 

1046 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

1047 When `DatasetRef` the `dataId` should be `None`. 

1048 Otherwise the `DatasetType` or name thereof. 

1049 data_id : `dict` or `DataCoordinate` 

1050 A `dict` of `Dimension` link name, value pairs that label the 

1051 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1052 should be provided as the first argument. 

1053 full_check : `bool`, optional 

1054 If `True`, a check will be made for the actual existence of a 

1055 dataset artifact. This will involve additional overhead due to 

1056 the need to query an external system. If `False`, this check will 

1057 be omitted, and the registry and datastore will solely be asked 

1058 if they know about the dataset but no direct check for the 

1059 artifact will be performed. 

1060 collections : Any, optional 

1061 Collections to be searched, overriding ``self.collections``. 

1062 Can be any of the types supported by the ``collections`` argument 

1063 to butler construction. 

1064 **kwargs 

1065 Additional keyword arguments used to augment or construct a 

1066 `DataCoordinate`. See `DataCoordinate.standardize` 

1067 parameters. 

1068 

1069 Returns 

1070 ------- 

1071 existence : `DatasetExistence` 

1072 Object indicating whether the dataset is known to registry and 

1073 datastore. Evaluates to `True` if the dataset is present and known 

1074 to both. 

1075 """ 

1076 raise NotImplementedError() 

1077 

1078 @abstractmethod 

1079 def _exists_many( 

1080 self, 

1081 refs: Iterable[DatasetRef], 

1082 /, 

1083 *, 

1084 full_check: bool = True, 

1085 ) -> dict[DatasetRef, DatasetExistence]: 

1086 """Indicate whether multiple datasets are known to Butler registry and 

1087 datastore. 

1088 

1089 This is an experimental API that may change at any moment. 

1090 

1091 Parameters 

1092 ---------- 

1093 refs : iterable of `DatasetRef` 

1094 The datasets to be checked. 

1095 full_check : `bool`, optional 

1096 If `True`, a check will be made for the actual existence of each 

1097 dataset artifact. This will involve additional overhead due to 

1098 the need to query an external system. If `False`, this check will 

1099 be omitted, and the registry and datastore will solely be asked 

1100 if they know about the dataset(s) but no direct check for the 

1101 artifact(s) will be performed. 

1102 

1103 Returns 

1104 ------- 

1105 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1106 Mapping from the given dataset refs to an enum indicating the 

1107 status of the dataset in registry and datastore. 

1108 Each value evaluates to `True` if the dataset is present and known 

1109 to both. 

1110 """ 

1111 raise NotImplementedError() 

1112 

1113 @abstractmethod 

1114 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1115 """Remove one or more `~CollectionType.RUN` collections and the 

1116 datasets within them. 

1117 

1118 Parameters 

1119 ---------- 

1120 names : `~collections.abc.Iterable` [ `str` ] 

1121 The names of the collections to remove. 

1122 unstore : `bool`, optional 

1123 If `True` (default), delete datasets from all datastores in which 

1124 they are present, and attempt to rollback the registry deletions if 

1125 datastore deletions fail (which may not always be possible). If 

1126 `False`, datastore records for these datasets are still removed, 

1127 but any artifacts (e.g. files) will not be. 

1128 

1129 Raises 

1130 ------ 

1131 TypeError 

1132 Raised if one or more collections are not of type 

1133 `~CollectionType.RUN`. 

1134 """ 

1135 raise NotImplementedError() 

1136 

1137 @abstractmethod 

1138 def ingest( 

1139 self, 

1140 *datasets: FileDataset, 

1141 transfer: str | None = "auto", 

1142 record_validation_info: bool = True, 

1143 ) -> None: 

1144 """Store and register one or more datasets that already exist on disk. 

1145 

1146 Parameters 

1147 ---------- 

1148 *datasets : `FileDataset` 

1149 Each positional argument is a struct containing information about 

1150 a file to be ingested, including its URI (either absolute or 

1151 relative to the datastore root, if applicable), a resolved 

1152 `DatasetRef`, and optionally a formatter class or its 

1153 fully-qualified string name. If a formatter is not provided, the 

1154 formatter that would be used for `put` is assumed. On successful 

1155 ingest all `FileDataset.formatter` attributes will be set to the 

1156 formatter class used. `FileDataset.path` attributes may be modified 

1157 to put paths in whatever the datastore considers a standardized 

1158 form. 

1159 transfer : `str`, optional 

1160 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1161 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1162 transfer the file. 

1163 record_validation_info : `bool`, optional 

1164 If `True`, the default, the datastore can record validation 

1165 information associated with the file. If `False` the datastore 

1166 will not attempt to track any information such as checksums 

1167 or file sizes. This can be useful if such information is tracked 

1168 in an external system or if the file is to be compressed in place. 

1169 It is up to the datastore whether this parameter is relevant. 

1170 

1171 Raises 

1172 ------ 

1173 TypeError 

1174 Raised if the butler is read-only or if no run was provided. 

1175 NotImplementedError 

1176 Raised if the `Datastore` does not support the given transfer mode. 

1177 DatasetTypeNotSupportedError 

1178 Raised if one or more files to be ingested have a dataset type that 

1179 is not supported by the `Datastore`.. 

1180 FileNotFoundError 

1181 Raised if one of the given files does not exist. 

1182 FileExistsError 

1183 Raised if transfer is not `None` but the (internal) location the 

1184 file would be moved to is already occupied. 

1185 

1186 Notes 

1187 ----- 

1188 This operation is not fully exception safe: if a database operation 

1189 fails, the given `FileDataset` instances may be only partially updated. 

1190 

1191 It is atomic in terms of database operations (they will either all 

1192 succeed or all fail) providing the database engine implements 

1193 transactions correctly. It will attempt to be atomic in terms of 

1194 filesystem operations as well, but this cannot be implemented 

1195 rigorously for most datastores. 

1196 """ 

1197 raise NotImplementedError() 

1198 

1199 @abstractmethod 

1200 def export( 

1201 self, 

1202 *, 

1203 directory: str | None = None, 

1204 filename: str | None = None, 

1205 format: str | None = None, 

1206 transfer: str | None = None, 

1207 ) -> AbstractContextManager[RepoExportContext]: 

1208 """Export datasets from the repository represented by this `Butler`. 

1209 

1210 This method is a context manager that returns a helper object 

1211 (`RepoExportContext`) that is used to indicate what information from 

1212 the repository should be exported. 

1213 

1214 Parameters 

1215 ---------- 

1216 directory : `str`, optional 

1217 Directory dataset files should be written to if ``transfer`` is not 

1218 `None`. 

1219 filename : `str`, optional 

1220 Name for the file that will include database information associated 

1221 with the exported datasets. If this is not an absolute path and 

1222 ``directory`` is not `None`, it will be written to ``directory`` 

1223 instead of the current working directory. Defaults to 

1224 "export.{format}". 

1225 format : `str`, optional 

1226 File format for the database information file. If `None`, the 

1227 extension of ``filename`` will be used. 

1228 transfer : `str`, optional 

1229 Transfer mode passed to `Datastore.export`. 

1230 

1231 Raises 

1232 ------ 

1233 TypeError 

1234 Raised if the set of arguments passed is inconsistent. 

1235 

1236 Examples 

1237 -------- 

1238 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1239 methods are used to provide the iterables over data IDs and/or datasets 

1240 to be exported:: 

1241 

1242 with butler.export("exports.yaml") as export: 

1243 # Export all flats, but none of the dimension element rows 

1244 # (i.e. data ID information) associated with them. 

1245 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1246 elements=()) 

1247 # Export all datasets that start with "deepCoadd_" and all of 

1248 # their associated data ID information. 

1249 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1250 """ 

1251 raise NotImplementedError() 

1252 

1253 @abstractmethod 

1254 def import_( 

1255 self, 

1256 *, 

1257 directory: ResourcePathExpression | None = None, 

1258 filename: ResourcePathExpression | TextIO | None = None, 

1259 format: str | None = None, 

1260 transfer: str | None = None, 

1261 skip_dimensions: set | None = None, 

1262 ) -> None: 

1263 """Import datasets into this repository that were exported from a 

1264 different butler repository via `~lsst.daf.butler.Butler.export`. 

1265 

1266 Parameters 

1267 ---------- 

1268 directory : `~lsst.resources.ResourcePathExpression`, optional 

1269 Directory containing dataset files to import from. If `None`, 

1270 ``filename`` and all dataset file paths specified therein must 

1271 be absolute. 

1272 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1273 A stream or name of file that contains database information 

1274 associated with the exported datasets, typically generated by 

1275 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1276 `~lsst.resources.ResourcePath` and is not an absolute path, 

1277 it will first be looked for relative to ``directory`` and if not 

1278 found there it will be looked for in the current working 

1279 directory. Defaults to "export.{format}". 

1280 format : `str`, optional 

1281 File format for ``filename``. If `None`, the extension of 

1282 ``filename`` will be used. 

1283 transfer : `str`, optional 

1284 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1285 skip_dimensions : `set`, optional 

1286 Names of dimensions that should be skipped and not imported. 

1287 

1288 Raises 

1289 ------ 

1290 TypeError 

1291 Raised if the set of arguments passed is inconsistent, or if the 

1292 butler is read-only. 

1293 """ 

1294 raise NotImplementedError() 

1295 

1296 @abstractmethod 

1297 def transfer_dimension_records_from( 

1298 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef] 

1299 ) -> None: 

1300 """Transfer dimension records to this Butler from another Butler. 

1301 

1302 Parameters 

1303 ---------- 

1304 source_butler : `LimitedButler` or `Butler` 

1305 Butler from which the records are to be transferred. If data IDs 

1306 in ``source_refs`` are not expanded then this has to be a full 

1307 `Butler` whose registry will be used to expand data IDs. If the 

1308 source refs contain coordinates that are used to populate other 

1309 records then this will also need to be a full `Butler`. 

1310 source_refs : iterable of `DatasetRef` 

1311 Datasets defined in the source butler whose dimension records 

1312 should be transferred to this butler. In most circumstances. 

1313 transfer is faster if the dataset refs are expanded. 

1314 """ 

1315 raise NotImplementedError() 

1316 

1317 @abstractmethod 

1318 def transfer_from( 

1319 self, 

1320 source_butler: LimitedButler, 

1321 source_refs: Iterable[DatasetRef], 

1322 transfer: str = "auto", 

1323 skip_missing: bool = True, 

1324 register_dataset_types: bool = False, 

1325 transfer_dimensions: bool = False, 

1326 dry_run: bool = False, 

1327 ) -> Collection[DatasetRef]: 

1328 """Transfer datasets to this Butler from a run in another Butler. 

1329 

1330 Parameters 

1331 ---------- 

1332 source_butler : `LimitedButler` 

1333 Butler from which the datasets are to be transferred. If data IDs 

1334 in ``source_refs`` are not expanded then this has to be a full 

1335 `Butler` whose registry will be used to expand data IDs. 

1336 source_refs : iterable of `DatasetRef` 

1337 Datasets defined in the source butler that should be transferred to 

1338 this butler. In most circumstances, ``transfer_from`` is faster if 

1339 the dataset refs are expanded. 

1340 transfer : `str`, optional 

1341 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1342 skip_missing : `bool` 

1343 If `True`, datasets with no datastore artifact associated with 

1344 them are not transferred. If `False` a registry entry will be 

1345 created even if no datastore record is created (and so will 

1346 look equivalent to the dataset being unstored). 

1347 register_dataset_types : `bool` 

1348 If `True` any missing dataset types are registered. Otherwise 

1349 an exception is raised. 

1350 transfer_dimensions : `bool`, optional 

1351 If `True`, dimension record data associated with the new datasets 

1352 will be transferred. 

1353 dry_run : `bool`, optional 

1354 If `True` the transfer will be processed without any modifications 

1355 made to the target butler and as if the target butler did not 

1356 have any of the datasets. 

1357 

1358 Returns 

1359 ------- 

1360 refs : `list` of `DatasetRef` 

1361 The refs added to this Butler. 

1362 

1363 Notes 

1364 ----- 

1365 The datastore artifact has to exist for a transfer 

1366 to be made but non-existence is not an error. 

1367 

1368 Datasets that already exist in this run will be skipped. 

1369 

1370 The datasets are imported as part of a transaction, although 

1371 dataset types are registered before the transaction is started. 

1372 This means that it is possible for a dataset type to be registered 

1373 even though transfer has failed. 

1374 """ 

1375 raise NotImplementedError() 

1376 

1377 @abstractmethod 

1378 def validateConfiguration( 

1379 self, 

1380 logFailures: bool = False, 

1381 datasetTypeNames: Iterable[str] | None = None, 

1382 ignore: Iterable[str] | None = None, 

1383 ) -> None: 

1384 """Validate butler configuration. 

1385 

1386 Checks that each `DatasetType` can be stored in the `Datastore`. 

1387 

1388 Parameters 

1389 ---------- 

1390 logFailures : `bool`, optional 

1391 If `True`, output a log message for every validation error 

1392 detected. 

1393 datasetTypeNames : iterable of `str`, optional 

1394 The `DatasetType` names that should be checked. This allows 

1395 only a subset to be selected. 

1396 ignore : iterable of `str`, optional 

1397 Names of DatasetTypes to skip over. This can be used to skip 

1398 known problems. If a named `DatasetType` corresponds to a 

1399 composite, all components of that `DatasetType` will also be 

1400 ignored. 

1401 

1402 Raises 

1403 ------ 

1404 ButlerValidationError 

1405 Raised if there is some inconsistency with how this Butler 

1406 is configured. 

1407 """ 

1408 raise NotImplementedError() 

1409 

1410 @property 

1411 @abstractmethod 

1412 def collections(self) -> Sequence[str]: 

1413 """The collections to search by default, in order 

1414 (`~collections.abc.Sequence` [ `str` ]). 

1415 """ 

1416 raise NotImplementedError() 

1417 

1418 @property 

1419 @abstractmethod 

1420 def run(self) -> str | None: 

1421 """Name of the run this butler writes outputs to by default (`str` or 

1422 `None`). 

1423 """ 

1424 raise NotImplementedError() 

1425 

1426 @property 

1427 @abstractmethod 

1428 def registry(self) -> Registry: 

1429 """The object that manages dataset metadata and relationships 

1430 (`Registry`). 

1431 

1432 Many operations that don't involve reading or writing butler datasets 

1433 are accessible only via `Registry` methods. Eventually these methods 

1434 will be replaced by equivalent `Butler` methods. 

1435 """ 

1436 raise NotImplementedError() 

1437 

1438 @abstractmethod 

1439 def _query(self) -> AbstractContextManager[Query]: 

1440 """Context manager returning a `Query` object used for construction 

1441 and execution of complex queries. 

1442 """ 

1443 raise NotImplementedError() 

1444 

1445 def _query_data_ids( 

1446 self, 

1447 dimensions: DimensionGroup | Iterable[str] | str, 

1448 *, 

1449 data_id: DataId | None = None, 

1450 where: str = "", 

1451 bind: Mapping[str, Any] | None = None, 

1452 with_dimension_records: bool = False, 

1453 order_by: Iterable[str] | str | None = None, 

1454 limit: int | None = None, 

1455 explain: bool = True, 

1456 **kwargs: Any, 

1457 ) -> list[DataCoordinate]: 

1458 """Query for data IDs matching user-provided criteria. 

1459 

1460 Parameters 

1461 ---------- 

1462 dimensions : `DimensionGroup`, `str`, or \ 

1463 `~collections.abc.Iterable` [`str`] 

1464 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1465 instances or `str`. Will be automatically expanded to a complete 

1466 `DimensionGroup`. 

1467 data_id : `dict` or `DataCoordinate`, optional 

1468 A data ID whose key-value pairs are used as equality constraints 

1469 in the query. 

1470 where : `str`, optional 

1471 A string expression similar to a SQL WHERE clause. May involve 

1472 any column of a dimension table or (as a shortcut for the primary 

1473 key column of a dimension table) dimension name. See 

1474 :ref:`daf_butler_dimension_expressions` for more information. 

1475 bind : `~collections.abc.Mapping`, optional 

1476 Mapping containing literal values that should be injected into the 

1477 ``where`` expression, keyed by the identifiers they replace. 

1478 Values of collection type can be expanded in some cases; see 

1479 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1480 information. 

1481 with_dimension_records : `bool`, optional 

1482 If `True` (default is `False`) then returned data IDs will have 

1483 dimension records. 

1484 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1485 Names of the columns/dimensions to use for ordering returned data 

1486 IDs. Column name can be prefixed with minus (``-``) to use 

1487 descending ordering. 

1488 limit : `int`, optional 

1489 Upper limit on the number of returned records. 

1490 explain : `bool`, optional 

1491 If `True` (default) then `EmptyQueryResultError` exception is 

1492 raised when resulting list is empty. The exception contains 

1493 non-empty list of strings explaining possible causes for empty 

1494 result. 

1495 **kwargs 

1496 Additional keyword arguments are forwarded to 

1497 `DataCoordinate.standardize` when processing the ``data_id`` 

1498 argument (and may be used to provide a constraining data ID even 

1499 when the ``data_id`` argument is `None`). 

1500 

1501 Returns 

1502 ------- 

1503 dataIds : `list` [`DataCoordinate`] 

1504 Data IDs matching the given query parameters. These are always 

1505 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1506 returns `True`). 

1507 

1508 Raises 

1509 ------ 

1510 lsst.daf.butler.registry.DataIdError 

1511 Raised when ``data_id`` or keyword arguments specify unknown 

1512 dimensions or values, or when they contain inconsistent values. 

1513 lsst.daf.butler.registry.UserExpressionError 

1514 Raised when ``where`` expression is invalid. 

1515 lsst.daf.butler.EmptyQueryResultError 

1516 Raised when query generates empty result and ``explain`` is set to 

1517 `True`. 

1518 TypeError 

1519 Raised when the arguments are incompatible. 

1520 """ 

1521 if data_id is None: 

1522 data_id = DataCoordinate.make_empty(self.dimensions) 

1523 with self._query() as query: 

1524 result = ( 

1525 query.where(data_id, where, bind=bind, **kwargs) 

1526 .data_ids(dimensions) 

1527 .order_by(*ensure_iterable(order_by)) 

1528 .limit(limit) 

1529 ) 

1530 if with_dimension_records: 

1531 result = result.with_dimension_records() 

1532 data_ids = list(result) 

1533 if explain and not data_ids: 

1534 raise EmptyQueryResultError(list(result.explain_no_results())) 

1535 return data_ids 

1536 

1537 def _query_datasets( 

1538 self, 

1539 dataset_type: str | DatasetType, 

1540 collections: str | Iterable[str] | None = None, 

1541 *, 

1542 find_first: bool = True, 

1543 data_id: DataId | None = None, 

1544 where: str = "", 

1545 bind: Mapping[str, Any] | None = None, 

1546 with_dimension_records: bool = False, 

1547 explain: bool = True, 

1548 **kwargs: Any, 

1549 ) -> list[DatasetRef]: 

1550 """Query for dataset references matching user-provided criteria. 

1551 

1552 Parameters 

1553 ---------- 

1554 dataset_type : `str` or `DatasetType` 

1555 Dataset type object or name to search for. 

1556 collections : collection expression, optional 

1557 A collection name or iterable of collection names to search. If not 

1558 provided, the default collections are used. See 

1559 :ref:`daf_butler_collection_expressions` for more information. 

1560 find_first : `bool`, optional 

1561 If `True` (default), for each result data ID, only yield one 

1562 `DatasetRef` of each `DatasetType`, from the first collection in 

1563 which a dataset of that dataset type appears (according to the 

1564 order of ``collections`` passed in). If `True`, ``collections`` 

1565 must not contain regular expressions and may not be ``...``. 

1566 data_id : `dict` or `DataCoordinate`, optional 

1567 A data ID whose key-value pairs are used as equality constraints in 

1568 the query. 

1569 where : `str`, optional 

1570 A string expression similar to a SQL WHERE clause. May involve any 

1571 column of a dimension table or (as a shortcut for the primary key 

1572 column of a dimension table) dimension name. See 

1573 :ref:`daf_butler_dimension_expressions` for more information. 

1574 bind : `~collections.abc.Mapping`, optional 

1575 Mapping containing literal values that should be injected into the 

1576 ``where`` expression, keyed by the identifiers they replace. Values 

1577 of collection type can be expanded in some cases; see 

1578 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1579 information. 

1580 with_dimension_records : `bool`, optional 

1581 If `True` (default is `False`) then returned data IDs will have 

1582 dimension records. 

1583 explain : `bool`, optional 

1584 If `True` (default) then `EmptyQueryResultError` exception is 

1585 raised when resulting list is empty. The exception contains 

1586 non-empty list of strings explaining possible causes for empty 

1587 result. 

1588 **kwargs 

1589 Additional keyword arguments are forwarded to 

1590 `DataCoordinate.standardize` when processing the ``data_id`` 

1591 argument (and may be used to provide a constraining data ID even 

1592 when the ``data_id`` argument is `None`). 

1593 

1594 Returns 

1595 ------- 

1596 refs : `.queries.DatasetRefQueryResults` 

1597 Dataset references matching the given query criteria. Nested data 

1598 IDs are guaranteed to include values for all implied dimensions 

1599 (i.e. `DataCoordinate.hasFull` will return `True`). 

1600 

1601 Raises 

1602 ------ 

1603 lsst.daf.butler.registry.DatasetTypeExpressionError 

1604 Raised when ``dataset_type`` expression is invalid. 

1605 lsst.daf.butler.registry.DataIdError 

1606 Raised when ``data_id`` or keyword arguments specify unknown 

1607 dimensions or values, or when they contain inconsistent values. 

1608 lsst.daf.butler.registry.UserExpressionError 

1609 Raised when ``where`` expression is invalid. 

1610 lsst.daf.butler.EmptyQueryResultError 

1611 Raised when query generates empty result and ``explain`` is set to 

1612 `True`. 

1613 TypeError 

1614 Raised when the arguments are incompatible, such as when a 

1615 collection wildcard is passed when ``find_first`` is `True`, or 

1616 when ``collections`` is `None` and default butler collections are 

1617 not defined. 

1618 

1619 Notes 

1620 ----- 

1621 When multiple dataset types are queried in a single call, the results 

1622 of this operation are equivalent to querying for each dataset type 

1623 separately in turn, and no information about the relationships between 

1624 datasets of different types is included. 

1625 """ 

1626 if data_id is None: 

1627 data_id = DataCoordinate.make_empty(self.dimensions) 

1628 with self._query() as query: 

1629 result = query.where(data_id, where, bind=bind, **kwargs).datasets( 

1630 dataset_type, 

1631 collections=collections, 

1632 find_first=find_first, 

1633 ) 

1634 if with_dimension_records: 

1635 result = result.with_dimension_records() 

1636 refs = list(result) 

1637 if explain and not refs: 

1638 raise EmptyQueryResultError(list(result.explain_no_results())) 

1639 return refs 

1640 

1641 def _query_dimension_records( 

1642 self, 

1643 element: str, 

1644 *, 

1645 data_id: DataId | None = None, 

1646 where: str = "", 

1647 bind: Mapping[str, Any] | None = None, 

1648 order_by: Iterable[str] | str | None = None, 

1649 limit: int | None = None, 

1650 explain: bool = True, 

1651 **kwargs: Any, 

1652 ) -> list[DimensionRecord]: 

1653 """Query for dimension information matching user-provided criteria. 

1654 

1655 Parameters 

1656 ---------- 

1657 element : `str` 

1658 The name of a dimension element to obtain records for. 

1659 data_id : `dict` or `DataCoordinate`, optional 

1660 A data ID whose key-value pairs are used as equality constraints 

1661 in the query. 

1662 where : `str`, optional 

1663 A string expression similar to a SQL WHERE clause. See 

1664 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1665 information. 

1666 bind : `~collections.abc.Mapping`, optional 

1667 Mapping containing literal values that should be injected into the 

1668 ``where`` expression, keyed by the identifiers they replace. 

1669 Values of collection type can be expanded in some cases; see 

1670 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1671 information. 

1672 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1673 Names of the columns/dimensions to use for ordering returned data 

1674 IDs. Column name can be prefixed with minus (``-``) to use 

1675 descending ordering. 

1676 limit : `int`, optional 

1677 Upper limit on the number of returned records. 

1678 explain : `bool`, optional 

1679 If `True` (default) then `EmptyQueryResultError` exception is 

1680 raised when resulting list is empty. The exception contains 

1681 non-empty list of strings explaining possible causes for empty 

1682 result. 

1683 **kwargs 

1684 Additional keyword arguments are forwarded to 

1685 `DataCoordinate.standardize` when processing the ``data_id`` 

1686 argument (and may be used to provide a constraining data ID even 

1687 when the ``data_id`` argument is `None`). 

1688 

1689 Returns 

1690 ------- 

1691 records : `list`[`DimensionRecord`] 

1692 Dimension records matching the given query parameters. 

1693 

1694 Raises 

1695 ------ 

1696 lsst.daf.butler.registry.DataIdError 

1697 Raised when ``data_id`` or keyword arguments specify unknown 

1698 dimensions or values, or when they contain inconsistent values. 

1699 lsst.daf.butler.registry.UserExpressionError 

1700 Raised when ``where`` expression is invalid. 

1701 lsst.daf.butler.EmptyQueryResultError 

1702 Raised when query generates empty result and ``explain`` is set to 

1703 `True`. 

1704 TypeError 

1705 Raised when the arguments are incompatible, such as when a 

1706 collection wildcard is passed when ``find_first`` is `True`, or 

1707 when ``collections`` is `None` and default butler collections are 

1708 not defined. 

1709 """ 

1710 if data_id is None: 

1711 data_id = DataCoordinate.make_empty(self.dimensions) 

1712 with self._query() as query: 

1713 result = ( 

1714 query.where(data_id, where, bind=bind, **kwargs) 

1715 .dimension_records(element) 

1716 .order_by(*ensure_iterable(order_by)) 

1717 .limit(limit) 

1718 ) 

1719 dimension_records = list(result) 

1720 if explain and not dimension_records: 

1721 raise EmptyQueryResultError(list(result.explain_no_results())) 

1722 return dimension_records 

1723 

1724 @abstractmethod 

1725 def _clone( 

1726 self, 

1727 *, 

1728 collections: Any = None, 

1729 run: str | None = None, 

1730 inferDefaults: bool = True, 

1731 **kwargs: Any, 

1732 ) -> Butler: 

1733 """Return a new Butler instance connected to the same repository 

1734 as this one, but overriding ``collections``, ``run``, 

1735 ``inferDefaults``, and default data ID. 

1736 """ 

1737 raise NotImplementedError()