Coverage for python/lsst/daf/butler/_butler.py: 51%

181 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-07 11:04 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Mapping, Sequence 

34from contextlib import AbstractContextManager 

35from typing import TYPE_CHECKING, Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.iteration import ensure_iterable 

40from lsst.utils.logging import getLogger 

41 

42from ._butler_config import ButlerConfig, ButlerType 

43from ._butler_instance_options import ButlerInstanceOptions 

44from ._butler_repo_index import ButlerRepoIndex 

45from ._config import Config, ConfigSubset 

46from ._exceptions import EmptyQueryResultError 

47from ._limited_butler import LimitedButler 

48from .datastore import Datastore 

49from .dimensions import DimensionConfig 

50from .registry import RegistryConfig, _RegistryFactory 

51from .repo_relocation import BUTLER_ROOT_TAG 

52 

53if TYPE_CHECKING: 

54 from ._dataset_existence import DatasetExistence 

55 from ._dataset_ref import DatasetId, DatasetRef 

56 from ._dataset_type import DatasetType 

57 from ._deferredDatasetHandle import DeferredDatasetHandle 

58 from ._file_dataset import FileDataset 

59 from ._storage_class import StorageClass 

60 from ._timespan import Timespan 

61 from .datastore import DatasetRefURIs 

62 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord 

63 from .queries import Query 

64 from .registry import Registry 

65 from .transfers import RepoExportContext 

66 

67_LOG = getLogger(__name__) 

68 

69 

70class Butler(LimitedButler): # numpydoc ignore=PR02 

71 """Interface for data butler and factory for Butler instances. 

72 

73 Parameters 

74 ---------- 

75 config : `ButlerConfig`, `Config` or `str`, optional 

76 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

77 If a directory path is given the configuration will be read from a 

78 ``butler.yaml`` file in that location. If `None` is given default 

79 values will be used. If ``config`` contains "cls" key then its value is 

80 used as a name of butler class and it must be a sub-class of this 

81 class, otherwise `DirectButler` is instantiated. 

82 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

83 An expression specifying the collections to be searched (in order) when 

84 reading datasets. 

85 This may be a `str` collection name or an iterable thereof. 

86 See :ref:`daf_butler_collection_expressions` for more information. 

87 These collections are not registered automatically and must be 

88 manually registered before they are used by any method, but they may be 

89 manually registered after the `Butler` is initialized. 

90 run : `str`, optional 

91 Name of the `~CollectionType.RUN` collection new datasets should be 

92 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

93 ``collections`` will be set to ``[run]``. If not `None`, this 

94 collection will automatically be registered. If this is not set (and 

95 ``writeable`` is not set either), a read-only butler will be created. 

96 searchPaths : `list` of `str`, optional 

97 Directory paths to search when calculating the full Butler 

98 configuration. Not used if the supplied config is already a 

99 `ButlerConfig`. 

100 writeable : `bool`, optional 

101 Explicitly sets whether the butler supports write operations. If not 

102 provided, a read-write butler is created if any of ``run``, ``tags``, 

103 or ``chains`` is non-empty. 

104 inferDefaults : `bool`, optional 

105 If `True` (default) infer default data ID values from the values 

106 present in the datasets in ``collections``: if all collections have the 

107 same value (or no value) for a governor dimension, that value will be 

108 the default for that dimension. Nonexistent collections are ignored. 

109 If a default value is provided explicitly for a governor dimension via 

110 ``**kwargs``, no default will be inferred for that dimension. 

111 without_datastore : `bool`, optional 

112 If `True` do not attach a datastore to this butler. Any attempts 

113 to use a datastore will fail. 

114 **kwargs : `Any` 

115 Additional keyword arguments passed to a constructor of actual butler 

116 class. 

117 

118 Notes 

119 ----- 

120 The preferred way to instantiate Butler is via the `from_config` method. 

121 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

122 but ``mypy`` will complain about the former. 

123 """ 

124 

125 def __new__( 

126 cls, 

127 config: Config | ResourcePathExpression | None = None, 

128 *, 

129 collections: Any = None, 

130 run: str | None = None, 

131 searchPaths: Sequence[ResourcePathExpression] | None = None, 

132 writeable: bool | None = None, 

133 inferDefaults: bool = True, 

134 without_datastore: bool = False, 

135 **kwargs: Any, 

136 ) -> Butler: 

137 if cls is Butler: 

138 return Butler.from_config( 

139 config=config, 

140 collections=collections, 

141 run=run, 

142 searchPaths=searchPaths, 

143 writeable=writeable, 

144 inferDefaults=inferDefaults, 

145 without_datastore=without_datastore, 

146 **kwargs, 

147 ) 

148 

149 # Note: we do not pass any parameters to __new__, Python will pass them 

150 # to __init__ after __new__ returns sub-class instance. 

151 return super().__new__(cls) 

152 

153 @classmethod 

154 def from_config( 

155 cls, 

156 config: Config | ResourcePathExpression | None = None, 

157 *, 

158 collections: Any = None, 

159 run: str | None = None, 

160 searchPaths: Sequence[ResourcePathExpression] | None = None, 

161 writeable: bool | None = None, 

162 inferDefaults: bool = True, 

163 without_datastore: bool = False, 

164 **kwargs: Any, 

165 ) -> Butler: 

166 """Create butler instance from configuration. 

167 

168 Parameters 

169 ---------- 

170 config : `ButlerConfig`, `Config` or `str`, optional 

171 Configuration. Anything acceptable to the `ButlerConfig` 

172 constructor. If a directory path is given the configuration will be 

173 read from a ``butler.yaml`` file in that location. If `None` is 

174 given default values will be used. If ``config`` contains "cls" key 

175 then its value is used as a name of butler class and it must be a 

176 sub-class of this class, otherwise `DirectButler` is instantiated. 

177 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

178 An expression specifying the collections to be searched (in order) 

179 when reading datasets. 

180 This may be a `str` collection name or an iterable thereof. 

181 See :ref:`daf_butler_collection_expressions` for more information. 

182 These collections are not registered automatically and must be 

183 manually registered before they are used by any method, but they 

184 may be manually registered after the `Butler` is initialized. 

185 run : `str`, optional 

186 Name of the `~CollectionType.RUN` collection new datasets should be 

187 inserted into. If ``collections`` is `None` and ``run`` is not 

188 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

189 this collection will automatically be registered. If this is not 

190 set (and ``writeable`` is not set either), a read-only butler will 

191 be created. 

192 searchPaths : `list` of `str`, optional 

193 Directory paths to search when calculating the full Butler 

194 configuration. Not used if the supplied config is already a 

195 `ButlerConfig`. 

196 writeable : `bool`, optional 

197 Explicitly sets whether the butler supports write operations. If 

198 not provided, a read-write butler is created if any of ``run``, 

199 ``tags``, or ``chains`` is non-empty. 

200 inferDefaults : `bool`, optional 

201 If `True` (default) infer default data ID values from the values 

202 present in the datasets in ``collections``: if all collections have 

203 the same value (or no value) for a governor dimension, that value 

204 will be the default for that dimension. Nonexistent collections 

205 are ignored. If a default value is provided explicitly for a 

206 governor dimension via ``**kwargs``, no default will be inferred 

207 for that dimension. 

208 without_datastore : `bool`, optional 

209 If `True` do not attach a datastore to this butler. Any attempts 

210 to use a datastore will fail. 

211 **kwargs : `Any` 

212 Default data ID key-value pairs. These may only identify 

213 "governor" dimensions like ``instrument`` and ``skymap``. 

214 

215 Returns 

216 ------- 

217 butler : `Butler` 

218 A `Butler` constructed from the given configuration. 

219 

220 Notes 

221 ----- 

222 Calling this factory method is identical to calling 

223 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

224 complains about ``Butler()`` call. 

225 

226 Examples 

227 -------- 

228 While there are many ways to control exactly how a `Butler` interacts 

229 with the collections in its `Registry`, the most common cases are still 

230 simple. 

231 

232 For a read-only `Butler` that searches one collection, do:: 

233 

234 butler = Butler.from_config( 

235 "/path/to/repo", collections=["u/alice/DM-50000"] 

236 ) 

237 

238 For a read-write `Butler` that writes to and reads from a 

239 `~CollectionType.RUN` collection:: 

240 

241 butler = Butler.from_config( 

242 "/path/to/repo", run="u/alice/DM-50000/a" 

243 ) 

244 

245 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

246 because we want to write to one `~CollectionType.RUN` collection but 

247 read from several others (as well):: 

248 

249 butler = Butler.from_config( 

250 "/path/to/repo", 

251 run="u/alice/DM-50000/a", 

252 collections=[ 

253 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

254 ] 

255 ) 

256 

257 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

258 Datasets will be read first from that run (since it appears first in 

259 the chain), and then from ``u/bob/DM-49998`` and finally 

260 ``HSC/defaults``. 

261 

262 Finally, one can always create a `Butler` with no collections:: 

263 

264 butler = Butler.from_config("/path/to/repo", writeable=True) 

265 

266 This can be extremely useful when you just want to use 

267 ``butler.registry``, e.g. for inserting dimension data or managing 

268 collections, or when the collections you want to use with the butler 

269 are not consistent. Passing ``writeable`` explicitly here is only 

270 necessary if you want to be able to make changes to the repo - usually 

271 the value for ``writeable`` can be guessed from the collection 

272 arguments provided, but it defaults to `False` when there are not 

273 collection arguments. 

274 """ 

275 # DirectButler used to have a way to specify a "copy constructor" by 

276 # passing the "butler" parameter to its constructor. This 

277 # functionality has been moved out of the constructor into 

278 # Butler._clone(), but the new interface is not public yet. 

279 butler = kwargs.pop("butler", None) 

280 if butler is not None: 

281 if not isinstance(butler, Butler): 

282 raise TypeError("'butler' parameter must be a Butler instance") 

283 if config is not None or searchPaths is not None or writeable is not None: 

284 raise TypeError( 

285 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

286 ) 

287 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs) 

288 

289 options = ButlerInstanceOptions( 

290 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs 

291 ) 

292 

293 # Load the Butler configuration. This may involve searching the 

294 # environment to locate a configuration file. 

295 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) 

296 butler_type = butler_config.get_butler_type() 

297 

298 # Make DirectButler if class is not specified. 

299 match butler_type: 

300 case ButlerType.DIRECT: 

301 from .direct_butler import DirectButler 

302 

303 return DirectButler.create_from_config( 

304 butler_config, 

305 options=options, 

306 without_datastore=without_datastore, 

307 ) 

308 case ButlerType.REMOTE: 

309 from .remote_butler import RemoteButlerFactory 

310 

311 factory = RemoteButlerFactory.create_factory_from_config(butler_config) 

312 return factory.create_butler_with_credentials_from_environment(butler_options=options) 

313 case _: 

314 raise TypeError(f"Unknown Butler type '{butler_type}'") 

315 

316 @staticmethod 

317 def makeRepo( 

318 root: ResourcePathExpression, 

319 config: Config | str | None = None, 

320 dimensionConfig: Config | str | None = None, 

321 standalone: bool = False, 

322 searchPaths: list[str] | None = None, 

323 forceConfigRoot: bool = True, 

324 outfile: ResourcePathExpression | None = None, 

325 overwrite: bool = False, 

326 ) -> Config: 

327 """Create an empty data repository by adding a butler.yaml config 

328 to a repository root directory. 

329 

330 Parameters 

331 ---------- 

332 root : `lsst.resources.ResourcePathExpression` 

333 Path or URI to the root location of the new repository. Will be 

334 created if it does not exist. 

335 config : `Config` or `str`, optional 

336 Configuration to write to the repository, after setting any 

337 root-dependent Registry or Datastore config options. Can not 

338 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

339 configuration will be used. Root-dependent config options 

340 specified in this config are overwritten if ``forceConfigRoot`` 

341 is `True`. 

342 dimensionConfig : `Config` or `str`, optional 

343 Configuration for dimensions, will be used to initialize registry 

344 database. 

345 standalone : `bool` 

346 If True, write all expanded defaults, not just customized or 

347 repository-specific settings. 

348 This (mostly) decouples the repository from the default 

349 configuration, insulating it from changes to the defaults (which 

350 may be good or bad, depending on the nature of the changes). 

351 Future *additions* to the defaults will still be picked up when 

352 initializing `Butlers` to repos created with ``standalone=True``. 

353 searchPaths : `list` of `str`, optional 

354 Directory paths to search when calculating the full butler 

355 configuration. 

356 forceConfigRoot : `bool`, optional 

357 If `False`, any values present in the supplied ``config`` that 

358 would normally be reset are not overridden and will appear 

359 directly in the output config. This allows non-standard overrides 

360 of the root directory for a datastore or registry to be given. 

361 If this parameter is `True` the values for ``root`` will be 

362 forced into the resulting config if appropriate. 

363 outfile : `lss.resources.ResourcePathExpression`, optional 

364 If not-`None`, the output configuration will be written to this 

365 location rather than into the repository itself. Can be a URI 

366 string. Can refer to a directory that will be used to write 

367 ``butler.yaml``. 

368 overwrite : `bool`, optional 

369 Create a new configuration file even if one already exists 

370 in the specified output location. Default is to raise 

371 an exception. 

372 

373 Returns 

374 ------- 

375 config : `Config` 

376 The updated `Config` instance written to the repo. 

377 

378 Raises 

379 ------ 

380 ValueError 

381 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

382 regular Config (as these subclasses would make it impossible to 

383 support ``standalone=False``). 

384 FileExistsError 

385 Raised if the output config file already exists. 

386 os.error 

387 Raised if the directory does not exist, exists but is not a 

388 directory, or cannot be created. 

389 

390 Notes 

391 ----- 

392 Note that when ``standalone=False`` (the default), the configuration 

393 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

394 construct the repository should also be used to construct any Butlers 

395 to avoid configuration inconsistencies. 

396 """ 

397 if isinstance(config, ButlerConfig | ConfigSubset): 

398 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

399 

400 # Ensure that the root of the repository exists or can be made 

401 root_uri = ResourcePath(root, forceDirectory=True) 

402 root_uri.mkdir() 

403 

404 config = Config(config) 

405 

406 # If we are creating a new repo from scratch with relative roots, 

407 # do not propagate an explicit root from the config file 

408 if "root" in config: 

409 del config["root"] 

410 

411 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

412 imported_class = doImportType(full["datastore", "cls"]) 

413 if not issubclass(imported_class, Datastore): 

414 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

415 datastoreClass: type[Datastore] = imported_class 

416 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

417 

418 # if key exists in given config, parse it, otherwise parse the defaults 

419 # in the expanded config 

420 if config.get(("registry", "db")): 

421 registryConfig = RegistryConfig(config) 

422 else: 

423 registryConfig = RegistryConfig(full) 

424 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

425 if defaultDatabaseUri is not None: 

426 Config.updateParameters( 

427 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

428 ) 

429 else: 

430 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

431 

432 if standalone: 

433 config.merge(full) 

434 else: 

435 # Always expand the registry.managers section into the per-repo 

436 # config, because after the database schema is created, it's not 

437 # allowed to change anymore. Note that in the standalone=True 

438 # branch, _everything_ in the config is expanded, so there's no 

439 # need to special case this. 

440 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

441 configURI: ResourcePathExpression 

442 if outfile is not None: 

443 # When writing to a separate location we must include 

444 # the root of the butler repo in the config else it won't know 

445 # where to look. 

446 config["root"] = root_uri.geturl() 

447 configURI = outfile 

448 else: 

449 configURI = root_uri 

450 # Strip obscore configuration, if it is present, before writing config 

451 # to a file, obscore config will be stored in registry. 

452 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

453 config_to_write = config.copy() 

454 del config_to_write[obscore_config_key] 

455 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

456 # configFile attribute is updated, need to copy it to original. 

457 config.configFile = config_to_write.configFile 

458 else: 

459 config.dumpToUri(configURI, overwrite=overwrite) 

460 

461 # Create Registry and populate tables 

462 registryConfig = RegistryConfig(config.get("registry")) 

463 dimensionConfig = DimensionConfig(dimensionConfig) 

464 _RegistryFactory(registryConfig).create_from_config( 

465 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

466 ) 

467 

468 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

469 

470 return config 

471 

472 @classmethod 

473 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

474 """Look up the label in a butler repository index. 

475 

476 Parameters 

477 ---------- 

478 label : `str` 

479 Label of the Butler repository to look up. 

480 return_label : `bool`, optional 

481 If ``label`` cannot be found in the repository index (either 

482 because index is not defined or ``label`` is not in the index) and 

483 ``return_label`` is `True` then return ``ResourcePath(label)``. 

484 If ``return_label`` is `False` (default) then an exception will be 

485 raised instead. 

486 

487 Returns 

488 ------- 

489 uri : `lsst.resources.ResourcePath` 

490 URI to the Butler repository associated with the given label or 

491 default value if it is provided. 

492 

493 Raises 

494 ------ 

495 KeyError 

496 Raised if the label is not found in the index, or if an index 

497 is not defined, and ``return_label`` is `False`. 

498 

499 Notes 

500 ----- 

501 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

502 information is discovered. 

503 """ 

504 return ButlerRepoIndex.get_repo_uri(label, return_label) 

505 

506 @classmethod 

507 def get_known_repos(cls) -> set[str]: 

508 """Retrieve the list of known repository labels. 

509 

510 Returns 

511 ------- 

512 repos : `set` of `str` 

513 All the known labels. Can be empty if no index can be found. 

514 

515 Notes 

516 ----- 

517 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

518 information is discovered. 

519 """ 

520 return ButlerRepoIndex.get_known_repos() 

521 

522 @abstractmethod 

523 def _caching_context(self) -> AbstractContextManager[None]: 

524 """Context manager that enables caching.""" 

525 raise NotImplementedError() 

526 

527 @abstractmethod 

528 def transaction(self) -> AbstractContextManager[None]: 

529 """Context manager supporting `Butler` transactions. 

530 

531 Transactions can be nested. 

532 """ 

533 raise NotImplementedError() 

534 

535 @abstractmethod 

536 def put( 

537 self, 

538 obj: Any, 

539 datasetRefOrType: DatasetRef | DatasetType | str, 

540 /, 

541 dataId: DataId | None = None, 

542 *, 

543 run: str | None = None, 

544 **kwargs: Any, 

545 ) -> DatasetRef: 

546 """Store and register a dataset. 

547 

548 Parameters 

549 ---------- 

550 obj : `object` 

551 The dataset. 

552 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

553 When `DatasetRef` is provided, ``dataId`` should be `None`. 

554 Otherwise the `DatasetType` or name thereof. If a fully resolved 

555 `DatasetRef` is given the run and ID are used directly. 

556 dataId : `dict` or `DataCoordinate` 

557 A `dict` of `Dimension` link name, value pairs that label the 

558 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

559 should be provided as the second argument. 

560 run : `str`, optional 

561 The name of the run the dataset should be added to, overriding 

562 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

563 **kwargs 

564 Additional keyword arguments used to augment or construct a 

565 `DataCoordinate`. See `DataCoordinate.standardize` 

566 parameters. Not used if a resolve `DatasetRef` is provided. 

567 

568 Returns 

569 ------- 

570 ref : `DatasetRef` 

571 A reference to the stored dataset, updated with the correct id if 

572 given. 

573 

574 Raises 

575 ------ 

576 TypeError 

577 Raised if the butler is read-only or if no run has been provided. 

578 """ 

579 raise NotImplementedError() 

580 

581 @abstractmethod 

582 def getDeferred( 

583 self, 

584 datasetRefOrType: DatasetRef | DatasetType | str, 

585 /, 

586 dataId: DataId | None = None, 

587 *, 

588 parameters: dict | None = None, 

589 collections: Any = None, 

590 storageClass: str | StorageClass | None = None, 

591 **kwargs: Any, 

592 ) -> DeferredDatasetHandle: 

593 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

594 after an immediate registry lookup. 

595 

596 Parameters 

597 ---------- 

598 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

599 When `DatasetRef` the `dataId` should be `None`. 

600 Otherwise the `DatasetType` or name thereof. 

601 dataId : `dict` or `DataCoordinate`, optional 

602 A `dict` of `Dimension` link name, value pairs that label the 

603 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

604 should be provided as the first argument. 

605 parameters : `dict` 

606 Additional StorageClass-defined options to control reading, 

607 typically used to efficiently read only a subset of the dataset. 

608 collections : Any, optional 

609 Collections to be searched, overriding ``self.collections``. 

610 Can be any of the types supported by the ``collections`` argument 

611 to butler construction. 

612 storageClass : `StorageClass` or `str`, optional 

613 The storage class to be used to override the Python type 

614 returned by this method. By default the returned type matches 

615 the dataset type definition for this dataset. Specifying a 

616 read `StorageClass` can force a different type to be returned. 

617 This type must be compatible with the original type. 

618 **kwargs 

619 Additional keyword arguments used to augment or construct a 

620 `DataId`. See `DataId` parameters. 

621 

622 Returns 

623 ------- 

624 obj : `DeferredDatasetHandle` 

625 A handle which can be used to retrieve a dataset at a later time. 

626 

627 Raises 

628 ------ 

629 LookupError 

630 Raised if no matching dataset exists in the `Registry` or 

631 datastore. 

632 ValueError 

633 Raised if a resolved `DatasetRef` was passed as an input, but it 

634 differs from the one found in the registry. 

635 TypeError 

636 Raised if no collections were provided. 

637 """ 

638 raise NotImplementedError() 

639 

640 @abstractmethod 

641 def get( 

642 self, 

643 datasetRefOrType: DatasetRef | DatasetType | str, 

644 /, 

645 dataId: DataId | None = None, 

646 *, 

647 parameters: dict[str, Any] | None = None, 

648 collections: Any = None, 

649 storageClass: StorageClass | str | None = None, 

650 **kwargs: Any, 

651 ) -> Any: 

652 """Retrieve a stored dataset. 

653 

654 Parameters 

655 ---------- 

656 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

657 When `DatasetRef` the `dataId` should be `None`. 

658 Otherwise the `DatasetType` or name thereof. 

659 If a resolved `DatasetRef`, the associated dataset 

660 is returned directly without additional querying. 

661 dataId : `dict` or `DataCoordinate` 

662 A `dict` of `Dimension` link name, value pairs that label the 

663 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

664 should be provided as the first argument. 

665 parameters : `dict` 

666 Additional StorageClass-defined options to control reading, 

667 typically used to efficiently read only a subset of the dataset. 

668 collections : Any, optional 

669 Collections to be searched, overriding ``self.collections``. 

670 Can be any of the types supported by the ``collections`` argument 

671 to butler construction. 

672 storageClass : `StorageClass` or `str`, optional 

673 The storage class to be used to override the Python type 

674 returned by this method. By default the returned type matches 

675 the dataset type definition for this dataset. Specifying a 

676 read `StorageClass` can force a different type to be returned. 

677 This type must be compatible with the original type. 

678 **kwargs 

679 Additional keyword arguments used to augment or construct a 

680 `DataCoordinate`. See `DataCoordinate.standardize` 

681 parameters. 

682 

683 Returns 

684 ------- 

685 obj : `object` 

686 The dataset. 

687 

688 Raises 

689 ------ 

690 LookupError 

691 Raised if no matching dataset exists in the `Registry`. 

692 TypeError 

693 Raised if no collections were provided. 

694 

695 Notes 

696 ----- 

697 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

698 this method requires that the given data ID include temporal dimensions 

699 beyond the dimensions of the dataset type itself, in order to find the 

700 dataset with the appropriate validity range. For example, a "bias" 

701 dataset with native dimensions ``{instrument, detector}`` could be 

702 fetched with a ``{instrument, detector, exposure}`` data ID, because 

703 ``exposure`` is a temporal dimension. 

704 """ 

705 raise NotImplementedError() 

706 

707 @abstractmethod 

708 def getURIs( 

709 self, 

710 datasetRefOrType: DatasetRef | DatasetType | str, 

711 /, 

712 dataId: DataId | None = None, 

713 *, 

714 predict: bool = False, 

715 collections: Any = None, 

716 run: str | None = None, 

717 **kwargs: Any, 

718 ) -> DatasetRefURIs: 

719 """Return the URIs associated with the dataset. 

720 

721 Parameters 

722 ---------- 

723 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

724 When `DatasetRef` the `dataId` should be `None`. 

725 Otherwise the `DatasetType` or name thereof. 

726 dataId : `dict` or `DataCoordinate` 

727 A `dict` of `Dimension` link name, value pairs that label the 

728 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

729 should be provided as the first argument. 

730 predict : `bool` 

731 If `True`, allow URIs to be returned of datasets that have not 

732 been written. 

733 collections : Any, optional 

734 Collections to be searched, overriding ``self.collections``. 

735 Can be any of the types supported by the ``collections`` argument 

736 to butler construction. 

737 run : `str`, optional 

738 Run to use for predictions, overriding ``self.run``. 

739 **kwargs 

740 Additional keyword arguments used to augment or construct a 

741 `DataCoordinate`. See `DataCoordinate.standardize` 

742 parameters. 

743 

744 Returns 

745 ------- 

746 uris : `DatasetRefURIs` 

747 The URI to the primary artifact associated with this dataset (if 

748 the dataset was disassembled within the datastore this may be 

749 `None`), and the URIs to any components associated with the dataset 

750 artifact. (can be empty if there are no components). 

751 """ 

752 raise NotImplementedError() 

753 

754 def getURI( 

755 self, 

756 datasetRefOrType: DatasetRef | DatasetType | str, 

757 /, 

758 dataId: DataId | None = None, 

759 *, 

760 predict: bool = False, 

761 collections: Any = None, 

762 run: str | None = None, 

763 **kwargs: Any, 

764 ) -> ResourcePath: 

765 """Return the URI to the Dataset. 

766 

767 Parameters 

768 ---------- 

769 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

770 When `DatasetRef` the `dataId` should be `None`. 

771 Otherwise the `DatasetType` or name thereof. 

772 dataId : `dict` or `DataCoordinate` 

773 A `dict` of `Dimension` link name, value pairs that label the 

774 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

775 should be provided as the first argument. 

776 predict : `bool` 

777 If `True`, allow URIs to be returned of datasets that have not 

778 been written. 

779 collections : Any, optional 

780 Collections to be searched, overriding ``self.collections``. 

781 Can be any of the types supported by the ``collections`` argument 

782 to butler construction. 

783 run : `str`, optional 

784 Run to use for predictions, overriding ``self.run``. 

785 **kwargs 

786 Additional keyword arguments used to augment or construct a 

787 `DataCoordinate`. See `DataCoordinate.standardize` 

788 parameters. 

789 

790 Returns 

791 ------- 

792 uri : `lsst.resources.ResourcePath` 

793 URI pointing to the Dataset within the datastore. If the 

794 Dataset does not exist in the datastore, and if ``predict`` is 

795 `True`, the URI will be a prediction and will include a URI 

796 fragment "#predicted". 

797 If the datastore does not have entities that relate well 

798 to the concept of a URI the returned URI string will be 

799 descriptive. The returned URI is not guaranteed to be obtainable. 

800 

801 Raises 

802 ------ 

803 LookupError 

804 A URI has been requested for a dataset that does not exist and 

805 guessing is not allowed. 

806 ValueError 

807 Raised if a resolved `DatasetRef` was passed as an input, but it 

808 differs from the one found in the registry. 

809 TypeError 

810 Raised if no collections were provided. 

811 RuntimeError 

812 Raised if a URI is requested for a dataset that consists of 

813 multiple artifacts. 

814 """ 

815 primary, components = self.getURIs( 

816 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

817 ) 

818 

819 if primary is None or components: 

820 raise RuntimeError( 

821 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

822 "Use Butler.getURIs() instead." 

823 ) 

824 return primary 

825 

826 @abstractmethod 

827 def get_dataset_type(self, name: str) -> DatasetType: 

828 """Get the `DatasetType`. 

829 

830 Parameters 

831 ---------- 

832 name : `str` 

833 Name of the type. 

834 

835 Returns 

836 ------- 

837 type : `DatasetType` 

838 The `DatasetType` associated with the given name. 

839 

840 Raises 

841 ------ 

842 lsst.daf.butler.MissingDatasetTypeError 

843 Raised if the requested dataset type has not been registered. 

844 

845 Notes 

846 ----- 

847 This method handles component dataset types automatically, though most 

848 other operations do not. 

849 """ 

850 raise NotImplementedError() 

851 

852 @abstractmethod 

853 def get_dataset( 

854 self, 

855 id: DatasetId, 

856 *, 

857 storage_class: str | StorageClass | None = None, 

858 dimension_records: bool = False, 

859 datastore_records: bool = False, 

860 ) -> DatasetRef | None: 

861 """Retrieve a Dataset entry. 

862 

863 Parameters 

864 ---------- 

865 id : `DatasetId` 

866 The unique identifier for the dataset. 

867 storage_class : `str` or `StorageClass` or `None` 

868 A storage class to use when creating the returned entry. If given 

869 it must be compatible with the default storage class. 

870 dimension_records : `bool`, optional 

871 If `True` the ref will be expanded and contain dimension records. 

872 datastore_records : `bool`, optional 

873 If `True` the ref will contain associated datastore records. 

874 

875 Returns 

876 ------- 

877 ref : `DatasetRef` or `None` 

878 A ref to the Dataset, or `None` if no matching Dataset 

879 was found. 

880 """ 

881 raise NotImplementedError() 

882 

883 @abstractmethod 

884 def find_dataset( 

885 self, 

886 dataset_type: DatasetType | str, 

887 data_id: DataId | None = None, 

888 *, 

889 collections: str | Sequence[str] | None = None, 

890 timespan: Timespan | None = None, 

891 storage_class: str | StorageClass | None = None, 

892 dimension_records: bool = False, 

893 datastore_records: bool = False, 

894 **kwargs: Any, 

895 ) -> DatasetRef | None: 

896 """Find a dataset given its `DatasetType` and data ID. 

897 

898 This can be used to obtain a `DatasetRef` that permits the dataset to 

899 be read from a `Datastore`. If the dataset is a component and can not 

900 be found using the provided dataset type, a dataset ref for the parent 

901 will be returned instead but with the correct dataset type. 

902 

903 Parameters 

904 ---------- 

905 dataset_type : `DatasetType` or `str` 

906 A `DatasetType` or the name of one. If this is a `DatasetType` 

907 instance, its storage class will be respected and propagated to 

908 the output, even if it differs from the dataset type definition 

909 in the registry, as long as the storage classes are convertible. 

910 data_id : `dict` or `DataCoordinate`, optional 

911 A `dict`-like object containing the `Dimension` links that identify 

912 the dataset within a collection. If it is a `dict` the dataId 

913 can include dimension record values such as ``day_obs`` and 

914 ``seq_num`` or ``full_name`` that can be used to derive the 

915 primary dimension. 

916 collections : `str` or `list` [`str`], optional 

917 A an ordered list of collections to search for the dataset. 

918 Defaults to ``self.defaults.collections``. 

919 timespan : `Timespan`, optional 

920 A timespan that the validity range of the dataset must overlap. 

921 If not provided, any `~CollectionType.CALIBRATION` collections 

922 matched by the ``collections`` argument will not be searched. 

923 storage_class : `str` or `StorageClass` or `None` 

924 A storage class to use when creating the returned entry. If given 

925 it must be compatible with the default storage class. 

926 dimension_records : `bool`, optional 

927 If `True` the ref will be expanded and contain dimension records. 

928 datastore_records : `bool`, optional 

929 If `True` the ref will contain associated datastore records. 

930 **kwargs 

931 Additional keyword arguments passed to 

932 `DataCoordinate.standardize` to convert ``dataId`` to a true 

933 `DataCoordinate` or augment an existing one. This can also include 

934 dimension record metadata that can be used to derive a primary 

935 dimension value. 

936 

937 Returns 

938 ------- 

939 ref : `DatasetRef` 

940 A reference to the dataset, or `None` if no matching Dataset 

941 was found. 

942 

943 Raises 

944 ------ 

945 lsst.daf.butler.NoDefaultCollectionError 

946 Raised if ``collections`` is `None` and 

947 ``self.collections`` is `None`. 

948 LookupError 

949 Raised if one or more data ID keys are missing. 

950 lsst.daf.butler.MissingDatasetTypeError 

951 Raised if the dataset type does not exist. 

952 lsst.daf.butler.MissingCollectionError 

953 Raised if any of ``collections`` does not exist in the registry. 

954 

955 Notes 

956 ----- 

957 This method simply returns `None` and does not raise an exception even 

958 when the set of collections searched is intrinsically incompatible with 

959 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

960 only `~CollectionType.CALIBRATION` collections are being searched. 

961 This may make it harder to debug some lookup failures, but the behavior 

962 is intentional; we consider it more important that failed searches are 

963 reported consistently, regardless of the reason, and that adding 

964 additional collections that do not contain a match to the search path 

965 never changes the behavior. 

966 

967 This method handles component dataset types automatically, though most 

968 other query operations do not. 

969 """ 

970 raise NotImplementedError() 

971 

972 @abstractmethod 

973 def retrieveArtifacts( 

974 self, 

975 refs: Iterable[DatasetRef], 

976 destination: ResourcePathExpression, 

977 transfer: str = "auto", 

978 preserve_path: bool = True, 

979 overwrite: bool = False, 

980 ) -> list[ResourcePath]: 

981 """Retrieve the artifacts associated with the supplied refs. 

982 

983 Parameters 

984 ---------- 

985 refs : iterable of `DatasetRef` 

986 The datasets for which artifacts are to be retrieved. 

987 A single ref can result in multiple artifacts. The refs must 

988 be resolved. 

989 destination : `lsst.resources.ResourcePath` or `str` 

990 Location to write the artifacts. 

991 transfer : `str`, optional 

992 Method to use to transfer the artifacts. Must be one of the options 

993 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

994 "move" is not allowed. 

995 preserve_path : `bool`, optional 

996 If `True` the full path of the artifact within the datastore 

997 is preserved. If `False` the final file component of the path 

998 is used. 

999 overwrite : `bool`, optional 

1000 If `True` allow transfers to overwrite existing files at the 

1001 destination. 

1002 

1003 Returns 

1004 ------- 

1005 targets : `list` of `lsst.resources.ResourcePath` 

1006 URIs of file artifacts in destination location. Order is not 

1007 preserved. 

1008 

1009 Notes 

1010 ----- 

1011 For non-file datastores the artifacts written to the destination 

1012 may not match the representation inside the datastore. For example 

1013 a hierarchical data structure in a NoSQL database may well be stored 

1014 as a JSON file. 

1015 """ 

1016 raise NotImplementedError() 

1017 

1018 @abstractmethod 

1019 def exists( 

1020 self, 

1021 dataset_ref_or_type: DatasetRef | DatasetType | str, 

1022 /, 

1023 data_id: DataId | None = None, 

1024 *, 

1025 full_check: bool = True, 

1026 collections: Any = None, 

1027 **kwargs: Any, 

1028 ) -> DatasetExistence: 

1029 """Indicate whether a dataset is known to Butler registry and 

1030 datastore. 

1031 

1032 Parameters 

1033 ---------- 

1034 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

1035 When `DatasetRef` the `dataId` should be `None`. 

1036 Otherwise the `DatasetType` or name thereof. 

1037 data_id : `dict` or `DataCoordinate` 

1038 A `dict` of `Dimension` link name, value pairs that label the 

1039 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1040 should be provided as the first argument. 

1041 full_check : `bool`, optional 

1042 If `True`, a check will be made for the actual existence of a 

1043 dataset artifact. This will involve additional overhead due to 

1044 the need to query an external system. If `False`, this check will 

1045 be omitted, and the registry and datastore will solely be asked 

1046 if they know about the dataset but no direct check for the 

1047 artifact will be performed. 

1048 collections : Any, optional 

1049 Collections to be searched, overriding ``self.collections``. 

1050 Can be any of the types supported by the ``collections`` argument 

1051 to butler construction. 

1052 **kwargs 

1053 Additional keyword arguments used to augment or construct a 

1054 `DataCoordinate`. See `DataCoordinate.standardize` 

1055 parameters. 

1056 

1057 Returns 

1058 ------- 

1059 existence : `DatasetExistence` 

1060 Object indicating whether the dataset is known to registry and 

1061 datastore. Evaluates to `True` if the dataset is present and known 

1062 to both. 

1063 """ 

1064 raise NotImplementedError() 

1065 

1066 @abstractmethod 

1067 def _exists_many( 

1068 self, 

1069 refs: Iterable[DatasetRef], 

1070 /, 

1071 *, 

1072 full_check: bool = True, 

1073 ) -> dict[DatasetRef, DatasetExistence]: 

1074 """Indicate whether multiple datasets are known to Butler registry and 

1075 datastore. 

1076 

1077 This is an experimental API that may change at any moment. 

1078 

1079 Parameters 

1080 ---------- 

1081 refs : iterable of `DatasetRef` 

1082 The datasets to be checked. 

1083 full_check : `bool`, optional 

1084 If `True`, a check will be made for the actual existence of each 

1085 dataset artifact. This will involve additional overhead due to 

1086 the need to query an external system. If `False`, this check will 

1087 be omitted, and the registry and datastore will solely be asked 

1088 if they know about the dataset(s) but no direct check for the 

1089 artifact(s) will be performed. 

1090 

1091 Returns 

1092 ------- 

1093 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1094 Mapping from the given dataset refs to an enum indicating the 

1095 status of the dataset in registry and datastore. 

1096 Each value evaluates to `True` if the dataset is present and known 

1097 to both. 

1098 """ 

1099 raise NotImplementedError() 

1100 

1101 @abstractmethod 

1102 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1103 """Remove one or more `~CollectionType.RUN` collections and the 

1104 datasets within them. 

1105 

1106 Parameters 

1107 ---------- 

1108 names : `~collections.abc.Iterable` [ `str` ] 

1109 The names of the collections to remove. 

1110 unstore : `bool`, optional 

1111 If `True` (default), delete datasets from all datastores in which 

1112 they are present, and attempt to rollback the registry deletions if 

1113 datastore deletions fail (which may not always be possible). If 

1114 `False`, datastore records for these datasets are still removed, 

1115 but any artifacts (e.g. files) will not be. 

1116 

1117 Raises 

1118 ------ 

1119 TypeError 

1120 Raised if one or more collections are not of type 

1121 `~CollectionType.RUN`. 

1122 """ 

1123 raise NotImplementedError() 

1124 

1125 @abstractmethod 

1126 def ingest( 

1127 self, 

1128 *datasets: FileDataset, 

1129 transfer: str | None = "auto", 

1130 record_validation_info: bool = True, 

1131 ) -> None: 

1132 """Store and register one or more datasets that already exist on disk. 

1133 

1134 Parameters 

1135 ---------- 

1136 *datasets : `FileDataset` 

1137 Each positional argument is a struct containing information about 

1138 a file to be ingested, including its URI (either absolute or 

1139 relative to the datastore root, if applicable), a resolved 

1140 `DatasetRef`, and optionally a formatter class or its 

1141 fully-qualified string name. If a formatter is not provided, the 

1142 formatter that would be used for `put` is assumed. On successful 

1143 ingest all `FileDataset.formatter` attributes will be set to the 

1144 formatter class used. `FileDataset.path` attributes may be modified 

1145 to put paths in whatever the datastore considers a standardized 

1146 form. 

1147 transfer : `str`, optional 

1148 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1149 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1150 transfer the file. 

1151 record_validation_info : `bool`, optional 

1152 If `True`, the default, the datastore can record validation 

1153 information associated with the file. If `False` the datastore 

1154 will not attempt to track any information such as checksums 

1155 or file sizes. This can be useful if such information is tracked 

1156 in an external system or if the file is to be compressed in place. 

1157 It is up to the datastore whether this parameter is relevant. 

1158 

1159 Raises 

1160 ------ 

1161 TypeError 

1162 Raised if the butler is read-only or if no run was provided. 

1163 NotImplementedError 

1164 Raised if the `Datastore` does not support the given transfer mode. 

1165 DatasetTypeNotSupportedError 

1166 Raised if one or more files to be ingested have a dataset type that 

1167 is not supported by the `Datastore`.. 

1168 FileNotFoundError 

1169 Raised if one of the given files does not exist. 

1170 FileExistsError 

1171 Raised if transfer is not `None` but the (internal) location the 

1172 file would be moved to is already occupied. 

1173 

1174 Notes 

1175 ----- 

1176 This operation is not fully exception safe: if a database operation 

1177 fails, the given `FileDataset` instances may be only partially updated. 

1178 

1179 It is atomic in terms of database operations (they will either all 

1180 succeed or all fail) providing the database engine implements 

1181 transactions correctly. It will attempt to be atomic in terms of 

1182 filesystem operations as well, but this cannot be implemented 

1183 rigorously for most datastores. 

1184 """ 

1185 raise NotImplementedError() 

1186 

1187 @abstractmethod 

1188 def export( 

1189 self, 

1190 *, 

1191 directory: str | None = None, 

1192 filename: str | None = None, 

1193 format: str | None = None, 

1194 transfer: str | None = None, 

1195 ) -> AbstractContextManager[RepoExportContext]: 

1196 """Export datasets from the repository represented by this `Butler`. 

1197 

1198 This method is a context manager that returns a helper object 

1199 (`RepoExportContext`) that is used to indicate what information from 

1200 the repository should be exported. 

1201 

1202 Parameters 

1203 ---------- 

1204 directory : `str`, optional 

1205 Directory dataset files should be written to if ``transfer`` is not 

1206 `None`. 

1207 filename : `str`, optional 

1208 Name for the file that will include database information associated 

1209 with the exported datasets. If this is not an absolute path and 

1210 ``directory`` is not `None`, it will be written to ``directory`` 

1211 instead of the current working directory. Defaults to 

1212 "export.{format}". 

1213 format : `str`, optional 

1214 File format for the database information file. If `None`, the 

1215 extension of ``filename`` will be used. 

1216 transfer : `str`, optional 

1217 Transfer mode passed to `Datastore.export`. 

1218 

1219 Raises 

1220 ------ 

1221 TypeError 

1222 Raised if the set of arguments passed is inconsistent. 

1223 

1224 Examples 

1225 -------- 

1226 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1227 methods are used to provide the iterables over data IDs and/or datasets 

1228 to be exported:: 

1229 

1230 with butler.export("exports.yaml") as export: 

1231 # Export all flats, but none of the dimension element rows 

1232 # (i.e. data ID information) associated with them. 

1233 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1234 elements=()) 

1235 # Export all datasets that start with "deepCoadd_" and all of 

1236 # their associated data ID information. 

1237 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1238 """ 

1239 raise NotImplementedError() 

1240 

1241 @abstractmethod 

1242 def import_( 

1243 self, 

1244 *, 

1245 directory: ResourcePathExpression | None = None, 

1246 filename: ResourcePathExpression | TextIO | None = None, 

1247 format: str | None = None, 

1248 transfer: str | None = None, 

1249 skip_dimensions: set | None = None, 

1250 ) -> None: 

1251 """Import datasets into this repository that were exported from a 

1252 different butler repository via `~lsst.daf.butler.Butler.export`. 

1253 

1254 Parameters 

1255 ---------- 

1256 directory : `~lsst.resources.ResourcePathExpression`, optional 

1257 Directory containing dataset files to import from. If `None`, 

1258 ``filename`` and all dataset file paths specified therein must 

1259 be absolute. 

1260 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1261 A stream or name of file that contains database information 

1262 associated with the exported datasets, typically generated by 

1263 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1264 `~lsst.resources.ResourcePath` and is not an absolute path, 

1265 it will first be looked for relative to ``directory`` and if not 

1266 found there it will be looked for in the current working 

1267 directory. Defaults to "export.{format}". 

1268 format : `str`, optional 

1269 File format for ``filename``. If `None`, the extension of 

1270 ``filename`` will be used. 

1271 transfer : `str`, optional 

1272 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1273 skip_dimensions : `set`, optional 

1274 Names of dimensions that should be skipped and not imported. 

1275 

1276 Raises 

1277 ------ 

1278 TypeError 

1279 Raised if the set of arguments passed is inconsistent, or if the 

1280 butler is read-only. 

1281 """ 

1282 raise NotImplementedError() 

1283 

1284 @abstractmethod 

1285 def transfer_dimension_records_from( 

1286 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef] 

1287 ) -> None: 

1288 """Transfer dimension records to this Butler from another Butler. 

1289 

1290 Parameters 

1291 ---------- 

1292 source_butler : `LimitedButler` or `Butler` 

1293 Butler from which the records are to be transferred. If data IDs 

1294 in ``source_refs`` are not expanded then this has to be a full 

1295 `Butler` whose registry will be used to expand data IDs. If the 

1296 source refs contain coordinates that are used to populate other 

1297 records then this will also need to be a full `Butler`. 

1298 source_refs : iterable of `DatasetRef` 

1299 Datasets defined in the source butler whose dimension records 

1300 should be transferred to this butler. In most circumstances. 

1301 transfer is faster if the dataset refs are expanded. 

1302 """ 

1303 raise NotImplementedError() 

1304 

1305 @abstractmethod 

1306 def transfer_from( 

1307 self, 

1308 source_butler: LimitedButler, 

1309 source_refs: Iterable[DatasetRef], 

1310 transfer: str = "auto", 

1311 skip_missing: bool = True, 

1312 register_dataset_types: bool = False, 

1313 transfer_dimensions: bool = False, 

1314 dry_run: bool = False, 

1315 ) -> Collection[DatasetRef]: 

1316 """Transfer datasets to this Butler from a run in another Butler. 

1317 

1318 Parameters 

1319 ---------- 

1320 source_butler : `LimitedButler` 

1321 Butler from which the datasets are to be transferred. If data IDs 

1322 in ``source_refs`` are not expanded then this has to be a full 

1323 `Butler` whose registry will be used to expand data IDs. 

1324 source_refs : iterable of `DatasetRef` 

1325 Datasets defined in the source butler that should be transferred to 

1326 this butler. In most circumstances, ``transfer_from`` is faster if 

1327 the dataset refs are expanded. 

1328 transfer : `str`, optional 

1329 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1330 skip_missing : `bool` 

1331 If `True`, datasets with no datastore artifact associated with 

1332 them are not transferred. If `False` a registry entry will be 

1333 created even if no datastore record is created (and so will 

1334 look equivalent to the dataset being unstored). 

1335 register_dataset_types : `bool` 

1336 If `True` any missing dataset types are registered. Otherwise 

1337 an exception is raised. 

1338 transfer_dimensions : `bool`, optional 

1339 If `True`, dimension record data associated with the new datasets 

1340 will be transferred. 

1341 dry_run : `bool`, optional 

1342 If `True` the transfer will be processed without any modifications 

1343 made to the target butler and as if the target butler did not 

1344 have any of the datasets. 

1345 

1346 Returns 

1347 ------- 

1348 refs : `list` of `DatasetRef` 

1349 The refs added to this Butler. 

1350 

1351 Notes 

1352 ----- 

1353 The datastore artifact has to exist for a transfer 

1354 to be made but non-existence is not an error. 

1355 

1356 Datasets that already exist in this run will be skipped. 

1357 

1358 The datasets are imported as part of a transaction, although 

1359 dataset types are registered before the transaction is started. 

1360 This means that it is possible for a dataset type to be registered 

1361 even though transfer has failed. 

1362 """ 

1363 raise NotImplementedError() 

1364 

1365 @abstractmethod 

1366 def validateConfiguration( 

1367 self, 

1368 logFailures: bool = False, 

1369 datasetTypeNames: Iterable[str] | None = None, 

1370 ignore: Iterable[str] | None = None, 

1371 ) -> None: 

1372 """Validate butler configuration. 

1373 

1374 Checks that each `DatasetType` can be stored in the `Datastore`. 

1375 

1376 Parameters 

1377 ---------- 

1378 logFailures : `bool`, optional 

1379 If `True`, output a log message for every validation error 

1380 detected. 

1381 datasetTypeNames : iterable of `str`, optional 

1382 The `DatasetType` names that should be checked. This allows 

1383 only a subset to be selected. 

1384 ignore : iterable of `str`, optional 

1385 Names of DatasetTypes to skip over. This can be used to skip 

1386 known problems. If a named `DatasetType` corresponds to a 

1387 composite, all components of that `DatasetType` will also be 

1388 ignored. 

1389 

1390 Raises 

1391 ------ 

1392 ButlerValidationError 

1393 Raised if there is some inconsistency with how this Butler 

1394 is configured. 

1395 """ 

1396 raise NotImplementedError() 

1397 

1398 @property 

1399 @abstractmethod 

1400 def collections(self) -> Sequence[str]: 

1401 """The collections to search by default, in order 

1402 (`~collections.abc.Sequence` [ `str` ]). 

1403 """ 

1404 raise NotImplementedError() 

1405 

1406 @property 

1407 @abstractmethod 

1408 def run(self) -> str | None: 

1409 """Name of the run this butler writes outputs to by default (`str` or 

1410 `None`). 

1411 """ 

1412 raise NotImplementedError() 

1413 

1414 @property 

1415 @abstractmethod 

1416 def registry(self) -> Registry: 

1417 """The object that manages dataset metadata and relationships 

1418 (`Registry`). 

1419 

1420 Many operations that don't involve reading or writing butler datasets 

1421 are accessible only via `Registry` methods. Eventually these methods 

1422 will be replaced by equivalent `Butler` methods. 

1423 """ 

1424 raise NotImplementedError() 

1425 

1426 @abstractmethod 

1427 def _query(self) -> AbstractContextManager[Query]: 

1428 """Context manager returning a `Query` object used for construction 

1429 and execution of complex queries. 

1430 """ 

1431 raise NotImplementedError() 

1432 

1433 def _query_data_ids( 

1434 self, 

1435 dimensions: DimensionGroup | Iterable[str] | str, 

1436 *, 

1437 data_id: DataId | None = None, 

1438 where: str = "", 

1439 bind: Mapping[str, Any] | None = None, 

1440 with_dimension_records: bool = False, 

1441 order_by: Iterable[str] | str | None = None, 

1442 limit: int | None = None, 

1443 offset: int = 0, 

1444 explain: bool = True, 

1445 **kwargs: Any, 

1446 ) -> list[DataCoordinate]: 

1447 """Query for data IDs matching user-provided criteria. 

1448 

1449 Parameters 

1450 ---------- 

1451 dimensions : `DimensionGroup`, `str`, or \ 

1452 `~collections.abc.Iterable` [`str`] 

1453 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1454 instances or `str`. Will be automatically expanded to a complete 

1455 `DimensionGroup`. 

1456 data_id : `dict` or `DataCoordinate`, optional 

1457 A data ID whose key-value pairs are used as equality constraints 

1458 in the query. 

1459 where : `str`, optional 

1460 A string expression similar to a SQL WHERE clause. May involve 

1461 any column of a dimension table or (as a shortcut for the primary 

1462 key column of a dimension table) dimension name. See 

1463 :ref:`daf_butler_dimension_expressions` for more information. 

1464 bind : `~collections.abc.Mapping`, optional 

1465 Mapping containing literal values that should be injected into the 

1466 ``where`` expression, keyed by the identifiers they replace. 

1467 Values of collection type can be expanded in some cases; see 

1468 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1469 information. 

1470 with_dimension_records : `bool`, optional 

1471 If `True` (default is `False`) then returned data IDs will have 

1472 dimension records. 

1473 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1474 Names of the columns/dimensions to use for ordering returned data 

1475 IDs. Column name can be prefixed with minus (``-``) to use 

1476 descending ordering. 

1477 limit : `int`, optional 

1478 Upper limit on the number of returned records. 

1479 offset : `int`, optional 

1480 The number of records to skip before returning at most ``limit`` 

1481 records. If ``offset`` is specified then ``limit`` must be 

1482 specified as well. 

1483 explain : `bool`, optional 

1484 If `True` (default) then `EmptyQueryResultError` exception is 

1485 raised when resulting list is empty. The exception contains 

1486 non-empty list of strings explaining possible causes for empty 

1487 result. 

1488 **kwargs 

1489 Additional keyword arguments are forwarded to 

1490 `DataCoordinate.standardize` when processing the ``data_id`` 

1491 argument (and may be used to provide a constraining data ID even 

1492 when the ``data_id`` argument is `None`). 

1493 

1494 Returns 

1495 ------- 

1496 dataIds : `list` [`DataCoordinate`] 

1497 Data IDs matching the given query parameters. These are always 

1498 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1499 returns `True`). 

1500 

1501 Raises 

1502 ------ 

1503 lsst.daf.butler.registry.DataIdError 

1504 Raised when ``data_id`` or keyword arguments specify unknown 

1505 dimensions or values, or when they contain inconsistent values. 

1506 lsst.daf.butler.registry.UserExpressionError 

1507 Raised when ``where`` expression is invalid. 

1508 lsst.daf.butler.EmptyQueryResultError 

1509 Raised when query generates empty result and ``explain`` is set to 

1510 `True`. 

1511 TypeError 

1512 Raised when the arguments are incompatible, e.g. ``offset`` is 

1513 specified, but ``limit`` is not. 

1514 """ 

1515 if data_id is None: 

1516 data_id = DataCoordinate.make_empty(self.dimensions) 

1517 with self._query() as query: 

1518 result = ( 

1519 query.where(data_id, where, bind=bind, **kwargs) 

1520 .data_ids(dimensions) 

1521 .order_by(*ensure_iterable(order_by)) 

1522 .limit(limit, offset) 

1523 ) 

1524 if with_dimension_records: 

1525 result = result.with_dimension_records() 

1526 data_ids = list(result) 

1527 if explain and not data_ids: 

1528 raise EmptyQueryResultError(list(result.explain_no_results())) 

1529 return data_ids 

1530 

1531 def _query_datasets( 

1532 self, 

1533 dataset_type: str | DatasetType, 

1534 collections: str | Iterable[str] | None = None, 

1535 *, 

1536 find_first: bool = True, 

1537 data_id: DataId | None = None, 

1538 where: str = "", 

1539 bind: Mapping[str, Any] | None = None, 

1540 with_dimension_records: bool = False, 

1541 explain: bool = True, 

1542 **kwargs: Any, 

1543 ) -> list[DatasetRef]: 

1544 """Query for dataset references matching user-provided criteria. 

1545 

1546 Parameters 

1547 ---------- 

1548 dataset_type : `str` or `DatasetType` 

1549 Dataset type object or name to search for. 

1550 collections : collection expression, optional 

1551 A collection name or iterable of collection names to search. If not 

1552 provided, the default collections are used. See 

1553 :ref:`daf_butler_collection_expressions` for more information. 

1554 find_first : `bool`, optional 

1555 If `True` (default), for each result data ID, only yield one 

1556 `DatasetRef` of each `DatasetType`, from the first collection in 

1557 which a dataset of that dataset type appears (according to the 

1558 order of ``collections`` passed in). If `True`, ``collections`` 

1559 must not contain regular expressions and may not be ``...``. 

1560 data_id : `dict` or `DataCoordinate`, optional 

1561 A data ID whose key-value pairs are used as equality constraints in 

1562 the query. 

1563 where : `str`, optional 

1564 A string expression similar to a SQL WHERE clause. May involve any 

1565 column of a dimension table or (as a shortcut for the primary key 

1566 column of a dimension table) dimension name. See 

1567 :ref:`daf_butler_dimension_expressions` for more information. 

1568 bind : `~collections.abc.Mapping`, optional 

1569 Mapping containing literal values that should be injected into the 

1570 ``where`` expression, keyed by the identifiers they replace. Values 

1571 of collection type can be expanded in some cases; see 

1572 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1573 information. 

1574 with_dimension_records : `bool`, optional 

1575 If `True` (default is `False`) then returned data IDs will have 

1576 dimension records. 

1577 explain : `bool`, optional 

1578 If `True` (default) then `EmptyQueryResultError` exception is 

1579 raised when resulting list is empty. The exception contains 

1580 non-empty list of strings explaining possible causes for empty 

1581 result. 

1582 **kwargs 

1583 Additional keyword arguments are forwarded to 

1584 `DataCoordinate.standardize` when processing the ``data_id`` 

1585 argument (and may be used to provide a constraining data ID even 

1586 when the ``data_id`` argument is `None`). 

1587 

1588 Returns 

1589 ------- 

1590 refs : `.queries.DatasetRefQueryResults` 

1591 Dataset references matching the given query criteria. Nested data 

1592 IDs are guaranteed to include values for all implied dimensions 

1593 (i.e. `DataCoordinate.hasFull` will return `True`). 

1594 

1595 Raises 

1596 ------ 

1597 lsst.daf.butler.registry.DatasetTypeExpressionError 

1598 Raised when ``dataset_type`` expression is invalid. 

1599 lsst.daf.butler.registry.DataIdError 

1600 Raised when ``data_id`` or keyword arguments specify unknown 

1601 dimensions or values, or when they contain inconsistent values. 

1602 lsst.daf.butler.registry.UserExpressionError 

1603 Raised when ``where`` expression is invalid. 

1604 lsst.daf.butler.EmptyQueryResultError 

1605 Raised when query generates empty result and ``explain`` is set to 

1606 `True`. 

1607 TypeError 

1608 Raised when the arguments are incompatible, such as when a 

1609 collection wildcard is passed when ``find_first`` is `True`, or 

1610 when ``collections`` is `None` and default butler collections are 

1611 not defined. 

1612 

1613 Notes 

1614 ----- 

1615 When multiple dataset types are queried in a single call, the results 

1616 of this operation are equivalent to querying for each dataset type 

1617 separately in turn, and no information about the relationships between 

1618 datasets of different types is included. 

1619 """ 

1620 if data_id is None: 

1621 data_id = DataCoordinate.make_empty(self.dimensions) 

1622 with self._query() as query: 

1623 result = query.where(data_id, where, bind=bind, **kwargs).datasets( 

1624 dataset_type, 

1625 collections=collections, 

1626 find_first=find_first, 

1627 ) 

1628 if with_dimension_records: 

1629 result = result.with_dimension_records() 

1630 refs = list(result) 

1631 if explain and not refs: 

1632 raise EmptyQueryResultError(list(result.explain_no_results())) 

1633 return refs 

1634 

1635 def _query_dimension_records( 

1636 self, 

1637 element: str, 

1638 *, 

1639 data_id: DataId | None = None, 

1640 where: str = "", 

1641 bind: Mapping[str, Any] | None = None, 

1642 order_by: Iterable[str] | str | None = None, 

1643 limit: int | None = None, 

1644 offset: int = 0, 

1645 explain: bool = True, 

1646 **kwargs: Any, 

1647 ) -> list[DimensionRecord]: 

1648 """Query for dimension information matching user-provided criteria. 

1649 

1650 Parameters 

1651 ---------- 

1652 element : `str` 

1653 The name of a dimension element to obtain records for. 

1654 data_id : `dict` or `DataCoordinate`, optional 

1655 A data ID whose key-value pairs are used as equality constraints 

1656 in the query. 

1657 where : `str`, optional 

1658 A string expression similar to a SQL WHERE clause. See 

1659 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1660 information. 

1661 bind : `~collections.abc.Mapping`, optional 

1662 Mapping containing literal values that should be injected into the 

1663 ``where`` expression, keyed by the identifiers they replace. 

1664 Values of collection type can be expanded in some cases; see 

1665 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1666 information. 

1667 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1668 Names of the columns/dimensions to use for ordering returned data 

1669 IDs. Column name can be prefixed with minus (``-``) to use 

1670 descending ordering. 

1671 limit : `int`, optional 

1672 Upper limit on the number of returned records. 

1673 offset : `int`, optional 

1674 The number of records to skip before returning at most ``limit`` 

1675 records. If ``offset`` is specified then ``limit`` must be 

1676 specified as well. 

1677 explain : `bool`, optional 

1678 If `True` (default) then `EmptyQueryResultError` exception is 

1679 raised when resulting list is empty. The exception contains 

1680 non-empty list of strings explaining possible causes for empty 

1681 result. 

1682 **kwargs 

1683 Additional keyword arguments are forwarded to 

1684 `DataCoordinate.standardize` when processing the ``data_id`` 

1685 argument (and may be used to provide a constraining data ID even 

1686 when the ``data_id`` argument is `None`). 

1687 

1688 Returns 

1689 ------- 

1690 records : `list`[`DimensionRecord`] 

1691 Dimension records matching the given query parameters. 

1692 

1693 Raises 

1694 ------ 

1695 lsst.daf.butler.registry.DataIdError 

1696 Raised when ``data_id`` or keyword arguments specify unknown 

1697 dimensions or values, or when they contain inconsistent values. 

1698 lsst.daf.butler.registry.UserExpressionError 

1699 Raised when ``where`` expression is invalid. 

1700 lsst.daf.butler.EmptyQueryResultError 

1701 Raised when query generates empty result and ``explain`` is set to 

1702 `True`. 

1703 TypeError 

1704 Raised when the arguments are incompatible, such as when a 

1705 collection wildcard is passed when ``find_first`` is `True`, or 

1706 when ``collections`` is `None` and default butler collections are 

1707 not defined. 

1708 """ 

1709 if data_id is None: 

1710 data_id = DataCoordinate.make_empty(self.dimensions) 

1711 with self._query() as query: 

1712 result = ( 

1713 query.where(data_id, where, bind=bind, **kwargs) 

1714 .dimension_records(element) 

1715 .order_by(*ensure_iterable(order_by)) 

1716 .limit(limit, offset) 

1717 ) 

1718 dimension_records = list(result) 

1719 if explain and not dimension_records: 

1720 raise EmptyQueryResultError(list(result.explain_no_results())) 

1721 return dimension_records 

1722 

1723 @abstractmethod 

1724 def _clone( 

1725 self, 

1726 *, 

1727 collections: Any = None, 

1728 run: str | None = None, 

1729 inferDefaults: bool = True, 

1730 **kwargs: Any, 

1731 ) -> Butler: 

1732 """Return a new Butler instance connected to the same repository 

1733 as this one, but overriding ``collections``, ``run``, 

1734 ``inferDefaults``, and default data ID. 

1735 """ 

1736 raise NotImplementedError()