Coverage for python/lsst/daf/butler/_butler.py: 66%

144 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 10:56 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler"] 

31 

32from abc import abstractmethod 

33from collections.abc import Collection, Iterable, Mapping, Sequence 

34from contextlib import AbstractContextManager 

35from typing import TYPE_CHECKING, Any, TextIO 

36 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils import doImportType 

39from lsst.utils.logging import getLogger 

40 

41from ._butler_config import ButlerConfig 

42from ._butler_repo_index import ButlerRepoIndex 

43from ._config import Config, ConfigSubset 

44from ._limited_butler import LimitedButler 

45from .datastore import Datastore 

46from .dimensions import DimensionConfig 

47from .registry import RegistryConfig, _RegistryFactory 

48from .repo_relocation import BUTLER_ROOT_TAG 

49 

50if TYPE_CHECKING: 

51 from ._dataset_existence import DatasetExistence 

52 from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

53 from ._dataset_type import DatasetType 

54 from ._deferredDatasetHandle import DeferredDatasetHandle 

55 from ._file_dataset import FileDataset 

56 from ._query import Query 

57 from ._storage_class import StorageClass 

58 from ._timespan import Timespan 

59 from .datastore import DatasetRefURIs 

60 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord 

61 from .registry import CollectionArgType, Registry 

62 from .transfers import RepoExportContext 

63 

64_LOG = getLogger(__name__) 

65 

66 

67class Butler(LimitedButler): 

68 """Interface for data butler and factory for Butler instances. 

69 

70 Parameters 

71 ---------- 

72 config : `ButlerConfig`, `Config` or `str`, optional. 

73 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

74 If a directory path is given the configuration will be read from a 

75 ``butler.yaml`` file in that location. If `None` is given default 

76 values will be used. If ``config`` contains "cls" key then its value is 

77 used as a name of butler class and it must be a sub-class of this 

78 class, otherwise `DirectButler` is instantiated. 

79 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

80 An expression specifying the collections to be searched (in order) when 

81 reading datasets. 

82 This may be a `str` collection name or an iterable thereof. 

83 See :ref:`daf_butler_collection_expressions` for more information. 

84 These collections are not registered automatically and must be 

85 manually registered before they are used by any method, but they may be 

86 manually registered after the `Butler` is initialized. 

87 run : `str`, optional 

88 Name of the `~CollectionType.RUN` collection new datasets should be 

89 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

90 ``collections`` will be set to ``[run]``. If not `None`, this 

91 collection will automatically be registered. If this is not set (and 

92 ``writeable`` is not set either), a read-only butler will be created. 

93 searchPaths : `list` of `str`, optional 

94 Directory paths to search when calculating the full Butler 

95 configuration. Not used if the supplied config is already a 

96 `ButlerConfig`. 

97 writeable : `bool`, optional 

98 Explicitly sets whether the butler supports write operations. If not 

99 provided, a read-write butler is created if any of ``run``, ``tags``, 

100 or ``chains`` is non-empty. 

101 inferDefaults : `bool`, optional 

102 If `True` (default) infer default data ID values from the values 

103 present in the datasets in ``collections``: if all collections have the 

104 same value (or no value) for a governor dimension, that value will be 

105 the default for that dimension. Nonexistent collections are ignored. 

106 If a default value is provided explicitly for a governor dimension via 

107 ``**kwargs``, no default will be inferred for that dimension. 

108 **kwargs : `Any` 

109 Additional keyword arguments passed to a constructor of actual butler 

110 class. 

111 

112 Notes 

113 ----- 

114 The preferred way to instantiate Butler is via the `from_config` method. 

115 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

116 but ``mypy`` will complain about the former. 

117 """ 

118 

119 def __new__( 

120 cls, 

121 config: Config | ResourcePathExpression | None = None, 

122 *, 

123 collections: Any = None, 

124 run: str | None = None, 

125 searchPaths: Sequence[ResourcePathExpression] | None = None, 

126 writeable: bool | None = None, 

127 inferDefaults: bool = True, 

128 **kwargs: Any, 

129 ) -> Butler: 

130 if cls is Butler: 

131 cls = cls._find_butler_class(config, searchPaths) 

132 # Note: we do not pass any parameters to __new__, Python will pass them 

133 # to __init__ after __new__ returns sub-class instance. 

134 return super().__new__(cls) 

135 

136 @staticmethod 

137 def _find_butler_class( 

138 config: Config | ResourcePathExpression | None = None, 

139 searchPaths: Sequence[ResourcePathExpression] | None = None, 

140 ) -> type[Butler]: 

141 """Find actual class to instantiate.""" 

142 butler_class_name: str | None = None 

143 if config is not None: 

144 # Check for optional "cls" key in config. 

145 if not isinstance(config, Config): 

146 config = ButlerConfig(config, searchPaths=searchPaths) 

147 butler_class_name = config.get("cls") 

148 

149 # Make DirectButler if class is not specified. 

150 butler_class: type[Butler] 

151 if butler_class_name is None: 

152 from .direct_butler import DirectButler 

153 

154 butler_class = DirectButler 

155 else: 

156 butler_class = doImportType(butler_class_name) 

157 if not issubclass(butler_class, Butler): 

158 raise TypeError(f"{butler_class_name} is not a subclass of Butler") 

159 return butler_class 

160 

161 @classmethod 

162 def from_config( 

163 cls, 

164 config: Config | ResourcePathExpression | None = None, 

165 *, 

166 collections: Any = None, 

167 run: str | None = None, 

168 searchPaths: Sequence[ResourcePathExpression] | None = None, 

169 writeable: bool | None = None, 

170 inferDefaults: bool = True, 

171 **kwargs: Any, 

172 ) -> Butler: 

173 """Create butler instance from configuration. 

174 

175 Parameters 

176 ---------- 

177 config : `ButlerConfig`, `Config` or `str`, optional. 

178 Configuration. Anything acceptable to the `ButlerConfig` 

179 constructor. If a directory path is given the configuration will be 

180 read from a ``butler.yaml`` file in that location. If `None` is 

181 given default values will be used. If ``config`` contains "cls" key 

182 then its value is used as a name of butler class and it must be a 

183 sub-class of this class, otherwise `DirectButler` is instantiated. 

184 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

185 An expression specifying the collections to be searched (in order) 

186 when reading datasets. 

187 This may be a `str` collection name or an iterable thereof. 

188 See :ref:`daf_butler_collection_expressions` for more information. 

189 These collections are not registered automatically and must be 

190 manually registered before they are used by any method, but they 

191 may be manually registered after the `Butler` is initialized. 

192 run : `str`, optional 

193 Name of the `~CollectionType.RUN` collection new datasets should be 

194 inserted into. If ``collections`` is `None` and ``run`` is not 

195 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

196 this collection will automatically be registered. If this is not 

197 set (and ``writeable`` is not set either), a read-only butler will 

198 be created. 

199 searchPaths : `list` of `str`, optional 

200 Directory paths to search when calculating the full Butler 

201 configuration. Not used if the supplied config is already a 

202 `ButlerConfig`. 

203 writeable : `bool`, optional 

204 Explicitly sets whether the butler supports write operations. If 

205 not provided, a read-write butler is created if any of ``run``, 

206 ``tags``, or ``chains`` is non-empty. 

207 inferDefaults : `bool`, optional 

208 If `True` (default) infer default data ID values from the values 

209 present in the datasets in ``collections``: if all collections have 

210 the same value (or no value) for a governor dimension, that value 

211 will be the default for that dimension. Nonexistent collections 

212 are ignored. If a default value is provided explicitly for a 

213 governor dimension via ``**kwargs``, no default will be inferred 

214 for that dimension. 

215 **kwargs : `Any` 

216 Additional keyword arguments passed to a constructor of actual 

217 butler class. 

218 

219 Notes 

220 ----- 

221 Calling this factory method is identical to calling 

222 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

223 complains about ``Butler()`` call. 

224 

225 Examples 

226 -------- 

227 While there are many ways to control exactly how a `Butler` interacts 

228 with the collections in its `Registry`, the most common cases are still 

229 simple. 

230 

231 For a read-only `Butler` that searches one collection, do:: 

232 

233 butler = Butler.from_config( 

234 "/path/to/repo", collections=["u/alice/DM-50000"] 

235 ) 

236 

237 For a read-write `Butler` that writes to and reads from a 

238 `~CollectionType.RUN` collection:: 

239 

240 butler = Butler.from_config( 

241 "/path/to/repo", run="u/alice/DM-50000/a" 

242 ) 

243 

244 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

245 because we want to write to one `~CollectionType.RUN` collection but 

246 read from several others (as well):: 

247 

248 butler = Butler.from_config( 

249 "/path/to/repo", 

250 run="u/alice/DM-50000/a", 

251 collections=[ 

252 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" 

253 ] 

254 ) 

255 

256 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

257 Datasets will be read first from that run (since it appears first in 

258 the chain), and then from ``u/bob/DM-49998`` and finally 

259 ``HSC/defaults``. 

260 

261 Finally, one can always create a `Butler` with no collections:: 

262 

263 butler = Butler.from_config("/path/to/repo", writeable=True) 

264 

265 This can be extremely useful when you just want to use 

266 ``butler.registry``, e.g. for inserting dimension data or managing 

267 collections, or when the collections you want to use with the butler 

268 are not consistent. Passing ``writeable`` explicitly here is only 

269 necessary if you want to be able to make changes to the repo - usually 

270 the value for ``writeable`` can be guessed from the collection 

271 arguments provided, but it defaults to `False` when there are not 

272 collection arguments. 

273 """ 

274 cls = cls._find_butler_class(config, searchPaths) 

275 return cls( 

276 config, 

277 collections=collections, 

278 run=run, 

279 searchPaths=searchPaths, 

280 writeable=writeable, 

281 inferDefaults=inferDefaults, 

282 **kwargs, 

283 ) 

284 

285 @staticmethod 

286 def makeRepo( 

287 root: ResourcePathExpression, 

288 config: Config | str | None = None, 

289 dimensionConfig: Config | str | None = None, 

290 standalone: bool = False, 

291 searchPaths: list[str] | None = None, 

292 forceConfigRoot: bool = True, 

293 outfile: ResourcePathExpression | None = None, 

294 overwrite: bool = False, 

295 ) -> Config: 

296 """Create an empty data repository by adding a butler.yaml config 

297 to a repository root directory. 

298 

299 Parameters 

300 ---------- 

301 root : `lsst.resources.ResourcePathExpression` 

302 Path or URI to the root location of the new repository. Will be 

303 created if it does not exist. 

304 config : `Config` or `str`, optional 

305 Configuration to write to the repository, after setting any 

306 root-dependent Registry or Datastore config options. Can not 

307 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

308 configuration will be used. Root-dependent config options 

309 specified in this config are overwritten if ``forceConfigRoot`` 

310 is `True`. 

311 dimensionConfig : `Config` or `str`, optional 

312 Configuration for dimensions, will be used to initialize registry 

313 database. 

314 standalone : `bool` 

315 If True, write all expanded defaults, not just customized or 

316 repository-specific settings. 

317 This (mostly) decouples the repository from the default 

318 configuration, insulating it from changes to the defaults (which 

319 may be good or bad, depending on the nature of the changes). 

320 Future *additions* to the defaults will still be picked up when 

321 initializing `Butlers` to repos created with ``standalone=True``. 

322 searchPaths : `list` of `str`, optional 

323 Directory paths to search when calculating the full butler 

324 configuration. 

325 forceConfigRoot : `bool`, optional 

326 If `False`, any values present in the supplied ``config`` that 

327 would normally be reset are not overridden and will appear 

328 directly in the output config. This allows non-standard overrides 

329 of the root directory for a datastore or registry to be given. 

330 If this parameter is `True` the values for ``root`` will be 

331 forced into the resulting config if appropriate. 

332 outfile : `lss.resources.ResourcePathExpression`, optional 

333 If not-`None`, the output configuration will be written to this 

334 location rather than into the repository itself. Can be a URI 

335 string. Can refer to a directory that will be used to write 

336 ``butler.yaml``. 

337 overwrite : `bool`, optional 

338 Create a new configuration file even if one already exists 

339 in the specified output location. Default is to raise 

340 an exception. 

341 

342 Returns 

343 ------- 

344 config : `Config` 

345 The updated `Config` instance written to the repo. 

346 

347 Raises 

348 ------ 

349 ValueError 

350 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

351 regular Config (as these subclasses would make it impossible to 

352 support ``standalone=False``). 

353 FileExistsError 

354 Raised if the output config file already exists. 

355 os.error 

356 Raised if the directory does not exist, exists but is not a 

357 directory, or cannot be created. 

358 

359 Notes 

360 ----- 

361 Note that when ``standalone=False`` (the default), the configuration 

362 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

363 construct the repository should also be used to construct any Butlers 

364 to avoid configuration inconsistencies. 

365 """ 

366 if isinstance(config, ButlerConfig | ConfigSubset): 

367 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

368 

369 # Ensure that the root of the repository exists or can be made 

370 root_uri = ResourcePath(root, forceDirectory=True) 

371 root_uri.mkdir() 

372 

373 config = Config(config) 

374 

375 # If we are creating a new repo from scratch with relative roots, 

376 # do not propagate an explicit root from the config file 

377 if "root" in config: 

378 del config["root"] 

379 

380 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

381 imported_class = doImportType(full["datastore", "cls"]) 

382 if not issubclass(imported_class, Datastore): 

383 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

384 datastoreClass: type[Datastore] = imported_class 

385 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

386 

387 # if key exists in given config, parse it, otherwise parse the defaults 

388 # in the expanded config 

389 if config.get(("registry", "db")): 

390 registryConfig = RegistryConfig(config) 

391 else: 

392 registryConfig = RegistryConfig(full) 

393 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

394 if defaultDatabaseUri is not None: 

395 Config.updateParameters( 

396 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

397 ) 

398 else: 

399 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

400 

401 if standalone: 

402 config.merge(full) 

403 else: 

404 # Always expand the registry.managers section into the per-repo 

405 # config, because after the database schema is created, it's not 

406 # allowed to change anymore. Note that in the standalone=True 

407 # branch, _everything_ in the config is expanded, so there's no 

408 # need to special case this. 

409 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

410 configURI: ResourcePathExpression 

411 if outfile is not None: 

412 # When writing to a separate location we must include 

413 # the root of the butler repo in the config else it won't know 

414 # where to look. 

415 config["root"] = root_uri.geturl() 

416 configURI = outfile 

417 else: 

418 configURI = root_uri 

419 # Strip obscore configuration, if it is present, before writing config 

420 # to a file, obscore config will be stored in registry. 

421 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

422 config_to_write = config.copy() 

423 del config_to_write[obscore_config_key] 

424 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

425 # configFile attribute is updated, need to copy it to original. 

426 config.configFile = config_to_write.configFile 

427 else: 

428 config.dumpToUri(configURI, overwrite=overwrite) 

429 

430 # Create Registry and populate tables 

431 registryConfig = RegistryConfig(config.get("registry")) 

432 dimensionConfig = DimensionConfig(dimensionConfig) 

433 _RegistryFactory(registryConfig).create_from_config( 

434 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

435 ) 

436 

437 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

438 

439 return config 

440 

441 @classmethod 

442 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

443 """Look up the label in a butler repository index. 

444 

445 Parameters 

446 ---------- 

447 label : `str` 

448 Label of the Butler repository to look up. 

449 return_label : `bool`, optional 

450 If ``label`` cannot be found in the repository index (either 

451 because index is not defined or ``label`` is not in the index) and 

452 ``return_label`` is `True` then return ``ResourcePath(label)``. 

453 If ``return_label`` is `False` (default) then an exception will be 

454 raised instead. 

455 

456 Returns 

457 ------- 

458 uri : `lsst.resources.ResourcePath` 

459 URI to the Butler repository associated with the given label or 

460 default value if it is provided. 

461 

462 Raises 

463 ------ 

464 KeyError 

465 Raised if the label is not found in the index, or if an index 

466 is not defined, and ``return_label`` is `False`. 

467 

468 Notes 

469 ----- 

470 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

471 information is discovered. 

472 """ 

473 return ButlerRepoIndex.get_repo_uri(label, return_label) 

474 

475 @classmethod 

476 def get_known_repos(cls) -> set[str]: 

477 """Retrieve the list of known repository labels. 

478 

479 Returns 

480 ------- 

481 repos : `set` of `str` 

482 All the known labels. Can be empty if no index can be found. 

483 

484 Notes 

485 ----- 

486 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

487 information is discovered. 

488 """ 

489 return ButlerRepoIndex.get_known_repos() 

490 

491 @abstractmethod 

492 def _caching_context(self) -> AbstractContextManager[None]: 

493 """Context manager that enables caching.""" 

494 raise NotImplementedError() 

495 

496 @abstractmethod 

497 def transaction(self) -> AbstractContextManager[None]: 

498 """Context manager supporting `Butler` transactions. 

499 

500 Transactions can be nested. 

501 """ 

502 raise NotImplementedError() 

503 

504 @abstractmethod 

505 def put( 

506 self, 

507 obj: Any, 

508 datasetRefOrType: DatasetRef | DatasetType | str, 

509 /, 

510 dataId: DataId | None = None, 

511 *, 

512 run: str | None = None, 

513 **kwargs: Any, 

514 ) -> DatasetRef: 

515 """Store and register a dataset. 

516 

517 Parameters 

518 ---------- 

519 obj : `object` 

520 The dataset. 

521 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

522 When `DatasetRef` is provided, ``dataId`` should be `None`. 

523 Otherwise the `DatasetType` or name thereof. If a fully resolved 

524 `DatasetRef` is given the run and ID are used directly. 

525 dataId : `dict` or `DataCoordinate` 

526 A `dict` of `Dimension` link name, value pairs that label the 

527 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

528 should be provided as the second argument. 

529 run : `str`, optional 

530 The name of the run the dataset should be added to, overriding 

531 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

532 **kwargs 

533 Additional keyword arguments used to augment or construct a 

534 `DataCoordinate`. See `DataCoordinate.standardize` 

535 parameters. Not used if a resolve `DatasetRef` is provided. 

536 

537 Returns 

538 ------- 

539 ref : `DatasetRef` 

540 A reference to the stored dataset, updated with the correct id if 

541 given. 

542 

543 Raises 

544 ------ 

545 TypeError 

546 Raised if the butler is read-only or if no run has been provided. 

547 """ 

548 raise NotImplementedError() 

549 

550 @abstractmethod 

551 def getDeferred( 

552 self, 

553 datasetRefOrType: DatasetRef | DatasetType | str, 

554 /, 

555 dataId: DataId | None = None, 

556 *, 

557 parameters: dict | None = None, 

558 collections: Any = None, 

559 storageClass: str | StorageClass | None = None, 

560 **kwargs: Any, 

561 ) -> DeferredDatasetHandle: 

562 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

563 after an immediate registry lookup. 

564 

565 Parameters 

566 ---------- 

567 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

568 When `DatasetRef` the `dataId` should be `None`. 

569 Otherwise the `DatasetType` or name thereof. 

570 dataId : `dict` or `DataCoordinate`, optional 

571 A `dict` of `Dimension` link name, value pairs that label the 

572 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

573 should be provided as the first argument. 

574 parameters : `dict` 

575 Additional StorageClass-defined options to control reading, 

576 typically used to efficiently read only a subset of the dataset. 

577 collections : Any, optional 

578 Collections to be searched, overriding ``self.collections``. 

579 Can be any of the types supported by the ``collections`` argument 

580 to butler construction. 

581 storageClass : `StorageClass` or `str`, optional 

582 The storage class to be used to override the Python type 

583 returned by this method. By default the returned type matches 

584 the dataset type definition for this dataset. Specifying a 

585 read `StorageClass` can force a different type to be returned. 

586 This type must be compatible with the original type. 

587 **kwargs 

588 Additional keyword arguments used to augment or construct a 

589 `DataId`. See `DataId` parameters. 

590 

591 Returns 

592 ------- 

593 obj : `DeferredDatasetHandle` 

594 A handle which can be used to retrieve a dataset at a later time. 

595 

596 Raises 

597 ------ 

598 LookupError 

599 Raised if no matching dataset exists in the `Registry` or 

600 datastore. 

601 ValueError 

602 Raised if a resolved `DatasetRef` was passed as an input, but it 

603 differs from the one found in the registry. 

604 TypeError 

605 Raised if no collections were provided. 

606 """ 

607 raise NotImplementedError() 

608 

609 @abstractmethod 

610 def get( 

611 self, 

612 datasetRefOrType: DatasetRef | DatasetType | str, 

613 /, 

614 dataId: DataId | None = None, 

615 *, 

616 parameters: dict[str, Any] | None = None, 

617 collections: Any = None, 

618 storageClass: StorageClass | str | None = None, 

619 **kwargs: Any, 

620 ) -> Any: 

621 """Retrieve a stored dataset. 

622 

623 Parameters 

624 ---------- 

625 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

626 When `DatasetRef` the `dataId` should be `None`. 

627 Otherwise the `DatasetType` or name thereof. 

628 If a resolved `DatasetRef`, the associated dataset 

629 is returned directly without additional querying. 

630 dataId : `dict` or `DataCoordinate` 

631 A `dict` of `Dimension` link name, value pairs that label the 

632 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

633 should be provided as the first argument. 

634 parameters : `dict` 

635 Additional StorageClass-defined options to control reading, 

636 typically used to efficiently read only a subset of the dataset. 

637 collections : Any, optional 

638 Collections to be searched, overriding ``self.collections``. 

639 Can be any of the types supported by the ``collections`` argument 

640 to butler construction. 

641 storageClass : `StorageClass` or `str`, optional 

642 The storage class to be used to override the Python type 

643 returned by this method. By default the returned type matches 

644 the dataset type definition for this dataset. Specifying a 

645 read `StorageClass` can force a different type to be returned. 

646 This type must be compatible with the original type. 

647 **kwargs 

648 Additional keyword arguments used to augment or construct a 

649 `DataCoordinate`. See `DataCoordinate.standardize` 

650 parameters. 

651 

652 Returns 

653 ------- 

654 obj : `object` 

655 The dataset. 

656 

657 Raises 

658 ------ 

659 LookupError 

660 Raised if no matching dataset exists in the `Registry`. 

661 TypeError 

662 Raised if no collections were provided. 

663 

664 Notes 

665 ----- 

666 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

667 this method requires that the given data ID include temporal dimensions 

668 beyond the dimensions of the dataset type itself, in order to find the 

669 dataset with the appropriate validity range. For example, a "bias" 

670 dataset with native dimensions ``{instrument, detector}`` could be 

671 fetched with a ``{instrument, detector, exposure}`` data ID, because 

672 ``exposure`` is a temporal dimension. 

673 """ 

674 raise NotImplementedError() 

675 

676 @abstractmethod 

677 def getURIs( 

678 self, 

679 datasetRefOrType: DatasetRef | DatasetType | str, 

680 /, 

681 dataId: DataId | None = None, 

682 *, 

683 predict: bool = False, 

684 collections: Any = None, 

685 run: str | None = None, 

686 **kwargs: Any, 

687 ) -> DatasetRefURIs: 

688 """Return the URIs associated with the dataset. 

689 

690 Parameters 

691 ---------- 

692 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

693 When `DatasetRef` the `dataId` should be `None`. 

694 Otherwise the `DatasetType` or name thereof. 

695 dataId : `dict` or `DataCoordinate` 

696 A `dict` of `Dimension` link name, value pairs that label the 

697 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

698 should be provided as the first argument. 

699 predict : `bool` 

700 If `True`, allow URIs to be returned of datasets that have not 

701 been written. 

702 collections : Any, optional 

703 Collections to be searched, overriding ``self.collections``. 

704 Can be any of the types supported by the ``collections`` argument 

705 to butler construction. 

706 run : `str`, optional 

707 Run to use for predictions, overriding ``self.run``. 

708 **kwargs 

709 Additional keyword arguments used to augment or construct a 

710 `DataCoordinate`. See `DataCoordinate.standardize` 

711 parameters. 

712 

713 Returns 

714 ------- 

715 uris : `DatasetRefURIs` 

716 The URI to the primary artifact associated with this dataset (if 

717 the dataset was disassembled within the datastore this may be 

718 `None`), and the URIs to any components associated with the dataset 

719 artifact. (can be empty if there are no components). 

720 """ 

721 raise NotImplementedError() 

722 

723 @abstractmethod 

724 def getURI( 

725 self, 

726 datasetRefOrType: DatasetRef | DatasetType | str, 

727 /, 

728 dataId: DataId | None = None, 

729 *, 

730 predict: bool = False, 

731 collections: Any = None, 

732 run: str | None = None, 

733 **kwargs: Any, 

734 ) -> ResourcePath: 

735 """Return the URI to the Dataset. 

736 

737 Parameters 

738 ---------- 

739 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

740 When `DatasetRef` the `dataId` should be `None`. 

741 Otherwise the `DatasetType` or name thereof. 

742 dataId : `dict` or `DataCoordinate` 

743 A `dict` of `Dimension` link name, value pairs that label the 

744 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

745 should be provided as the first argument. 

746 predict : `bool` 

747 If `True`, allow URIs to be returned of datasets that have not 

748 been written. 

749 collections : Any, optional 

750 Collections to be searched, overriding ``self.collections``. 

751 Can be any of the types supported by the ``collections`` argument 

752 to butler construction. 

753 run : `str`, optional 

754 Run to use for predictions, overriding ``self.run``. 

755 **kwargs 

756 Additional keyword arguments used to augment or construct a 

757 `DataCoordinate`. See `DataCoordinate.standardize` 

758 parameters. 

759 

760 Returns 

761 ------- 

762 uri : `lsst.resources.ResourcePath` 

763 URI pointing to the Dataset within the datastore. If the 

764 Dataset does not exist in the datastore, and if ``predict`` is 

765 `True`, the URI will be a prediction and will include a URI 

766 fragment "#predicted". 

767 If the datastore does not have entities that relate well 

768 to the concept of a URI the returned URI string will be 

769 descriptive. The returned URI is not guaranteed to be obtainable. 

770 

771 Raises 

772 ------ 

773 LookupError 

774 A URI has been requested for a dataset that does not exist and 

775 guessing is not allowed. 

776 ValueError 

777 Raised if a resolved `DatasetRef` was passed as an input, but it 

778 differs from the one found in the registry. 

779 TypeError 

780 Raised if no collections were provided. 

781 RuntimeError 

782 Raised if a URI is requested for a dataset that consists of 

783 multiple artifacts. 

784 """ 

785 raise NotImplementedError() 

786 

787 @abstractmethod 

788 def get_dataset_type(self, name: str) -> DatasetType: 

789 """Get the `DatasetType`. 

790 

791 Parameters 

792 ---------- 

793 name : `str` 

794 Name of the type. 

795 

796 Returns 

797 ------- 

798 type : `DatasetType` 

799 The `DatasetType` associated with the given name. 

800 

801 Raises 

802 ------ 

803 lsst.daf.butler.MissingDatasetTypeError 

804 Raised if the requested dataset type has not been registered. 

805 

806 Notes 

807 ----- 

808 This method handles component dataset types automatically, though most 

809 other operations do not. 

810 """ 

811 raise NotImplementedError() 

812 

813 @abstractmethod 

814 def get_dataset( 

815 self, 

816 id: DatasetId, 

817 storage_class: str | StorageClass | None, 

818 dimension_records: bool = False, 

819 datastore_records: bool = False, 

820 ) -> DatasetRef | None: 

821 """Retrieve a Dataset entry. 

822 

823 Parameters 

824 ---------- 

825 id : `DatasetId` 

826 The unique identifier for the dataset. 

827 storage_class : `str` or `StorageClass` or `None` 

828 A storage class to use when creating the returned entry. If given 

829 it must be compatible with the default storage class. 

830 dimension_records: `bool`, optional 

831 If `True` the ref will be expanded and contain dimension records. 

832 datastore_records: `bool`, optional. 

833 If `True` the ref will contain associated datastore records. 

834 

835 Returns 

836 ------- 

837 ref : `DatasetRef` or `None` 

838 A ref to the Dataset, or `None` if no matching Dataset 

839 was found. 

840 """ 

841 raise NotImplementedError() 

842 

843 @abstractmethod 

844 def find_dataset( 

845 self, 

846 dataset_type: DatasetType | str, 

847 data_id: DataId | None = None, 

848 *, 

849 collections: str | Sequence[str] | None = None, 

850 timespan: Timespan | None = None, 

851 storage_class: str | StorageClass | None = None, 

852 dimension_records: bool = False, 

853 datastore_records: bool = False, 

854 **kwargs: Any, 

855 ) -> DatasetRef | None: 

856 """Find a dataset given its `DatasetType` and data ID. 

857 

858 This can be used to obtain a `DatasetRef` that permits the dataset to 

859 be read from a `Datastore`. If the dataset is a component and can not 

860 be found using the provided dataset type, a dataset ref for the parent 

861 will be returned instead but with the correct dataset type. 

862 

863 Parameters 

864 ---------- 

865 dataset_type : `DatasetType` or `str` 

866 A `DatasetType` or the name of one. If this is a `DatasetType` 

867 instance, its storage class will be respected and propagated to 

868 the output, even if it differs from the dataset type definition 

869 in the registry, as long as the storage classes are convertible. 

870 data_id : `dict` or `DataCoordinate`, optional 

871 A `dict`-like object containing the `Dimension` links that identify 

872 the dataset within a collection. If it is a `dict` the dataId 

873 can include dimension record values such as ``day_obs`` and 

874 ``seq_num`` or ``full_name`` that can be used to derive the 

875 primary dimension. 

876 collections : `str` or `list` [`str`], optional 

877 A an ordered list of collections to search for the dataset. 

878 Defaults to ``self.defaults.collections``. 

879 timespan : `Timespan`, optional 

880 A timespan that the validity range of the dataset must overlap. 

881 If not provided, any `~CollectionType.CALIBRATION` collections 

882 matched by the ``collections`` argument will not be searched. 

883 storage_class : `str` or `StorageClass` or `None` 

884 A storage class to use when creating the returned entry. If given 

885 it must be compatible with the default storage class. 

886 dimension_records: `bool`, optional 

887 If `True` the ref will be expanded and contain dimension records. 

888 datastore_records: `bool`, optional. 

889 If `True` the ref will contain associated datastore records. 

890 **kwargs 

891 Additional keyword arguments passed to 

892 `DataCoordinate.standardize` to convert ``dataId`` to a true 

893 `DataCoordinate` or augment an existing one. This can also include 

894 dimension record metadata that can be used to derive a primary 

895 dimension value. 

896 

897 Returns 

898 ------- 

899 ref : `DatasetRef` 

900 A reference to the dataset, or `None` if no matching Dataset 

901 was found. 

902 

903 Raises 

904 ------ 

905 lsst.daf.butler.NoDefaultCollectionError 

906 Raised if ``collections`` is `None` and 

907 ``self.collections`` is `None`. 

908 LookupError 

909 Raised if one or more data ID keys are missing. 

910 lsst.daf.butler.MissingDatasetTypeError 

911 Raised if the dataset type does not exist. 

912 lsst.daf.butler.MissingCollectionError 

913 Raised if any of ``collections`` does not exist in the registry. 

914 

915 Notes 

916 ----- 

917 This method simply returns `None` and does not raise an exception even 

918 when the set of collections searched is intrinsically incompatible with 

919 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

920 only `~CollectionType.CALIBRATION` collections are being searched. 

921 This may make it harder to debug some lookup failures, but the behavior 

922 is intentional; we consider it more important that failed searches are 

923 reported consistently, regardless of the reason, and that adding 

924 additional collections that do not contain a match to the search path 

925 never changes the behavior. 

926 

927 This method handles component dataset types automatically, though most 

928 other query operations do not. 

929 """ 

930 raise NotImplementedError() 

931 

932 @abstractmethod 

933 def retrieveArtifacts( 

934 self, 

935 refs: Iterable[DatasetRef], 

936 destination: ResourcePathExpression, 

937 transfer: str = "auto", 

938 preserve_path: bool = True, 

939 overwrite: bool = False, 

940 ) -> list[ResourcePath]: 

941 """Retrieve the artifacts associated with the supplied refs. 

942 

943 Parameters 

944 ---------- 

945 refs : iterable of `DatasetRef` 

946 The datasets for which artifacts are to be retrieved. 

947 A single ref can result in multiple artifacts. The refs must 

948 be resolved. 

949 destination : `lsst.resources.ResourcePath` or `str` 

950 Location to write the artifacts. 

951 transfer : `str`, optional 

952 Method to use to transfer the artifacts. Must be one of the options 

953 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

954 "move" is not allowed. 

955 preserve_path : `bool`, optional 

956 If `True` the full path of the artifact within the datastore 

957 is preserved. If `False` the final file component of the path 

958 is used. 

959 overwrite : `bool`, optional 

960 If `True` allow transfers to overwrite existing files at the 

961 destination. 

962 

963 Returns 

964 ------- 

965 targets : `list` of `lsst.resources.ResourcePath` 

966 URIs of file artifacts in destination location. Order is not 

967 preserved. 

968 

969 Notes 

970 ----- 

971 For non-file datastores the artifacts written to the destination 

972 may not match the representation inside the datastore. For example 

973 a hierarchical data structure in a NoSQL database may well be stored 

974 as a JSON file. 

975 """ 

976 raise NotImplementedError() 

977 

978 @abstractmethod 

979 def exists( 

980 self, 

981 dataset_ref_or_type: DatasetRef | DatasetType | str, 

982 /, 

983 data_id: DataId | None = None, 

984 *, 

985 full_check: bool = True, 

986 collections: Any = None, 

987 **kwargs: Any, 

988 ) -> DatasetExistence: 

989 """Indicate whether a dataset is known to Butler registry and 

990 datastore. 

991 

992 Parameters 

993 ---------- 

994 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

995 When `DatasetRef` the `dataId` should be `None`. 

996 Otherwise the `DatasetType` or name thereof. 

997 data_id : `dict` or `DataCoordinate` 

998 A `dict` of `Dimension` link name, value pairs that label the 

999 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1000 should be provided as the first argument. 

1001 full_check : `bool`, optional 

1002 If `True`, an additional check will be made for dataset artifact 

1003 existence. This will involve additional overhead due to the need 

1004 to query an external system. If `False` registry and datastore 

1005 will solely be asked if they know about the dataset but no 

1006 check for the artifact will be performed. 

1007 collections : Any, optional 

1008 Collections to be searched, overriding ``self.collections``. 

1009 Can be any of the types supported by the ``collections`` argument 

1010 to butler construction. 

1011 **kwargs 

1012 Additional keyword arguments used to augment or construct a 

1013 `DataCoordinate`. See `DataCoordinate.standardize` 

1014 parameters. 

1015 

1016 Returns 

1017 ------- 

1018 existence : `DatasetExistence` 

1019 Object indicating whether the dataset is known to registry and 

1020 datastore. Evaluates to `True` if the dataset is present and known 

1021 to both. 

1022 """ 

1023 raise NotImplementedError() 

1024 

1025 @abstractmethod 

1026 def _exists_many( 

1027 self, 

1028 refs: Iterable[DatasetRef], 

1029 /, 

1030 *, 

1031 full_check: bool = True, 

1032 ) -> dict[DatasetRef, DatasetExistence]: 

1033 """Indicate whether multiple datasets are known to Butler registry and 

1034 datastore. 

1035 

1036 This is an experimental API that may change at any moment. 

1037 

1038 Parameters 

1039 ---------- 

1040 refs : iterable of `DatasetRef` 

1041 The datasets to be checked. 

1042 full_check : `bool`, optional 

1043 If `True`, an additional check will be made for dataset artifact 

1044 existence. This will involve additional overhead due to the need 

1045 to query an external system. If `False` registry and datastore 

1046 will solely be asked if they know about the dataset but no 

1047 check for the artifact will be performed. 

1048 

1049 Returns 

1050 ------- 

1051 existence : dict of [`DatasetRef`, `DatasetExistence`] 

1052 Mapping from the given dataset refs to an enum indicating the 

1053 status of the dataset in registry and datastore. 

1054 Each value evaluates to `True` if the dataset is present and known 

1055 to both. 

1056 """ 

1057 raise NotImplementedError() 

1058 

1059 @abstractmethod 

1060 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1061 """Remove one or more `~CollectionType.RUN` collections and the 

1062 datasets within them. 

1063 

1064 Parameters 

1065 ---------- 

1066 names : `~collections.abc.Iterable` [ `str` ] 

1067 The names of the collections to remove. 

1068 unstore : `bool`, optional 

1069 If `True` (default), delete datasets from all datastores in which 

1070 they are present, and attempt to rollback the registry deletions if 

1071 datastore deletions fail (which may not always be possible). If 

1072 `False`, datastore records for these datasets are still removed, 

1073 but any artifacts (e.g. files) will not be. 

1074 

1075 Raises 

1076 ------ 

1077 TypeError 

1078 Raised if one or more collections are not of type 

1079 `~CollectionType.RUN`. 

1080 """ 

1081 raise NotImplementedError() 

1082 

1083 @abstractmethod 

1084 def ingest( 

1085 self, 

1086 *datasets: FileDataset, 

1087 transfer: str | None = "auto", 

1088 run: str | None = None, 

1089 idGenerationMode: DatasetIdGenEnum | None = None, 

1090 record_validation_info: bool = True, 

1091 ) -> None: 

1092 """Store and register one or more datasets that already exist on disk. 

1093 

1094 Parameters 

1095 ---------- 

1096 datasets : `FileDataset` 

1097 Each positional argument is a struct containing information about 

1098 a file to be ingested, including its URI (either absolute or 

1099 relative to the datastore root, if applicable), a resolved 

1100 `DatasetRef`, and optionally a formatter class or its 

1101 fully-qualified string name. If a formatter is not provided, the 

1102 formatter that would be used for `put` is assumed. On successful 

1103 ingest all `FileDataset.formatter` attributes will be set to the 

1104 formatter class used. `FileDataset.path` attributes may be modified 

1105 to put paths in whatever the datastore considers a standardized 

1106 form. 

1107 transfer : `str`, optional 

1108 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1109 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1110 transfer the file. 

1111 run : `str`, optional 

1112 The name of the run ingested datasets should be added to, 

1113 overriding ``self.run``. This parameter is now deprecated since 

1114 the run is encoded in the ``FileDataset``. 

1115 idGenerationMode : `DatasetIdGenEnum`, optional 

1116 Specifies option for generating dataset IDs. Parameter is 

1117 deprecated. 

1118 record_validation_info : `bool`, optional 

1119 If `True`, the default, the datastore can record validation 

1120 information associated with the file. If `False` the datastore 

1121 will not attempt to track any information such as checksums 

1122 or file sizes. This can be useful if such information is tracked 

1123 in an external system or if the file is to be compressed in place. 

1124 It is up to the datastore whether this parameter is relevant. 

1125 

1126 Raises 

1127 ------ 

1128 TypeError 

1129 Raised if the butler is read-only or if no run was provided. 

1130 NotImplementedError 

1131 Raised if the `Datastore` does not support the given transfer mode. 

1132 DatasetTypeNotSupportedError 

1133 Raised if one or more files to be ingested have a dataset type that 

1134 is not supported by the `Datastore`.. 

1135 FileNotFoundError 

1136 Raised if one of the given files does not exist. 

1137 FileExistsError 

1138 Raised if transfer is not `None` but the (internal) location the 

1139 file would be moved to is already occupied. 

1140 

1141 Notes 

1142 ----- 

1143 This operation is not fully exception safe: if a database operation 

1144 fails, the given `FileDataset` instances may be only partially updated. 

1145 

1146 It is atomic in terms of database operations (they will either all 

1147 succeed or all fail) providing the database engine implements 

1148 transactions correctly. It will attempt to be atomic in terms of 

1149 filesystem operations as well, but this cannot be implemented 

1150 rigorously for most datastores. 

1151 """ 

1152 raise NotImplementedError() 

1153 

1154 @abstractmethod 

1155 def export( 

1156 self, 

1157 *, 

1158 directory: str | None = None, 

1159 filename: str | None = None, 

1160 format: str | None = None, 

1161 transfer: str | None = None, 

1162 ) -> AbstractContextManager[RepoExportContext]: 

1163 """Export datasets from the repository represented by this `Butler`. 

1164 

1165 This method is a context manager that returns a helper object 

1166 (`RepoExportContext`) that is used to indicate what information from 

1167 the repository should be exported. 

1168 

1169 Parameters 

1170 ---------- 

1171 directory : `str`, optional 

1172 Directory dataset files should be written to if ``transfer`` is not 

1173 `None`. 

1174 filename : `str`, optional 

1175 Name for the file that will include database information associated 

1176 with the exported datasets. If this is not an absolute path and 

1177 ``directory`` is not `None`, it will be written to ``directory`` 

1178 instead of the current working directory. Defaults to 

1179 "export.{format}". 

1180 format : `str`, optional 

1181 File format for the database information file. If `None`, the 

1182 extension of ``filename`` will be used. 

1183 transfer : `str`, optional 

1184 Transfer mode passed to `Datastore.export`. 

1185 

1186 Raises 

1187 ------ 

1188 TypeError 

1189 Raised if the set of arguments passed is inconsistent. 

1190 

1191 Examples 

1192 -------- 

1193 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1194 methods are used to provide the iterables over data IDs and/or datasets 

1195 to be exported:: 

1196 

1197 with butler.export("exports.yaml") as export: 

1198 # Export all flats, but none of the dimension element rows 

1199 # (i.e. data ID information) associated with them. 

1200 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1201 elements=()) 

1202 # Export all datasets that start with "deepCoadd_" and all of 

1203 # their associated data ID information. 

1204 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1205 """ 

1206 raise NotImplementedError() 

1207 

1208 @abstractmethod 

1209 def import_( 

1210 self, 

1211 *, 

1212 directory: ResourcePathExpression | None = None, 

1213 filename: ResourcePathExpression | TextIO | None = None, 

1214 format: str | None = None, 

1215 transfer: str | None = None, 

1216 skip_dimensions: set | None = None, 

1217 ) -> None: 

1218 """Import datasets into this repository that were exported from a 

1219 different butler repository via `~lsst.daf.butler.Butler.export`. 

1220 

1221 Parameters 

1222 ---------- 

1223 directory : `~lsst.resources.ResourcePathExpression`, optional 

1224 Directory containing dataset files to import from. If `None`, 

1225 ``filename`` and all dataset file paths specified therein must 

1226 be absolute. 

1227 filename : `~lsst.resources.ResourcePathExpression` or `TextIO` 

1228 A stream or name of file that contains database information 

1229 associated with the exported datasets, typically generated by 

1230 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1231 `~lsst.resources.ResourcePath` and is not an absolute path, 

1232 it will first be looked for relative to ``directory`` and if not 

1233 found there it will be looked for in the current working 

1234 directory. Defaults to "export.{format}". 

1235 format : `str`, optional 

1236 File format for ``filename``. If `None`, the extension of 

1237 ``filename`` will be used. 

1238 transfer : `str`, optional 

1239 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1240 skip_dimensions : `set`, optional 

1241 Names of dimensions that should be skipped and not imported. 

1242 

1243 Raises 

1244 ------ 

1245 TypeError 

1246 Raised if the set of arguments passed is inconsistent, or if the 

1247 butler is read-only. 

1248 """ 

1249 raise NotImplementedError() 

1250 

1251 @abstractmethod 

1252 def transfer_dimension_records_from( 

1253 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef] 

1254 ) -> None: 

1255 """Transfer dimension records to this Butler from another Butler. 

1256 

1257 Parameters 

1258 ---------- 

1259 source_butler : `LimitedButler` or `Butler` 

1260 Butler from which the records are to be transferred. If data IDs 

1261 in ``source_refs`` are not expanded then this has to be a full 

1262 `Butler` whose registry will be used to expand data IDs. If the 

1263 source refs contain coordinates that are used to populate other 

1264 records then this will also need to be a full `Butler`. 

1265 source_refs : iterable of `DatasetRef` 

1266 Datasets defined in the source butler whose dimension records 

1267 should be transferred to this butler. In most circumstances. 

1268 transfer is faster if the dataset refs are expanded. 

1269 """ 

1270 raise NotImplementedError() 

1271 

1272 @abstractmethod 

1273 def transfer_from( 

1274 self, 

1275 source_butler: LimitedButler, 

1276 source_refs: Iterable[DatasetRef], 

1277 transfer: str = "auto", 

1278 skip_missing: bool = True, 

1279 register_dataset_types: bool = False, 

1280 transfer_dimensions: bool = False, 

1281 ) -> Collection[DatasetRef]: 

1282 """Transfer datasets to this Butler from a run in another Butler. 

1283 

1284 Parameters 

1285 ---------- 

1286 source_butler : `LimitedButler` 

1287 Butler from which the datasets are to be transferred. If data IDs 

1288 in ``source_refs`` are not expanded then this has to be a full 

1289 `Butler` whose registry will be used to expand data IDs. 

1290 source_refs : iterable of `DatasetRef` 

1291 Datasets defined in the source butler that should be transferred to 

1292 this butler. In most circumstances, ``transfer_from`` is faster if 

1293 the dataset refs are expanded. 

1294 transfer : `str`, optional 

1295 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1296 skip_missing : `bool` 

1297 If `True`, datasets with no datastore artifact associated with 

1298 them are not transferred. If `False` a registry entry will be 

1299 created even if no datastore record is created (and so will 

1300 look equivalent to the dataset being unstored). 

1301 register_dataset_types : `bool` 

1302 If `True` any missing dataset types are registered. Otherwise 

1303 an exception is raised. 

1304 transfer_dimensions : `bool`, optional 

1305 If `True`, dimension record data associated with the new datasets 

1306 will be transferred. 

1307 

1308 Returns 

1309 ------- 

1310 refs : `list` of `DatasetRef` 

1311 The refs added to this Butler. 

1312 

1313 Notes 

1314 ----- 

1315 The datastore artifact has to exist for a transfer 

1316 to be made but non-existence is not an error. 

1317 

1318 Datasets that already exist in this run will be skipped. 

1319 

1320 The datasets are imported as part of a transaction, although 

1321 dataset types are registered before the transaction is started. 

1322 This means that it is possible for a dataset type to be registered 

1323 even though transfer has failed. 

1324 """ 

1325 raise NotImplementedError() 

1326 

1327 @abstractmethod 

1328 def validateConfiguration( 

1329 self, 

1330 logFailures: bool = False, 

1331 datasetTypeNames: Iterable[str] | None = None, 

1332 ignore: Iterable[str] | None = None, 

1333 ) -> None: 

1334 """Validate butler configuration. 

1335 

1336 Checks that each `DatasetType` can be stored in the `Datastore`. 

1337 

1338 Parameters 

1339 ---------- 

1340 logFailures : `bool`, optional 

1341 If `True`, output a log message for every validation error 

1342 detected. 

1343 datasetTypeNames : iterable of `str`, optional 

1344 The `DatasetType` names that should be checked. This allows 

1345 only a subset to be selected. 

1346 ignore : iterable of `str`, optional 

1347 Names of DatasetTypes to skip over. This can be used to skip 

1348 known problems. If a named `DatasetType` corresponds to a 

1349 composite, all components of that `DatasetType` will also be 

1350 ignored. 

1351 

1352 Raises 

1353 ------ 

1354 ButlerValidationError 

1355 Raised if there is some inconsistency with how this Butler 

1356 is configured. 

1357 """ 

1358 raise NotImplementedError() 

1359 

1360 @property 

1361 @abstractmethod 

1362 def collections(self) -> Sequence[str]: 

1363 """The collections to search by default, in order 

1364 (`~collections.abc.Sequence` [ `str` ]). 

1365 """ 

1366 raise NotImplementedError() 

1367 

1368 @property 

1369 @abstractmethod 

1370 def run(self) -> str | None: 

1371 """Name of the run this butler writes outputs to by default (`str` or 

1372 `None`). 

1373 """ 

1374 raise NotImplementedError() 

1375 

1376 @property 

1377 @abstractmethod 

1378 def registry(self) -> Registry: 

1379 """The object that manages dataset metadata and relationships 

1380 (`Registry`). 

1381 

1382 Many operations that don't involve reading or writing butler datasets 

1383 are accessible only via `Registry` methods. Eventually these methods 

1384 will be replaced by equivalent `Butler` methods. 

1385 """ 

1386 raise NotImplementedError() 

1387 

1388 @abstractmethod 

1389 def _query(self) -> AbstractContextManager[Query]: 

1390 """Context manager returning a `Query` object used for construction 

1391 and execution of complex queries. 

1392 """ 

1393 raise NotImplementedError() 

1394 

1395 @abstractmethod 

1396 def _query_data_ids( 

1397 self, 

1398 dimensions: DimensionGroup | Iterable[str] | str, 

1399 *, 

1400 data_id: DataId | None = None, 

1401 where: str = "", 

1402 bind: Mapping[str, Any] | None = None, 

1403 expanded: bool = False, 

1404 order_by: Iterable[str] | str | None = None, 

1405 limit: int | None = None, 

1406 offset: int | None = None, 

1407 explain: bool = True, 

1408 **kwargs: Any, 

1409 ) -> list[DataCoordinate]: 

1410 """Query for data IDs matching user-provided criteria. 

1411 

1412 Parameters 

1413 ---------- 

1414 dimensions : `DimensionGroup`, `str`, or \ 

1415 `~collections.abc.Iterable` [`str`] 

1416 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1417 instances or `str`. Will be automatically expanded to a complete 

1418 `DimensionGroup`. 

1419 data_id : `dict` or `DataCoordinate`, optional 

1420 A data ID whose key-value pairs are used as equality constraints 

1421 in the query. 

1422 where : `str`, optional 

1423 A string expression similar to a SQL WHERE clause. May involve 

1424 any column of a dimension table or (as a shortcut for the primary 

1425 key column of a dimension table) dimension name. See 

1426 :ref:`daf_butler_dimension_expressions` for more information. 

1427 bind : `~collections.abc.Mapping`, optional 

1428 Mapping containing literal values that should be injected into the 

1429 ``where`` expression, keyed by the identifiers they replace. 

1430 Values of collection type can be expanded in some cases; see 

1431 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1432 information. 

1433 expanded : `bool`, optional 

1434 If `True` (default is `False`) then returned data IDs will have 

1435 dimension records. 

1436 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1437 Names of the columns/dimensions to use for ordering returned data 

1438 IDs. Column name can be prefixed with minus (``-``) to use 

1439 descending ordering. 

1440 limit : `int`, optional 

1441 Upper limit on the number of returned records. 

1442 offset : `int`, optional 

1443 The number of records to skip before returning at most ``limit`` 

1444 records. If ``offset`` is specified then ``limit`` must be 

1445 specified as well. 

1446 explain : `bool`, optional 

1447 If `True` (default) then `EmptyQueryResultError` exception is 

1448 raised when resulting list is empty. The exception contains 

1449 non-empty list of strings explaining possible causes for empty 

1450 result. 

1451 **kwargs 

1452 Additional keyword arguments are forwarded to 

1453 `DataCoordinate.standardize` when processing the ``data_id`` 

1454 argument (and may be used to provide a constraining data ID even 

1455 when the ``data_id`` argument is `None`). 

1456 

1457 Returns 

1458 ------- 

1459 dataIds : `list` [`DataCoordinate`] 

1460 Data IDs matching the given query parameters. These are always 

1461 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1462 returns `True`). 

1463 

1464 Raises 

1465 ------ 

1466 lsst.daf.butler.registry.DataIdError 

1467 Raised when ``data_id`` or keyword arguments specify unknown 

1468 dimensions or values, or when they contain inconsistent values. 

1469 lsst.daf.butler.registry.UserExpressionError 

1470 Raised when ``where`` expression is invalid. 

1471 lsst.daf.butler.EmptyQueryResultError 

1472 Raised when query generates empty result and ``explain`` is set to 

1473 `True`. 

1474 TypeError 

1475 Raised when the arguments are incompatible, e.g. ``offset`` is 

1476 specified, but ``limit`` is not. 

1477 """ 

1478 raise NotImplementedError() 

1479 

1480 @abstractmethod 

1481 def _query_datasets( 

1482 self, 

1483 dataset_type: Any, 

1484 collections: CollectionArgType | None = None, 

1485 *, 

1486 find_first: bool = True, 

1487 data_id: DataId | None = None, 

1488 where: str = "", 

1489 bind: Mapping[str, Any] | None = None, 

1490 expanded: bool = False, 

1491 explain: bool = True, 

1492 **kwargs: Any, 

1493 ) -> list[DatasetRef]: 

1494 """Query for dataset references matching user-provided criteria. 

1495 

1496 Parameters 

1497 ---------- 

1498 dataset_type : dataset type expression 

1499 An expression that fully or partially identifies the dataset types 

1500 to be queried. Allowed types include `DatasetType`, `str`, 

1501 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1502 be used to query all dataset types. See 

1503 :ref:`daf_butler_dataset_type_expressions` for more information. 

1504 collections : collection expression, optional 

1505 An expression that identifies the collections to search, such as a 

1506 `str` (for full matches or partial matches via globs), `re.Pattern` 

1507 (for partial matches), or iterable thereof. ``...`` can be used to 

1508 search all collections (actually just all `~CollectionType.RUN` 

1509 collections, because this will still find all datasets). 

1510 If not provided, the default collections are used. See 

1511 :ref:`daf_butler_collection_expressions` for more information. 

1512 find_first : `bool`, optional 

1513 If `True` (default), for each result data ID, only yield one 

1514 `DatasetRef` of each `DatasetType`, from the first collection in 

1515 which a dataset of that dataset type appears (according to the 

1516 order of ``collections`` passed in). If `True`, ``collections`` 

1517 must not contain regular expressions and may not be ``...``. 

1518 data_id : `dict` or `DataCoordinate`, optional 

1519 A data ID whose key-value pairs are used as equality constraints 

1520 in the query. 

1521 where : `str`, optional 

1522 A string expression similar to a SQL WHERE clause. May involve 

1523 any column of a dimension table or (as a shortcut for the primary 

1524 key column of a dimension table) dimension name. See 

1525 :ref:`daf_butler_dimension_expressions` for more information. 

1526 bind : `~collections.abc.Mapping`, optional 

1527 Mapping containing literal values that should be injected into the 

1528 ``where`` expression, keyed by the identifiers they replace. 

1529 Values of collection type can be expanded in some cases; see 

1530 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1531 information. 

1532 expanded : `bool`, optional 

1533 If `True` (default is `False`) then returned data IDs will have 

1534 dimension records. 

1535 explain : `bool`, optional 

1536 If `True` (default) then `EmptyQueryResultError` exception is 

1537 raised when resulting list is empty. The exception contains 

1538 non-empty list of strings explaining possible causes for empty 

1539 result. 

1540 **kwargs 

1541 Additional keyword arguments are forwarded to 

1542 `DataCoordinate.standardize` when processing the ``data_id`` 

1543 argument (and may be used to provide a constraining data ID even 

1544 when the ``data_id`` argument is `None`). 

1545 

1546 Returns 

1547 ------- 

1548 refs : `.queries.DatasetQueryResults` 

1549 Dataset references matching the given query criteria. Nested data 

1550 IDs are guaranteed to include values for all implied dimensions 

1551 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

1552 include dimension records (`DataCoordinate.hasRecords` will be 

1553 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

1554 called on the result object (which returns a new one). 

1555 

1556 Raises 

1557 ------ 

1558 lsst.daf.butler.registry.DatasetTypeExpressionError 

1559 Raised when ``dataset_type`` expression is invalid. 

1560 lsst.daf.butler.registry.DataIdError 

1561 Raised when ``data_id`` or keyword arguments specify unknown 

1562 dimensions or values, or when they contain inconsistent values. 

1563 lsst.daf.butler.registry.UserExpressionError 

1564 Raised when ``where`` expression is invalid. 

1565 lsst.daf.butler.EmptyQueryResultError 

1566 Raised when query generates empty result and ``explain`` is set to 

1567 `True`. 

1568 TypeError 

1569 Raised when the arguments are incompatible, such as when a 

1570 collection wildcard is passed when ``find_first`` is `True`, or 

1571 when ``collections`` is `None` and default butler collections are 

1572 not defined. 

1573 

1574 Notes 

1575 ----- 

1576 When multiple dataset types are queried in a single call, the 

1577 results of this operation are equivalent to querying for each dataset 

1578 type separately in turn, and no information about the relationships 

1579 between datasets of different types is included. 

1580 """ 

1581 raise NotImplementedError() 

1582 

1583 @abstractmethod 

1584 def _query_dimension_records( 

1585 self, 

1586 element: str, 

1587 *, 

1588 data_id: DataId | None = None, 

1589 where: str = "", 

1590 bind: Mapping[str, Any] | None = None, 

1591 order_by: Iterable[str] | str | None = None, 

1592 limit: int | None = None, 

1593 offset: int | None = None, 

1594 explain: bool = True, 

1595 **kwargs: Any, 

1596 ) -> list[DimensionRecord]: 

1597 """Query for dimension information matching user-provided criteria. 

1598 

1599 Parameters 

1600 ---------- 

1601 element : `str` 

1602 The name of a dimension element to obtain records for. 

1603 data_id : `dict` or `DataCoordinate`, optional 

1604 A data ID whose key-value pairs are used as equality constraints 

1605 in the query. 

1606 where : `str`, optional 

1607 A string expression similar to a SQL WHERE clause. See 

1608 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1609 information. 

1610 bind : `~collections.abc.Mapping`, optional 

1611 Mapping containing literal values that should be injected into the 

1612 ``where`` expression, keyed by the identifiers they replace. 

1613 Values of collection type can be expanded in some cases; see 

1614 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1615 information. 

1616 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1617 Names of the columns/dimensions to use for ordering returned data 

1618 IDs. Column name can be prefixed with minus (``-``) to use 

1619 descending ordering. 

1620 limit : `int`, optional 

1621 Upper limit on the number of returned records. 

1622 offset : `int`, optional 

1623 The number of records to skip before returning at most ``limit`` 

1624 records. If ``offset`` is specified then ``limit`` must be 

1625 specified as well. 

1626 explain : `bool`, optional 

1627 If `True` (default) then `EmptyQueryResultError` exception is 

1628 raised when resulting list is empty. The exception contains 

1629 non-empty list of strings explaining possible causes for empty 

1630 result. 

1631 **kwargs 

1632 Additional keyword arguments are forwarded to 

1633 `DataCoordinate.standardize` when processing the ``data_id`` 

1634 argument (and may be used to provide a constraining data ID even 

1635 when the ``data_id`` argument is `None`). 

1636 

1637 Returns 

1638 ------- 

1639 records : `list`[`DimensionRecord`] 

1640 Dimension records matching the given query parameters. 

1641 

1642 Raises 

1643 ------ 

1644 lsst.daf.butler.registry.DataIdError 

1645 Raised when ``data_id`` or keyword arguments specify unknown 

1646 dimensions or values, or when they contain inconsistent values. 

1647 lsst.daf.butler.registry.UserExpressionError 

1648 Raised when ``where`` expression is invalid. 

1649 lsst.daf.butler.EmptyQueryResultError 

1650 Raised when query generates empty result and ``explain`` is set to 

1651 `True`. 

1652 TypeError 

1653 Raised when the arguments are incompatible, such as when a 

1654 collection wildcard is passed when ``find_first`` is `True`, or 

1655 when ``collections`` is `None` and default butler collections are 

1656 not defined. 

1657 """ 

1658 raise NotImplementedError()