Coverage for python / lsst / daf / butler / _butler.py: 31%

325 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-30 08:41 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler", "ParsedButlerDatasetURI", "SpecificButlerDataset"] 

31 

32import dataclasses 

33import urllib.parse 

34import uuid 

35import warnings 

36from abc import abstractmethod 

37from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence 

38from contextlib import AbstractContextManager 

39from types import EllipsisType 

40from typing import TYPE_CHECKING, Any, TextIO 

41 

42from lsst.resources import ResourcePath, ResourcePathExpression 

43from lsst.utils import doImportType 

44from lsst.utils.iteration import ensure_iterable 

45from lsst.utils.logging import getLogger 

46 

47from ._butler_collections import ButlerCollections 

48from ._butler_config import ButlerConfig, ButlerType 

49from ._butler_instance_options import ButlerInstanceOptions 

50from ._butler_metrics import ButlerMetrics 

51from ._butler_repo_index import ButlerRepoIndex 

52from ._config import Config, ConfigSubset 

53from ._exceptions import EmptyQueryResultError, InvalidQueryError 

54from ._limited_butler import LimitedButler 

55from ._query_all_datasets import QueryAllDatasetsParameters 

56from .datastore import Datastore 

57from .dimensions import DataCoordinate, DimensionConfig 

58from .registry import RegistryConfig, _RegistryFactory 

59from .repo_relocation import BUTLER_ROOT_TAG 

60from .utils import has_globs 

61 

62if TYPE_CHECKING: 

63 from ._dataset_existence import DatasetExistence 

64 from ._dataset_provenance import DatasetProvenance 

65 from ._dataset_ref import DatasetId, DatasetRef 

66 from ._dataset_type import DatasetType 

67 from ._deferredDatasetHandle import DeferredDatasetHandle 

68 from ._file_dataset import FileDataset 

69 from ._labeled_butler_factory import LabeledButlerFactoryProtocol 

70 from ._storage_class import StorageClass 

71 from ._timespan import Timespan 

72 from .datastore import DatasetRefURIs 

73 from .dimensions import DataId, DimensionGroup, DimensionRecord 

74 from .queries import Query 

75 from .registry import CollectionArgType, Registry 

76 from .transfers import RepoExportContext 

77 

78_LOG = getLogger(__name__) 

79 

80 

81@dataclasses.dataclass 

82class ParsedButlerDatasetURI: 

83 """Representation of the contents of an IVOA IVOID or dataset URI.""" 

84 

85 label: str 

86 """Label of the associated butler repository. (`str`)""" 

87 dataset_id: uuid.UUID 

88 """Dataset ID of the referenced dataset within the labeled repository. 

89 (`uuid.UUID`)""" 

90 uri: str 

91 """The original URI that was parsed (`str`).""" 

92 

93 

94@dataclasses.dataclass 

95class SpecificButlerDataset: 

96 """A dataset ref associated with a specific butler.""" 

97 

98 butler: Butler 

99 """A specific butler repository (`Butler`).""" 

100 dataset: DatasetRef | None 

101 """The reference of a specific dataset in that butler (`DatasetRef`).""" 

102 

103 

104class _DeprecatedDefault: 

105 """Default value for a deprecated parameter.""" 

106 

107 

108class Butler(LimitedButler): # numpydoc ignore=PR02 

109 """Interface for data butler and factory for Butler instances. 

110 

111 Parameters 

112 ---------- 

113 config : `ButlerConfig`, `Config` or `str`, optional 

114 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

115 If a directory path is given the configuration will be read from a 

116 ``butler.yaml`` file in that location. If `None` is given default 

117 values will be used. If ``config`` contains "cls" key then its value is 

118 used as a name of butler class and it must be a sub-class of this 

119 class, otherwise `DirectButler` is instantiated. 

120 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

121 An expression specifying the collections to be searched (in order) when 

122 reading datasets. 

123 This may be a `str` collection name or an iterable thereof. 

124 See :ref:`daf_butler_collection_expressions` for more information. 

125 These collections are not registered automatically and must be 

126 manually registered before they are used by any method, but they may be 

127 manually registered after the `Butler` is initialized. 

128 run : `str`, optional 

129 Name of the `~CollectionType.RUN` collection new datasets should be 

130 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

131 ``collections`` will be set to ``[run]``. If not `None`, this 

132 collection will automatically be registered. If this is not set (and 

133 ``writeable`` is not set either), a read-only butler will be created. 

134 searchPaths : `list` of `str`, optional 

135 Directory paths to search when calculating the full Butler 

136 configuration. Not used if the supplied config is already a 

137 `ButlerConfig`. 

138 writeable : `bool`, optional 

139 Explicitly sets whether the butler supports write operations. If not 

140 provided, a read-write butler is created if any of ``run``, ``tags``, 

141 or ``chains`` is non-empty. 

142 inferDefaults : `bool`, optional 

143 If `True` (default) infer default data ID values from the values 

144 present in the datasets in ``collections``: if all collections have the 

145 same value (or no value) for a governor dimension, that value will be 

146 the default for that dimension. Nonexistent collections are ignored. 

147 If a default value is provided explicitly for a governor dimension via 

148 ``**kwargs``, no default will be inferred for that dimension. 

149 without_datastore : `bool`, optional 

150 If `True` do not attach a datastore to this butler. Any attempts 

151 to use a datastore will fail. 

152 metrics : `ButlerMetrics` or `None` 

153 External metrics object to be used for tracking butler usage. If `None` 

154 a new metrics object is created. 

155 **kwargs : `typing.Any` 

156 Additional keyword arguments passed to a constructor of actual butler 

157 class. 

158 

159 Notes 

160 ----- 

161 The preferred way to instantiate Butler is via the `from_config` method. 

162 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

163 but ``mypy`` will complain about the former. 

164 """ 

165 

166 def __new__( 

167 cls, 

168 config: Config | ResourcePathExpression | None = None, 

169 *, 

170 collections: Any = None, 

171 run: str | None = None, 

172 searchPaths: Sequence[ResourcePathExpression] | None = None, 

173 writeable: bool | None = None, 

174 inferDefaults: bool = True, 

175 without_datastore: bool = False, 

176 metrics: ButlerMetrics | None = None, 

177 **kwargs: Any, 

178 ) -> Butler: 

179 if cls is Butler: 

180 return Butler.from_config( 

181 config=config, 

182 collections=collections, 

183 run=run, 

184 searchPaths=searchPaths, 

185 writeable=writeable, 

186 inferDefaults=inferDefaults, 

187 without_datastore=without_datastore, 

188 metrics=metrics, 

189 **kwargs, 

190 ) 

191 

192 # Note: we do not pass any parameters to __new__, Python will pass them 

193 # to __init__ after __new__ returns sub-class instance. 

194 return super().__new__(cls) 

195 

196 @classmethod 

197 def from_config( 

198 cls, 

199 config: Config | ResourcePathExpression | None = None, 

200 *, 

201 collections: Any = None, 

202 run: str | None = None, 

203 searchPaths: Sequence[ResourcePathExpression] | None = None, 

204 writeable: bool | None = None, 

205 inferDefaults: bool = True, 

206 without_datastore: bool = False, 

207 metrics: ButlerMetrics | None = None, 

208 **kwargs: Any, 

209 ) -> Butler: 

210 """Create butler instance from configuration. 

211 

212 Parameters 

213 ---------- 

214 config : `ButlerConfig`, `Config` or `str`, optional 

215 Configuration. Anything acceptable to the `ButlerConfig` 

216 constructor. If a directory path is given the configuration will be 

217 read from a ``butler.yaml`` file in that location. If `None` is 

218 given default values will be used. If ``config`` contains "cls" key 

219 then its value is used as a name of butler class and it must be a 

220 sub-class of this class, otherwise `DirectButler` is instantiated. 

221 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

222 An expression specifying the collections to be searched (in order) 

223 when reading datasets. 

224 This may be a `str` collection name or an iterable thereof. 

225 See :ref:`daf_butler_collection_expressions` for more information. 

226 These collections are not registered automatically and must be 

227 manually registered before they are used by any method, but they 

228 may be manually registered after the `Butler` is initialized. 

229 run : `str`, optional 

230 Name of the `~CollectionType.RUN` collection new datasets should be 

231 inserted into. If ``collections`` is `None` and ``run`` is not 

232 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

233 this collection will automatically be registered. If this is not 

234 set (and ``writeable`` is not set either), a read-only butler will 

235 be created. 

236 searchPaths : `list` of `str`, optional 

237 Directory paths to search when calculating the full Butler 

238 configuration. Not used if the supplied config is already a 

239 `ButlerConfig`. 

240 writeable : `bool`, optional 

241 Explicitly sets whether the butler supports write operations. If 

242 not provided, a read-write butler is created if any of ``run``, 

243 ``tags``, or ``chains`` is non-empty. 

244 inferDefaults : `bool`, optional 

245 If `True` (default) infer default data ID values from the values 

246 present in the datasets in ``collections``: if all collections have 

247 the same value (or no value) for a governor dimension, that value 

248 will be the default for that dimension. Nonexistent collections 

249 are ignored. If a default value is provided explicitly for a 

250 governor dimension via ``**kwargs``, no default will be inferred 

251 for that dimension. 

252 without_datastore : `bool`, optional 

253 If `True` do not attach a datastore to this butler. Any attempts 

254 to use a datastore will fail. 

255 metrics : `ButlerMetrics` or `None`, optional 

256 Metrics object to record butler usage statistics. 

257 **kwargs : `typing.Any` 

258 Default data ID key-value pairs. These may only identify 

259 "governor" dimensions like ``instrument`` and ``skymap``. 

260 

261 Returns 

262 ------- 

263 butler : `Butler` 

264 A `Butler` constructed from the given configuration. 

265 

266 Notes 

267 ----- 

268 Calling this factory method is identical to calling 

269 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

270 complains about ``Butler()`` call. 

271 

272 Examples 

273 -------- 

274 While there are many ways to control exactly how a `Butler` interacts 

275 with the collections in its `Registry`, the most common cases are still 

276 simple. 

277 

278 For a read-only `Butler` that searches one collection, do:: 

279 

280 butler = Butler.from_config( 

281 "/path/to/repo", collections=["u/alice/DM-50000"] 

282 ) 

283 

284 For a read-write `Butler` that writes to and reads from a 

285 `~CollectionType.RUN` collection:: 

286 

287 butler = Butler.from_config( 

288 "/path/to/repo", run="u/alice/DM-50000/a" 

289 ) 

290 

291 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

292 because we want to write to one `~CollectionType.RUN` collection but 

293 read from several others (as well):: 

294 

295 butler = Butler.from_config( 

296 "/path/to/repo", 

297 run="u/alice/DM-50000/a", 

298 collections=[ 

299 "u/alice/DM-50000/a", 

300 "u/bob/DM-49998", 

301 "HSC/defaults", 

302 ], 

303 ) 

304 

305 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

306 Datasets will be read first from that run (since it appears first in 

307 the chain), and then from ``u/bob/DM-49998`` and finally 

308 ``HSC/defaults``. 

309 

310 Finally, one can always create a `Butler` with no collections:: 

311 

312 butler = Butler.from_config("/path/to/repo", writeable=True) 

313 

314 This can be extremely useful when you just want to use 

315 ``butler.registry``, e.g. for inserting dimension data or managing 

316 collections, or when the collections you want to use with the butler 

317 are not consistent. Passing ``writeable`` explicitly here is only 

318 necessary if you want to be able to make changes to the repo - usually 

319 the value for ``writeable`` can be guessed from the collection 

320 arguments provided, but it defaults to `False` when there are not 

321 collection arguments. 

322 """ 

323 # DirectButler used to have a way to specify a "copy constructor" by 

324 # passing the "butler" parameter to its constructor. This has 

325 # been moved out of the constructor into Butler.clone(). 

326 butler = kwargs.pop("butler", None) 

327 metrics = metrics if metrics is not None else ButlerMetrics() 

328 if butler is not None: 

329 if not isinstance(butler, Butler): 

330 raise TypeError("'butler' parameter must be a Butler instance") 

331 if config is not None or searchPaths is not None or writeable is not None: 

332 raise TypeError( 

333 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

334 ) 

335 return butler.clone( 

336 collections=collections, run=run, inferDefaults=inferDefaults, metrics=metrics, dataId=kwargs 

337 ) 

338 

339 options = ButlerInstanceOptions( 

340 collections=collections, 

341 run=run, 

342 writeable=writeable, 

343 inferDefaults=inferDefaults, 

344 metrics=metrics, 

345 kwargs=kwargs, 

346 ) 

347 

348 # Load the Butler configuration. This may involve searching the 

349 # environment to locate a configuration file. 

350 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) 

351 butler_type = butler_config.get_butler_type() 

352 

353 # Make DirectButler if class is not specified. 

354 match butler_type: 

355 case ButlerType.DIRECT: 

356 from .direct_butler import DirectButler 

357 

358 return DirectButler.create_from_config( 

359 butler_config, 

360 options=options, 

361 without_datastore=without_datastore, 

362 ) 

363 case ButlerType.REMOTE: 

364 from .remote_butler._factory import RemoteButlerFactory 

365 

366 # Assume this is being created by a client who would like 

367 # default caching of remote datasets. 

368 factory = RemoteButlerFactory.create_factory_from_config(butler_config) 

369 return factory.create_butler_with_credentials_from_environment( 

370 butler_options=options, enable_datastore_cache=True 

371 ) 

372 case _: 

373 raise TypeError(f"Unknown Butler type '{butler_type}'") 

374 

375 @staticmethod 

376 def has_repo_config(root: ResourcePathExpression) -> bool: 

377 """Check whether the given directory path contains a Butler 

378 configuration or not. 

379 

380 Parameters 

381 ---------- 

382 root : `lsst.resources.ResourcePathExpression` 

383 The directory URI to check. 

384 

385 Returns 

386 ------- 

387 is_root : `bool` 

388 `True` if this is a directory containing a butler configuration. 

389 """ 

390 root_uri = ResourcePath(root, forceDirectory=True) 

391 return root_uri.join("butler.yaml").exists() 

392 

393 @staticmethod 

394 def makeRepo( 

395 root: ResourcePathExpression, 

396 config: Config | str | None = None, 

397 dimensionConfig: Config | str | None = None, 

398 standalone: bool = False, 

399 searchPaths: list[str] | None = None, 

400 forceConfigRoot: bool = True, 

401 outfile: ResourcePathExpression | None = None, 

402 overwrite: bool = False, 

403 ) -> Config: 

404 """Create an empty data repository by adding a butler.yaml config 

405 to a repository root directory. 

406 

407 Parameters 

408 ---------- 

409 root : `lsst.resources.ResourcePathExpression` 

410 Path or URI to the root location of the new repository. Will be 

411 created if it does not exist. 

412 config : `Config` or `str`, optional 

413 Configuration to write to the repository, after setting any 

414 root-dependent Registry or Datastore config options. Can not 

415 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

416 configuration will be used. Root-dependent config options 

417 specified in this config are overwritten if ``forceConfigRoot`` 

418 is `True`. 

419 dimensionConfig : `Config` or `str`, optional 

420 Configuration for dimensions, will be used to initialize registry 

421 database. 

422 standalone : `bool` 

423 If True, write all expanded defaults, not just customized or 

424 repository-specific settings. 

425 This (mostly) decouples the repository from the default 

426 configuration, insulating it from changes to the defaults (which 

427 may be good or bad, depending on the nature of the changes). 

428 Future *additions* to the defaults will still be picked up when 

429 initializing a `Butler` for repos created with ``standalone=True``. 

430 searchPaths : `list` of `str`, optional 

431 Directory paths to search when calculating the full butler 

432 configuration. 

433 forceConfigRoot : `bool`, optional 

434 If `False`, any values present in the supplied ``config`` that 

435 would normally be reset are not overridden and will appear 

436 directly in the output config. This allows non-standard overrides 

437 of the root directory for a datastore or registry to be given. 

438 If this parameter is `True` the values for ``root`` will be 

439 forced into the resulting config if appropriate. 

440 outfile : `lsst.resources.ResourcePathExpression`, optional 

441 If not-`None`, the output configuration will be written to this 

442 location rather than into the repository itself. Can be a URI 

443 string. Can refer to a directory that will be used to write 

444 ``butler.yaml``. 

445 overwrite : `bool`, optional 

446 Create a new configuration file even if one already exists 

447 in the specified output location. Default is to raise 

448 an exception. 

449 

450 Returns 

451 ------- 

452 config : `Config` 

453 The updated `Config` instance written to the repo. 

454 

455 Raises 

456 ------ 

457 ValueError 

458 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

459 regular Config (as these subclasses would make it impossible to 

460 support ``standalone=False``). 

461 FileExistsError 

462 Raised if the output config file already exists. 

463 os.error 

464 Raised if the directory does not exist, exists but is not a 

465 directory, or cannot be created. 

466 

467 Notes 

468 ----- 

469 Note that when ``standalone=False`` (the default), the configuration 

470 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

471 construct the repository should also be used to construct any Butlers 

472 to avoid configuration inconsistencies. 

473 """ 

474 if isinstance(config, ButlerConfig | ConfigSubset): 

475 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

476 

477 # Ensure that the root of the repository exists or can be made 

478 root_uri = ResourcePath(root, forceDirectory=True) 

479 root_uri.mkdir() 

480 

481 config = Config(config) 

482 

483 # If we are creating a new repo from scratch with relative roots, 

484 # do not propagate an explicit root from the config file 

485 if "root" in config: 

486 del config["root"] 

487 

488 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

489 imported_class = doImportType(full["datastore", "cls"]) 

490 if not issubclass(imported_class, Datastore): 

491 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

492 datastoreClass: type[Datastore] = imported_class 

493 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

494 

495 # if key exists in given config, parse it, otherwise parse the defaults 

496 # in the expanded config 

497 if config.get(("registry", "db")): 

498 registryConfig = RegistryConfig(config) 

499 else: 

500 registryConfig = RegistryConfig(full) 

501 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

502 if defaultDatabaseUri is not None: 

503 Config.updateParameters( 

504 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

505 ) 

506 else: 

507 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

508 

509 if standalone: 

510 config.merge(full) 

511 else: 

512 # Always expand the registry.managers section into the per-repo 

513 # config, because after the database schema is created, it's not 

514 # allowed to change anymore. Note that in the standalone=True 

515 # branch, _everything_ in the config is expanded, so there's no 

516 # need to special case this. 

517 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

518 configURI: ResourcePathExpression 

519 if outfile is not None: 

520 # When writing to a separate location we must include 

521 # the root of the butler repo in the config else it won't know 

522 # where to look. 

523 config["root"] = root_uri.geturl() 

524 configURI = outfile 

525 else: 

526 configURI = root_uri 

527 # Check that if obscore key is present then its config must be there 

528 # too, this is to avoid common mistake when people copy butler.yaml 

529 # from existing repo with obscore but do not fill its config. 

530 if (obscore_key := ("registry", "managers", "obscore")) in config: 

531 obscore_config_key = ("registry", "managers", "obscore", "config") 

532 if obscore_config_key not in config or not config[obscore_config_key]: 

533 warnings.warn( 

534 "Obscore manager is declared in registry configuration, " 

535 "but obscore configuration is missing, obscore manager will be removed.", 

536 stacklevel=2, 

537 ) 

538 del config[obscore_key] 

539 # Strip obscore configuration, if it is present, before writing config 

540 # to a file, obscore config will be stored in registry. 

541 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

542 config_to_write = config.copy() 

543 del config_to_write[obscore_config_key] 

544 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

545 # configFile attribute is updated, need to copy it to original. 

546 config.configFile = config_to_write.configFile 

547 else: 

548 config.dumpToUri(configURI, overwrite=overwrite) 

549 

550 # Create Registry and populate tables 

551 registryConfig = RegistryConfig(config.get("registry")) 

552 dimensionConfig = DimensionConfig(dimensionConfig) 

553 registry = _RegistryFactory(registryConfig).create_from_config( 

554 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

555 ) 

556 registry.close() 

557 

558 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

559 

560 return config 

561 

562 @classmethod 

563 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

564 """Look up the label in a butler repository index. 

565 

566 Parameters 

567 ---------- 

568 label : `str` 

569 Label of the Butler repository to look up. 

570 return_label : `bool`, optional 

571 If ``label`` cannot be found in the repository index (either 

572 because index is not defined or ``label`` is not in the index) and 

573 ``return_label`` is `True` then return ``ResourcePath(label)``. 

574 If ``return_label`` is `False` (default) then an exception will be 

575 raised instead. 

576 

577 Returns 

578 ------- 

579 uri : `lsst.resources.ResourcePath` 

580 URI to the Butler repository associated with the given label or 

581 default value if it is provided. 

582 

583 Raises 

584 ------ 

585 KeyError 

586 Raised if the label is not found in the index, or if an index 

587 is not defined, and ``return_label`` is `False`. 

588 

589 Notes 

590 ----- 

591 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

592 information is discovered. 

593 """ 

594 return ButlerRepoIndex.get_repo_uri(label, return_label) 

595 

596 @classmethod 

597 def get_known_repos(cls) -> set[str]: 

598 """Retrieve the list of known repository labels. 

599 

600 Returns 

601 ------- 

602 repos : `set` of `str` 

603 All the known labels. Can be empty if no index can be found. 

604 

605 Notes 

606 ----- 

607 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

608 information is discovered. 

609 """ 

610 return ButlerRepoIndex.get_known_repos() 

611 

612 @classmethod 

613 def parse_dataset_uri(cls, uri: str) -> ParsedButlerDatasetURI: 

614 """Extract the butler label and dataset ID from a dataset URI. 

615 

616 Parameters 

617 ---------- 

618 uri : `str` 

619 The dataset URI to parse. 

620 

621 Returns 

622 ------- 

623 parsed : `ParsedButlerDatasetURI` 

624 The label associated with the butler repository from which this 

625 dataset originates and the ID of the dataset. 

626 

627 Notes 

628 ----- 

629 Supports dataset URIs of the forms 

630 ``ivo://org.rubinobs/usdac/dr1?repo=butler_label&id=UUID`` (see 

631 DMTN-302) and ``butler://butler_label/UUID``. The ``butler`` URI is 

632 deprecated and can not include ``/`` in the label string. ``ivo`` URIs 

633 can include anything supported by the `Butler` constructor, including 

634 paths to repositories and alias labels. 

635 

636 ivo://org.rubinobs/dr1?repo=/repo/main&id=UUID 

637 

638 will return a label of ``/repo/main``. 

639 

640 This method does not attempt to check that the dataset exists in the 

641 labeled butler. 

642 

643 Since the IVOID can be issued by any publisher to represent a Butler 

644 dataset there is no validation of the path or netloc component of the 

645 URI. The only requirement is that there are ``id`` and ``repo`` keys 

646 in the ``ivo`` URI query component. 

647 """ 

648 parsed = urllib.parse.urlparse(uri) 

649 parsed_scheme = parsed.scheme.lower() 

650 if parsed_scheme == "ivo": 

651 # Do not validate the netloc or the path values. 

652 qs = urllib.parse.parse_qs(parsed.query) 

653 if "repo" not in qs or "id" not in qs: 

654 raise ValueError(f"Missing 'repo' and/or 'id' query parameters in IVOID {uri}.") 

655 if len(qs["repo"]) != 1 or len(qs["id"]) != 1: 

656 raise ValueError(f"Butler IVOID only supports a single value of repo and id, got {uri}") 

657 label = qs["repo"][0] 

658 id_ = qs["id"][0] 

659 elif parsed_scheme == "butler": 

660 label = parsed.netloc # Butler label is case sensitive. 

661 # Need to strip the leading /. 

662 id_ = parsed.path[1:] 

663 else: 

664 raise ValueError(f"Unrecognized URI scheme: {uri!r}") 

665 # Strip trailing/leading whitespace from label. 

666 label = label.strip() 

667 if not label: 

668 raise ValueError(f"No butler repository label found in uri {uri!r}") 

669 try: 

670 dataset_id = uuid.UUID(hex=id_) 

671 except Exception as e: 

672 e.add_note(f"Error extracting dataset ID from uri {uri!r} with dataset ID string {id_!r}") 

673 raise 

674 

675 return ParsedButlerDatasetURI(label=label, dataset_id=dataset_id, uri=uri) 

676 

677 @classmethod 

678 def get_dataset_from_uri( 

679 cls, uri: str, factory: LabeledButlerFactoryProtocol | None = None 

680 ) -> SpecificButlerDataset: 

681 """Get the dataset associated with the given dataset URI. 

682 

683 Parameters 

684 ---------- 

685 uri : `str` 

686 The URI associated with a dataset. 

687 factory : `LabeledButlerFactoryProtocol` or `None`, optional 

688 Bound factory function that will be given the butler label 

689 and receive a `Butler`. If this is not provided the label 

690 will be tried directly. 

691 

692 Returns 

693 ------- 

694 result : `SpecificButlerDataset` 

695 The butler associated with this URI and the dataset itself. 

696 The dataset can be `None` if the UUID is valid but the dataset 

697 is not known to this butler. 

698 """ 

699 parsed = cls.parse_dataset_uri(uri) 

700 butler: Butler | None = None 

701 if factory is not None: 

702 # If the label is not recognized, it might be a path. 

703 try: 

704 butler = factory(parsed.label) 

705 except KeyError: 

706 pass 

707 if butler is None: 

708 butler = cls.from_config(parsed.label) 

709 return SpecificButlerDataset(butler=butler, dataset=butler.get_dataset(parsed.dataset_id)) 

710 

711 @abstractmethod 

712 def _caching_context(self) -> AbstractContextManager[None]: 

713 """Context manager that enables caching.""" 

714 raise NotImplementedError() 

715 

716 @abstractmethod 

717 def transaction(self) -> AbstractContextManager[None]: 

718 """Context manager supporting `Butler` transactions. 

719 

720 Transactions can be nested. 

721 """ 

722 raise NotImplementedError() 

723 

724 @abstractmethod 

725 def put( 

726 self, 

727 obj: Any, 

728 datasetRefOrType: DatasetRef | DatasetType | str, 

729 /, 

730 dataId: DataId | None = None, 

731 *, 

732 run: str | None = None, 

733 provenance: DatasetProvenance | None = None, 

734 **kwargs: Any, 

735 ) -> DatasetRef: 

736 """Store and register a dataset. 

737 

738 Parameters 

739 ---------- 

740 obj : `object` 

741 The dataset. 

742 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

743 When `DatasetRef` is provided, ``dataId`` should be `None`. 

744 Otherwise the `DatasetType` or name thereof. If a fully resolved 

745 `DatasetRef` is given the run and ID are used directly. 

746 dataId : `dict` or `DataCoordinate` 

747 A `dict` of `Dimension` link name, value pairs that label the 

748 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

749 should be provided as the second argument. 

750 run : `str`, optional 

751 The name of the run the dataset should be added to, overriding 

752 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

753 provenance : `DatasetProvenance` or `None`, optional 

754 Any provenance that should be attached to the serialized dataset. 

755 Not supported by all serialization mechanisms. 

756 **kwargs 

757 Additional keyword arguments used to augment or construct a 

758 `DataCoordinate`. See `DataCoordinate.standardize` 

759 parameters. Not used if a resolve `DatasetRef` is provided. 

760 

761 Returns 

762 ------- 

763 ref : `DatasetRef` 

764 A reference to the stored dataset, updated with the correct id if 

765 given. 

766 

767 Raises 

768 ------ 

769 TypeError 

770 Raised if the butler is read-only or if no run has been provided. 

771 """ 

772 raise NotImplementedError() 

773 

774 @abstractmethod 

775 def getDeferred( 

776 self, 

777 datasetRefOrType: DatasetRef | DatasetType | str, 

778 /, 

779 dataId: DataId | None = None, 

780 *, 

781 parameters: dict | None = None, 

782 collections: Any = None, 

783 storageClass: str | StorageClass | None = None, 

784 timespan: Timespan | None = None, 

785 **kwargs: Any, 

786 ) -> DeferredDatasetHandle: 

787 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

788 after an immediate registry lookup. 

789 

790 Parameters 

791 ---------- 

792 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

793 When `DatasetRef` the `dataId` should be `None`. 

794 Otherwise the `DatasetType` or name thereof. 

795 dataId : `dict` or `DataCoordinate`, optional 

796 A `dict` of `Dimension` link name, value pairs that label the 

797 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

798 should be provided as the first argument. 

799 parameters : `dict` 

800 Additional StorageClass-defined options to control reading, 

801 typically used to efficiently read only a subset of the dataset. 

802 collections : Any, optional 

803 Collections to be searched, overriding ``self.collections``. 

804 Can be any of the types supported by the ``collections`` argument 

805 to butler construction. 

806 storageClass : `StorageClass` or `str`, optional 

807 The storage class to be used to override the Python type 

808 returned by this method. By default the returned type matches 

809 the dataset type definition for this dataset. Specifying a 

810 read `StorageClass` can force a different type to be returned. 

811 This type must be compatible with the original type. 

812 timespan : `Timespan` or `None`, optional 

813 A timespan that the validity range of the dataset must overlap. 

814 If not provided and this is a calibration dataset type, an attempt 

815 will be made to find the timespan from any temporal coordinate 

816 in the data ID. 

817 **kwargs 

818 Additional keyword arguments used to augment or construct a 

819 `DataId`. See `DataId` parameters. 

820 

821 Returns 

822 ------- 

823 obj : `DeferredDatasetHandle` 

824 A handle which can be used to retrieve a dataset at a later time. 

825 

826 Raises 

827 ------ 

828 LookupError 

829 Raised if no matching dataset exists in the `Registry` or 

830 datastore. 

831 ValueError 

832 Raised if a resolved `DatasetRef` was passed as an input, but it 

833 differs from the one found in the registry. 

834 TypeError 

835 Raised if no collections were provided. 

836 """ 

837 raise NotImplementedError() 

838 

839 @abstractmethod 

840 def get( 

841 self, 

842 datasetRefOrType: DatasetRef | DatasetType | str, 

843 /, 

844 dataId: DataId | None = None, 

845 *, 

846 parameters: dict[str, Any] | None = None, 

847 collections: Any = None, 

848 storageClass: StorageClass | str | None = None, 

849 timespan: Timespan | None = None, 

850 **kwargs: Any, 

851 ) -> Any: 

852 """Retrieve a stored dataset. 

853 

854 Parameters 

855 ---------- 

856 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

857 When `DatasetRef` the `dataId` should be `None`. 

858 Otherwise the `DatasetType` or name thereof. 

859 If a resolved `DatasetRef`, the associated dataset 

860 is returned directly without additional querying. 

861 dataId : `dict` or `DataCoordinate` 

862 A `dict` of `Dimension` link name, value pairs that label the 

863 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

864 should be provided as the first argument. 

865 parameters : `dict` 

866 Additional StorageClass-defined options to control reading, 

867 typically used to efficiently read only a subset of the dataset. 

868 collections : Any, optional 

869 Collections to be searched, overriding ``self.collections``. 

870 Can be any of the types supported by the ``collections`` argument 

871 to butler construction. 

872 storageClass : `StorageClass` or `str`, optional 

873 The storage class to be used to override the Python type 

874 returned by this method. By default the returned type matches 

875 the dataset type definition for this dataset. Specifying a 

876 read `StorageClass` can force a different type to be returned. 

877 This type must be compatible with the original type. 

878 timespan : `Timespan` or `None`, optional 

879 A timespan that the validity range of the dataset must overlap. 

880 If not provided and this is a calibration dataset type, an attempt 

881 will be made to find the timespan from any temporal coordinate 

882 in the data ID. 

883 **kwargs 

884 Additional keyword arguments used to augment or construct a 

885 `DataCoordinate`. See `DataCoordinate.standardize` 

886 parameters. 

887 

888 Returns 

889 ------- 

890 obj : `object` 

891 The dataset. 

892 

893 Raises 

894 ------ 

895 LookupError 

896 Raised if no matching dataset exists in the `Registry`. 

897 TypeError 

898 Raised if no collections were provided. 

899 

900 Notes 

901 ----- 

902 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

903 this method requires that the given data ID include temporal dimensions 

904 beyond the dimensions of the dataset type itself, in order to find the 

905 dataset with the appropriate validity range. For example, a "bias" 

906 dataset with native dimensions ``{instrument, detector}`` could be 

907 fetched with a ``{instrument, detector, exposure}`` data ID, because 

908 ``exposure`` is a temporal dimension. 

909 """ 

910 raise NotImplementedError() 

911 

912 @abstractmethod 

913 def getURIs( 

914 self, 

915 datasetRefOrType: DatasetRef | DatasetType | str, 

916 /, 

917 dataId: DataId | None = None, 

918 *, 

919 predict: bool = False, 

920 collections: Any = None, 

921 run: str | None = None, 

922 **kwargs: Any, 

923 ) -> DatasetRefURIs: 

924 """Return the URIs associated with the dataset. 

925 

926 Parameters 

927 ---------- 

928 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

929 When `DatasetRef` the `dataId` should be `None`. 

930 Otherwise the `DatasetType` or name thereof. 

931 dataId : `dict` or `DataCoordinate` 

932 A `dict` of `Dimension` link name, value pairs that label the 

933 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

934 should be provided as the first argument. 

935 predict : `bool` 

936 If `True`, allow URIs to be returned of datasets that have not 

937 been written. 

938 collections : Any, optional 

939 Collections to be searched, overriding ``self.collections``. 

940 Can be any of the types supported by the ``collections`` argument 

941 to butler construction. 

942 run : `str`, optional 

943 Run to use for predictions, overriding ``self.run``. 

944 **kwargs 

945 Additional keyword arguments used to augment or construct a 

946 `DataCoordinate`. See `DataCoordinate.standardize` 

947 parameters. 

948 

949 Returns 

950 ------- 

951 uris : `DatasetRefURIs` 

952 The URI to the primary artifact associated with this dataset (if 

953 the dataset was disassembled within the datastore this may be 

954 `None`), and the URIs to any components associated with the dataset 

955 artifact. (can be empty if there are no components). 

956 """ 

957 raise NotImplementedError() 

958 

959 def getURI( 

960 self, 

961 datasetRefOrType: DatasetRef | DatasetType | str, 

962 /, 

963 dataId: DataId | None = None, 

964 *, 

965 predict: bool = False, 

966 collections: Any = None, 

967 run: str | None = None, 

968 **kwargs: Any, 

969 ) -> ResourcePath: 

970 """Return the URI to the Dataset. 

971 

972 Parameters 

973 ---------- 

974 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

975 When `DatasetRef` the `dataId` should be `None`. 

976 Otherwise the `DatasetType` or name thereof. 

977 dataId : `dict` or `DataCoordinate` 

978 A `dict` of `Dimension` link name, value pairs that label the 

979 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

980 should be provided as the first argument. 

981 predict : `bool` 

982 If `True`, allow URIs to be returned of datasets that have not 

983 been written. 

984 collections : Any, optional 

985 Collections to be searched, overriding ``self.collections``. 

986 Can be any of the types supported by the ``collections`` argument 

987 to butler construction. 

988 run : `str`, optional 

989 Run to use for predictions, overriding ``self.run``. 

990 **kwargs 

991 Additional keyword arguments used to augment or construct a 

992 `DataCoordinate`. See `DataCoordinate.standardize` 

993 parameters. 

994 

995 Returns 

996 ------- 

997 uri : `lsst.resources.ResourcePath` 

998 URI pointing to the Dataset within the datastore. If the 

999 Dataset does not exist in the datastore, and if ``predict`` is 

1000 `True`, the URI will be a prediction and will include a URI 

1001 fragment "#predicted". 

1002 If the datastore does not have entities that relate well 

1003 to the concept of a URI the returned URI string will be 

1004 descriptive. The returned URI is not guaranteed to be obtainable. 

1005 

1006 Raises 

1007 ------ 

1008 LookupError 

1009 A URI has been requested for a dataset that does not exist and 

1010 guessing is not allowed. 

1011 ValueError 

1012 Raised if a resolved `DatasetRef` was passed as an input, but it 

1013 differs from the one found in the registry. 

1014 TypeError 

1015 Raised if no collections were provided. 

1016 RuntimeError 

1017 Raised if a URI is requested for a dataset that consists of 

1018 multiple artifacts. 

1019 """ 

1020 primary, components = self.getURIs( 

1021 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

1022 ) 

1023 

1024 if primary is None or components: 

1025 raise RuntimeError( 

1026 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1027 "Use Butler.getURIs() instead." 

1028 ) 

1029 return primary 

1030 

1031 @abstractmethod 

1032 def get_dataset_type(self, name: str) -> DatasetType: 

1033 """Get the `DatasetType`. 

1034 

1035 Parameters 

1036 ---------- 

1037 name : `str` 

1038 Name of the type. 

1039 

1040 Returns 

1041 ------- 

1042 type : `DatasetType` 

1043 The `DatasetType` associated with the given name. 

1044 

1045 Raises 

1046 ------ 

1047 lsst.daf.butler.MissingDatasetTypeError 

1048 Raised if the requested dataset type has not been registered. 

1049 

1050 Notes 

1051 ----- 

1052 This method handles component dataset types automatically, though most 

1053 other operations do not. 

1054 """ 

1055 raise NotImplementedError() 

1056 

1057 @abstractmethod 

1058 def get_dataset( 

1059 self, 

1060 id: DatasetId | str, 

1061 *, 

1062 storage_class: str | StorageClass | None = None, 

1063 dimension_records: bool = False, 

1064 datastore_records: bool = False, 

1065 ) -> DatasetRef | None: 

1066 """Retrieve a Dataset entry. 

1067 

1068 Parameters 

1069 ---------- 

1070 id : `DatasetId` 

1071 The unique identifier for the dataset, as an instance of 

1072 `uuid.UUID` or a string containing a hexadecimal number. 

1073 storage_class : `str` or `StorageClass` or `None` 

1074 A storage class to use when creating the returned entry. If given 

1075 it must be compatible with the default storage class. 

1076 dimension_records : `bool`, optional 

1077 If `True` the ref will be expanded and contain dimension records. 

1078 datastore_records : `bool`, optional 

1079 If `True` the ref will contain associated datastore records. 

1080 

1081 Returns 

1082 ------- 

1083 ref : `DatasetRef` or `None` 

1084 A ref to the Dataset, or `None` if no matching Dataset 

1085 was found. 

1086 """ 

1087 raise NotImplementedError() 

1088 

1089 @abstractmethod 

1090 def get_many_datasets(self, ids: Iterable[DatasetId | str]) -> list[DatasetRef]: 

1091 """Retrieve a list of dataset entries. 

1092 

1093 Parameters 

1094 ---------- 

1095 ids : `~collections.abc.Iterable` [ `DatasetId` or `str` ] 

1096 The unique identifiers for the datasets, as instances of 

1097 `uuid.UUID` or strings containing a hexadecimal number. 

1098 

1099 Returns 

1100 ------- 

1101 refs : `list` [ `DatasetRef` ] 

1102 A list containing a `DatasetRef` for each of the given dataset IDs. 

1103 If a dataset was not found, no error is thrown -- it is just not 

1104 included in the list. The returned datasets are in no particular 

1105 order. 

1106 """ 

1107 raise NotImplementedError() 

1108 

1109 @abstractmethod 

1110 def find_dataset( 

1111 self, 

1112 dataset_type: DatasetType | str, 

1113 data_id: DataId | None = None, 

1114 *, 

1115 collections: str | Sequence[str] | None = None, 

1116 timespan: Timespan | None = None, 

1117 storage_class: str | StorageClass | None = None, 

1118 dimension_records: bool = False, 

1119 datastore_records: bool = False, 

1120 **kwargs: Any, 

1121 ) -> DatasetRef | None: 

1122 """Find a dataset given its `DatasetType` and data ID. 

1123 

1124 This can be used to obtain a `DatasetRef` that permits the dataset to 

1125 be read from a `Datastore`. If the dataset is a component and can not 

1126 be found using the provided dataset type, a dataset ref for the parent 

1127 will be returned instead but with the correct dataset type. 

1128 

1129 Parameters 

1130 ---------- 

1131 dataset_type : `DatasetType` or `str` 

1132 A `DatasetType` or the name of one. If this is a `DatasetType` 

1133 instance, its storage class will be respected and propagated to 

1134 the output, even if it differs from the dataset type definition 

1135 in the registry, as long as the storage classes are convertible. 

1136 data_id : `dict` or `DataCoordinate`, optional 

1137 A `dict`-like object containing the `Dimension` links that identify 

1138 the dataset within a collection. If it is a `dict` the dataId 

1139 can include dimension record values such as ``day_obs`` and 

1140 ``seq_num`` or ``full_name`` that can be used to derive the 

1141 primary dimension. 

1142 collections : `str` or `list` [`str`], optional 

1143 A an ordered list of collections to search for the dataset. 

1144 Defaults to ``self.defaults.collections``. 

1145 timespan : `Timespan`, optional 

1146 A timespan that the validity range of the dataset must overlap. 

1147 If not provided, any `~CollectionType.CALIBRATION` collections 

1148 matched by the ``collections`` argument will not be searched. 

1149 storage_class : `str` or `StorageClass` or `None` 

1150 A storage class to use when creating the returned entry. If given 

1151 it must be compatible with the default storage class. 

1152 dimension_records : `bool`, optional 

1153 If `True` the ref will be expanded and contain dimension records. 

1154 datastore_records : `bool`, optional 

1155 If `True` the ref will contain associated datastore records. 

1156 **kwargs 

1157 Additional keyword arguments passed to 

1158 `DataCoordinate.standardize` to convert ``dataId`` to a true 

1159 `DataCoordinate` or augment an existing one. This can also include 

1160 dimension record metadata that can be used to derive a primary 

1161 dimension value. 

1162 

1163 Returns 

1164 ------- 

1165 ref : `DatasetRef` 

1166 A reference to the dataset, or `None` if no matching Dataset 

1167 was found. 

1168 

1169 Raises 

1170 ------ 

1171 lsst.daf.butler.NoDefaultCollectionError 

1172 Raised if ``collections`` is `None` and 

1173 ``self.collections`` is `None`. 

1174 LookupError 

1175 Raised if one or more data ID keys are missing. 

1176 lsst.daf.butler.MissingDatasetTypeError 

1177 Raised if the dataset type does not exist. 

1178 lsst.daf.butler.MissingCollectionError 

1179 Raised if any of ``collections`` does not exist in the registry. 

1180 

1181 Notes 

1182 ----- 

1183 This method simply returns `None` and does not raise an exception even 

1184 when the set of collections searched is intrinsically incompatible with 

1185 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

1186 only `~CollectionType.CALIBRATION` collections are being searched. 

1187 This may make it harder to debug some lookup failures, but the behavior 

1188 is intentional; we consider it more important that failed searches are 

1189 reported consistently, regardless of the reason, and that adding 

1190 additional collections that do not contain a match to the search path 

1191 never changes the behavior. 

1192 

1193 This method handles component dataset types automatically, though most 

1194 other query operations do not. 

1195 """ 

1196 raise NotImplementedError() 

1197 

1198 @abstractmethod 

1199 def retrieve_artifacts_zip( 

1200 self, 

1201 refs: Iterable[DatasetRef], 

1202 destination: ResourcePathExpression, 

1203 overwrite: bool = True, 

1204 ) -> ResourcePath: 

1205 """Retrieve artifacts from a Butler and place in ZIP file. 

1206 

1207 Parameters 

1208 ---------- 

1209 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1210 The datasets to be included in the zip file. 

1211 destination : `lsst.resources.ResourcePathExpression` 

1212 Directory to write the new ZIP file. This directory will 

1213 also be used as a staging area for the datasets being downloaded 

1214 from the datastore. 

1215 overwrite : `bool`, optional 

1216 If `False` the output Zip will not be written if a file of the 

1217 same name is already present in ``destination``. 

1218 

1219 Returns 

1220 ------- 

1221 zip_file : `lsst.resources.ResourcePath` 

1222 The path to the new ZIP file. 

1223 

1224 Raises 

1225 ------ 

1226 ValueError 

1227 Raised if there are no refs to retrieve. 

1228 """ 

1229 raise NotImplementedError() 

1230 

1231 @abstractmethod 

1232 def retrieveArtifacts( 

1233 self, 

1234 refs: Iterable[DatasetRef], 

1235 destination: ResourcePathExpression, 

1236 transfer: str = "auto", 

1237 preserve_path: bool = True, 

1238 overwrite: bool = False, 

1239 ) -> list[ResourcePath]: 

1240 """Retrieve the artifacts associated with the supplied refs. 

1241 

1242 Parameters 

1243 ---------- 

1244 refs : `~collections.abc.Iterable` of `DatasetRef` 

1245 The datasets for which artifacts are to be retrieved. 

1246 A single ref can result in multiple artifacts. The refs must 

1247 be resolved. 

1248 destination : `lsst.resources.ResourcePath` or `str` 

1249 Location to write the artifacts. 

1250 transfer : `str`, optional 

1251 Method to use to transfer the artifacts. Must be one of the options 

1252 supported by `~lsst.resources.ResourcePath.transfer_from`. 

1253 "move" is not allowed. 

1254 preserve_path : `bool`, optional 

1255 If `True` the full path of the artifact within the datastore 

1256 is preserved. If `False` the final file component of the path 

1257 is used. 

1258 overwrite : `bool`, optional 

1259 If `True` allow transfers to overwrite existing files at the 

1260 destination. 

1261 

1262 Returns 

1263 ------- 

1264 targets : `list` of `lsst.resources.ResourcePath` 

1265 URIs of file artifacts in destination location. Order is not 

1266 preserved. 

1267 

1268 Notes 

1269 ----- 

1270 For non-file datastores the artifacts written to the destination 

1271 may not match the representation inside the datastore. For example 

1272 a hierarchical data structure in a NoSQL database may well be stored 

1273 as a JSON file. 

1274 """ 

1275 raise NotImplementedError() 

1276 

1277 @abstractmethod 

1278 def exists( 

1279 self, 

1280 dataset_ref_or_type: DatasetRef | DatasetType | str, 

1281 /, 

1282 data_id: DataId | None = None, 

1283 *, 

1284 full_check: bool = True, 

1285 collections: Any = None, 

1286 **kwargs: Any, 

1287 ) -> DatasetExistence: 

1288 """Indicate whether a dataset is known to Butler registry and 

1289 datastore. 

1290 

1291 Parameters 

1292 ---------- 

1293 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

1294 When `DatasetRef` the `dataId` should be `None`. 

1295 Otherwise the `DatasetType` or name thereof. 

1296 data_id : `dict` or `DataCoordinate` 

1297 A `dict` of `Dimension` link name, value pairs that label the 

1298 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1299 should be provided as the first argument. 

1300 full_check : `bool`, optional 

1301 If `True`, a check will be made for the actual existence of a 

1302 dataset artifact. This will involve additional overhead due to 

1303 the need to query an external system. If `False`, this check will 

1304 be omitted, and the registry and datastore will solely be asked 

1305 if they know about the dataset but no direct check for the 

1306 artifact will be performed. 

1307 collections : Any, optional 

1308 Collections to be searched, overriding ``self.collections``. 

1309 Can be any of the types supported by the ``collections`` argument 

1310 to butler construction. 

1311 **kwargs 

1312 Additional keyword arguments used to augment or construct a 

1313 `DataCoordinate`. See `DataCoordinate.standardize` 

1314 parameters. 

1315 

1316 Returns 

1317 ------- 

1318 existence : `DatasetExistence` 

1319 Object indicating whether the dataset is known to registry and 

1320 datastore. Evaluates to `True` if the dataset is present and known 

1321 to both. 

1322 """ 

1323 raise NotImplementedError() 

1324 

1325 @abstractmethod 

1326 def _exists_many( 

1327 self, 

1328 refs: Iterable[DatasetRef], 

1329 /, 

1330 *, 

1331 full_check: bool = True, 

1332 ) -> dict[DatasetRef, DatasetExistence]: 

1333 """Indicate whether multiple datasets are known to Butler registry and 

1334 datastore. 

1335 

1336 This is an experimental API that may change at any moment. 

1337 

1338 Parameters 

1339 ---------- 

1340 refs : `~collections.abc.Iterable` of `DatasetRef` 

1341 The datasets to be checked. 

1342 full_check : `bool`, optional 

1343 If `True`, a check will be made for the actual existence of each 

1344 dataset artifact. This will involve additional overhead due to 

1345 the need to query an external system. If `False`, this check will 

1346 be omitted, and the registry and datastore will solely be asked 

1347 if they know about the dataset(s) but no direct check for the 

1348 artifact(s) will be performed. 

1349 

1350 Returns 

1351 ------- 

1352 existence : `dict` [`DatasetRef`, `DatasetExistence`] 

1353 Mapping from the given dataset refs to an enum indicating the 

1354 status of the dataset in registry and datastore. 

1355 Each value evaluates to `True` if the dataset is present and known 

1356 to both. 

1357 """ 

1358 raise NotImplementedError() 

1359 

1360 @abstractmethod 

1361 def removeRuns( 

1362 self, 

1363 names: Iterable[str], 

1364 unstore: bool | type[_DeprecatedDefault] = _DeprecatedDefault, 

1365 *, 

1366 unlink_from_chains: bool = False, 

1367 ) -> None: 

1368 """Remove one or more `~CollectionType.RUN` collections and the 

1369 datasets within them. 

1370 

1371 Parameters 

1372 ---------- 

1373 names : `~collections.abc.Iterable` [ `str` ] 

1374 The names of the collections to remove. 

1375 unstore : `bool`, optional 

1376 If `True` (default), delete datasets from all datastores in which 

1377 they are present, and attempt to rollback the registry deletions if 

1378 datastore deletions fail (which may not always be possible). If 

1379 `False`, datastore records for these datasets are still removed, 

1380 but any artifacts (e.g. files) will not be. This parameter is now 

1381 deprecated and no longer has any effect. Files are always deleted 

1382 from datastores unless they were ingested using full URIs. 

1383 unlink_from_chains : `bool`, optional 

1384 If `True` remove the RUN collection from any chains prior to 

1385 removing the RUN. If `False` the removal will fail if any chains 

1386 still refer to the RUN. 

1387 

1388 Raises 

1389 ------ 

1390 TypeError 

1391 Raised if one or more collections are not of type 

1392 `~CollectionType.RUN`. 

1393 """ 

1394 raise NotImplementedError() 

1395 

1396 @abstractmethod 

1397 def ingest( 

1398 self, 

1399 *datasets: FileDataset, 

1400 transfer: str | None = "auto", 

1401 record_validation_info: bool = True, 

1402 skip_existing: bool = False, 

1403 ) -> None: 

1404 """Store and register one or more datasets that already exist on disk. 

1405 

1406 Parameters 

1407 ---------- 

1408 *datasets : `FileDataset` 

1409 Each positional argument is a struct containing information about 

1410 a file to be ingested, including its URI (either absolute or 

1411 relative to the datastore root, if applicable), a resolved 

1412 `DatasetRef`, and optionally a formatter class or its 

1413 fully-qualified string name. If a formatter is not provided, the 

1414 formatter that would be used for `put` is assumed. On successful 

1415 ingest all `FileDataset.formatter` attributes will be set to the 

1416 formatter class used. `FileDataset.path` attributes may be modified 

1417 to put paths in whatever the datastore considers a standardized 

1418 form. 

1419 transfer : `str`, optional 

1420 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1421 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1422 transfer the file. 

1423 record_validation_info : `bool`, optional 

1424 If `True`, the default, the datastore can record validation 

1425 information associated with the file. If `False` the datastore 

1426 will not attempt to track any information such as checksums 

1427 or file sizes. This can be useful if such information is tracked 

1428 in an external system or if the file is to be compressed in place. 

1429 It is up to the datastore whether this parameter is relevant. 

1430 skip_existing : `bool`, optional 

1431 If `True`, a dataset will not be ingested if a dataset with the 

1432 same dataset ID already exists in the datastore. 

1433 If `False` (the default), a `ConflictingDefinitionError` will be 

1434 raised if any datasets with the same dataset ID already exist 

1435 in the datastore. 

1436 

1437 Returns 

1438 ------- 

1439 None 

1440 

1441 Raises 

1442 ------ 

1443 TypeError 

1444 Raised if the butler is read-only or if no run was provided. 

1445 NotImplementedError 

1446 Raised if the `Datastore` does not support the given transfer mode. 

1447 DatasetTypeNotSupportedError 

1448 Raised if one or more files to be ingested have a dataset type that 

1449 is not supported by the `Datastore`.. 

1450 FileNotFoundError 

1451 Raised if one of the given files does not exist. 

1452 FileExistsError 

1453 Raised if transfer is not `None` but the (internal) location the 

1454 file would be moved to is already occupied. 

1455 ConflictingDefinitionError 

1456 Raised if a dataset already exists in the repository and 

1457 ``skip_existing`` is `False`. 

1458 

1459 Notes 

1460 ----- 

1461 This operation is not fully exception safe: if a database operation 

1462 fails, the given `FileDataset` instances may be only partially updated. 

1463 

1464 It is atomic in terms of database operations (they will either all 

1465 succeed or all fail) providing the database engine implements 

1466 transactions correctly. It will attempt to be atomic in terms of 

1467 filesystem operations as well, but this cannot be implemented 

1468 rigorously for most datastores. 

1469 """ 

1470 raise NotImplementedError() 

1471 

1472 @abstractmethod 

1473 def ingest_zip( 

1474 self, 

1475 zip_file: ResourcePathExpression, 

1476 transfer: str = "auto", 

1477 *, 

1478 transfer_dimensions: bool = False, 

1479 dry_run: bool = False, 

1480 skip_existing: bool = False, 

1481 ) -> None: 

1482 """Ingest a Zip file into this butler. 

1483 

1484 The Zip file must have been created by `retrieve_artifacts_zip`. 

1485 

1486 Parameters 

1487 ---------- 

1488 zip_file : `lsst.resources.ResourcePathExpression` 

1489 Path to the Zip file. 

1490 transfer : `str`, optional 

1491 Method to use to transfer the Zip into the datastore. 

1492 transfer_dimensions : `bool`, optional 

1493 If `True`, dimension record data associated with the new datasets 

1494 will be transferred from the Zip file, if present. 

1495 dry_run : `bool`, optional 

1496 If `True` the ingest will be processed without any modifications 

1497 made to the target butler and as if the target butler did not 

1498 have any of the datasets. 

1499 skip_existing : `bool`, optional 

1500 If `True`, a zip will not be ingested if the dataset entries listed 

1501 in the index with the same dataset ID already exists in the butler. 

1502 If `False` (the default), a `ConflictingDefinitionError` will be 

1503 raised if any datasets with the same dataset ID already exist 

1504 in the repository. If, somehow, some datasets are known to the 

1505 butler and some are not, this is currently treated as an error 

1506 rather than attempting to do a partial ingest. 

1507 

1508 Notes 

1509 ----- 

1510 Run collections and dataset types are created as needed. 

1511 """ 

1512 raise NotImplementedError() 

1513 

1514 @abstractmethod 

1515 def export( 

1516 self, 

1517 *, 

1518 directory: str | None = None, 

1519 filename: str | None = None, 

1520 format: str | None = None, 

1521 transfer: str | None = None, 

1522 ) -> AbstractContextManager[RepoExportContext]: 

1523 """Export datasets from the repository represented by this `Butler`. 

1524 

1525 This method is a context manager that returns a helper object 

1526 (`RepoExportContext`) that is used to indicate what information from 

1527 the repository should be exported. 

1528 

1529 Parameters 

1530 ---------- 

1531 directory : `str`, optional 

1532 Directory dataset files should be written to if ``transfer`` is not 

1533 `None`. 

1534 filename : `str`, optional 

1535 Name for the file that will include database information associated 

1536 with the exported datasets. If this is not an absolute path and 

1537 ``directory`` is not `None`, it will be written to ``directory`` 

1538 instead of the current working directory. Defaults to 

1539 "export.{format}". 

1540 format : `str`, optional 

1541 File format for the database information file. If `None`, the 

1542 extension of ``filename`` will be used. 

1543 transfer : `str`, optional 

1544 Transfer mode passed to `Datastore.export`. 

1545 

1546 Raises 

1547 ------ 

1548 TypeError 

1549 Raised if the set of arguments passed is inconsistent. 

1550 

1551 Examples 

1552 -------- 

1553 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1554 methods are used to provide the iterables over data IDs and/or datasets 

1555 to be exported:: 

1556 

1557 with butler.export("exports.yaml") as export: 

1558 # Export all flats, but none of the dimension element rows 

1559 # (i.e. data ID information) associated with them. 

1560 export.saveDatasets( 

1561 butler.registry.queryDatasets("flat"), elements=() 

1562 ) 

1563 # Export all datasets that start with "deepCoadd_" and all of 

1564 # their associated data ID information. 

1565 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1566 """ 

1567 raise NotImplementedError() 

1568 

1569 @abstractmethod 

1570 def import_( 

1571 self, 

1572 *, 

1573 directory: ResourcePathExpression | None = None, 

1574 filename: ResourcePathExpression | TextIO | None = None, 

1575 format: str | None = None, 

1576 transfer: str | None = None, 

1577 skip_dimensions: set | None = None, 

1578 record_validation_info: bool = True, 

1579 without_datastore: bool = False, 

1580 ) -> None: 

1581 """Import datasets into this repository that were exported from a 

1582 different butler repository via `~lsst.daf.butler.Butler.export`. 

1583 

1584 Parameters 

1585 ---------- 

1586 directory : `~lsst.resources.ResourcePathExpression`, optional 

1587 Directory containing dataset files to import from. If `None`, 

1588 ``filename`` and all dataset file paths specified therein must 

1589 be absolute. 

1590 filename : `~lsst.resources.ResourcePathExpression` or `typing.TextIO` 

1591 A stream or name of file that contains database information 

1592 associated with the exported datasets, typically generated by 

1593 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1594 `~lsst.resources.ResourcePath` and is not an absolute path, 

1595 it will first be looked for relative to ``directory`` and if not 

1596 found there it will be looked for in the current working 

1597 directory. Defaults to "export.{format}". 

1598 format : `str`, optional 

1599 File format for ``filename``. If `None`, the extension of 

1600 ``filename`` will be used. 

1601 transfer : `str`, optional 

1602 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1603 skip_dimensions : `set`, optional 

1604 Names of dimensions that should be skipped and not imported. 

1605 record_validation_info : `bool`, optional 

1606 If `True`, the default, the datastore can record validation 

1607 information associated with the file. If `False` the datastore 

1608 will not attempt to track any information such as checksums 

1609 or file sizes. This can be useful if such information is tracked 

1610 in an external system or if the file is to be compressed in place. 

1611 It is up to the datastore whether this parameter is relevant. 

1612 without_datastore : `bool`, optional 

1613 If `True` only registry records will be imported and the datastore 

1614 will be ignored. 

1615 

1616 Raises 

1617 ------ 

1618 TypeError 

1619 Raised if the set of arguments passed is inconsistent, or if the 

1620 butler is read-only. 

1621 """ 

1622 raise NotImplementedError() 

1623 

1624 @abstractmethod 

1625 def transfer_dimension_records_from( 

1626 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate] 

1627 ) -> None: 

1628 """Transfer dimension records to this Butler from another Butler. 

1629 

1630 Parameters 

1631 ---------- 

1632 source_butler : `LimitedButler` or `Butler` 

1633 Butler from which the records are to be transferred. If data IDs 

1634 in ``source_refs`` are not expanded then this has to be a full 

1635 `Butler` whose registry will be used to expand data IDs. If the 

1636 source refs contain coordinates that are used to populate other 

1637 records then this will also need to be a full `Butler`. 

1638 source_refs : `~collections.abc.Iterable` [`DatasetRef` |\ 

1639 `DataCoordinate`] 

1640 Datasets or data IDs defined in the source butler whose dimension 

1641 records should be transferred to this butler. 

1642 """ 

1643 raise NotImplementedError() 

1644 

1645 @abstractmethod 

1646 def transfer_from( 

1647 self, 

1648 source_butler: LimitedButler, 

1649 source_refs: Iterable[DatasetRef], 

1650 transfer: str = "auto", 

1651 skip_missing: bool = True, 

1652 register_dataset_types: bool = False, 

1653 transfer_dimensions: bool = False, 

1654 dry_run: bool = False, 

1655 ) -> Collection[DatasetRef]: 

1656 """Transfer datasets to this Butler from a run in another Butler. 

1657 

1658 Parameters 

1659 ---------- 

1660 source_butler : `LimitedButler` 

1661 Butler from which the datasets are to be transferred. If data IDs 

1662 in ``source_refs`` are not expanded then this has to be a full 

1663 `Butler` whose registry will be used to expand data IDs. 

1664 source_refs : `~collections.abc.Iterable` of `DatasetRef` 

1665 Datasets defined in the source butler that should be transferred to 

1666 this butler. In most circumstances, ``transfer_from`` is faster if 

1667 the dataset refs are expanded. 

1668 transfer : `str`, optional 

1669 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1670 skip_missing : `bool` 

1671 If `True`, datasets with no datastore artifact associated with 

1672 them are not transferred. If `False` a registry entry will be 

1673 created even if no datastore record is created (and so will 

1674 look equivalent to the dataset being unstored). 

1675 register_dataset_types : `bool` 

1676 If `True` any missing dataset types are registered. Otherwise 

1677 an exception is raised. 

1678 transfer_dimensions : `bool`, optional 

1679 If `True`, dimension record data associated with the new datasets 

1680 will be transferred. 

1681 dry_run : `bool`, optional 

1682 If `True` the transfer will be processed without any modifications 

1683 made to the target butler and as if the target butler did not 

1684 have any of the datasets. 

1685 

1686 Returns 

1687 ------- 

1688 refs : `list` of `DatasetRef` 

1689 The refs added to this Butler. 

1690 

1691 Notes 

1692 ----- 

1693 The datastore artifact has to exist for a transfer 

1694 to be made but non-existence is not an error. 

1695 

1696 Datasets that already exist in this run will be skipped. 

1697 

1698 The datasets are imported as part of a transaction, although 

1699 dataset types are registered before the transaction is started. 

1700 This means that it is possible for a dataset type to be registered 

1701 even though transfer has failed. 

1702 """ 

1703 raise NotImplementedError() 

1704 

1705 @abstractmethod 

1706 def validateConfiguration( 

1707 self, 

1708 logFailures: bool = False, 

1709 datasetTypeNames: Iterable[str] | None = None, 

1710 ignore: Iterable[str] | None = None, 

1711 ) -> None: 

1712 """Validate butler configuration. 

1713 

1714 Checks that each `DatasetType` can be stored in the `Datastore`. 

1715 

1716 Parameters 

1717 ---------- 

1718 logFailures : `bool`, optional 

1719 If `True`, output a log message for every validation error 

1720 detected. 

1721 datasetTypeNames : `~collections.abc.Iterable` of `str`, optional 

1722 The `DatasetType` names that should be checked. This allows 

1723 only a subset to be selected. 

1724 ignore : `~collections.abc.Iterable` of `str`, optional 

1725 Names of DatasetTypes to skip over. This can be used to skip 

1726 known problems. If a named `DatasetType` corresponds to a 

1727 composite, all components of that `DatasetType` will also be 

1728 ignored. 

1729 

1730 Raises 

1731 ------ 

1732 ButlerValidationError 

1733 Raised if there is some inconsistency with how this Butler 

1734 is configured. 

1735 """ 

1736 raise NotImplementedError() 

1737 

1738 @property 

1739 @abstractmethod 

1740 def collection_chains(self) -> ButlerCollections: 

1741 """Object with methods for modifying collection chains 

1742 (`~lsst.daf.butler.ButlerCollections`). 

1743 

1744 Deprecated. Replaced with ``collections`` property. 

1745 """ 

1746 raise NotImplementedError() 

1747 

1748 @property 

1749 @abstractmethod 

1750 def collections(self) -> ButlerCollections: 

1751 """Object with methods for modifying and querying collections 

1752 (`~lsst.daf.butler.ButlerCollections`). 

1753 

1754 Use of this object is preferred over `registry` wherever possible. 

1755 """ 

1756 raise NotImplementedError() 

1757 

1758 @property 

1759 @abstractmethod 

1760 def run(self) -> str | None: 

1761 """Name of the run this butler writes outputs to by default (`str` or 

1762 `None`). 

1763 """ 

1764 raise NotImplementedError() 

1765 

1766 @property 

1767 @abstractmethod 

1768 def registry(self) -> Registry: 

1769 """The object that manages dataset metadata and relationships 

1770 (`Registry`). 

1771 

1772 Many operations that don't involve reading or writing butler datasets 

1773 are accessible only via `Registry` methods. Eventually these methods 

1774 will be replaced by equivalent `Butler` methods. 

1775 """ 

1776 raise NotImplementedError() 

1777 

1778 @abstractmethod 

1779 def query(self) -> AbstractContextManager[Query]: 

1780 """Context manager returning a `.queries.Query` object used for 

1781 construction and execution of complex queries. 

1782 """ 

1783 raise NotImplementedError() 

1784 

1785 def query_data_ids( 

1786 self, 

1787 dimensions: DimensionGroup | Iterable[str] | str, 

1788 *, 

1789 data_id: DataId | None = None, 

1790 where: str = "", 

1791 bind: Mapping[str, Any] | None = None, 

1792 with_dimension_records: bool = False, 

1793 order_by: Iterable[str] | str | None = None, 

1794 limit: int | None = -20_000, 

1795 explain: bool = True, 

1796 **kwargs: Any, 

1797 ) -> list[DataCoordinate]: 

1798 """Query for data IDs matching user-provided criteria. 

1799 

1800 Parameters 

1801 ---------- 

1802 dimensions : `DimensionGroup`, `str`, or \ 

1803 `~collections.abc.Iterable` [`str`] 

1804 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1805 instances or `str`. Will be automatically expanded to a complete 

1806 `DimensionGroup`. 

1807 data_id : `dict` or `DataCoordinate`, optional 

1808 A data ID whose key-value pairs are used as equality constraints 

1809 in the query. 

1810 where : `str`, optional 

1811 A string expression similar to a SQL WHERE clause. May involve 

1812 any column of a dimension table or (as a shortcut for the primary 

1813 key column of a dimension table) dimension name. See 

1814 :ref:`daf_butler_dimension_expressions` for more information. 

1815 bind : `~collections.abc.Mapping`, optional 

1816 Mapping containing literal values that should be injected into the 

1817 ``where`` expression, keyed by the identifiers they replace. 

1818 Values of collection type can be expanded in some cases; see 

1819 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1820 information. 

1821 with_dimension_records : `bool`, optional 

1822 If `True` (default is `False`) then returned data IDs will have 

1823 dimension records. 

1824 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1825 Names of the columns/dimensions to use for ordering returned data 

1826 IDs. Column name can be prefixed with minus (``-``) to use 

1827 descending ordering. 

1828 limit : `int` or `None`, optional 

1829 Upper limit on the number of returned records. `None` can be used 

1830 if no limit is wanted. A limit of ``0`` means that the query will 

1831 be executed and validated but no results will be returned. In this 

1832 case there will be no exception even if ``explain`` is `True`. 

1833 If a negative value is given a warning will be issued if the number 

1834 of results is capped by that limit. 

1835 explain : `bool`, optional 

1836 If `True` (default) then `EmptyQueryResultError` exception is 

1837 raised when resulting list is empty. The exception contains 

1838 non-empty list of strings explaining possible causes for empty 

1839 result. 

1840 **kwargs 

1841 Additional keyword arguments are forwarded to 

1842 `DataCoordinate.standardize` when processing the ``data_id`` 

1843 argument (and may be used to provide a constraining data ID even 

1844 when the ``data_id`` argument is `None`). 

1845 

1846 Returns 

1847 ------- 

1848 dataIds : `list` [`DataCoordinate`] 

1849 Data IDs matching the given query parameters. These are always 

1850 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1851 returns `True`). 

1852 

1853 Raises 

1854 ------ 

1855 lsst.daf.butler.registry.DataIdError 

1856 Raised when ``data_id`` or keyword arguments specify unknown 

1857 dimensions or values, or when they contain inconsistent values. 

1858 lsst.daf.butler.registry.UserExpressionError 

1859 Raised when ``where`` expression is invalid. 

1860 lsst.daf.butler.EmptyQueryResultError 

1861 Raised when query generates empty result and ``explain`` is set to 

1862 `True`. 

1863 TypeError 

1864 Raised when the arguments are incompatible. 

1865 """ 

1866 if data_id is None: 

1867 data_id = DataCoordinate.make_empty(self.dimensions) 

1868 if order_by is None: 

1869 order_by = [] 

1870 query_limit = limit 

1871 warn_limit = False 

1872 if limit is not None and limit < 0: 

1873 query_limit = abs(limit) + 1 

1874 warn_limit = True 

1875 with self.query() as query: 

1876 result = ( 

1877 query.data_ids(dimensions) 

1878 .where(data_id, where, bind=bind, **kwargs) 

1879 .order_by(*ensure_iterable(order_by)) 

1880 .limit(query_limit) 

1881 ) 

1882 if with_dimension_records: 

1883 result = result.with_dimension_records() 

1884 data_ids = list(result) 

1885 if warn_limit and len(data_ids) == query_limit: 

1886 # We asked for one too many so must remove that from the list. 

1887 data_ids.pop(-1) 

1888 assert limit is not None # For mypy. 

1889 _LOG.warning("More data IDs are available than the requested limit of %d.", abs(limit)) 

1890 if explain and (limit is None or limit != 0) and not data_ids: 

1891 raise EmptyQueryResultError(list(result.explain_no_results())) 

1892 return data_ids 

1893 

1894 def query_datasets( 

1895 self, 

1896 dataset_type: str | DatasetType, 

1897 collections: str | Iterable[str] | None = None, 

1898 *, 

1899 find_first: bool = True, 

1900 data_id: DataId | None = None, 

1901 where: str = "", 

1902 bind: Mapping[str, Any] | None = None, 

1903 with_dimension_records: bool = False, 

1904 order_by: Iterable[str] | str | None = None, 

1905 limit: int | None = -20_000, 

1906 explain: bool = True, 

1907 **kwargs: Any, 

1908 ) -> list[DatasetRef]: 

1909 """Query for dataset references matching user-provided criteria. 

1910 

1911 Parameters 

1912 ---------- 

1913 dataset_type : `str` or `DatasetType` 

1914 Dataset type object or name to search for. 

1915 collections : collection expression, optional 

1916 A collection name or iterable of collection names to search. If not 

1917 provided, the default collections are used. Can be a wildcard if 

1918 ``find_first`` is `False` (if find first is requested the order 

1919 of collections matters and wildcards make the order indeterminate). 

1920 See :ref:`daf_butler_collection_expressions` for more information. 

1921 find_first : `bool`, optional 

1922 If `True` (default), for each result data ID, only yield one 

1923 `DatasetRef` of each `DatasetType`, from the first collection in 

1924 which a dataset of that dataset type appears (according to the 

1925 order of ``collections`` passed in). If `True`, ``collections`` 

1926 must not contain wildcards. 

1927 data_id : `dict` or `DataCoordinate`, optional 

1928 A data ID whose key-value pairs are used as equality constraints in 

1929 the query. 

1930 where : `str`, optional 

1931 A string expression similar to a SQL WHERE clause. May involve any 

1932 column of a dimension table or (as a shortcut for the primary key 

1933 column of a dimension table) dimension name. See 

1934 :ref:`daf_butler_dimension_expressions` for more information. 

1935 bind : `~collections.abc.Mapping`, optional 

1936 Mapping containing literal values that should be injected into the 

1937 ``where`` expression, keyed by the identifiers they replace. Values 

1938 of collection type can be expanded in some cases; see 

1939 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1940 information. 

1941 with_dimension_records : `bool`, optional 

1942 If `True` (default is `False`) then returned data IDs will have 

1943 dimension records. 

1944 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1945 Names of the columns/dimensions to use for ordering returned data 

1946 IDs. Column name can be prefixed with minus (``-``) to use 

1947 descending ordering. 

1948 limit : `int` or `None`, optional 

1949 Upper limit on the number of returned records. `None` can be used 

1950 if no limit is wanted. A limit of ``0`` means that the query will 

1951 be executed and validated but no results will be returned. In this 

1952 case there will be no exception even if ``explain`` is `True`. 

1953 If a negative value is given a warning will be issued if the number 

1954 of results is capped by that limit. 

1955 explain : `bool`, optional 

1956 If `True` (default) then `EmptyQueryResultError` exception is 

1957 raised when resulting list is empty. The exception contains 

1958 non-empty list of strings explaining possible causes for empty 

1959 result. 

1960 **kwargs 

1961 Additional keyword arguments are forwarded to 

1962 `DataCoordinate.standardize` when processing the ``data_id`` 

1963 argument (and may be used to provide a constraining data ID even 

1964 when the ``data_id`` argument is `None`). 

1965 

1966 Returns 

1967 ------- 

1968 refs : `.queries.DatasetRefQueryResults` 

1969 Dataset references matching the given query criteria. Nested data 

1970 IDs are guaranteed to include values for all implied dimensions 

1971 (i.e. `DataCoordinate.hasFull` will return `True`). 

1972 

1973 Raises 

1974 ------ 

1975 lsst.daf.butler.registry.DatasetTypeExpressionError 

1976 Raised when ``dataset_type`` expression is invalid. 

1977 lsst.daf.butler.registry.DataIdError 

1978 Raised when ``data_id`` or keyword arguments specify unknown 

1979 dimensions or values, or when they contain inconsistent values. 

1980 lsst.daf.butler.registry.UserExpressionError 

1981 Raised when ``where`` expression is invalid. 

1982 lsst.daf.butler.EmptyQueryResultError 

1983 Raised when query generates empty result and ``explain`` is set to 

1984 `True`. 

1985 TypeError 

1986 Raised when the arguments are incompatible, such as when a 

1987 collection wildcard is passed when ``find_first`` is `True`, or 

1988 when ``collections`` is `None` and default butler collections are 

1989 not defined. 

1990 """ 

1991 if data_id is None: 

1992 data_id = DataCoordinate.make_empty(self.dimensions) 

1993 if order_by is None: 

1994 order_by = [] 

1995 if collections and has_globs(collections): 

1996 # Wild cards need to be expanded but can only be allowed if 

1997 # find_first=False because expanding wildcards does not return 

1998 # a guaranteed ordering. Querying collection registry to expand 

1999 # collections when we do not have wildcards is expensive so only 

2000 # do it if we need it. 

2001 if find_first: 

2002 raise InvalidQueryError( 

2003 f"Can not use wildcards in collections when find_first=True (given {collections})" 

2004 ) 

2005 collections = self.collections.query(collections) 

2006 query_limit = limit 

2007 warn_limit = False 

2008 if limit is not None and limit < 0: 

2009 query_limit = abs(limit) + 1 

2010 warn_limit = True 

2011 with self.query() as query: 

2012 result = ( 

2013 query.datasets(dataset_type, collections=collections, find_first=find_first) 

2014 .where(data_id, where, bind=bind, **kwargs) 

2015 .order_by(*ensure_iterable(order_by)) 

2016 .limit(query_limit) 

2017 ) 

2018 if with_dimension_records: 

2019 result = result.with_dimension_records() 

2020 refs = list(result) 

2021 if warn_limit and len(refs) == query_limit: 

2022 # We asked for one too many so must remove that from the list. 

2023 refs.pop(-1) 

2024 assert limit is not None # For mypy. 

2025 _LOG.warning("More datasets are available than the requested limit of %d.", abs(limit)) 

2026 if explain and (limit is None or limit != 0) and not refs: 

2027 raise EmptyQueryResultError(list(result.explain_no_results())) 

2028 return refs 

2029 

2030 def query_dimension_records( 

2031 self, 

2032 element: str, 

2033 *, 

2034 data_id: DataId | None = None, 

2035 where: str = "", 

2036 bind: Mapping[str, Any] | None = None, 

2037 order_by: Iterable[str] | str | None = None, 

2038 limit: int | None = -20_000, 

2039 explain: bool = True, 

2040 **kwargs: Any, 

2041 ) -> list[DimensionRecord]: 

2042 """Query for dimension information matching user-provided criteria. 

2043 

2044 Parameters 

2045 ---------- 

2046 element : `str` 

2047 The name of a dimension element to obtain records for. 

2048 data_id : `dict` or `DataCoordinate`, optional 

2049 A data ID whose key-value pairs are used as equality constraints 

2050 in the query. 

2051 where : `str`, optional 

2052 A string expression similar to a SQL WHERE clause. See 

2053 `Registry.queryDataIds` and :ref:`daf_butler_dimension_expressions` 

2054 for more information. 

2055 bind : `~collections.abc.Mapping`, optional 

2056 Mapping containing literal values that should be injected into the 

2057 ``where`` expression, keyed by the identifiers they replace. 

2058 Values of collection type can be expanded in some cases; see 

2059 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2060 information. 

2061 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

2062 Names of the columns/dimensions to use for ordering returned data 

2063 IDs. Column name can be prefixed with minus (``-``) to use 

2064 descending ordering. 

2065 limit : `int` or `None`, optional 

2066 Upper limit on the number of returned records. `None` can be used 

2067 if no limit is wanted. A limit of ``0`` means that the query will 

2068 be executed and validated but no results will be returned. In this 

2069 case there will be no exception even if ``explain`` is `True`. 

2070 If a negative value is given a warning will be issued if the number 

2071 of results is capped by that limit. 

2072 explain : `bool`, optional 

2073 If `True` (default) then `EmptyQueryResultError` exception is 

2074 raised when resulting list is empty. The exception contains 

2075 non-empty list of strings explaining possible causes for empty 

2076 result. 

2077 **kwargs 

2078 Additional keyword arguments are forwarded to 

2079 `DataCoordinate.standardize` when processing the ``data_id`` 

2080 argument (and may be used to provide a constraining data ID even 

2081 when the ``data_id`` argument is `None`). 

2082 

2083 Returns 

2084 ------- 

2085 records : `list` [`DimensionRecord`] 

2086 Dimension records matching the given query parameters. 

2087 

2088 Raises 

2089 ------ 

2090 lsst.daf.butler.registry.DataIdError 

2091 Raised when ``data_id`` or keyword arguments specify unknown 

2092 dimensions or values, or when they contain inconsistent values. 

2093 lsst.daf.butler.registry.UserExpressionError 

2094 Raised when ``where`` expression is invalid. 

2095 lsst.daf.butler.EmptyQueryResultError 

2096 Raised when query generates empty result and ``explain`` is set to 

2097 `True`. 

2098 TypeError 

2099 Raised when the arguments are incompatible, such as when a 

2100 collection wildcard is passed when ``find_first`` is `True`, or 

2101 when ``collections`` is `None` and default butler collections are 

2102 not defined. 

2103 """ 

2104 if data_id is None: 

2105 data_id = DataCoordinate.make_empty(self.dimensions) 

2106 if order_by is None: 

2107 order_by = [] 

2108 query_limit = limit 

2109 warn_limit = False 

2110 if limit is not None and limit < 0: 

2111 query_limit = abs(limit) + 1 

2112 warn_limit = True 

2113 with self.query() as query: 

2114 result = ( 

2115 query.dimension_records(element) 

2116 .where(data_id, where, bind=bind, **kwargs) 

2117 .order_by(*ensure_iterable(order_by)) 

2118 .limit(query_limit) 

2119 ) 

2120 dimension_records = list(result) 

2121 if warn_limit and len(dimension_records) == query_limit: 

2122 # We asked for one too many so must remove that from the list. 

2123 dimension_records.pop(-1) 

2124 assert limit is not None # For mypy. 

2125 _LOG.warning( 

2126 "More dimension records are available than the requested limit of %d.", abs(limit) 

2127 ) 

2128 if explain and (limit is None or limit != 0) and not dimension_records: 

2129 raise EmptyQueryResultError(list(result.explain_no_results())) 

2130 return dimension_records 

2131 

2132 def query_all_datasets( 

2133 self, 

2134 collections: str | Iterable[str] | None = None, 

2135 *, 

2136 name: str | Iterable[str] = "*", 

2137 find_first: bool = True, 

2138 data_id: DataId | None = None, 

2139 where: str = "", 

2140 bind: Mapping[str, Any] | None = None, 

2141 limit: int | None = -20_000, 

2142 **kwargs: Any, 

2143 ) -> list[DatasetRef]: 

2144 """Query for datasets of potentially multiple types. 

2145 

2146 Parameters 

2147 ---------- 

2148 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

2149 The collection or collections to search, in order. If not provided 

2150 or `None`, the default collection search path for this butler is 

2151 used. 

2152 name : `str` or `~collections.abc.Iterable` [ `str` ], optional 

2153 Names or name patterns (glob-style) that returned dataset type 

2154 names must match. If an iterable, items are OR'd together. The 

2155 default is to include all dataset types in the given collections. 

2156 find_first : `bool`, optional 

2157 If `True` (default), for each result data ID, only yield one 

2158 `DatasetRef` of each `DatasetType`, from the first collection in 

2159 which a dataset of that dataset type appears (according to the 

2160 order of ``collections`` passed in). 

2161 data_id : `dict` or `DataCoordinate`, optional 

2162 A data ID whose key-value pairs are used as equality constraints in 

2163 the query. 

2164 where : `str`, optional 

2165 A string expression similar to a SQL WHERE clause. May involve any 

2166 column of a dimension table or (as a shortcut for the primary key 

2167 column of a dimension table) dimension name. See 

2168 :ref:`daf_butler_dimension_expressions` for more information. 

2169 bind : `~collections.abc.Mapping`, optional 

2170 Mapping containing literal values that should be injected into the 

2171 ``where`` expression, keyed by the identifiers they replace. Values 

2172 of collection type can be expanded in some cases; see 

2173 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2174 information. 

2175 limit : `int` or `None`, optional 

2176 Upper limit on the number of returned records. `None` can be used 

2177 if no limit is wanted. A limit of ``0`` means that the query will 

2178 be executed and validated but no results will be returned. 

2179 If a negative value is given a warning will be issued if the number 

2180 of results is capped by that limit. If no limit is provided, by 

2181 default a maximum of 20,000 records will be returned. 

2182 **kwargs 

2183 Additional keyword arguments are forwarded to 

2184 `DataCoordinate.standardize` when processing the ``data_id`` 

2185 argument (and may be used to provide a constraining data ID even 

2186 when the ``data_id`` argument is `None`). 

2187 

2188 Raises 

2189 ------ 

2190 MissingDatasetTypeError 

2191 When no dataset types match ``name``, or an explicit (non-glob) 

2192 dataset type in ``name`` does not exist. 

2193 InvalidQueryError 

2194 If the parameters to the query are inconsistent or malformed. 

2195 MissingCollectionError 

2196 If a given collection is not found. 

2197 

2198 Returns 

2199 ------- 

2200 refs : `list` [ `DatasetRef` ] 

2201 Dataset references matching the given query criteria. Nested data 

2202 IDs are guaranteed to include values for all implied dimensions 

2203 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2204 include dimension records (`DataCoordinate.hasRecords` will be 

2205 `False`). 

2206 """ 

2207 if collections is None: 

2208 collections = list(self.collections.defaults) 

2209 else: 

2210 collections = list(ensure_iterable(collections)) 

2211 

2212 if bind is None: 

2213 bind = {} 

2214 if data_id is None: 

2215 data_id = {} 

2216 

2217 warn_limit = False 

2218 if limit is not None and limit < 0: 

2219 # Add one to the limit so we can detect if we have exceeded it. 

2220 limit = abs(limit) + 1 

2221 warn_limit = True 

2222 

2223 args = QueryAllDatasetsParameters( 

2224 collections=collections, 

2225 name=list(ensure_iterable(name)), 

2226 find_first=find_first, 

2227 data_id=data_id, 

2228 where=where, 

2229 limit=limit, 

2230 bind=bind, 

2231 kwargs=kwargs, 

2232 with_dimension_records=False, 

2233 ) 

2234 with self._query_all_datasets_by_page(args) as pages: 

2235 result = [] 

2236 for page in pages: 

2237 result.extend(page) 

2238 

2239 if warn_limit and limit is not None and len(result) >= limit: 

2240 # Remove the extra dataset we added for the limit check. 

2241 result.pop() 

2242 _LOG.warning("More datasets are available than the requested limit of %d.", limit - 1) 

2243 

2244 return result 

2245 

2246 @abstractmethod 

2247 def _query_all_datasets_by_page( 

2248 self, args: QueryAllDatasetsParameters 

2249 ) -> AbstractContextManager[Iterator[list[DatasetRef]]]: 

2250 raise NotImplementedError() 

2251 

2252 def clone( 

2253 self, 

2254 *, 

2255 collections: CollectionArgType | None | EllipsisType = ..., 

2256 run: str | None | EllipsisType = ..., 

2257 inferDefaults: bool | EllipsisType = ..., 

2258 dataId: dict[str, str] | EllipsisType = ..., 

2259 metrics: ButlerMetrics | None = None, 

2260 ) -> Butler: 

2261 """Return a new Butler instance connected to the same repository 

2262 as this one, optionally overriding ``collections``, ``run``, 

2263 ``inferDefaults``, and default data ID. 

2264 

2265 Parameters 

2266 ---------- 

2267 collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\ 

2268 optional 

2269 Same as constructor. If omitted, uses value from original object. 

2270 run : `str` or `None`, optional 

2271 Same as constructor. If `None`, no default run is used. If 

2272 omitted, copies value from original object. 

2273 inferDefaults : `bool`, optional 

2274 Same as constructor. If omitted, copies value from original 

2275 object. 

2276 dataId : `str` 

2277 Same as ``kwargs`` passed to the constructor. If omitted, copies 

2278 values from original object. 

2279 metrics : `ButlerMetrics` or `None`, optional 

2280 Metrics object to record butler statistics. 

2281 """ 

2282 raise NotImplementedError() 

2283 

2284 @abstractmethod 

2285 def close(self) -> None: 

2286 raise NotImplementedError() 

2287 

2288 @abstractmethod 

2289 def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]: 

2290 raise NotImplementedError()