Coverage for python / lsst / daf / butler / _butler.py: 31%

321 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:37 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["Butler", "ParsedButlerDatasetURI", "SpecificButlerDataset"] 

31 

32import dataclasses 

33import urllib.parse 

34import uuid 

35import warnings 

36from abc import abstractmethod 

37from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence 

38from contextlib import AbstractContextManager 

39from types import EllipsisType 

40from typing import TYPE_CHECKING, Any, TextIO 

41 

42from lsst.resources import ResourcePath, ResourcePathExpression 

43from lsst.utils import doImportType 

44from lsst.utils.iteration import ensure_iterable 

45from lsst.utils.logging import getLogger 

46 

47from ._butler_collections import ButlerCollections 

48from ._butler_config import ButlerConfig, ButlerType 

49from ._butler_instance_options import ButlerInstanceOptions 

50from ._butler_metrics import ButlerMetrics 

51from ._butler_repo_index import ButlerRepoIndex 

52from ._config import Config, ConfigSubset 

53from ._exceptions import EmptyQueryResultError, InvalidQueryError 

54from ._limited_butler import LimitedButler 

55from ._query_all_datasets import QueryAllDatasetsParameters 

56from .datastore import Datastore 

57from .dimensions import DataCoordinate, DimensionConfig 

58from .registry import RegistryConfig, _RegistryFactory 

59from .repo_relocation import BUTLER_ROOT_TAG 

60from .utils import has_globs 

61 

62if TYPE_CHECKING: 

63 from ._dataset_existence import DatasetExistence 

64 from ._dataset_provenance import DatasetProvenance 

65 from ._dataset_ref import DatasetId, DatasetRef 

66 from ._dataset_type import DatasetType 

67 from ._deferredDatasetHandle import DeferredDatasetHandle 

68 from ._file_dataset import FileDataset 

69 from ._labeled_butler_factory import LabeledButlerFactoryProtocol 

70 from ._storage_class import StorageClass 

71 from ._timespan import Timespan 

72 from .datastore import DatasetRefURIs 

73 from .dimensions import DataId, DimensionGroup, DimensionRecord 

74 from .queries import Query 

75 from .registry import CollectionArgType, Registry 

76 from .transfers import RepoExportContext 

77 

78_LOG = getLogger(__name__) 

79 

80 

81@dataclasses.dataclass 

82class ParsedButlerDatasetURI: 

83 """Representation of the contents of an IVOA IVOID or dataset URI.""" 

84 

85 label: str 

86 """Label of the associated butler repository. (`str`)""" 

87 dataset_id: uuid.UUID 

88 """Dataset ID of the referenced dataset within the labeled repository. 

89 (`uuid.UUID`)""" 

90 uri: str 

91 """The original URI that was parsed (`str`).""" 

92 

93 

94@dataclasses.dataclass 

95class SpecificButlerDataset: 

96 """A dataset ref associated with a specific butler.""" 

97 

98 butler: Butler 

99 """A specific butler repository (`Butler`).""" 

100 dataset: DatasetRef | None 

101 """The reference of a specific dataset in that butler (`DatasetRef`).""" 

102 

103 

104class _DeprecatedDefault: 

105 """Default value for a deprecated parameter.""" 

106 

107 

108class Butler(LimitedButler): # numpydoc ignore=PR02 

109 """Interface for data butler and factory for Butler instances. 

110 

111 Parameters 

112 ---------- 

113 config : `ButlerConfig`, `Config` or `str`, optional 

114 Configuration. Anything acceptable to the `ButlerConfig` constructor. 

115 If a directory path is given the configuration will be read from a 

116 ``butler.yaml`` file in that location. If `None` is given default 

117 values will be used. If ``config`` contains "cls" key then its value is 

118 used as a name of butler class and it must be a sub-class of this 

119 class, otherwise `DirectButler` is instantiated. 

120 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

121 An expression specifying the collections to be searched (in order) when 

122 reading datasets. 

123 This may be a `str` collection name or an iterable thereof. 

124 See :ref:`daf_butler_collection_expressions` for more information. 

125 These collections are not registered automatically and must be 

126 manually registered before they are used by any method, but they may be 

127 manually registered after the `Butler` is initialized. 

128 run : `str`, optional 

129 Name of the `~CollectionType.RUN` collection new datasets should be 

130 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

131 ``collections`` will be set to ``[run]``. If not `None`, this 

132 collection will automatically be registered. If this is not set (and 

133 ``writeable`` is not set either), a read-only butler will be created. 

134 searchPaths : `list` of `str`, optional 

135 Directory paths to search when calculating the full Butler 

136 configuration. Not used if the supplied config is already a 

137 `ButlerConfig`. 

138 writeable : `bool`, optional 

139 Explicitly sets whether the butler supports write operations. If not 

140 provided, a read-write butler is created if any of ``run``, ``tags``, 

141 or ``chains`` is non-empty. 

142 inferDefaults : `bool`, optional 

143 If `True` (default) infer default data ID values from the values 

144 present in the datasets in ``collections``: if all collections have the 

145 same value (or no value) for a governor dimension, that value will be 

146 the default for that dimension. Nonexistent collections are ignored. 

147 If a default value is provided explicitly for a governor dimension via 

148 ``**kwargs``, no default will be inferred for that dimension. 

149 without_datastore : `bool`, optional 

150 If `True` do not attach a datastore to this butler. Any attempts 

151 to use a datastore will fail. 

152 metrics : `ButlerMetrics` or `None` 

153 External metrics object to be used for tracking butler usage. If `None` 

154 a new metrics object is created. 

155 **kwargs : `typing.Any` 

156 Additional keyword arguments passed to a constructor of actual butler 

157 class. 

158 

159 Notes 

160 ----- 

161 The preferred way to instantiate Butler is via the `from_config` method. 

162 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, 

163 but ``mypy`` will complain about the former. 

164 """ 

165 

166 def __new__( 

167 cls, 

168 config: Config | ResourcePathExpression | None = None, 

169 *, 

170 collections: Any = None, 

171 run: str | None = None, 

172 searchPaths: Sequence[ResourcePathExpression] | None = None, 

173 writeable: bool | None = None, 

174 inferDefaults: bool = True, 

175 without_datastore: bool = False, 

176 metrics: ButlerMetrics | None = None, 

177 **kwargs: Any, 

178 ) -> Butler: 

179 if cls is Butler: 

180 return Butler.from_config( 

181 config=config, 

182 collections=collections, 

183 run=run, 

184 searchPaths=searchPaths, 

185 writeable=writeable, 

186 inferDefaults=inferDefaults, 

187 without_datastore=without_datastore, 

188 metrics=metrics, 

189 **kwargs, 

190 ) 

191 

192 # Note: we do not pass any parameters to __new__, Python will pass them 

193 # to __init__ after __new__ returns sub-class instance. 

194 return super().__new__(cls) 

195 

196 @classmethod 

197 def from_config( 

198 cls, 

199 config: Config | ResourcePathExpression | None = None, 

200 *, 

201 collections: Any = None, 

202 run: str | None = None, 

203 searchPaths: Sequence[ResourcePathExpression] | None = None, 

204 writeable: bool | None = None, 

205 inferDefaults: bool = True, 

206 without_datastore: bool = False, 

207 metrics: ButlerMetrics | None = None, 

208 **kwargs: Any, 

209 ) -> Butler: 

210 """Create butler instance from configuration. 

211 

212 Parameters 

213 ---------- 

214 config : `ButlerConfig`, `Config` or `str`, optional 

215 Configuration. Anything acceptable to the `ButlerConfig` 

216 constructor. If a directory path is given the configuration will be 

217 read from a ``butler.yaml`` file in that location. If `None` is 

218 given default values will be used. If ``config`` contains "cls" key 

219 then its value is used as a name of butler class and it must be a 

220 sub-class of this class, otherwise `DirectButler` is instantiated. 

221 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

222 An expression specifying the collections to be searched (in order) 

223 when reading datasets. 

224 This may be a `str` collection name or an iterable thereof. 

225 See :ref:`daf_butler_collection_expressions` for more information. 

226 These collections are not registered automatically and must be 

227 manually registered before they are used by any method, but they 

228 may be manually registered after the `Butler` is initialized. 

229 run : `str`, optional 

230 Name of the `~CollectionType.RUN` collection new datasets should be 

231 inserted into. If ``collections`` is `None` and ``run`` is not 

232 `None`, ``collections`` will be set to ``[run]``. If not `None`, 

233 this collection will automatically be registered. If this is not 

234 set (and ``writeable`` is not set either), a read-only butler will 

235 be created. 

236 searchPaths : `list` of `str`, optional 

237 Directory paths to search when calculating the full Butler 

238 configuration. Not used if the supplied config is already a 

239 `ButlerConfig`. 

240 writeable : `bool`, optional 

241 Explicitly sets whether the butler supports write operations. If 

242 not provided, a read-write butler is created if any of ``run``, 

243 ``tags``, or ``chains`` is non-empty. 

244 inferDefaults : `bool`, optional 

245 If `True` (default) infer default data ID values from the values 

246 present in the datasets in ``collections``: if all collections have 

247 the same value (or no value) for a governor dimension, that value 

248 will be the default for that dimension. Nonexistent collections 

249 are ignored. If a default value is provided explicitly for a 

250 governor dimension via ``**kwargs``, no default will be inferred 

251 for that dimension. 

252 without_datastore : `bool`, optional 

253 If `True` do not attach a datastore to this butler. Any attempts 

254 to use a datastore will fail. 

255 metrics : `ButlerMetrics` or `None`, optional 

256 Metrics object to record butler usage statistics. 

257 **kwargs : `typing.Any` 

258 Default data ID key-value pairs. These may only identify 

259 "governor" dimensions like ``instrument`` and ``skymap``. 

260 

261 Returns 

262 ------- 

263 butler : `Butler` 

264 A `Butler` constructed from the given configuration. 

265 

266 Notes 

267 ----- 

268 Calling this factory method is identical to calling 

269 ``Butler(config, ...)``. Its only raison d'être is that ``mypy`` 

270 complains about ``Butler()`` call. 

271 

272 Examples 

273 -------- 

274 While there are many ways to control exactly how a `Butler` interacts 

275 with the collections in its `Registry`, the most common cases are still 

276 simple. 

277 

278 For a read-only `Butler` that searches one collection, do:: 

279 

280 butler = Butler.from_config( 

281 "/path/to/repo", collections=["u/alice/DM-50000"] 

282 ) 

283 

284 For a read-write `Butler` that writes to and reads from a 

285 `~CollectionType.RUN` collection:: 

286 

287 butler = Butler.from_config( 

288 "/path/to/repo", run="u/alice/DM-50000/a" 

289 ) 

290 

291 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

292 because we want to write to one `~CollectionType.RUN` collection but 

293 read from several others (as well):: 

294 

295 butler = Butler.from_config( 

296 "/path/to/repo", 

297 run="u/alice/DM-50000/a", 

298 collections=[ 

299 "u/alice/DM-50000/a", 

300 "u/bob/DM-49998", 

301 "HSC/defaults", 

302 ], 

303 ) 

304 

305 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

306 Datasets will be read first from that run (since it appears first in 

307 the chain), and then from ``u/bob/DM-49998`` and finally 

308 ``HSC/defaults``. 

309 

310 Finally, one can always create a `Butler` with no collections:: 

311 

312 butler = Butler.from_config("/path/to/repo", writeable=True) 

313 

314 This can be extremely useful when you just want to use 

315 ``butler.registry``, e.g. for inserting dimension data or managing 

316 collections, or when the collections you want to use with the butler 

317 are not consistent. Passing ``writeable`` explicitly here is only 

318 necessary if you want to be able to make changes to the repo - usually 

319 the value for ``writeable`` can be guessed from the collection 

320 arguments provided, but it defaults to `False` when there are not 

321 collection arguments. 

322 """ 

323 # DirectButler used to have a way to specify a "copy constructor" by 

324 # passing the "butler" parameter to its constructor. This has 

325 # been moved out of the constructor into Butler.clone(). 

326 butler = kwargs.pop("butler", None) 

327 metrics = metrics if metrics is not None else ButlerMetrics() 

328 if butler is not None: 

329 if not isinstance(butler, Butler): 

330 raise TypeError("'butler' parameter must be a Butler instance") 

331 if config is not None or searchPaths is not None or writeable is not None: 

332 raise TypeError( 

333 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

334 ) 

335 return butler.clone( 

336 collections=collections, run=run, inferDefaults=inferDefaults, metrics=metrics, dataId=kwargs 

337 ) 

338 

339 options = ButlerInstanceOptions( 

340 collections=collections, 

341 run=run, 

342 writeable=writeable, 

343 inferDefaults=inferDefaults, 

344 metrics=metrics, 

345 kwargs=kwargs, 

346 ) 

347 

348 # Load the Butler configuration. This may involve searching the 

349 # environment to locate a configuration file. 

350 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) 

351 butler_type = butler_config.get_butler_type() 

352 

353 # Make DirectButler if class is not specified. 

354 match butler_type: 

355 case ButlerType.DIRECT: 

356 from .direct_butler import DirectButler 

357 

358 return DirectButler.create_from_config( 

359 butler_config, 

360 options=options, 

361 without_datastore=without_datastore, 

362 ) 

363 case ButlerType.REMOTE: 

364 from .remote_butler._factory import RemoteButlerFactory 

365 

366 # Assume this is being created by a client who would like 

367 # default caching of remote datasets. 

368 factory = RemoteButlerFactory.create_factory_from_config(butler_config) 

369 return factory.create_butler_with_credentials_from_environment( 

370 butler_options=options, enable_datastore_cache=True 

371 ) 

372 case _: 

373 raise TypeError(f"Unknown Butler type '{butler_type}'") 

374 

375 @staticmethod 

376 def makeRepo( 

377 root: ResourcePathExpression, 

378 config: Config | str | None = None, 

379 dimensionConfig: Config | str | None = None, 

380 standalone: bool = False, 

381 searchPaths: list[str] | None = None, 

382 forceConfigRoot: bool = True, 

383 outfile: ResourcePathExpression | None = None, 

384 overwrite: bool = False, 

385 ) -> Config: 

386 """Create an empty data repository by adding a butler.yaml config 

387 to a repository root directory. 

388 

389 Parameters 

390 ---------- 

391 root : `lsst.resources.ResourcePathExpression` 

392 Path or URI to the root location of the new repository. Will be 

393 created if it does not exist. 

394 config : `Config` or `str`, optional 

395 Configuration to write to the repository, after setting any 

396 root-dependent Registry or Datastore config options. Can not 

397 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

398 configuration will be used. Root-dependent config options 

399 specified in this config are overwritten if ``forceConfigRoot`` 

400 is `True`. 

401 dimensionConfig : `Config` or `str`, optional 

402 Configuration for dimensions, will be used to initialize registry 

403 database. 

404 standalone : `bool` 

405 If True, write all expanded defaults, not just customized or 

406 repository-specific settings. 

407 This (mostly) decouples the repository from the default 

408 configuration, insulating it from changes to the defaults (which 

409 may be good or bad, depending on the nature of the changes). 

410 Future *additions* to the defaults will still be picked up when 

411 initializing a `Butler` for repos created with ``standalone=True``. 

412 searchPaths : `list` of `str`, optional 

413 Directory paths to search when calculating the full butler 

414 configuration. 

415 forceConfigRoot : `bool`, optional 

416 If `False`, any values present in the supplied ``config`` that 

417 would normally be reset are not overridden and will appear 

418 directly in the output config. This allows non-standard overrides 

419 of the root directory for a datastore or registry to be given. 

420 If this parameter is `True` the values for ``root`` will be 

421 forced into the resulting config if appropriate. 

422 outfile : `lsst.resources.ResourcePathExpression`, optional 

423 If not-`None`, the output configuration will be written to this 

424 location rather than into the repository itself. Can be a URI 

425 string. Can refer to a directory that will be used to write 

426 ``butler.yaml``. 

427 overwrite : `bool`, optional 

428 Create a new configuration file even if one already exists 

429 in the specified output location. Default is to raise 

430 an exception. 

431 

432 Returns 

433 ------- 

434 config : `Config` 

435 The updated `Config` instance written to the repo. 

436 

437 Raises 

438 ------ 

439 ValueError 

440 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

441 regular Config (as these subclasses would make it impossible to 

442 support ``standalone=False``). 

443 FileExistsError 

444 Raised if the output config file already exists. 

445 os.error 

446 Raised if the directory does not exist, exists but is not a 

447 directory, or cannot be created. 

448 

449 Notes 

450 ----- 

451 Note that when ``standalone=False`` (the default), the configuration 

452 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

453 construct the repository should also be used to construct any Butlers 

454 to avoid configuration inconsistencies. 

455 """ 

456 if isinstance(config, ButlerConfig | ConfigSubset): 

457 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

458 

459 # Ensure that the root of the repository exists or can be made 

460 root_uri = ResourcePath(root, forceDirectory=True) 

461 root_uri.mkdir() 

462 

463 config = Config(config) 

464 

465 # If we are creating a new repo from scratch with relative roots, 

466 # do not propagate an explicit root from the config file 

467 if "root" in config: 

468 del config["root"] 

469 

470 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

471 imported_class = doImportType(full["datastore", "cls"]) 

472 if not issubclass(imported_class, Datastore): 

473 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

474 datastoreClass: type[Datastore] = imported_class 

475 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

476 

477 # if key exists in given config, parse it, otherwise parse the defaults 

478 # in the expanded config 

479 if config.get(("registry", "db")): 

480 registryConfig = RegistryConfig(config) 

481 else: 

482 registryConfig = RegistryConfig(full) 

483 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

484 if defaultDatabaseUri is not None: 

485 Config.updateParameters( 

486 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

487 ) 

488 else: 

489 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

490 

491 if standalone: 

492 config.merge(full) 

493 else: 

494 # Always expand the registry.managers section into the per-repo 

495 # config, because after the database schema is created, it's not 

496 # allowed to change anymore. Note that in the standalone=True 

497 # branch, _everything_ in the config is expanded, so there's no 

498 # need to special case this. 

499 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

500 configURI: ResourcePathExpression 

501 if outfile is not None: 

502 # When writing to a separate location we must include 

503 # the root of the butler repo in the config else it won't know 

504 # where to look. 

505 config["root"] = root_uri.geturl() 

506 configURI = outfile 

507 else: 

508 configURI = root_uri 

509 # Check that if obscore key is present then its config must be there 

510 # too, this is to avoid common mistake when people copy butler.yaml 

511 # from existing repo with obscore but do not fill its config. 

512 if (obscore_key := ("registry", "managers", "obscore")) in config: 

513 obscore_config_key = ("registry", "managers", "obscore", "config") 

514 if obscore_config_key not in config or not config[obscore_config_key]: 

515 warnings.warn( 

516 "Obscore manager is declared in registry configuration, " 

517 "but obscore configuration is missing, obscore manager will be removed.", 

518 stacklevel=2, 

519 ) 

520 del config[obscore_key] 

521 # Strip obscore configuration, if it is present, before writing config 

522 # to a file, obscore config will be stored in registry. 

523 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: 

524 config_to_write = config.copy() 

525 del config_to_write[obscore_config_key] 

526 config_to_write.dumpToUri(configURI, overwrite=overwrite) 

527 # configFile attribute is updated, need to copy it to original. 

528 config.configFile = config_to_write.configFile 

529 else: 

530 config.dumpToUri(configURI, overwrite=overwrite) 

531 

532 # Create Registry and populate tables 

533 registryConfig = RegistryConfig(config.get("registry")) 

534 dimensionConfig = DimensionConfig(dimensionConfig) 

535 registry = _RegistryFactory(registryConfig).create_from_config( 

536 dimensionConfig=dimensionConfig, butlerRoot=root_uri 

537 ) 

538 registry.close() 

539 

540 _LOG.verbose("Wrote new Butler configuration file to %s", configURI) 

541 

542 return config 

543 

544 @classmethod 

545 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: 

546 """Look up the label in a butler repository index. 

547 

548 Parameters 

549 ---------- 

550 label : `str` 

551 Label of the Butler repository to look up. 

552 return_label : `bool`, optional 

553 If ``label`` cannot be found in the repository index (either 

554 because index is not defined or ``label`` is not in the index) and 

555 ``return_label`` is `True` then return ``ResourcePath(label)``. 

556 If ``return_label`` is `False` (default) then an exception will be 

557 raised instead. 

558 

559 Returns 

560 ------- 

561 uri : `lsst.resources.ResourcePath` 

562 URI to the Butler repository associated with the given label or 

563 default value if it is provided. 

564 

565 Raises 

566 ------ 

567 KeyError 

568 Raised if the label is not found in the index, or if an index 

569 is not defined, and ``return_label`` is `False`. 

570 

571 Notes 

572 ----- 

573 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

574 information is discovered. 

575 """ 

576 return ButlerRepoIndex.get_repo_uri(label, return_label) 

577 

578 @classmethod 

579 def get_known_repos(cls) -> set[str]: 

580 """Retrieve the list of known repository labels. 

581 

582 Returns 

583 ------- 

584 repos : `set` of `str` 

585 All the known labels. Can be empty if no index can be found. 

586 

587 Notes 

588 ----- 

589 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

590 information is discovered. 

591 """ 

592 return ButlerRepoIndex.get_known_repos() 

593 

594 @classmethod 

595 def parse_dataset_uri(cls, uri: str) -> ParsedButlerDatasetURI: 

596 """Extract the butler label and dataset ID from a dataset URI. 

597 

598 Parameters 

599 ---------- 

600 uri : `str` 

601 The dataset URI to parse. 

602 

603 Returns 

604 ------- 

605 parsed : `ParsedButlerDatasetURI` 

606 The label associated with the butler repository from which this 

607 dataset originates and the ID of the dataset. 

608 

609 Notes 

610 ----- 

611 Supports dataset URIs of the forms 

612 ``ivo://org.rubinobs/usdac/dr1?repo=butler_label&id=UUID`` (see 

613 DMTN-302) and ``butler://butler_label/UUID``. The ``butler`` URI is 

614 deprecated and can not include ``/`` in the label string. ``ivo`` URIs 

615 can include anything supported by the `Butler` constructor, including 

616 paths to repositories and alias labels. 

617 

618 ivo://org.rubinobs/dr1?repo=/repo/main&id=UUID 

619 

620 will return a label of ``/repo/main``. 

621 

622 This method does not attempt to check that the dataset exists in the 

623 labeled butler. 

624 

625 Since the IVOID can be issued by any publisher to represent a Butler 

626 dataset there is no validation of the path or netloc component of the 

627 URI. The only requirement is that there are ``id`` and ``repo`` keys 

628 in the ``ivo`` URI query component. 

629 """ 

630 parsed = urllib.parse.urlparse(uri) 

631 parsed_scheme = parsed.scheme.lower() 

632 if parsed_scheme == "ivo": 

633 # Do not validate the netloc or the path values. 

634 qs = urllib.parse.parse_qs(parsed.query) 

635 if "repo" not in qs or "id" not in qs: 

636 raise ValueError(f"Missing 'repo' and/or 'id' query parameters in IVOID {uri}.") 

637 if len(qs["repo"]) != 1 or len(qs["id"]) != 1: 

638 raise ValueError(f"Butler IVOID only supports a single value of repo and id, got {uri}") 

639 label = qs["repo"][0] 

640 id_ = qs["id"][0] 

641 elif parsed_scheme == "butler": 

642 label = parsed.netloc # Butler label is case sensitive. 

643 # Need to strip the leading /. 

644 id_ = parsed.path[1:] 

645 else: 

646 raise ValueError(f"Unrecognized URI scheme: {uri!r}") 

647 # Strip trailing/leading whitespace from label. 

648 label = label.strip() 

649 if not label: 

650 raise ValueError(f"No butler repository label found in uri {uri!r}") 

651 try: 

652 dataset_id = uuid.UUID(hex=id_) 

653 except Exception as e: 

654 e.add_note(f"Error extracting dataset ID from uri {uri!r} with dataset ID string {id_!r}") 

655 raise 

656 

657 return ParsedButlerDatasetURI(label=label, dataset_id=dataset_id, uri=uri) 

658 

659 @classmethod 

660 def get_dataset_from_uri( 

661 cls, uri: str, factory: LabeledButlerFactoryProtocol | None = None 

662 ) -> SpecificButlerDataset: 

663 """Get the dataset associated with the given dataset URI. 

664 

665 Parameters 

666 ---------- 

667 uri : `str` 

668 The URI associated with a dataset. 

669 factory : `LabeledButlerFactoryProtocol` or `None`, optional 

670 Bound factory function that will be given the butler label 

671 and receive a `Butler`. If this is not provided the label 

672 will be tried directly. 

673 

674 Returns 

675 ------- 

676 result : `SpecificButlerDataset` 

677 The butler associated with this URI and the dataset itself. 

678 The dataset can be `None` if the UUID is valid but the dataset 

679 is not known to this butler. 

680 """ 

681 parsed = cls.parse_dataset_uri(uri) 

682 butler: Butler | None = None 

683 if factory is not None: 

684 # If the label is not recognized, it might be a path. 

685 try: 

686 butler = factory(parsed.label) 

687 except KeyError: 

688 pass 

689 if butler is None: 

690 butler = cls.from_config(parsed.label) 

691 return SpecificButlerDataset(butler=butler, dataset=butler.get_dataset(parsed.dataset_id)) 

692 

693 @abstractmethod 

694 def _caching_context(self) -> AbstractContextManager[None]: 

695 """Context manager that enables caching.""" 

696 raise NotImplementedError() 

697 

698 @abstractmethod 

699 def transaction(self) -> AbstractContextManager[None]: 

700 """Context manager supporting `Butler` transactions. 

701 

702 Transactions can be nested. 

703 """ 

704 raise NotImplementedError() 

705 

706 @abstractmethod 

707 def put( 

708 self, 

709 obj: Any, 

710 datasetRefOrType: DatasetRef | DatasetType | str, 

711 /, 

712 dataId: DataId | None = None, 

713 *, 

714 run: str | None = None, 

715 provenance: DatasetProvenance | None = None, 

716 **kwargs: Any, 

717 ) -> DatasetRef: 

718 """Store and register a dataset. 

719 

720 Parameters 

721 ---------- 

722 obj : `object` 

723 The dataset. 

724 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

725 When `DatasetRef` is provided, ``dataId`` should be `None`. 

726 Otherwise the `DatasetType` or name thereof. If a fully resolved 

727 `DatasetRef` is given the run and ID are used directly. 

728 dataId : `dict` or `DataCoordinate` 

729 A `dict` of `Dimension` link name, value pairs that label the 

730 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

731 should be provided as the second argument. 

732 run : `str`, optional 

733 The name of the run the dataset should be added to, overriding 

734 ``self.run``. Not used if a resolved `DatasetRef` is provided. 

735 provenance : `DatasetProvenance` or `None`, optional 

736 Any provenance that should be attached to the serialized dataset. 

737 Not supported by all serialization mechanisms. 

738 **kwargs 

739 Additional keyword arguments used to augment or construct a 

740 `DataCoordinate`. See `DataCoordinate.standardize` 

741 parameters. Not used if a resolve `DatasetRef` is provided. 

742 

743 Returns 

744 ------- 

745 ref : `DatasetRef` 

746 A reference to the stored dataset, updated with the correct id if 

747 given. 

748 

749 Raises 

750 ------ 

751 TypeError 

752 Raised if the butler is read-only or if no run has been provided. 

753 """ 

754 raise NotImplementedError() 

755 

756 @abstractmethod 

757 def getDeferred( 

758 self, 

759 datasetRefOrType: DatasetRef | DatasetType | str, 

760 /, 

761 dataId: DataId | None = None, 

762 *, 

763 parameters: dict | None = None, 

764 collections: Any = None, 

765 storageClass: str | StorageClass | None = None, 

766 timespan: Timespan | None = None, 

767 **kwargs: Any, 

768 ) -> DeferredDatasetHandle: 

769 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

770 after an immediate registry lookup. 

771 

772 Parameters 

773 ---------- 

774 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

775 When `DatasetRef` the `dataId` should be `None`. 

776 Otherwise the `DatasetType` or name thereof. 

777 dataId : `dict` or `DataCoordinate`, optional 

778 A `dict` of `Dimension` link name, value pairs that label the 

779 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

780 should be provided as the first argument. 

781 parameters : `dict` 

782 Additional StorageClass-defined options to control reading, 

783 typically used to efficiently read only a subset of the dataset. 

784 collections : Any, optional 

785 Collections to be searched, overriding ``self.collections``. 

786 Can be any of the types supported by the ``collections`` argument 

787 to butler construction. 

788 storageClass : `StorageClass` or `str`, optional 

789 The storage class to be used to override the Python type 

790 returned by this method. By default the returned type matches 

791 the dataset type definition for this dataset. Specifying a 

792 read `StorageClass` can force a different type to be returned. 

793 This type must be compatible with the original type. 

794 timespan : `Timespan` or `None`, optional 

795 A timespan that the validity range of the dataset must overlap. 

796 If not provided and this is a calibration dataset type, an attempt 

797 will be made to find the timespan from any temporal coordinate 

798 in the data ID. 

799 **kwargs 

800 Additional keyword arguments used to augment or construct a 

801 `DataId`. See `DataId` parameters. 

802 

803 Returns 

804 ------- 

805 obj : `DeferredDatasetHandle` 

806 A handle which can be used to retrieve a dataset at a later time. 

807 

808 Raises 

809 ------ 

810 LookupError 

811 Raised if no matching dataset exists in the `Registry` or 

812 datastore. 

813 ValueError 

814 Raised if a resolved `DatasetRef` was passed as an input, but it 

815 differs from the one found in the registry. 

816 TypeError 

817 Raised if no collections were provided. 

818 """ 

819 raise NotImplementedError() 

820 

821 @abstractmethod 

822 def get( 

823 self, 

824 datasetRefOrType: DatasetRef | DatasetType | str, 

825 /, 

826 dataId: DataId | None = None, 

827 *, 

828 parameters: dict[str, Any] | None = None, 

829 collections: Any = None, 

830 storageClass: StorageClass | str | None = None, 

831 timespan: Timespan | None = None, 

832 **kwargs: Any, 

833 ) -> Any: 

834 """Retrieve a stored dataset. 

835 

836 Parameters 

837 ---------- 

838 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

839 When `DatasetRef` the `dataId` should be `None`. 

840 Otherwise the `DatasetType` or name thereof. 

841 If a resolved `DatasetRef`, the associated dataset 

842 is returned directly without additional querying. 

843 dataId : `dict` or `DataCoordinate` 

844 A `dict` of `Dimension` link name, value pairs that label the 

845 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

846 should be provided as the first argument. 

847 parameters : `dict` 

848 Additional StorageClass-defined options to control reading, 

849 typically used to efficiently read only a subset of the dataset. 

850 collections : Any, optional 

851 Collections to be searched, overriding ``self.collections``. 

852 Can be any of the types supported by the ``collections`` argument 

853 to butler construction. 

854 storageClass : `StorageClass` or `str`, optional 

855 The storage class to be used to override the Python type 

856 returned by this method. By default the returned type matches 

857 the dataset type definition for this dataset. Specifying a 

858 read `StorageClass` can force a different type to be returned. 

859 This type must be compatible with the original type. 

860 timespan : `Timespan` or `None`, optional 

861 A timespan that the validity range of the dataset must overlap. 

862 If not provided and this is a calibration dataset type, an attempt 

863 will be made to find the timespan from any temporal coordinate 

864 in the data ID. 

865 **kwargs 

866 Additional keyword arguments used to augment or construct a 

867 `DataCoordinate`. See `DataCoordinate.standardize` 

868 parameters. 

869 

870 Returns 

871 ------- 

872 obj : `object` 

873 The dataset. 

874 

875 Raises 

876 ------ 

877 LookupError 

878 Raised if no matching dataset exists in the `Registry`. 

879 TypeError 

880 Raised if no collections were provided. 

881 

882 Notes 

883 ----- 

884 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

885 this method requires that the given data ID include temporal dimensions 

886 beyond the dimensions of the dataset type itself, in order to find the 

887 dataset with the appropriate validity range. For example, a "bias" 

888 dataset with native dimensions ``{instrument, detector}`` could be 

889 fetched with a ``{instrument, detector, exposure}`` data ID, because 

890 ``exposure`` is a temporal dimension. 

891 """ 

892 raise NotImplementedError() 

893 

894 @abstractmethod 

895 def getURIs( 

896 self, 

897 datasetRefOrType: DatasetRef | DatasetType | str, 

898 /, 

899 dataId: DataId | None = None, 

900 *, 

901 predict: bool = False, 

902 collections: Any = None, 

903 run: str | None = None, 

904 **kwargs: Any, 

905 ) -> DatasetRefURIs: 

906 """Return the URIs associated with the dataset. 

907 

908 Parameters 

909 ---------- 

910 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

911 When `DatasetRef` the `dataId` should be `None`. 

912 Otherwise the `DatasetType` or name thereof. 

913 dataId : `dict` or `DataCoordinate` 

914 A `dict` of `Dimension` link name, value pairs that label the 

915 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

916 should be provided as the first argument. 

917 predict : `bool` 

918 If `True`, allow URIs to be returned of datasets that have not 

919 been written. 

920 collections : Any, optional 

921 Collections to be searched, overriding ``self.collections``. 

922 Can be any of the types supported by the ``collections`` argument 

923 to butler construction. 

924 run : `str`, optional 

925 Run to use for predictions, overriding ``self.run``. 

926 **kwargs 

927 Additional keyword arguments used to augment or construct a 

928 `DataCoordinate`. See `DataCoordinate.standardize` 

929 parameters. 

930 

931 Returns 

932 ------- 

933 uris : `DatasetRefURIs` 

934 The URI to the primary artifact associated with this dataset (if 

935 the dataset was disassembled within the datastore this may be 

936 `None`), and the URIs to any components associated with the dataset 

937 artifact. (can be empty if there are no components). 

938 """ 

939 raise NotImplementedError() 

940 

941 def getURI( 

942 self, 

943 datasetRefOrType: DatasetRef | DatasetType | str, 

944 /, 

945 dataId: DataId | None = None, 

946 *, 

947 predict: bool = False, 

948 collections: Any = None, 

949 run: str | None = None, 

950 **kwargs: Any, 

951 ) -> ResourcePath: 

952 """Return the URI to the Dataset. 

953 

954 Parameters 

955 ---------- 

956 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

957 When `DatasetRef` the `dataId` should be `None`. 

958 Otherwise the `DatasetType` or name thereof. 

959 dataId : `dict` or `DataCoordinate` 

960 A `dict` of `Dimension` link name, value pairs that label the 

961 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

962 should be provided as the first argument. 

963 predict : `bool` 

964 If `True`, allow URIs to be returned of datasets that have not 

965 been written. 

966 collections : Any, optional 

967 Collections to be searched, overriding ``self.collections``. 

968 Can be any of the types supported by the ``collections`` argument 

969 to butler construction. 

970 run : `str`, optional 

971 Run to use for predictions, overriding ``self.run``. 

972 **kwargs 

973 Additional keyword arguments used to augment or construct a 

974 `DataCoordinate`. See `DataCoordinate.standardize` 

975 parameters. 

976 

977 Returns 

978 ------- 

979 uri : `lsst.resources.ResourcePath` 

980 URI pointing to the Dataset within the datastore. If the 

981 Dataset does not exist in the datastore, and if ``predict`` is 

982 `True`, the URI will be a prediction and will include a URI 

983 fragment "#predicted". 

984 If the datastore does not have entities that relate well 

985 to the concept of a URI the returned URI string will be 

986 descriptive. The returned URI is not guaranteed to be obtainable. 

987 

988 Raises 

989 ------ 

990 LookupError 

991 A URI has been requested for a dataset that does not exist and 

992 guessing is not allowed. 

993 ValueError 

994 Raised if a resolved `DatasetRef` was passed as an input, but it 

995 differs from the one found in the registry. 

996 TypeError 

997 Raised if no collections were provided. 

998 RuntimeError 

999 Raised if a URI is requested for a dataset that consists of 

1000 multiple artifacts. 

1001 """ 

1002 primary, components = self.getURIs( 

1003 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

1004 ) 

1005 

1006 if primary is None or components: 

1007 raise RuntimeError( 

1008 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1009 "Use Butler.getURIs() instead." 

1010 ) 

1011 return primary 

1012 

1013 @abstractmethod 

1014 def get_dataset_type(self, name: str) -> DatasetType: 

1015 """Get the `DatasetType`. 

1016 

1017 Parameters 

1018 ---------- 

1019 name : `str` 

1020 Name of the type. 

1021 

1022 Returns 

1023 ------- 

1024 type : `DatasetType` 

1025 The `DatasetType` associated with the given name. 

1026 

1027 Raises 

1028 ------ 

1029 lsst.daf.butler.MissingDatasetTypeError 

1030 Raised if the requested dataset type has not been registered. 

1031 

1032 Notes 

1033 ----- 

1034 This method handles component dataset types automatically, though most 

1035 other operations do not. 

1036 """ 

1037 raise NotImplementedError() 

1038 

1039 @abstractmethod 

1040 def get_dataset( 

1041 self, 

1042 id: DatasetId | str, 

1043 *, 

1044 storage_class: str | StorageClass | None = None, 

1045 dimension_records: bool = False, 

1046 datastore_records: bool = False, 

1047 ) -> DatasetRef | None: 

1048 """Retrieve a Dataset entry. 

1049 

1050 Parameters 

1051 ---------- 

1052 id : `DatasetId` 

1053 The unique identifier for the dataset, as an instance of 

1054 `uuid.UUID` or a string containing a hexadecimal number. 

1055 storage_class : `str` or `StorageClass` or `None` 

1056 A storage class to use when creating the returned entry. If given 

1057 it must be compatible with the default storage class. 

1058 dimension_records : `bool`, optional 

1059 If `True` the ref will be expanded and contain dimension records. 

1060 datastore_records : `bool`, optional 

1061 If `True` the ref will contain associated datastore records. 

1062 

1063 Returns 

1064 ------- 

1065 ref : `DatasetRef` or `None` 

1066 A ref to the Dataset, or `None` if no matching Dataset 

1067 was found. 

1068 """ 

1069 raise NotImplementedError() 

1070 

1071 @abstractmethod 

1072 def get_many_datasets(self, ids: Iterable[DatasetId | str]) -> list[DatasetRef]: 

1073 """Retrieve a list of dataset entries. 

1074 

1075 Parameters 

1076 ---------- 

1077 ids : `~collections.abc.Iterable` [ `DatasetId` or `str` ] 

1078 The unique identifiers for the datasets, as instances of 

1079 `uuid.UUID` or strings containing a hexadecimal number. 

1080 

1081 Returns 

1082 ------- 

1083 refs : `list` [ `DatasetRef` ] 

1084 A list containing a `DatasetRef` for each of the given dataset IDs. 

1085 If a dataset was not found, no error is thrown -- it is just not 

1086 included in the list. The returned datasets are in no particular 

1087 order. 

1088 """ 

1089 raise NotImplementedError() 

1090 

1091 @abstractmethod 

1092 def find_dataset( 

1093 self, 

1094 dataset_type: DatasetType | str, 

1095 data_id: DataId | None = None, 

1096 *, 

1097 collections: str | Sequence[str] | None = None, 

1098 timespan: Timespan | None = None, 

1099 storage_class: str | StorageClass | None = None, 

1100 dimension_records: bool = False, 

1101 datastore_records: bool = False, 

1102 **kwargs: Any, 

1103 ) -> DatasetRef | None: 

1104 """Find a dataset given its `DatasetType` and data ID. 

1105 

1106 This can be used to obtain a `DatasetRef` that permits the dataset to 

1107 be read from a `Datastore`. If the dataset is a component and can not 

1108 be found using the provided dataset type, a dataset ref for the parent 

1109 will be returned instead but with the correct dataset type. 

1110 

1111 Parameters 

1112 ---------- 

1113 dataset_type : `DatasetType` or `str` 

1114 A `DatasetType` or the name of one. If this is a `DatasetType` 

1115 instance, its storage class will be respected and propagated to 

1116 the output, even if it differs from the dataset type definition 

1117 in the registry, as long as the storage classes are convertible. 

1118 data_id : `dict` or `DataCoordinate`, optional 

1119 A `dict`-like object containing the `Dimension` links that identify 

1120 the dataset within a collection. If it is a `dict` the dataId 

1121 can include dimension record values such as ``day_obs`` and 

1122 ``seq_num`` or ``full_name`` that can be used to derive the 

1123 primary dimension. 

1124 collections : `str` or `list` [`str`], optional 

1125 A an ordered list of collections to search for the dataset. 

1126 Defaults to ``self.defaults.collections``. 

1127 timespan : `Timespan`, optional 

1128 A timespan that the validity range of the dataset must overlap. 

1129 If not provided, any `~CollectionType.CALIBRATION` collections 

1130 matched by the ``collections`` argument will not be searched. 

1131 storage_class : `str` or `StorageClass` or `None` 

1132 A storage class to use when creating the returned entry. If given 

1133 it must be compatible with the default storage class. 

1134 dimension_records : `bool`, optional 

1135 If `True` the ref will be expanded and contain dimension records. 

1136 datastore_records : `bool`, optional 

1137 If `True` the ref will contain associated datastore records. 

1138 **kwargs 

1139 Additional keyword arguments passed to 

1140 `DataCoordinate.standardize` to convert ``dataId`` to a true 

1141 `DataCoordinate` or augment an existing one. This can also include 

1142 dimension record metadata that can be used to derive a primary 

1143 dimension value. 

1144 

1145 Returns 

1146 ------- 

1147 ref : `DatasetRef` 

1148 A reference to the dataset, or `None` if no matching Dataset 

1149 was found. 

1150 

1151 Raises 

1152 ------ 

1153 lsst.daf.butler.NoDefaultCollectionError 

1154 Raised if ``collections`` is `None` and 

1155 ``self.collections`` is `None`. 

1156 LookupError 

1157 Raised if one or more data ID keys are missing. 

1158 lsst.daf.butler.MissingDatasetTypeError 

1159 Raised if the dataset type does not exist. 

1160 lsst.daf.butler.MissingCollectionError 

1161 Raised if any of ``collections`` does not exist in the registry. 

1162 

1163 Notes 

1164 ----- 

1165 This method simply returns `None` and does not raise an exception even 

1166 when the set of collections searched is intrinsically incompatible with 

1167 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

1168 only `~CollectionType.CALIBRATION` collections are being searched. 

1169 This may make it harder to debug some lookup failures, but the behavior 

1170 is intentional; we consider it more important that failed searches are 

1171 reported consistently, regardless of the reason, and that adding 

1172 additional collections that do not contain a match to the search path 

1173 never changes the behavior. 

1174 

1175 This method handles component dataset types automatically, though most 

1176 other query operations do not. 

1177 """ 

1178 raise NotImplementedError() 

1179 

1180 @abstractmethod 

1181 def retrieve_artifacts_zip( 

1182 self, 

1183 refs: Iterable[DatasetRef], 

1184 destination: ResourcePathExpression, 

1185 overwrite: bool = True, 

1186 ) -> ResourcePath: 

1187 """Retrieve artifacts from a Butler and place in ZIP file. 

1188 

1189 Parameters 

1190 ---------- 

1191 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1192 The datasets to be included in the zip file. 

1193 destination : `lsst.resources.ResourcePathExpression` 

1194 Directory to write the new ZIP file. This directory will 

1195 also be used as a staging area for the datasets being downloaded 

1196 from the datastore. 

1197 overwrite : `bool`, optional 

1198 If `False` the output Zip will not be written if a file of the 

1199 same name is already present in ``destination``. 

1200 

1201 Returns 

1202 ------- 

1203 zip_file : `lsst.resources.ResourcePath` 

1204 The path to the new ZIP file. 

1205 

1206 Raises 

1207 ------ 

1208 ValueError 

1209 Raised if there are no refs to retrieve. 

1210 """ 

1211 raise NotImplementedError() 

1212 

1213 @abstractmethod 

1214 def retrieveArtifacts( 

1215 self, 

1216 refs: Iterable[DatasetRef], 

1217 destination: ResourcePathExpression, 

1218 transfer: str = "auto", 

1219 preserve_path: bool = True, 

1220 overwrite: bool = False, 

1221 ) -> list[ResourcePath]: 

1222 """Retrieve the artifacts associated with the supplied refs. 

1223 

1224 Parameters 

1225 ---------- 

1226 refs : `~collections.abc.Iterable` of `DatasetRef` 

1227 The datasets for which artifacts are to be retrieved. 

1228 A single ref can result in multiple artifacts. The refs must 

1229 be resolved. 

1230 destination : `lsst.resources.ResourcePath` or `str` 

1231 Location to write the artifacts. 

1232 transfer : `str`, optional 

1233 Method to use to transfer the artifacts. Must be one of the options 

1234 supported by `~lsst.resources.ResourcePath.transfer_from`. 

1235 "move" is not allowed. 

1236 preserve_path : `bool`, optional 

1237 If `True` the full path of the artifact within the datastore 

1238 is preserved. If `False` the final file component of the path 

1239 is used. 

1240 overwrite : `bool`, optional 

1241 If `True` allow transfers to overwrite existing files at the 

1242 destination. 

1243 

1244 Returns 

1245 ------- 

1246 targets : `list` of `lsst.resources.ResourcePath` 

1247 URIs of file artifacts in destination location. Order is not 

1248 preserved. 

1249 

1250 Notes 

1251 ----- 

1252 For non-file datastores the artifacts written to the destination 

1253 may not match the representation inside the datastore. For example 

1254 a hierarchical data structure in a NoSQL database may well be stored 

1255 as a JSON file. 

1256 """ 

1257 raise NotImplementedError() 

1258 

1259 @abstractmethod 

1260 def exists( 

1261 self, 

1262 dataset_ref_or_type: DatasetRef | DatasetType | str, 

1263 /, 

1264 data_id: DataId | None = None, 

1265 *, 

1266 full_check: bool = True, 

1267 collections: Any = None, 

1268 **kwargs: Any, 

1269 ) -> DatasetExistence: 

1270 """Indicate whether a dataset is known to Butler registry and 

1271 datastore. 

1272 

1273 Parameters 

1274 ---------- 

1275 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` 

1276 When `DatasetRef` the `dataId` should be `None`. 

1277 Otherwise the `DatasetType` or name thereof. 

1278 data_id : `dict` or `DataCoordinate` 

1279 A `dict` of `Dimension` link name, value pairs that label the 

1280 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1281 should be provided as the first argument. 

1282 full_check : `bool`, optional 

1283 If `True`, a check will be made for the actual existence of a 

1284 dataset artifact. This will involve additional overhead due to 

1285 the need to query an external system. If `False`, this check will 

1286 be omitted, and the registry and datastore will solely be asked 

1287 if they know about the dataset but no direct check for the 

1288 artifact will be performed. 

1289 collections : Any, optional 

1290 Collections to be searched, overriding ``self.collections``. 

1291 Can be any of the types supported by the ``collections`` argument 

1292 to butler construction. 

1293 **kwargs 

1294 Additional keyword arguments used to augment or construct a 

1295 `DataCoordinate`. See `DataCoordinate.standardize` 

1296 parameters. 

1297 

1298 Returns 

1299 ------- 

1300 existence : `DatasetExistence` 

1301 Object indicating whether the dataset is known to registry and 

1302 datastore. Evaluates to `True` if the dataset is present and known 

1303 to both. 

1304 """ 

1305 raise NotImplementedError() 

1306 

1307 @abstractmethod 

1308 def _exists_many( 

1309 self, 

1310 refs: Iterable[DatasetRef], 

1311 /, 

1312 *, 

1313 full_check: bool = True, 

1314 ) -> dict[DatasetRef, DatasetExistence]: 

1315 """Indicate whether multiple datasets are known to Butler registry and 

1316 datastore. 

1317 

1318 This is an experimental API that may change at any moment. 

1319 

1320 Parameters 

1321 ---------- 

1322 refs : `~collections.abc.Iterable` of `DatasetRef` 

1323 The datasets to be checked. 

1324 full_check : `bool`, optional 

1325 If `True`, a check will be made for the actual existence of each 

1326 dataset artifact. This will involve additional overhead due to 

1327 the need to query an external system. If `False`, this check will 

1328 be omitted, and the registry and datastore will solely be asked 

1329 if they know about the dataset(s) but no direct check for the 

1330 artifact(s) will be performed. 

1331 

1332 Returns 

1333 ------- 

1334 existence : `dict` [`DatasetRef`, `DatasetExistence`] 

1335 Mapping from the given dataset refs to an enum indicating the 

1336 status of the dataset in registry and datastore. 

1337 Each value evaluates to `True` if the dataset is present and known 

1338 to both. 

1339 """ 

1340 raise NotImplementedError() 

1341 

1342 @abstractmethod 

1343 def removeRuns( 

1344 self, 

1345 names: Iterable[str], 

1346 unstore: bool | type[_DeprecatedDefault] = _DeprecatedDefault, 

1347 *, 

1348 unlink_from_chains: bool = False, 

1349 ) -> None: 

1350 """Remove one or more `~CollectionType.RUN` collections and the 

1351 datasets within them. 

1352 

1353 Parameters 

1354 ---------- 

1355 names : `~collections.abc.Iterable` [ `str` ] 

1356 The names of the collections to remove. 

1357 unstore : `bool`, optional 

1358 If `True` (default), delete datasets from all datastores in which 

1359 they are present, and attempt to rollback the registry deletions if 

1360 datastore deletions fail (which may not always be possible). If 

1361 `False`, datastore records for these datasets are still removed, 

1362 but any artifacts (e.g. files) will not be. This parameter is now 

1363 deprecated and no longer has any effect. Files are always deleted 

1364 from datastores unless they were ingested using full URIs. 

1365 unlink_from_chains : `bool`, optional 

1366 If `True` remove the RUN collection from any chains prior to 

1367 removing the RUN. If `False` the removal will fail if any chains 

1368 still refer to the RUN. 

1369 

1370 Raises 

1371 ------ 

1372 TypeError 

1373 Raised if one or more collections are not of type 

1374 `~CollectionType.RUN`. 

1375 """ 

1376 raise NotImplementedError() 

1377 

1378 @abstractmethod 

1379 def ingest( 

1380 self, 

1381 *datasets: FileDataset, 

1382 transfer: str | None = "auto", 

1383 record_validation_info: bool = True, 

1384 skip_existing: bool = False, 

1385 ) -> None: 

1386 """Store and register one or more datasets that already exist on disk. 

1387 

1388 Parameters 

1389 ---------- 

1390 *datasets : `FileDataset` 

1391 Each positional argument is a struct containing information about 

1392 a file to be ingested, including its URI (either absolute or 

1393 relative to the datastore root, if applicable), a resolved 

1394 `DatasetRef`, and optionally a formatter class or its 

1395 fully-qualified string name. If a formatter is not provided, the 

1396 formatter that would be used for `put` is assumed. On successful 

1397 ingest all `FileDataset.formatter` attributes will be set to the 

1398 formatter class used. `FileDataset.path` attributes may be modified 

1399 to put paths in whatever the datastore considers a standardized 

1400 form. 

1401 transfer : `str`, optional 

1402 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1403 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1404 transfer the file. 

1405 record_validation_info : `bool`, optional 

1406 If `True`, the default, the datastore can record validation 

1407 information associated with the file. If `False` the datastore 

1408 will not attempt to track any information such as checksums 

1409 or file sizes. This can be useful if such information is tracked 

1410 in an external system or if the file is to be compressed in place. 

1411 It is up to the datastore whether this parameter is relevant. 

1412 skip_existing : `bool`, optional 

1413 If `True`, a dataset will not be ingested if a dataset with the 

1414 same dataset ID already exists in the datastore. 

1415 If `False` (the default), a `ConflictingDefinitionError` will be 

1416 raised if any datasets with the same dataset ID already exist 

1417 in the datastore. 

1418 

1419 Returns 

1420 ------- 

1421 None 

1422 

1423 Raises 

1424 ------ 

1425 TypeError 

1426 Raised if the butler is read-only or if no run was provided. 

1427 NotImplementedError 

1428 Raised if the `Datastore` does not support the given transfer mode. 

1429 DatasetTypeNotSupportedError 

1430 Raised if one or more files to be ingested have a dataset type that 

1431 is not supported by the `Datastore`.. 

1432 FileNotFoundError 

1433 Raised if one of the given files does not exist. 

1434 FileExistsError 

1435 Raised if transfer is not `None` but the (internal) location the 

1436 file would be moved to is already occupied. 

1437 ConflictingDefinitionError 

1438 Raised if a dataset already exists in the repository and 

1439 ``skip_existing`` is `False`. 

1440 

1441 Notes 

1442 ----- 

1443 This operation is not fully exception safe: if a database operation 

1444 fails, the given `FileDataset` instances may be only partially updated. 

1445 

1446 It is atomic in terms of database operations (they will either all 

1447 succeed or all fail) providing the database engine implements 

1448 transactions correctly. It will attempt to be atomic in terms of 

1449 filesystem operations as well, but this cannot be implemented 

1450 rigorously for most datastores. 

1451 """ 

1452 raise NotImplementedError() 

1453 

1454 @abstractmethod 

1455 def ingest_zip( 

1456 self, 

1457 zip_file: ResourcePathExpression, 

1458 transfer: str = "auto", 

1459 *, 

1460 transfer_dimensions: bool = False, 

1461 dry_run: bool = False, 

1462 skip_existing: bool = False, 

1463 ) -> None: 

1464 """Ingest a Zip file into this butler. 

1465 

1466 The Zip file must have been created by `retrieve_artifacts_zip`. 

1467 

1468 Parameters 

1469 ---------- 

1470 zip_file : `lsst.resources.ResourcePathExpression` 

1471 Path to the Zip file. 

1472 transfer : `str`, optional 

1473 Method to use to transfer the Zip into the datastore. 

1474 transfer_dimensions : `bool`, optional 

1475 If `True`, dimension record data associated with the new datasets 

1476 will be transferred from the Zip file, if present. 

1477 dry_run : `bool`, optional 

1478 If `True` the ingest will be processed without any modifications 

1479 made to the target butler and as if the target butler did not 

1480 have any of the datasets. 

1481 skip_existing : `bool`, optional 

1482 If `True`, a zip will not be ingested if the dataset entries listed 

1483 in the index with the same dataset ID already exists in the butler. 

1484 If `False` (the default), a `ConflictingDefinitionError` will be 

1485 raised if any datasets with the same dataset ID already exist 

1486 in the repository. If, somehow, some datasets are known to the 

1487 butler and some are not, this is currently treated as an error 

1488 rather than attempting to do a partial ingest. 

1489 

1490 Notes 

1491 ----- 

1492 Run collections and dataset types are created as needed. 

1493 """ 

1494 raise NotImplementedError() 

1495 

1496 @abstractmethod 

1497 def export( 

1498 self, 

1499 *, 

1500 directory: str | None = None, 

1501 filename: str | None = None, 

1502 format: str | None = None, 

1503 transfer: str | None = None, 

1504 ) -> AbstractContextManager[RepoExportContext]: 

1505 """Export datasets from the repository represented by this `Butler`. 

1506 

1507 This method is a context manager that returns a helper object 

1508 (`RepoExportContext`) that is used to indicate what information from 

1509 the repository should be exported. 

1510 

1511 Parameters 

1512 ---------- 

1513 directory : `str`, optional 

1514 Directory dataset files should be written to if ``transfer`` is not 

1515 `None`. 

1516 filename : `str`, optional 

1517 Name for the file that will include database information associated 

1518 with the exported datasets. If this is not an absolute path and 

1519 ``directory`` is not `None`, it will be written to ``directory`` 

1520 instead of the current working directory. Defaults to 

1521 "export.{format}". 

1522 format : `str`, optional 

1523 File format for the database information file. If `None`, the 

1524 extension of ``filename`` will be used. 

1525 transfer : `str`, optional 

1526 Transfer mode passed to `Datastore.export`. 

1527 

1528 Raises 

1529 ------ 

1530 TypeError 

1531 Raised if the set of arguments passed is inconsistent. 

1532 

1533 Examples 

1534 -------- 

1535 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1536 methods are used to provide the iterables over data IDs and/or datasets 

1537 to be exported:: 

1538 

1539 with butler.export("exports.yaml") as export: 

1540 # Export all flats, but none of the dimension element rows 

1541 # (i.e. data ID information) associated with them. 

1542 export.saveDatasets( 

1543 butler.registry.queryDatasets("flat"), elements=() 

1544 ) 

1545 # Export all datasets that start with "deepCoadd_" and all of 

1546 # their associated data ID information. 

1547 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1548 """ 

1549 raise NotImplementedError() 

1550 

1551 @abstractmethod 

1552 def import_( 

1553 self, 

1554 *, 

1555 directory: ResourcePathExpression | None = None, 

1556 filename: ResourcePathExpression | TextIO | None = None, 

1557 format: str | None = None, 

1558 transfer: str | None = None, 

1559 skip_dimensions: set | None = None, 

1560 record_validation_info: bool = True, 

1561 without_datastore: bool = False, 

1562 ) -> None: 

1563 """Import datasets into this repository that were exported from a 

1564 different butler repository via `~lsst.daf.butler.Butler.export`. 

1565 

1566 Parameters 

1567 ---------- 

1568 directory : `~lsst.resources.ResourcePathExpression`, optional 

1569 Directory containing dataset files to import from. If `None`, 

1570 ``filename`` and all dataset file paths specified therein must 

1571 be absolute. 

1572 filename : `~lsst.resources.ResourcePathExpression` or `typing.TextIO` 

1573 A stream or name of file that contains database information 

1574 associated with the exported datasets, typically generated by 

1575 `~lsst.daf.butler.Butler.export`. If this a string (name) or 

1576 `~lsst.resources.ResourcePath` and is not an absolute path, 

1577 it will first be looked for relative to ``directory`` and if not 

1578 found there it will be looked for in the current working 

1579 directory. Defaults to "export.{format}". 

1580 format : `str`, optional 

1581 File format for ``filename``. If `None`, the extension of 

1582 ``filename`` will be used. 

1583 transfer : `str`, optional 

1584 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1585 skip_dimensions : `set`, optional 

1586 Names of dimensions that should be skipped and not imported. 

1587 record_validation_info : `bool`, optional 

1588 If `True`, the default, the datastore can record validation 

1589 information associated with the file. If `False` the datastore 

1590 will not attempt to track any information such as checksums 

1591 or file sizes. This can be useful if such information is tracked 

1592 in an external system or if the file is to be compressed in place. 

1593 It is up to the datastore whether this parameter is relevant. 

1594 without_datastore : `bool`, optional 

1595 If `True` only registry records will be imported and the datastore 

1596 will be ignored. 

1597 

1598 Raises 

1599 ------ 

1600 TypeError 

1601 Raised if the set of arguments passed is inconsistent, or if the 

1602 butler is read-only. 

1603 """ 

1604 raise NotImplementedError() 

1605 

1606 @abstractmethod 

1607 def transfer_dimension_records_from( 

1608 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef | DataCoordinate] 

1609 ) -> None: 

1610 """Transfer dimension records to this Butler from another Butler. 

1611 

1612 Parameters 

1613 ---------- 

1614 source_butler : `LimitedButler` or `Butler` 

1615 Butler from which the records are to be transferred. If data IDs 

1616 in ``source_refs`` are not expanded then this has to be a full 

1617 `Butler` whose registry will be used to expand data IDs. If the 

1618 source refs contain coordinates that are used to populate other 

1619 records then this will also need to be a full `Butler`. 

1620 source_refs : `~collections.abc.Iterable` [`DatasetRef` |\ 

1621 `DataCoordinate`] 

1622 Datasets or data IDs defined in the source butler whose dimension 

1623 records should be transferred to this butler. 

1624 """ 

1625 raise NotImplementedError() 

1626 

1627 @abstractmethod 

1628 def transfer_from( 

1629 self, 

1630 source_butler: LimitedButler, 

1631 source_refs: Iterable[DatasetRef], 

1632 transfer: str = "auto", 

1633 skip_missing: bool = True, 

1634 register_dataset_types: bool = False, 

1635 transfer_dimensions: bool = False, 

1636 dry_run: bool = False, 

1637 ) -> Collection[DatasetRef]: 

1638 """Transfer datasets to this Butler from a run in another Butler. 

1639 

1640 Parameters 

1641 ---------- 

1642 source_butler : `LimitedButler` 

1643 Butler from which the datasets are to be transferred. If data IDs 

1644 in ``source_refs`` are not expanded then this has to be a full 

1645 `Butler` whose registry will be used to expand data IDs. 

1646 source_refs : `~collections.abc.Iterable` of `DatasetRef` 

1647 Datasets defined in the source butler that should be transferred to 

1648 this butler. In most circumstances, ``transfer_from`` is faster if 

1649 the dataset refs are expanded. 

1650 transfer : `str`, optional 

1651 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

1652 skip_missing : `bool` 

1653 If `True`, datasets with no datastore artifact associated with 

1654 them are not transferred. If `False` a registry entry will be 

1655 created even if no datastore record is created (and so will 

1656 look equivalent to the dataset being unstored). 

1657 register_dataset_types : `bool` 

1658 If `True` any missing dataset types are registered. Otherwise 

1659 an exception is raised. 

1660 transfer_dimensions : `bool`, optional 

1661 If `True`, dimension record data associated with the new datasets 

1662 will be transferred. 

1663 dry_run : `bool`, optional 

1664 If `True` the transfer will be processed without any modifications 

1665 made to the target butler and as if the target butler did not 

1666 have any of the datasets. 

1667 

1668 Returns 

1669 ------- 

1670 refs : `list` of `DatasetRef` 

1671 The refs added to this Butler. 

1672 

1673 Notes 

1674 ----- 

1675 The datastore artifact has to exist for a transfer 

1676 to be made but non-existence is not an error. 

1677 

1678 Datasets that already exist in this run will be skipped. 

1679 

1680 The datasets are imported as part of a transaction, although 

1681 dataset types are registered before the transaction is started. 

1682 This means that it is possible for a dataset type to be registered 

1683 even though transfer has failed. 

1684 """ 

1685 raise NotImplementedError() 

1686 

1687 @abstractmethod 

1688 def validateConfiguration( 

1689 self, 

1690 logFailures: bool = False, 

1691 datasetTypeNames: Iterable[str] | None = None, 

1692 ignore: Iterable[str] | None = None, 

1693 ) -> None: 

1694 """Validate butler configuration. 

1695 

1696 Checks that each `DatasetType` can be stored in the `Datastore`. 

1697 

1698 Parameters 

1699 ---------- 

1700 logFailures : `bool`, optional 

1701 If `True`, output a log message for every validation error 

1702 detected. 

1703 datasetTypeNames : `~collections.abc.Iterable` of `str`, optional 

1704 The `DatasetType` names that should be checked. This allows 

1705 only a subset to be selected. 

1706 ignore : `~collections.abc.Iterable` of `str`, optional 

1707 Names of DatasetTypes to skip over. This can be used to skip 

1708 known problems. If a named `DatasetType` corresponds to a 

1709 composite, all components of that `DatasetType` will also be 

1710 ignored. 

1711 

1712 Raises 

1713 ------ 

1714 ButlerValidationError 

1715 Raised if there is some inconsistency with how this Butler 

1716 is configured. 

1717 """ 

1718 raise NotImplementedError() 

1719 

1720 @property 

1721 @abstractmethod 

1722 def collection_chains(self) -> ButlerCollections: 

1723 """Object with methods for modifying collection chains 

1724 (`~lsst.daf.butler.ButlerCollections`). 

1725 

1726 Deprecated. Replaced with ``collections`` property. 

1727 """ 

1728 raise NotImplementedError() 

1729 

1730 @property 

1731 @abstractmethod 

1732 def collections(self) -> ButlerCollections: 

1733 """Object with methods for modifying and querying collections 

1734 (`~lsst.daf.butler.ButlerCollections`). 

1735 

1736 Use of this object is preferred over `registry` wherever possible. 

1737 """ 

1738 raise NotImplementedError() 

1739 

1740 @property 

1741 @abstractmethod 

1742 def run(self) -> str | None: 

1743 """Name of the run this butler writes outputs to by default (`str` or 

1744 `None`). 

1745 """ 

1746 raise NotImplementedError() 

1747 

1748 @property 

1749 @abstractmethod 

1750 def registry(self) -> Registry: 

1751 """The object that manages dataset metadata and relationships 

1752 (`Registry`). 

1753 

1754 Many operations that don't involve reading or writing butler datasets 

1755 are accessible only via `Registry` methods. Eventually these methods 

1756 will be replaced by equivalent `Butler` methods. 

1757 """ 

1758 raise NotImplementedError() 

1759 

1760 @abstractmethod 

1761 def query(self) -> AbstractContextManager[Query]: 

1762 """Context manager returning a `.queries.Query` object used for 

1763 construction and execution of complex queries. 

1764 """ 

1765 raise NotImplementedError() 

1766 

1767 def query_data_ids( 

1768 self, 

1769 dimensions: DimensionGroup | Iterable[str] | str, 

1770 *, 

1771 data_id: DataId | None = None, 

1772 where: str = "", 

1773 bind: Mapping[str, Any] | None = None, 

1774 with_dimension_records: bool = False, 

1775 order_by: Iterable[str] | str | None = None, 

1776 limit: int | None = -20_000, 

1777 explain: bool = True, 

1778 **kwargs: Any, 

1779 ) -> list[DataCoordinate]: 

1780 """Query for data IDs matching user-provided criteria. 

1781 

1782 Parameters 

1783 ---------- 

1784 dimensions : `DimensionGroup`, `str`, or \ 

1785 `~collections.abc.Iterable` [`str`] 

1786 The dimensions of the data IDs to yield, as either `DimensionGroup` 

1787 instances or `str`. Will be automatically expanded to a complete 

1788 `DimensionGroup`. 

1789 data_id : `dict` or `DataCoordinate`, optional 

1790 A data ID whose key-value pairs are used as equality constraints 

1791 in the query. 

1792 where : `str`, optional 

1793 A string expression similar to a SQL WHERE clause. May involve 

1794 any column of a dimension table or (as a shortcut for the primary 

1795 key column of a dimension table) dimension name. See 

1796 :ref:`daf_butler_dimension_expressions` for more information. 

1797 bind : `~collections.abc.Mapping`, optional 

1798 Mapping containing literal values that should be injected into the 

1799 ``where`` expression, keyed by the identifiers they replace. 

1800 Values of collection type can be expanded in some cases; see 

1801 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1802 information. 

1803 with_dimension_records : `bool`, optional 

1804 If `True` (default is `False`) then returned data IDs will have 

1805 dimension records. 

1806 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1807 Names of the columns/dimensions to use for ordering returned data 

1808 IDs. Column name can be prefixed with minus (``-``) to use 

1809 descending ordering. 

1810 limit : `int` or `None`, optional 

1811 Upper limit on the number of returned records. `None` can be used 

1812 if no limit is wanted. A limit of ``0`` means that the query will 

1813 be executed and validated but no results will be returned. In this 

1814 case there will be no exception even if ``explain`` is `True`. 

1815 If a negative value is given a warning will be issued if the number 

1816 of results is capped by that limit. 

1817 explain : `bool`, optional 

1818 If `True` (default) then `EmptyQueryResultError` exception is 

1819 raised when resulting list is empty. The exception contains 

1820 non-empty list of strings explaining possible causes for empty 

1821 result. 

1822 **kwargs 

1823 Additional keyword arguments are forwarded to 

1824 `DataCoordinate.standardize` when processing the ``data_id`` 

1825 argument (and may be used to provide a constraining data ID even 

1826 when the ``data_id`` argument is `None`). 

1827 

1828 Returns 

1829 ------- 

1830 dataIds : `list` [`DataCoordinate`] 

1831 Data IDs matching the given query parameters. These are always 

1832 guaranteed to identify all dimensions (`DataCoordinate.hasFull` 

1833 returns `True`). 

1834 

1835 Raises 

1836 ------ 

1837 lsst.daf.butler.registry.DataIdError 

1838 Raised when ``data_id`` or keyword arguments specify unknown 

1839 dimensions or values, or when they contain inconsistent values. 

1840 lsst.daf.butler.registry.UserExpressionError 

1841 Raised when ``where`` expression is invalid. 

1842 lsst.daf.butler.EmptyQueryResultError 

1843 Raised when query generates empty result and ``explain`` is set to 

1844 `True`. 

1845 TypeError 

1846 Raised when the arguments are incompatible. 

1847 """ 

1848 if data_id is None: 

1849 data_id = DataCoordinate.make_empty(self.dimensions) 

1850 if order_by is None: 

1851 order_by = [] 

1852 query_limit = limit 

1853 warn_limit = False 

1854 if limit is not None and limit < 0: 

1855 query_limit = abs(limit) + 1 

1856 warn_limit = True 

1857 with self.query() as query: 

1858 result = ( 

1859 query.data_ids(dimensions) 

1860 .where(data_id, where, bind=bind, **kwargs) 

1861 .order_by(*ensure_iterable(order_by)) 

1862 .limit(query_limit) 

1863 ) 

1864 if with_dimension_records: 

1865 result = result.with_dimension_records() 

1866 data_ids = list(result) 

1867 if warn_limit and len(data_ids) == query_limit: 

1868 # We asked for one too many so must remove that from the list. 

1869 data_ids.pop(-1) 

1870 assert limit is not None # For mypy. 

1871 _LOG.warning("More data IDs are available than the requested limit of %d.", abs(limit)) 

1872 if explain and (limit is None or limit != 0) and not data_ids: 

1873 raise EmptyQueryResultError(list(result.explain_no_results())) 

1874 return data_ids 

1875 

1876 def query_datasets( 

1877 self, 

1878 dataset_type: str | DatasetType, 

1879 collections: str | Iterable[str] | None = None, 

1880 *, 

1881 find_first: bool = True, 

1882 data_id: DataId | None = None, 

1883 where: str = "", 

1884 bind: Mapping[str, Any] | None = None, 

1885 with_dimension_records: bool = False, 

1886 order_by: Iterable[str] | str | None = None, 

1887 limit: int | None = -20_000, 

1888 explain: bool = True, 

1889 **kwargs: Any, 

1890 ) -> list[DatasetRef]: 

1891 """Query for dataset references matching user-provided criteria. 

1892 

1893 Parameters 

1894 ---------- 

1895 dataset_type : `str` or `DatasetType` 

1896 Dataset type object or name to search for. 

1897 collections : collection expression, optional 

1898 A collection name or iterable of collection names to search. If not 

1899 provided, the default collections are used. Can be a wildcard if 

1900 ``find_first`` is `False` (if find first is requested the order 

1901 of collections matters and wildcards make the order indeterminate). 

1902 See :ref:`daf_butler_collection_expressions` for more information. 

1903 find_first : `bool`, optional 

1904 If `True` (default), for each result data ID, only yield one 

1905 `DatasetRef` of each `DatasetType`, from the first collection in 

1906 which a dataset of that dataset type appears (according to the 

1907 order of ``collections`` passed in). If `True`, ``collections`` 

1908 must not contain wildcards. 

1909 data_id : `dict` or `DataCoordinate`, optional 

1910 A data ID whose key-value pairs are used as equality constraints in 

1911 the query. 

1912 where : `str`, optional 

1913 A string expression similar to a SQL WHERE clause. May involve any 

1914 column of a dimension table or (as a shortcut for the primary key 

1915 column of a dimension table) dimension name. See 

1916 :ref:`daf_butler_dimension_expressions` for more information. 

1917 bind : `~collections.abc.Mapping`, optional 

1918 Mapping containing literal values that should be injected into the 

1919 ``where`` expression, keyed by the identifiers they replace. Values 

1920 of collection type can be expanded in some cases; see 

1921 :ref:`daf_butler_dimension_expressions_identifiers` for more 

1922 information. 

1923 with_dimension_records : `bool`, optional 

1924 If `True` (default is `False`) then returned data IDs will have 

1925 dimension records. 

1926 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

1927 Names of the columns/dimensions to use for ordering returned data 

1928 IDs. Column name can be prefixed with minus (``-``) to use 

1929 descending ordering. 

1930 limit : `int` or `None`, optional 

1931 Upper limit on the number of returned records. `None` can be used 

1932 if no limit is wanted. A limit of ``0`` means that the query will 

1933 be executed and validated but no results will be returned. In this 

1934 case there will be no exception even if ``explain`` is `True`. 

1935 If a negative value is given a warning will be issued if the number 

1936 of results is capped by that limit. 

1937 explain : `bool`, optional 

1938 If `True` (default) then `EmptyQueryResultError` exception is 

1939 raised when resulting list is empty. The exception contains 

1940 non-empty list of strings explaining possible causes for empty 

1941 result. 

1942 **kwargs 

1943 Additional keyword arguments are forwarded to 

1944 `DataCoordinate.standardize` when processing the ``data_id`` 

1945 argument (and may be used to provide a constraining data ID even 

1946 when the ``data_id`` argument is `None`). 

1947 

1948 Returns 

1949 ------- 

1950 refs : `.queries.DatasetRefQueryResults` 

1951 Dataset references matching the given query criteria. Nested data 

1952 IDs are guaranteed to include values for all implied dimensions 

1953 (i.e. `DataCoordinate.hasFull` will return `True`). 

1954 

1955 Raises 

1956 ------ 

1957 lsst.daf.butler.registry.DatasetTypeExpressionError 

1958 Raised when ``dataset_type`` expression is invalid. 

1959 lsst.daf.butler.registry.DataIdError 

1960 Raised when ``data_id`` or keyword arguments specify unknown 

1961 dimensions or values, or when they contain inconsistent values. 

1962 lsst.daf.butler.registry.UserExpressionError 

1963 Raised when ``where`` expression is invalid. 

1964 lsst.daf.butler.EmptyQueryResultError 

1965 Raised when query generates empty result and ``explain`` is set to 

1966 `True`. 

1967 TypeError 

1968 Raised when the arguments are incompatible, such as when a 

1969 collection wildcard is passed when ``find_first`` is `True`, or 

1970 when ``collections`` is `None` and default butler collections are 

1971 not defined. 

1972 """ 

1973 if data_id is None: 

1974 data_id = DataCoordinate.make_empty(self.dimensions) 

1975 if order_by is None: 

1976 order_by = [] 

1977 if collections and has_globs(collections): 

1978 # Wild cards need to be expanded but can only be allowed if 

1979 # find_first=False because expanding wildcards does not return 

1980 # a guaranteed ordering. Querying collection registry to expand 

1981 # collections when we do not have wildcards is expensive so only 

1982 # do it if we need it. 

1983 if find_first: 

1984 raise InvalidQueryError( 

1985 f"Can not use wildcards in collections when find_first=True (given {collections})" 

1986 ) 

1987 collections = self.collections.query(collections) 

1988 query_limit = limit 

1989 warn_limit = False 

1990 if limit is not None and limit < 0: 

1991 query_limit = abs(limit) + 1 

1992 warn_limit = True 

1993 with self.query() as query: 

1994 result = ( 

1995 query.datasets(dataset_type, collections=collections, find_first=find_first) 

1996 .where(data_id, where, bind=bind, **kwargs) 

1997 .order_by(*ensure_iterable(order_by)) 

1998 .limit(query_limit) 

1999 ) 

2000 if with_dimension_records: 

2001 result = result.with_dimension_records() 

2002 refs = list(result) 

2003 if warn_limit and len(refs) == query_limit: 

2004 # We asked for one too many so must remove that from the list. 

2005 refs.pop(-1) 

2006 assert limit is not None # For mypy. 

2007 _LOG.warning("More datasets are available than the requested limit of %d.", abs(limit)) 

2008 if explain and (limit is None or limit != 0) and not refs: 

2009 raise EmptyQueryResultError(list(result.explain_no_results())) 

2010 return refs 

2011 

2012 def query_dimension_records( 

2013 self, 

2014 element: str, 

2015 *, 

2016 data_id: DataId | None = None, 

2017 where: str = "", 

2018 bind: Mapping[str, Any] | None = None, 

2019 order_by: Iterable[str] | str | None = None, 

2020 limit: int | None = -20_000, 

2021 explain: bool = True, 

2022 **kwargs: Any, 

2023 ) -> list[DimensionRecord]: 

2024 """Query for dimension information matching user-provided criteria. 

2025 

2026 Parameters 

2027 ---------- 

2028 element : `str` 

2029 The name of a dimension element to obtain records for. 

2030 data_id : `dict` or `DataCoordinate`, optional 

2031 A data ID whose key-value pairs are used as equality constraints 

2032 in the query. 

2033 where : `str`, optional 

2034 A string expression similar to a SQL WHERE clause. See 

2035 `Registry.queryDataIds` and :ref:`daf_butler_dimension_expressions` 

2036 for more information. 

2037 bind : `~collections.abc.Mapping`, optional 

2038 Mapping containing literal values that should be injected into the 

2039 ``where`` expression, keyed by the identifiers they replace. 

2040 Values of collection type can be expanded in some cases; see 

2041 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2042 information. 

2043 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional 

2044 Names of the columns/dimensions to use for ordering returned data 

2045 IDs. Column name can be prefixed with minus (``-``) to use 

2046 descending ordering. 

2047 limit : `int` or `None`, optional 

2048 Upper limit on the number of returned records. `None` can be used 

2049 if no limit is wanted. A limit of ``0`` means that the query will 

2050 be executed and validated but no results will be returned. In this 

2051 case there will be no exception even if ``explain`` is `True`. 

2052 If a negative value is given a warning will be issued if the number 

2053 of results is capped by that limit. 

2054 explain : `bool`, optional 

2055 If `True` (default) then `EmptyQueryResultError` exception is 

2056 raised when resulting list is empty. The exception contains 

2057 non-empty list of strings explaining possible causes for empty 

2058 result. 

2059 **kwargs 

2060 Additional keyword arguments are forwarded to 

2061 `DataCoordinate.standardize` when processing the ``data_id`` 

2062 argument (and may be used to provide a constraining data ID even 

2063 when the ``data_id`` argument is `None`). 

2064 

2065 Returns 

2066 ------- 

2067 records : `list` [`DimensionRecord`] 

2068 Dimension records matching the given query parameters. 

2069 

2070 Raises 

2071 ------ 

2072 lsst.daf.butler.registry.DataIdError 

2073 Raised when ``data_id`` or keyword arguments specify unknown 

2074 dimensions or values, or when they contain inconsistent values. 

2075 lsst.daf.butler.registry.UserExpressionError 

2076 Raised when ``where`` expression is invalid. 

2077 lsst.daf.butler.EmptyQueryResultError 

2078 Raised when query generates empty result and ``explain`` is set to 

2079 `True`. 

2080 TypeError 

2081 Raised when the arguments are incompatible, such as when a 

2082 collection wildcard is passed when ``find_first`` is `True`, or 

2083 when ``collections`` is `None` and default butler collections are 

2084 not defined. 

2085 """ 

2086 if data_id is None: 

2087 data_id = DataCoordinate.make_empty(self.dimensions) 

2088 if order_by is None: 

2089 order_by = [] 

2090 query_limit = limit 

2091 warn_limit = False 

2092 if limit is not None and limit < 0: 

2093 query_limit = abs(limit) + 1 

2094 warn_limit = True 

2095 with self.query() as query: 

2096 result = ( 

2097 query.dimension_records(element) 

2098 .where(data_id, where, bind=bind, **kwargs) 

2099 .order_by(*ensure_iterable(order_by)) 

2100 .limit(query_limit) 

2101 ) 

2102 dimension_records = list(result) 

2103 if warn_limit and len(dimension_records) == query_limit: 

2104 # We asked for one too many so must remove that from the list. 

2105 dimension_records.pop(-1) 

2106 assert limit is not None # For mypy. 

2107 _LOG.warning( 

2108 "More dimension records are available than the requested limit of %d.", abs(limit) 

2109 ) 

2110 if explain and (limit is None or limit != 0) and not dimension_records: 

2111 raise EmptyQueryResultError(list(result.explain_no_results())) 

2112 return dimension_records 

2113 

2114 def query_all_datasets( 

2115 self, 

2116 collections: str | Iterable[str] | None = None, 

2117 *, 

2118 name: str | Iterable[str] = "*", 

2119 find_first: bool = True, 

2120 data_id: DataId | None = None, 

2121 where: str = "", 

2122 bind: Mapping[str, Any] | None = None, 

2123 limit: int | None = -20_000, 

2124 **kwargs: Any, 

2125 ) -> list[DatasetRef]: 

2126 """Query for datasets of potentially multiple types. 

2127 

2128 Parameters 

2129 ---------- 

2130 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

2131 The collection or collections to search, in order. If not provided 

2132 or `None`, the default collection search path for this butler is 

2133 used. 

2134 name : `str` or `~collections.abc.Iterable` [ `str` ], optional 

2135 Names or name patterns (glob-style) that returned dataset type 

2136 names must match. If an iterable, items are OR'd together. The 

2137 default is to include all dataset types in the given collections. 

2138 find_first : `bool`, optional 

2139 If `True` (default), for each result data ID, only yield one 

2140 `DatasetRef` of each `DatasetType`, from the first collection in 

2141 which a dataset of that dataset type appears (according to the 

2142 order of ``collections`` passed in). 

2143 data_id : `dict` or `DataCoordinate`, optional 

2144 A data ID whose key-value pairs are used as equality constraints in 

2145 the query. 

2146 where : `str`, optional 

2147 A string expression similar to a SQL WHERE clause. May involve any 

2148 column of a dimension table or (as a shortcut for the primary key 

2149 column of a dimension table) dimension name. See 

2150 :ref:`daf_butler_dimension_expressions` for more information. 

2151 bind : `~collections.abc.Mapping`, optional 

2152 Mapping containing literal values that should be injected into the 

2153 ``where`` expression, keyed by the identifiers they replace. Values 

2154 of collection type can be expanded in some cases; see 

2155 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2156 information. 

2157 limit : `int` or `None`, optional 

2158 Upper limit on the number of returned records. `None` can be used 

2159 if no limit is wanted. A limit of ``0`` means that the query will 

2160 be executed and validated but no results will be returned. 

2161 If a negative value is given a warning will be issued if the number 

2162 of results is capped by that limit. If no limit is provided, by 

2163 default a maximum of 20,000 records will be returned. 

2164 **kwargs 

2165 Additional keyword arguments are forwarded to 

2166 `DataCoordinate.standardize` when processing the ``data_id`` 

2167 argument (and may be used to provide a constraining data ID even 

2168 when the ``data_id`` argument is `None`). 

2169 

2170 Raises 

2171 ------ 

2172 MissingDatasetTypeError 

2173 When no dataset types match ``name``, or an explicit (non-glob) 

2174 dataset type in ``name`` does not exist. 

2175 InvalidQueryError 

2176 If the parameters to the query are inconsistent or malformed. 

2177 MissingCollectionError 

2178 If a given collection is not found. 

2179 

2180 Returns 

2181 ------- 

2182 refs : `list` [ `DatasetRef` ] 

2183 Dataset references matching the given query criteria. Nested data 

2184 IDs are guaranteed to include values for all implied dimensions 

2185 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2186 include dimension records (`DataCoordinate.hasRecords` will be 

2187 `False`). 

2188 """ 

2189 if collections is None: 

2190 collections = list(self.collections.defaults) 

2191 else: 

2192 collections = list(ensure_iterable(collections)) 

2193 

2194 if bind is None: 

2195 bind = {} 

2196 if data_id is None: 

2197 data_id = {} 

2198 

2199 warn_limit = False 

2200 if limit is not None and limit < 0: 

2201 # Add one to the limit so we can detect if we have exceeded it. 

2202 limit = abs(limit) + 1 

2203 warn_limit = True 

2204 

2205 args = QueryAllDatasetsParameters( 

2206 collections=collections, 

2207 name=list(ensure_iterable(name)), 

2208 find_first=find_first, 

2209 data_id=data_id, 

2210 where=where, 

2211 limit=limit, 

2212 bind=bind, 

2213 kwargs=kwargs, 

2214 with_dimension_records=False, 

2215 ) 

2216 with self._query_all_datasets_by_page(args) as pages: 

2217 result = [] 

2218 for page in pages: 

2219 result.extend(page) 

2220 

2221 if warn_limit and limit is not None and len(result) >= limit: 

2222 # Remove the extra dataset we added for the limit check. 

2223 result.pop() 

2224 _LOG.warning("More datasets are available than the requested limit of %d.", limit - 1) 

2225 

2226 return result 

2227 

2228 @abstractmethod 

2229 def _query_all_datasets_by_page( 

2230 self, args: QueryAllDatasetsParameters 

2231 ) -> AbstractContextManager[Iterator[list[DatasetRef]]]: 

2232 raise NotImplementedError() 

2233 

2234 def clone( 

2235 self, 

2236 *, 

2237 collections: CollectionArgType | None | EllipsisType = ..., 

2238 run: str | None | EllipsisType = ..., 

2239 inferDefaults: bool | EllipsisType = ..., 

2240 dataId: dict[str, str] | EllipsisType = ..., 

2241 metrics: ButlerMetrics | None = None, 

2242 ) -> Butler: 

2243 """Return a new Butler instance connected to the same repository 

2244 as this one, optionally overriding ``collections``, ``run``, 

2245 ``inferDefaults``, and default data ID. 

2246 

2247 Parameters 

2248 ---------- 

2249 collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\ 

2250 optional 

2251 Same as constructor. If omitted, uses value from original object. 

2252 run : `str` or `None`, optional 

2253 Same as constructor. If `None`, no default run is used. If 

2254 omitted, copies value from original object. 

2255 inferDefaults : `bool`, optional 

2256 Same as constructor. If omitted, copies value from original 

2257 object. 

2258 dataId : `str` 

2259 Same as ``kwargs`` passed to the constructor. If omitted, copies 

2260 values from original object. 

2261 metrics : `ButlerMetrics` or `None`, optional 

2262 Metrics object to record butler statistics. 

2263 """ 

2264 raise NotImplementedError() 

2265 

2266 @abstractmethod 

2267 def close(self) -> None: 

2268 raise NotImplementedError() 

2269 

2270 @abstractmethod 

2271 def _expand_data_ids(self, data_ids: Iterable[DataCoordinate]) -> list[DataCoordinate]: 

2272 raise NotImplementedError()