Coverage for python/lsst/daf/butler/_butler.py: 51%
182 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-05 11:36 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-05 11:36 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["Butler"]
32from abc import abstractmethod
33from collections.abc import Collection, Iterable, Mapping, Sequence
34from contextlib import AbstractContextManager
35from types import EllipsisType
36from typing import TYPE_CHECKING, Any, TextIO
38from lsst.resources import ResourcePath, ResourcePathExpression
39from lsst.utils import doImportType
40from lsst.utils.iteration import ensure_iterable
41from lsst.utils.logging import getLogger
43from ._butler_config import ButlerConfig, ButlerType
44from ._butler_instance_options import ButlerInstanceOptions
45from ._butler_repo_index import ButlerRepoIndex
46from ._config import Config, ConfigSubset
47from ._exceptions import EmptyQueryResultError
48from ._limited_butler import LimitedButler
49from .datastore import Datastore
50from .dimensions import DimensionConfig
51from .registry import RegistryConfig, _RegistryFactory
52from .repo_relocation import BUTLER_ROOT_TAG
54if TYPE_CHECKING:
55 from ._dataset_existence import DatasetExistence
56 from ._dataset_ref import DatasetId, DatasetRef
57 from ._dataset_type import DatasetType
58 from ._deferredDatasetHandle import DeferredDatasetHandle
59 from ._file_dataset import FileDataset
60 from ._storage_class import StorageClass
61 from ._timespan import Timespan
62 from .datastore import DatasetRefURIs
63 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord
64 from .queries import Query
65 from .registry import Registry
66 from .transfers import RepoExportContext
68_LOG = getLogger(__name__)
71class Butler(LimitedButler): # numpydoc ignore=PR02
72 """Interface for data butler and factory for Butler instances.
74 Parameters
75 ----------
76 config : `ButlerConfig`, `Config` or `str`, optional
77 Configuration. Anything acceptable to the `ButlerConfig` constructor.
78 If a directory path is given the configuration will be read from a
79 ``butler.yaml`` file in that location. If `None` is given default
80 values will be used. If ``config`` contains "cls" key then its value is
81 used as a name of butler class and it must be a sub-class of this
82 class, otherwise `DirectButler` is instantiated.
83 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
84 An expression specifying the collections to be searched (in order) when
85 reading datasets.
86 This may be a `str` collection name or an iterable thereof.
87 See :ref:`daf_butler_collection_expressions` for more information.
88 These collections are not registered automatically and must be
89 manually registered before they are used by any method, but they may be
90 manually registered after the `Butler` is initialized.
91 run : `str`, optional
92 Name of the `~CollectionType.RUN` collection new datasets should be
93 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
94 ``collections`` will be set to ``[run]``. If not `None`, this
95 collection will automatically be registered. If this is not set (and
96 ``writeable`` is not set either), a read-only butler will be created.
97 searchPaths : `list` of `str`, optional
98 Directory paths to search when calculating the full Butler
99 configuration. Not used if the supplied config is already a
100 `ButlerConfig`.
101 writeable : `bool`, optional
102 Explicitly sets whether the butler supports write operations. If not
103 provided, a read-write butler is created if any of ``run``, ``tags``,
104 or ``chains`` is non-empty.
105 inferDefaults : `bool`, optional
106 If `True` (default) infer default data ID values from the values
107 present in the datasets in ``collections``: if all collections have the
108 same value (or no value) for a governor dimension, that value will be
109 the default for that dimension. Nonexistent collections are ignored.
110 If a default value is provided explicitly for a governor dimension via
111 ``**kwargs``, no default will be inferred for that dimension.
112 without_datastore : `bool`, optional
113 If `True` do not attach a datastore to this butler. Any attempts
114 to use a datastore will fail.
115 **kwargs : `Any`
116 Additional keyword arguments passed to a constructor of actual butler
117 class.
119 Notes
120 -----
121 The preferred way to instantiate Butler is via the `from_config` method.
122 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``,
123 but ``mypy`` will complain about the former.
124 """
126 def __new__(
127 cls,
128 config: Config | ResourcePathExpression | None = None,
129 *,
130 collections: Any = None,
131 run: str | None = None,
132 searchPaths: Sequence[ResourcePathExpression] | None = None,
133 writeable: bool | None = None,
134 inferDefaults: bool = True,
135 without_datastore: bool = False,
136 **kwargs: Any,
137 ) -> Butler:
138 if cls is Butler:
139 return Butler.from_config(
140 config=config,
141 collections=collections,
142 run=run,
143 searchPaths=searchPaths,
144 writeable=writeable,
145 inferDefaults=inferDefaults,
146 without_datastore=without_datastore,
147 **kwargs,
148 )
150 # Note: we do not pass any parameters to __new__, Python will pass them
151 # to __init__ after __new__ returns sub-class instance.
152 return super().__new__(cls)
154 @classmethod
155 def from_config(
156 cls,
157 config: Config | ResourcePathExpression | None = None,
158 *,
159 collections: Any = None,
160 run: str | None = None,
161 searchPaths: Sequence[ResourcePathExpression] | None = None,
162 writeable: bool | None = None,
163 inferDefaults: bool = True,
164 without_datastore: bool = False,
165 **kwargs: Any,
166 ) -> Butler:
167 """Create butler instance from configuration.
169 Parameters
170 ----------
171 config : `ButlerConfig`, `Config` or `str`, optional
172 Configuration. Anything acceptable to the `ButlerConfig`
173 constructor. If a directory path is given the configuration will be
174 read from a ``butler.yaml`` file in that location. If `None` is
175 given default values will be used. If ``config`` contains "cls" key
176 then its value is used as a name of butler class and it must be a
177 sub-class of this class, otherwise `DirectButler` is instantiated.
178 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
179 An expression specifying the collections to be searched (in order)
180 when reading datasets.
181 This may be a `str` collection name or an iterable thereof.
182 See :ref:`daf_butler_collection_expressions` for more information.
183 These collections are not registered automatically and must be
184 manually registered before they are used by any method, but they
185 may be manually registered after the `Butler` is initialized.
186 run : `str`, optional
187 Name of the `~CollectionType.RUN` collection new datasets should be
188 inserted into. If ``collections`` is `None` and ``run`` is not
189 `None`, ``collections`` will be set to ``[run]``. If not `None`,
190 this collection will automatically be registered. If this is not
191 set (and ``writeable`` is not set either), a read-only butler will
192 be created.
193 searchPaths : `list` of `str`, optional
194 Directory paths to search when calculating the full Butler
195 configuration. Not used if the supplied config is already a
196 `ButlerConfig`.
197 writeable : `bool`, optional
198 Explicitly sets whether the butler supports write operations. If
199 not provided, a read-write butler is created if any of ``run``,
200 ``tags``, or ``chains`` is non-empty.
201 inferDefaults : `bool`, optional
202 If `True` (default) infer default data ID values from the values
203 present in the datasets in ``collections``: if all collections have
204 the same value (or no value) for a governor dimension, that value
205 will be the default for that dimension. Nonexistent collections
206 are ignored. If a default value is provided explicitly for a
207 governor dimension via ``**kwargs``, no default will be inferred
208 for that dimension.
209 without_datastore : `bool`, optional
210 If `True` do not attach a datastore to this butler. Any attempts
211 to use a datastore will fail.
212 **kwargs : `Any`
213 Default data ID key-value pairs. These may only identify
214 "governor" dimensions like ``instrument`` and ``skymap``.
216 Returns
217 -------
218 butler : `Butler`
219 A `Butler` constructed from the given configuration.
221 Notes
222 -----
223 Calling this factory method is identical to calling
224 ``Butler(config, ...)``. Its only raison d'être is that ``mypy``
225 complains about ``Butler()`` call.
227 Examples
228 --------
229 While there are many ways to control exactly how a `Butler` interacts
230 with the collections in its `Registry`, the most common cases are still
231 simple.
233 For a read-only `Butler` that searches one collection, do::
235 butler = Butler.from_config(
236 "/path/to/repo", collections=["u/alice/DM-50000"]
237 )
239 For a read-write `Butler` that writes to and reads from a
240 `~CollectionType.RUN` collection::
242 butler = Butler.from_config(
243 "/path/to/repo", run="u/alice/DM-50000/a"
244 )
246 The `Butler` passed to a ``PipelineTask`` is often much more complex,
247 because we want to write to one `~CollectionType.RUN` collection but
248 read from several others (as well)::
250 butler = Butler.from_config(
251 "/path/to/repo",
252 run="u/alice/DM-50000/a",
253 collections=[
254 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults"
255 ]
256 )
258 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
259 Datasets will be read first from that run (since it appears first in
260 the chain), and then from ``u/bob/DM-49998`` and finally
261 ``HSC/defaults``.
263 Finally, one can always create a `Butler` with no collections::
265 butler = Butler.from_config("/path/to/repo", writeable=True)
267 This can be extremely useful when you just want to use
268 ``butler.registry``, e.g. for inserting dimension data or managing
269 collections, or when the collections you want to use with the butler
270 are not consistent. Passing ``writeable`` explicitly here is only
271 necessary if you want to be able to make changes to the repo - usually
272 the value for ``writeable`` can be guessed from the collection
273 arguments provided, but it defaults to `False` when there are not
274 collection arguments.
275 """
276 # DirectButler used to have a way to specify a "copy constructor" by
277 # passing the "butler" parameter to its constructor. This
278 # functionality has been moved out of the constructor into
279 # Butler._clone(), but the new interface is not public yet.
280 butler = kwargs.pop("butler", None)
281 if butler is not None:
282 if not isinstance(butler, Butler):
283 raise TypeError("'butler' parameter must be a Butler instance")
284 if config is not None or searchPaths is not None or writeable is not None:
285 raise TypeError(
286 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
287 )
288 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)
290 options = ButlerInstanceOptions(
291 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
292 )
294 # Load the Butler configuration. This may involve searching the
295 # environment to locate a configuration file.
296 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
297 butler_type = butler_config.get_butler_type()
299 # Make DirectButler if class is not specified.
300 match butler_type:
301 case ButlerType.DIRECT:
302 from .direct_butler import DirectButler
304 return DirectButler.create_from_config(
305 butler_config,
306 options=options,
307 without_datastore=without_datastore,
308 )
309 case ButlerType.REMOTE:
310 from .remote_butler import RemoteButlerFactory
312 factory = RemoteButlerFactory.create_factory_from_config(butler_config)
313 return factory.create_butler_with_credentials_from_environment(butler_options=options)
314 case _:
315 raise TypeError(f"Unknown Butler type '{butler_type}'")
317 @staticmethod
318 def makeRepo(
319 root: ResourcePathExpression,
320 config: Config | str | None = None,
321 dimensionConfig: Config | str | None = None,
322 standalone: bool = False,
323 searchPaths: list[str] | None = None,
324 forceConfigRoot: bool = True,
325 outfile: ResourcePathExpression | None = None,
326 overwrite: bool = False,
327 ) -> Config:
328 """Create an empty data repository by adding a butler.yaml config
329 to a repository root directory.
331 Parameters
332 ----------
333 root : `lsst.resources.ResourcePathExpression`
334 Path or URI to the root location of the new repository. Will be
335 created if it does not exist.
336 config : `Config` or `str`, optional
337 Configuration to write to the repository, after setting any
338 root-dependent Registry or Datastore config options. Can not
339 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
340 configuration will be used. Root-dependent config options
341 specified in this config are overwritten if ``forceConfigRoot``
342 is `True`.
343 dimensionConfig : `Config` or `str`, optional
344 Configuration for dimensions, will be used to initialize registry
345 database.
346 standalone : `bool`
347 If True, write all expanded defaults, not just customized or
348 repository-specific settings.
349 This (mostly) decouples the repository from the default
350 configuration, insulating it from changes to the defaults (which
351 may be good or bad, depending on the nature of the changes).
352 Future *additions* to the defaults will still be picked up when
353 initializing `Butlers` to repos created with ``standalone=True``.
354 searchPaths : `list` of `str`, optional
355 Directory paths to search when calculating the full butler
356 configuration.
357 forceConfigRoot : `bool`, optional
358 If `False`, any values present in the supplied ``config`` that
359 would normally be reset are not overridden and will appear
360 directly in the output config. This allows non-standard overrides
361 of the root directory for a datastore or registry to be given.
362 If this parameter is `True` the values for ``root`` will be
363 forced into the resulting config if appropriate.
364 outfile : `lss.resources.ResourcePathExpression`, optional
365 If not-`None`, the output configuration will be written to this
366 location rather than into the repository itself. Can be a URI
367 string. Can refer to a directory that will be used to write
368 ``butler.yaml``.
369 overwrite : `bool`, optional
370 Create a new configuration file even if one already exists
371 in the specified output location. Default is to raise
372 an exception.
374 Returns
375 -------
376 config : `Config`
377 The updated `Config` instance written to the repo.
379 Raises
380 ------
381 ValueError
382 Raised if a ButlerConfig or ConfigSubset is passed instead of a
383 regular Config (as these subclasses would make it impossible to
384 support ``standalone=False``).
385 FileExistsError
386 Raised if the output config file already exists.
387 os.error
388 Raised if the directory does not exist, exists but is not a
389 directory, or cannot be created.
391 Notes
392 -----
393 Note that when ``standalone=False`` (the default), the configuration
394 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
395 construct the repository should also be used to construct any Butlers
396 to avoid configuration inconsistencies.
397 """
398 if isinstance(config, ButlerConfig | ConfigSubset):
399 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
401 # Ensure that the root of the repository exists or can be made
402 root_uri = ResourcePath(root, forceDirectory=True)
403 root_uri.mkdir()
405 config = Config(config)
407 # If we are creating a new repo from scratch with relative roots,
408 # do not propagate an explicit root from the config file
409 if "root" in config:
410 del config["root"]
412 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
413 imported_class = doImportType(full["datastore", "cls"])
414 if not issubclass(imported_class, Datastore):
415 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
416 datastoreClass: type[Datastore] = imported_class
417 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
419 # if key exists in given config, parse it, otherwise parse the defaults
420 # in the expanded config
421 if config.get(("registry", "db")):
422 registryConfig = RegistryConfig(config)
423 else:
424 registryConfig = RegistryConfig(full)
425 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
426 if defaultDatabaseUri is not None:
427 Config.updateParameters(
428 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
429 )
430 else:
431 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
433 if standalone:
434 config.merge(full)
435 else:
436 # Always expand the registry.managers section into the per-repo
437 # config, because after the database schema is created, it's not
438 # allowed to change anymore. Note that in the standalone=True
439 # branch, _everything_ in the config is expanded, so there's no
440 # need to special case this.
441 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
442 configURI: ResourcePathExpression
443 if outfile is not None:
444 # When writing to a separate location we must include
445 # the root of the butler repo in the config else it won't know
446 # where to look.
447 config["root"] = root_uri.geturl()
448 configURI = outfile
449 else:
450 configURI = root_uri
451 # Strip obscore configuration, if it is present, before writing config
452 # to a file, obscore config will be stored in registry.
453 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
454 config_to_write = config.copy()
455 del config_to_write[obscore_config_key]
456 config_to_write.dumpToUri(configURI, overwrite=overwrite)
457 # configFile attribute is updated, need to copy it to original.
458 config.configFile = config_to_write.configFile
459 else:
460 config.dumpToUri(configURI, overwrite=overwrite)
462 # Create Registry and populate tables
463 registryConfig = RegistryConfig(config.get("registry"))
464 dimensionConfig = DimensionConfig(dimensionConfig)
465 _RegistryFactory(registryConfig).create_from_config(
466 dimensionConfig=dimensionConfig, butlerRoot=root_uri
467 )
469 _LOG.verbose("Wrote new Butler configuration file to %s", configURI)
471 return config
473 @classmethod
474 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
475 """Look up the label in a butler repository index.
477 Parameters
478 ----------
479 label : `str`
480 Label of the Butler repository to look up.
481 return_label : `bool`, optional
482 If ``label`` cannot be found in the repository index (either
483 because index is not defined or ``label`` is not in the index) and
484 ``return_label`` is `True` then return ``ResourcePath(label)``.
485 If ``return_label`` is `False` (default) then an exception will be
486 raised instead.
488 Returns
489 -------
490 uri : `lsst.resources.ResourcePath`
491 URI to the Butler repository associated with the given label or
492 default value if it is provided.
494 Raises
495 ------
496 KeyError
497 Raised if the label is not found in the index, or if an index
498 is not defined, and ``return_label`` is `False`.
500 Notes
501 -----
502 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
503 information is discovered.
504 """
505 return ButlerRepoIndex.get_repo_uri(label, return_label)
507 @classmethod
508 def get_known_repos(cls) -> set[str]:
509 """Retrieve the list of known repository labels.
511 Returns
512 -------
513 repos : `set` of `str`
514 All the known labels. Can be empty if no index can be found.
516 Notes
517 -----
518 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
519 information is discovered.
520 """
521 return ButlerRepoIndex.get_known_repos()
523 @abstractmethod
524 def _caching_context(self) -> AbstractContextManager[None]:
525 """Context manager that enables caching."""
526 raise NotImplementedError()
528 @abstractmethod
529 def transaction(self) -> AbstractContextManager[None]:
530 """Context manager supporting `Butler` transactions.
532 Transactions can be nested.
533 """
534 raise NotImplementedError()
536 @abstractmethod
537 def put(
538 self,
539 obj: Any,
540 datasetRefOrType: DatasetRef | DatasetType | str,
541 /,
542 dataId: DataId | None = None,
543 *,
544 run: str | None = None,
545 **kwargs: Any,
546 ) -> DatasetRef:
547 """Store and register a dataset.
549 Parameters
550 ----------
551 obj : `object`
552 The dataset.
553 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
554 When `DatasetRef` is provided, ``dataId`` should be `None`.
555 Otherwise the `DatasetType` or name thereof. If a fully resolved
556 `DatasetRef` is given the run and ID are used directly.
557 dataId : `dict` or `DataCoordinate`
558 A `dict` of `Dimension` link name, value pairs that label the
559 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
560 should be provided as the second argument.
561 run : `str`, optional
562 The name of the run the dataset should be added to, overriding
563 ``self.run``. Not used if a resolved `DatasetRef` is provided.
564 **kwargs
565 Additional keyword arguments used to augment or construct a
566 `DataCoordinate`. See `DataCoordinate.standardize`
567 parameters. Not used if a resolve `DatasetRef` is provided.
569 Returns
570 -------
571 ref : `DatasetRef`
572 A reference to the stored dataset, updated with the correct id if
573 given.
575 Raises
576 ------
577 TypeError
578 Raised if the butler is read-only or if no run has been provided.
579 """
580 raise NotImplementedError()
582 @abstractmethod
583 def getDeferred(
584 self,
585 datasetRefOrType: DatasetRef | DatasetType | str,
586 /,
587 dataId: DataId | None = None,
588 *,
589 parameters: dict | None = None,
590 collections: Any = None,
591 storageClass: str | StorageClass | None = None,
592 **kwargs: Any,
593 ) -> DeferredDatasetHandle:
594 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
595 after an immediate registry lookup.
597 Parameters
598 ----------
599 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
600 When `DatasetRef` the `dataId` should be `None`.
601 Otherwise the `DatasetType` or name thereof.
602 dataId : `dict` or `DataCoordinate`, optional
603 A `dict` of `Dimension` link name, value pairs that label the
604 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
605 should be provided as the first argument.
606 parameters : `dict`
607 Additional StorageClass-defined options to control reading,
608 typically used to efficiently read only a subset of the dataset.
609 collections : Any, optional
610 Collections to be searched, overriding ``self.collections``.
611 Can be any of the types supported by the ``collections`` argument
612 to butler construction.
613 storageClass : `StorageClass` or `str`, optional
614 The storage class to be used to override the Python type
615 returned by this method. By default the returned type matches
616 the dataset type definition for this dataset. Specifying a
617 read `StorageClass` can force a different type to be returned.
618 This type must be compatible with the original type.
619 **kwargs
620 Additional keyword arguments used to augment or construct a
621 `DataId`. See `DataId` parameters.
623 Returns
624 -------
625 obj : `DeferredDatasetHandle`
626 A handle which can be used to retrieve a dataset at a later time.
628 Raises
629 ------
630 LookupError
631 Raised if no matching dataset exists in the `Registry` or
632 datastore.
633 ValueError
634 Raised if a resolved `DatasetRef` was passed as an input, but it
635 differs from the one found in the registry.
636 TypeError
637 Raised if no collections were provided.
638 """
639 raise NotImplementedError()
641 @abstractmethod
642 def get(
643 self,
644 datasetRefOrType: DatasetRef | DatasetType | str,
645 /,
646 dataId: DataId | None = None,
647 *,
648 parameters: dict[str, Any] | None = None,
649 collections: Any = None,
650 storageClass: StorageClass | str | None = None,
651 **kwargs: Any,
652 ) -> Any:
653 """Retrieve a stored dataset.
655 Parameters
656 ----------
657 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
658 When `DatasetRef` the `dataId` should be `None`.
659 Otherwise the `DatasetType` or name thereof.
660 If a resolved `DatasetRef`, the associated dataset
661 is returned directly without additional querying.
662 dataId : `dict` or `DataCoordinate`
663 A `dict` of `Dimension` link name, value pairs that label the
664 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
665 should be provided as the first argument.
666 parameters : `dict`
667 Additional StorageClass-defined options to control reading,
668 typically used to efficiently read only a subset of the dataset.
669 collections : Any, optional
670 Collections to be searched, overriding ``self.collections``.
671 Can be any of the types supported by the ``collections`` argument
672 to butler construction.
673 storageClass : `StorageClass` or `str`, optional
674 The storage class to be used to override the Python type
675 returned by this method. By default the returned type matches
676 the dataset type definition for this dataset. Specifying a
677 read `StorageClass` can force a different type to be returned.
678 This type must be compatible with the original type.
679 **kwargs
680 Additional keyword arguments used to augment or construct a
681 `DataCoordinate`. See `DataCoordinate.standardize`
682 parameters.
684 Returns
685 -------
686 obj : `object`
687 The dataset.
689 Raises
690 ------
691 LookupError
692 Raised if no matching dataset exists in the `Registry`.
693 TypeError
694 Raised if no collections were provided.
696 Notes
697 -----
698 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
699 this method requires that the given data ID include temporal dimensions
700 beyond the dimensions of the dataset type itself, in order to find the
701 dataset with the appropriate validity range. For example, a "bias"
702 dataset with native dimensions ``{instrument, detector}`` could be
703 fetched with a ``{instrument, detector, exposure}`` data ID, because
704 ``exposure`` is a temporal dimension.
705 """
706 raise NotImplementedError()
708 @abstractmethod
709 def getURIs(
710 self,
711 datasetRefOrType: DatasetRef | DatasetType | str,
712 /,
713 dataId: DataId | None = None,
714 *,
715 predict: bool = False,
716 collections: Any = None,
717 run: str | None = None,
718 **kwargs: Any,
719 ) -> DatasetRefURIs:
720 """Return the URIs associated with the dataset.
722 Parameters
723 ----------
724 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
725 When `DatasetRef` the `dataId` should be `None`.
726 Otherwise the `DatasetType` or name thereof.
727 dataId : `dict` or `DataCoordinate`
728 A `dict` of `Dimension` link name, value pairs that label the
729 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
730 should be provided as the first argument.
731 predict : `bool`
732 If `True`, allow URIs to be returned of datasets that have not
733 been written.
734 collections : Any, optional
735 Collections to be searched, overriding ``self.collections``.
736 Can be any of the types supported by the ``collections`` argument
737 to butler construction.
738 run : `str`, optional
739 Run to use for predictions, overriding ``self.run``.
740 **kwargs
741 Additional keyword arguments used to augment or construct a
742 `DataCoordinate`. See `DataCoordinate.standardize`
743 parameters.
745 Returns
746 -------
747 uris : `DatasetRefURIs`
748 The URI to the primary artifact associated with this dataset (if
749 the dataset was disassembled within the datastore this may be
750 `None`), and the URIs to any components associated with the dataset
751 artifact. (can be empty if there are no components).
752 """
753 raise NotImplementedError()
755 def getURI(
756 self,
757 datasetRefOrType: DatasetRef | DatasetType | str,
758 /,
759 dataId: DataId | None = None,
760 *,
761 predict: bool = False,
762 collections: Any = None,
763 run: str | None = None,
764 **kwargs: Any,
765 ) -> ResourcePath:
766 """Return the URI to the Dataset.
768 Parameters
769 ----------
770 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
771 When `DatasetRef` the `dataId` should be `None`.
772 Otherwise the `DatasetType` or name thereof.
773 dataId : `dict` or `DataCoordinate`
774 A `dict` of `Dimension` link name, value pairs that label the
775 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
776 should be provided as the first argument.
777 predict : `bool`
778 If `True`, allow URIs to be returned of datasets that have not
779 been written.
780 collections : Any, optional
781 Collections to be searched, overriding ``self.collections``.
782 Can be any of the types supported by the ``collections`` argument
783 to butler construction.
784 run : `str`, optional
785 Run to use for predictions, overriding ``self.run``.
786 **kwargs
787 Additional keyword arguments used to augment or construct a
788 `DataCoordinate`. See `DataCoordinate.standardize`
789 parameters.
791 Returns
792 -------
793 uri : `lsst.resources.ResourcePath`
794 URI pointing to the Dataset within the datastore. If the
795 Dataset does not exist in the datastore, and if ``predict`` is
796 `True`, the URI will be a prediction and will include a URI
797 fragment "#predicted".
798 If the datastore does not have entities that relate well
799 to the concept of a URI the returned URI string will be
800 descriptive. The returned URI is not guaranteed to be obtainable.
802 Raises
803 ------
804 LookupError
805 A URI has been requested for a dataset that does not exist and
806 guessing is not allowed.
807 ValueError
808 Raised if a resolved `DatasetRef` was passed as an input, but it
809 differs from the one found in the registry.
810 TypeError
811 Raised if no collections were provided.
812 RuntimeError
813 Raised if a URI is requested for a dataset that consists of
814 multiple artifacts.
815 """
816 primary, components = self.getURIs(
817 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
818 )
820 if primary is None or components:
821 raise RuntimeError(
822 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
823 "Use Butler.getURIs() instead."
824 )
825 return primary
827 @abstractmethod
828 def get_dataset_type(self, name: str) -> DatasetType:
829 """Get the `DatasetType`.
831 Parameters
832 ----------
833 name : `str`
834 Name of the type.
836 Returns
837 -------
838 type : `DatasetType`
839 The `DatasetType` associated with the given name.
841 Raises
842 ------
843 lsst.daf.butler.MissingDatasetTypeError
844 Raised if the requested dataset type has not been registered.
846 Notes
847 -----
848 This method handles component dataset types automatically, though most
849 other operations do not.
850 """
851 raise NotImplementedError()
853 @abstractmethod
854 def get_dataset(
855 self,
856 id: DatasetId,
857 *,
858 storage_class: str | StorageClass | None = None,
859 dimension_records: bool = False,
860 datastore_records: bool = False,
861 ) -> DatasetRef | None:
862 """Retrieve a Dataset entry.
864 Parameters
865 ----------
866 id : `DatasetId`
867 The unique identifier for the dataset.
868 storage_class : `str` or `StorageClass` or `None`
869 A storage class to use when creating the returned entry. If given
870 it must be compatible with the default storage class.
871 dimension_records : `bool`, optional
872 If `True` the ref will be expanded and contain dimension records.
873 datastore_records : `bool`, optional
874 If `True` the ref will contain associated datastore records.
876 Returns
877 -------
878 ref : `DatasetRef` or `None`
879 A ref to the Dataset, or `None` if no matching Dataset
880 was found.
881 """
882 raise NotImplementedError()
884 @abstractmethod
885 def find_dataset(
886 self,
887 dataset_type: DatasetType | str,
888 data_id: DataId | None = None,
889 *,
890 collections: str | Sequence[str] | None = None,
891 timespan: Timespan | None = None,
892 storage_class: str | StorageClass | None = None,
893 dimension_records: bool = False,
894 datastore_records: bool = False,
895 **kwargs: Any,
896 ) -> DatasetRef | None:
897 """Find a dataset given its `DatasetType` and data ID.
899 This can be used to obtain a `DatasetRef` that permits the dataset to
900 be read from a `Datastore`. If the dataset is a component and can not
901 be found using the provided dataset type, a dataset ref for the parent
902 will be returned instead but with the correct dataset type.
904 Parameters
905 ----------
906 dataset_type : `DatasetType` or `str`
907 A `DatasetType` or the name of one. If this is a `DatasetType`
908 instance, its storage class will be respected and propagated to
909 the output, even if it differs from the dataset type definition
910 in the registry, as long as the storage classes are convertible.
911 data_id : `dict` or `DataCoordinate`, optional
912 A `dict`-like object containing the `Dimension` links that identify
913 the dataset within a collection. If it is a `dict` the dataId
914 can include dimension record values such as ``day_obs`` and
915 ``seq_num`` or ``full_name`` that can be used to derive the
916 primary dimension.
917 collections : `str` or `list` [`str`], optional
918 A an ordered list of collections to search for the dataset.
919 Defaults to ``self.defaults.collections``.
920 timespan : `Timespan`, optional
921 A timespan that the validity range of the dataset must overlap.
922 If not provided, any `~CollectionType.CALIBRATION` collections
923 matched by the ``collections`` argument will not be searched.
924 storage_class : `str` or `StorageClass` or `None`
925 A storage class to use when creating the returned entry. If given
926 it must be compatible with the default storage class.
927 dimension_records : `bool`, optional
928 If `True` the ref will be expanded and contain dimension records.
929 datastore_records : `bool`, optional
930 If `True` the ref will contain associated datastore records.
931 **kwargs
932 Additional keyword arguments passed to
933 `DataCoordinate.standardize` to convert ``dataId`` to a true
934 `DataCoordinate` or augment an existing one. This can also include
935 dimension record metadata that can be used to derive a primary
936 dimension value.
938 Returns
939 -------
940 ref : `DatasetRef`
941 A reference to the dataset, or `None` if no matching Dataset
942 was found.
944 Raises
945 ------
946 lsst.daf.butler.NoDefaultCollectionError
947 Raised if ``collections`` is `None` and
948 ``self.collections`` is `None`.
949 LookupError
950 Raised if one or more data ID keys are missing.
951 lsst.daf.butler.MissingDatasetTypeError
952 Raised if the dataset type does not exist.
953 lsst.daf.butler.MissingCollectionError
954 Raised if any of ``collections`` does not exist in the registry.
956 Notes
957 -----
958 This method simply returns `None` and does not raise an exception even
959 when the set of collections searched is intrinsically incompatible with
960 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
961 only `~CollectionType.CALIBRATION` collections are being searched.
962 This may make it harder to debug some lookup failures, but the behavior
963 is intentional; we consider it more important that failed searches are
964 reported consistently, regardless of the reason, and that adding
965 additional collections that do not contain a match to the search path
966 never changes the behavior.
968 This method handles component dataset types automatically, though most
969 other query operations do not.
970 """
971 raise NotImplementedError()
973 @abstractmethod
974 def retrieveArtifacts(
975 self,
976 refs: Iterable[DatasetRef],
977 destination: ResourcePathExpression,
978 transfer: str = "auto",
979 preserve_path: bool = True,
980 overwrite: bool = False,
981 ) -> list[ResourcePath]:
982 """Retrieve the artifacts associated with the supplied refs.
984 Parameters
985 ----------
986 refs : iterable of `DatasetRef`
987 The datasets for which artifacts are to be retrieved.
988 A single ref can result in multiple artifacts. The refs must
989 be resolved.
990 destination : `lsst.resources.ResourcePath` or `str`
991 Location to write the artifacts.
992 transfer : `str`, optional
993 Method to use to transfer the artifacts. Must be one of the options
994 supported by `~lsst.resources.ResourcePath.transfer_from()`.
995 "move" is not allowed.
996 preserve_path : `bool`, optional
997 If `True` the full path of the artifact within the datastore
998 is preserved. If `False` the final file component of the path
999 is used.
1000 overwrite : `bool`, optional
1001 If `True` allow transfers to overwrite existing files at the
1002 destination.
1004 Returns
1005 -------
1006 targets : `list` of `lsst.resources.ResourcePath`
1007 URIs of file artifacts in destination location. Order is not
1008 preserved.
1010 Notes
1011 -----
1012 For non-file datastores the artifacts written to the destination
1013 may not match the representation inside the datastore. For example
1014 a hierarchical data structure in a NoSQL database may well be stored
1015 as a JSON file.
1016 """
1017 raise NotImplementedError()
1019 @abstractmethod
1020 def exists(
1021 self,
1022 dataset_ref_or_type: DatasetRef | DatasetType | str,
1023 /,
1024 data_id: DataId | None = None,
1025 *,
1026 full_check: bool = True,
1027 collections: Any = None,
1028 **kwargs: Any,
1029 ) -> DatasetExistence:
1030 """Indicate whether a dataset is known to Butler registry and
1031 datastore.
1033 Parameters
1034 ----------
1035 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
1036 When `DatasetRef` the `dataId` should be `None`.
1037 Otherwise the `DatasetType` or name thereof.
1038 data_id : `dict` or `DataCoordinate`
1039 A `dict` of `Dimension` link name, value pairs that label the
1040 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1041 should be provided as the first argument.
1042 full_check : `bool`, optional
1043 If `True`, a check will be made for the actual existence of a
1044 dataset artifact. This will involve additional overhead due to
1045 the need to query an external system. If `False`, this check will
1046 be omitted, and the registry and datastore will solely be asked
1047 if they know about the dataset but no direct check for the
1048 artifact will be performed.
1049 collections : Any, optional
1050 Collections to be searched, overriding ``self.collections``.
1051 Can be any of the types supported by the ``collections`` argument
1052 to butler construction.
1053 **kwargs
1054 Additional keyword arguments used to augment or construct a
1055 `DataCoordinate`. See `DataCoordinate.standardize`
1056 parameters.
1058 Returns
1059 -------
1060 existence : `DatasetExistence`
1061 Object indicating whether the dataset is known to registry and
1062 datastore. Evaluates to `True` if the dataset is present and known
1063 to both.
1064 """
1065 raise NotImplementedError()
1067 @abstractmethod
1068 def _exists_many(
1069 self,
1070 refs: Iterable[DatasetRef],
1071 /,
1072 *,
1073 full_check: bool = True,
1074 ) -> dict[DatasetRef, DatasetExistence]:
1075 """Indicate whether multiple datasets are known to Butler registry and
1076 datastore.
1078 This is an experimental API that may change at any moment.
1080 Parameters
1081 ----------
1082 refs : iterable of `DatasetRef`
1083 The datasets to be checked.
1084 full_check : `bool`, optional
1085 If `True`, a check will be made for the actual existence of each
1086 dataset artifact. This will involve additional overhead due to
1087 the need to query an external system. If `False`, this check will
1088 be omitted, and the registry and datastore will solely be asked
1089 if they know about the dataset(s) but no direct check for the
1090 artifact(s) will be performed.
1092 Returns
1093 -------
1094 existence : dict of [`DatasetRef`, `DatasetExistence`]
1095 Mapping from the given dataset refs to an enum indicating the
1096 status of the dataset in registry and datastore.
1097 Each value evaluates to `True` if the dataset is present and known
1098 to both.
1099 """
1100 raise NotImplementedError()
1102 @abstractmethod
1103 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1104 """Remove one or more `~CollectionType.RUN` collections and the
1105 datasets within them.
1107 Parameters
1108 ----------
1109 names : `~collections.abc.Iterable` [ `str` ]
1110 The names of the collections to remove.
1111 unstore : `bool`, optional
1112 If `True` (default), delete datasets from all datastores in which
1113 they are present, and attempt to rollback the registry deletions if
1114 datastore deletions fail (which may not always be possible). If
1115 `False`, datastore records for these datasets are still removed,
1116 but any artifacts (e.g. files) will not be.
1118 Raises
1119 ------
1120 TypeError
1121 Raised if one or more collections are not of type
1122 `~CollectionType.RUN`.
1123 """
1124 raise NotImplementedError()
1126 @abstractmethod
1127 def ingest(
1128 self,
1129 *datasets: FileDataset,
1130 transfer: str | None = "auto",
1131 record_validation_info: bool = True,
1132 ) -> None:
1133 """Store and register one or more datasets that already exist on disk.
1135 Parameters
1136 ----------
1137 *datasets : `FileDataset`
1138 Each positional argument is a struct containing information about
1139 a file to be ingested, including its URI (either absolute or
1140 relative to the datastore root, if applicable), a resolved
1141 `DatasetRef`, and optionally a formatter class or its
1142 fully-qualified string name. If a formatter is not provided, the
1143 formatter that would be used for `put` is assumed. On successful
1144 ingest all `FileDataset.formatter` attributes will be set to the
1145 formatter class used. `FileDataset.path` attributes may be modified
1146 to put paths in whatever the datastore considers a standardized
1147 form.
1148 transfer : `str`, optional
1149 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1150 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1151 transfer the file.
1152 record_validation_info : `bool`, optional
1153 If `True`, the default, the datastore can record validation
1154 information associated with the file. If `False` the datastore
1155 will not attempt to track any information such as checksums
1156 or file sizes. This can be useful if such information is tracked
1157 in an external system or if the file is to be compressed in place.
1158 It is up to the datastore whether this parameter is relevant.
1160 Raises
1161 ------
1162 TypeError
1163 Raised if the butler is read-only or if no run was provided.
1164 NotImplementedError
1165 Raised if the `Datastore` does not support the given transfer mode.
1166 DatasetTypeNotSupportedError
1167 Raised if one or more files to be ingested have a dataset type that
1168 is not supported by the `Datastore`..
1169 FileNotFoundError
1170 Raised if one of the given files does not exist.
1171 FileExistsError
1172 Raised if transfer is not `None` but the (internal) location the
1173 file would be moved to is already occupied.
1175 Notes
1176 -----
1177 This operation is not fully exception safe: if a database operation
1178 fails, the given `FileDataset` instances may be only partially updated.
1180 It is atomic in terms of database operations (they will either all
1181 succeed or all fail) providing the database engine implements
1182 transactions correctly. It will attempt to be atomic in terms of
1183 filesystem operations as well, but this cannot be implemented
1184 rigorously for most datastores.
1185 """
1186 raise NotImplementedError()
1188 @abstractmethod
1189 def export(
1190 self,
1191 *,
1192 directory: str | None = None,
1193 filename: str | None = None,
1194 format: str | None = None,
1195 transfer: str | None = None,
1196 ) -> AbstractContextManager[RepoExportContext]:
1197 """Export datasets from the repository represented by this `Butler`.
1199 This method is a context manager that returns a helper object
1200 (`RepoExportContext`) that is used to indicate what information from
1201 the repository should be exported.
1203 Parameters
1204 ----------
1205 directory : `str`, optional
1206 Directory dataset files should be written to if ``transfer`` is not
1207 `None`.
1208 filename : `str`, optional
1209 Name for the file that will include database information associated
1210 with the exported datasets. If this is not an absolute path and
1211 ``directory`` is not `None`, it will be written to ``directory``
1212 instead of the current working directory. Defaults to
1213 "export.{format}".
1214 format : `str`, optional
1215 File format for the database information file. If `None`, the
1216 extension of ``filename`` will be used.
1217 transfer : `str`, optional
1218 Transfer mode passed to `Datastore.export`.
1220 Raises
1221 ------
1222 TypeError
1223 Raised if the set of arguments passed is inconsistent.
1225 Examples
1226 --------
1227 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
1228 methods are used to provide the iterables over data IDs and/or datasets
1229 to be exported::
1231 with butler.export("exports.yaml") as export:
1232 # Export all flats, but none of the dimension element rows
1233 # (i.e. data ID information) associated with them.
1234 export.saveDatasets(butler.registry.queryDatasets("flat"),
1235 elements=())
1236 # Export all datasets that start with "deepCoadd_" and all of
1237 # their associated data ID information.
1238 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
1239 """
1240 raise NotImplementedError()
1242 @abstractmethod
1243 def import_(
1244 self,
1245 *,
1246 directory: ResourcePathExpression | None = None,
1247 filename: ResourcePathExpression | TextIO | None = None,
1248 format: str | None = None,
1249 transfer: str | None = None,
1250 skip_dimensions: set | None = None,
1251 ) -> None:
1252 """Import datasets into this repository that were exported from a
1253 different butler repository via `~lsst.daf.butler.Butler.export`.
1255 Parameters
1256 ----------
1257 directory : `~lsst.resources.ResourcePathExpression`, optional
1258 Directory containing dataset files to import from. If `None`,
1259 ``filename`` and all dataset file paths specified therein must
1260 be absolute.
1261 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
1262 A stream or name of file that contains database information
1263 associated with the exported datasets, typically generated by
1264 `~lsst.daf.butler.Butler.export`. If this a string (name) or
1265 `~lsst.resources.ResourcePath` and is not an absolute path,
1266 it will first be looked for relative to ``directory`` and if not
1267 found there it will be looked for in the current working
1268 directory. Defaults to "export.{format}".
1269 format : `str`, optional
1270 File format for ``filename``. If `None`, the extension of
1271 ``filename`` will be used.
1272 transfer : `str`, optional
1273 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
1274 skip_dimensions : `set`, optional
1275 Names of dimensions that should be skipped and not imported.
1277 Raises
1278 ------
1279 TypeError
1280 Raised if the set of arguments passed is inconsistent, or if the
1281 butler is read-only.
1282 """
1283 raise NotImplementedError()
1285 @abstractmethod
1286 def transfer_dimension_records_from(
1287 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1288 ) -> None:
1289 """Transfer dimension records to this Butler from another Butler.
1291 Parameters
1292 ----------
1293 source_butler : `LimitedButler` or `Butler`
1294 Butler from which the records are to be transferred. If data IDs
1295 in ``source_refs`` are not expanded then this has to be a full
1296 `Butler` whose registry will be used to expand data IDs. If the
1297 source refs contain coordinates that are used to populate other
1298 records then this will also need to be a full `Butler`.
1299 source_refs : iterable of `DatasetRef`
1300 Datasets defined in the source butler whose dimension records
1301 should be transferred to this butler. In most circumstances.
1302 transfer is faster if the dataset refs are expanded.
1303 """
1304 raise NotImplementedError()
1306 @abstractmethod
1307 def transfer_from(
1308 self,
1309 source_butler: LimitedButler,
1310 source_refs: Iterable[DatasetRef],
1311 transfer: str = "auto",
1312 skip_missing: bool = True,
1313 register_dataset_types: bool = False,
1314 transfer_dimensions: bool = False,
1315 dry_run: bool = False,
1316 ) -> Collection[DatasetRef]:
1317 """Transfer datasets to this Butler from a run in another Butler.
1319 Parameters
1320 ----------
1321 source_butler : `LimitedButler`
1322 Butler from which the datasets are to be transferred. If data IDs
1323 in ``source_refs`` are not expanded then this has to be a full
1324 `Butler` whose registry will be used to expand data IDs.
1325 source_refs : iterable of `DatasetRef`
1326 Datasets defined in the source butler that should be transferred to
1327 this butler. In most circumstances, ``transfer_from`` is faster if
1328 the dataset refs are expanded.
1329 transfer : `str`, optional
1330 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
1331 skip_missing : `bool`
1332 If `True`, datasets with no datastore artifact associated with
1333 them are not transferred. If `False` a registry entry will be
1334 created even if no datastore record is created (and so will
1335 look equivalent to the dataset being unstored).
1336 register_dataset_types : `bool`
1337 If `True` any missing dataset types are registered. Otherwise
1338 an exception is raised.
1339 transfer_dimensions : `bool`, optional
1340 If `True`, dimension record data associated with the new datasets
1341 will be transferred.
1342 dry_run : `bool`, optional
1343 If `True` the transfer will be processed without any modifications
1344 made to the target butler and as if the target butler did not
1345 have any of the datasets.
1347 Returns
1348 -------
1349 refs : `list` of `DatasetRef`
1350 The refs added to this Butler.
1352 Notes
1353 -----
1354 The datastore artifact has to exist for a transfer
1355 to be made but non-existence is not an error.
1357 Datasets that already exist in this run will be skipped.
1359 The datasets are imported as part of a transaction, although
1360 dataset types are registered before the transaction is started.
1361 This means that it is possible for a dataset type to be registered
1362 even though transfer has failed.
1363 """
1364 raise NotImplementedError()
1366 @abstractmethod
1367 def validateConfiguration(
1368 self,
1369 logFailures: bool = False,
1370 datasetTypeNames: Iterable[str] | None = None,
1371 ignore: Iterable[str] | None = None,
1372 ) -> None:
1373 """Validate butler configuration.
1375 Checks that each `DatasetType` can be stored in the `Datastore`.
1377 Parameters
1378 ----------
1379 logFailures : `bool`, optional
1380 If `True`, output a log message for every validation error
1381 detected.
1382 datasetTypeNames : iterable of `str`, optional
1383 The `DatasetType` names that should be checked. This allows
1384 only a subset to be selected.
1385 ignore : iterable of `str`, optional
1386 Names of DatasetTypes to skip over. This can be used to skip
1387 known problems. If a named `DatasetType` corresponds to a
1388 composite, all components of that `DatasetType` will also be
1389 ignored.
1391 Raises
1392 ------
1393 ButlerValidationError
1394 Raised if there is some inconsistency with how this Butler
1395 is configured.
1396 """
1397 raise NotImplementedError()
1399 @property
1400 @abstractmethod
1401 def collections(self) -> Sequence[str]:
1402 """The collections to search by default, in order
1403 (`~collections.abc.Sequence` [ `str` ]).
1404 """
1405 raise NotImplementedError()
1407 @property
1408 @abstractmethod
1409 def run(self) -> str | None:
1410 """Name of the run this butler writes outputs to by default (`str` or
1411 `None`).
1412 """
1413 raise NotImplementedError()
1415 @property
1416 @abstractmethod
1417 def registry(self) -> Registry:
1418 """The object that manages dataset metadata and relationships
1419 (`Registry`).
1421 Many operations that don't involve reading or writing butler datasets
1422 are accessible only via `Registry` methods. Eventually these methods
1423 will be replaced by equivalent `Butler` methods.
1424 """
1425 raise NotImplementedError()
1427 @abstractmethod
1428 def _query(self) -> AbstractContextManager[Query]:
1429 """Context manager returning a `Query` object used for construction
1430 and execution of complex queries.
1431 """
1432 raise NotImplementedError()
1434 def _query_data_ids(
1435 self,
1436 dimensions: DimensionGroup | Iterable[str] | str,
1437 *,
1438 data_id: DataId | None = None,
1439 where: str = "",
1440 bind: Mapping[str, Any] | None = None,
1441 with_dimension_records: bool = False,
1442 order_by: Iterable[str] | str | None = None,
1443 limit: int | None = None,
1444 offset: int = 0,
1445 explain: bool = True,
1446 **kwargs: Any,
1447 ) -> list[DataCoordinate]:
1448 """Query for data IDs matching user-provided criteria.
1450 Parameters
1451 ----------
1452 dimensions : `DimensionGroup`, `str`, or \
1453 `~collections.abc.Iterable` [`str`]
1454 The dimensions of the data IDs to yield, as either `DimensionGroup`
1455 instances or `str`. Will be automatically expanded to a complete
1456 `DimensionGroup`.
1457 data_id : `dict` or `DataCoordinate`, optional
1458 A data ID whose key-value pairs are used as equality constraints
1459 in the query.
1460 where : `str`, optional
1461 A string expression similar to a SQL WHERE clause. May involve
1462 any column of a dimension table or (as a shortcut for the primary
1463 key column of a dimension table) dimension name. See
1464 :ref:`daf_butler_dimension_expressions` for more information.
1465 bind : `~collections.abc.Mapping`, optional
1466 Mapping containing literal values that should be injected into the
1467 ``where`` expression, keyed by the identifiers they replace.
1468 Values of collection type can be expanded in some cases; see
1469 :ref:`daf_butler_dimension_expressions_identifiers` for more
1470 information.
1471 with_dimension_records : `bool`, optional
1472 If `True` (default is `False`) then returned data IDs will have
1473 dimension records.
1474 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1475 Names of the columns/dimensions to use for ordering returned data
1476 IDs. Column name can be prefixed with minus (``-``) to use
1477 descending ordering.
1478 limit : `int`, optional
1479 Upper limit on the number of returned records.
1480 offset : `int`, optional
1481 The number of records to skip before returning at most ``limit``
1482 records. If ``offset`` is specified then ``limit`` must be
1483 specified as well.
1484 explain : `bool`, optional
1485 If `True` (default) then `EmptyQueryResultError` exception is
1486 raised when resulting list is empty. The exception contains
1487 non-empty list of strings explaining possible causes for empty
1488 result.
1489 **kwargs
1490 Additional keyword arguments are forwarded to
1491 `DataCoordinate.standardize` when processing the ``data_id``
1492 argument (and may be used to provide a constraining data ID even
1493 when the ``data_id`` argument is `None`).
1495 Returns
1496 -------
1497 dataIds : `list` [`DataCoordinate`]
1498 Data IDs matching the given query parameters. These are always
1499 guaranteed to identify all dimensions (`DataCoordinate.hasFull`
1500 returns `True`).
1502 Raises
1503 ------
1504 lsst.daf.butler.registry.DataIdError
1505 Raised when ``data_id`` or keyword arguments specify unknown
1506 dimensions or values, or when they contain inconsistent values.
1507 lsst.daf.butler.registry.UserExpressionError
1508 Raised when ``where`` expression is invalid.
1509 lsst.daf.butler.EmptyQueryResultError
1510 Raised when query generates empty result and ``explain`` is set to
1511 `True`.
1512 TypeError
1513 Raised when the arguments are incompatible, e.g. ``offset`` is
1514 specified, but ``limit`` is not.
1515 """
1516 if data_id is None:
1517 data_id = DataCoordinate.make_empty(self.dimensions)
1518 with self._query() as query:
1519 result = (
1520 query.where(data_id, where, bind=bind, **kwargs)
1521 .data_ids(dimensions)
1522 .order_by(*ensure_iterable(order_by))
1523 .limit(limit, offset)
1524 )
1525 if with_dimension_records:
1526 result = result.with_dimension_records()
1527 data_ids = list(result)
1528 if explain and not data_ids:
1529 raise EmptyQueryResultError(list(result.explain_no_results()))
1530 return data_ids
1532 def _query_datasets(
1533 self,
1534 dataset_type: str | Iterable[str] | DatasetType | Iterable[DatasetType] | EllipsisType,
1535 collections: str | Iterable[str] | None = None,
1536 *,
1537 find_first: bool = True,
1538 data_id: DataId | None = None,
1539 where: str = "",
1540 bind: Mapping[str, Any] | None = None,
1541 with_dimension_records: bool = False,
1542 explain: bool = True,
1543 **kwargs: Any,
1544 ) -> list[DatasetRef]:
1545 """Query for dataset references matching user-provided criteria.
1547 Parameters
1548 ----------
1549 dataset_type : dataset type expression
1550 An expression that fully or partially identifies the dataset types
1551 to be queried. Allowed types include `DatasetType`, `str`, and
1552 iterables thereof. The special value ``...`` can be used to query
1553 all dataset types. See :ref:`daf_butler_dataset_type_expressions`
1554 for more information.
1555 collections : collection expression, optional
1556 A collection name or iterable of collection names to search. If not
1557 provided, the default collections are used. See
1558 :ref:`daf_butler_collection_expressions` for more information.
1559 find_first : `bool`, optional
1560 If `True` (default), for each result data ID, only yield one
1561 `DatasetRef` of each `DatasetType`, from the first collection in
1562 which a dataset of that dataset type appears (according to the
1563 order of ``collections`` passed in). If `True`, ``collections``
1564 must not contain regular expressions and may not be ``...``.
1565 data_id : `dict` or `DataCoordinate`, optional
1566 A data ID whose key-value pairs are used as equality constraints in
1567 the query.
1568 where : `str`, optional
1569 A string expression similar to a SQL WHERE clause. May involve any
1570 column of a dimension table or (as a shortcut for the primary key
1571 column of a dimension table) dimension name. See
1572 :ref:`daf_butler_dimension_expressions` for more information.
1573 bind : `~collections.abc.Mapping`, optional
1574 Mapping containing literal values that should be injected into the
1575 ``where`` expression, keyed by the identifiers they replace. Values
1576 of collection type can be expanded in some cases; see
1577 :ref:`daf_butler_dimension_expressions_identifiers` for more
1578 information.
1579 with_dimension_records : `bool`, optional
1580 If `True` (default is `False`) then returned data IDs will have
1581 dimension records.
1582 explain : `bool`, optional
1583 If `True` (default) then `EmptyQueryResultError` exception is
1584 raised when resulting list is empty. The exception contains
1585 non-empty list of strings explaining possible causes for empty
1586 result.
1587 **kwargs
1588 Additional keyword arguments are forwarded to
1589 `DataCoordinate.standardize` when processing the ``data_id``
1590 argument (and may be used to provide a constraining data ID even
1591 when the ``data_id`` argument is `None`).
1593 Returns
1594 -------
1595 refs : `.queries.DatasetQueryResults`
1596 Dataset references matching the given query criteria. Nested data
1597 IDs are guaranteed to include values for all implied dimensions
1598 (i.e. `DataCoordinate.hasFull` will return `True`).
1600 Raises
1601 ------
1602 lsst.daf.butler.registry.DatasetTypeExpressionError
1603 Raised when ``dataset_type`` expression is invalid.
1604 lsst.daf.butler.registry.DataIdError
1605 Raised when ``data_id`` or keyword arguments specify unknown
1606 dimensions or values, or when they contain inconsistent values.
1607 lsst.daf.butler.registry.UserExpressionError
1608 Raised when ``where`` expression is invalid.
1609 lsst.daf.butler.EmptyQueryResultError
1610 Raised when query generates empty result and ``explain`` is set to
1611 `True`.
1612 TypeError
1613 Raised when the arguments are incompatible, such as when a
1614 collection wildcard is passed when ``find_first`` is `True`, or
1615 when ``collections`` is `None` and default butler collections are
1616 not defined.
1618 Notes
1619 -----
1620 When multiple dataset types are queried in a single call, the results
1621 of this operation are equivalent to querying for each dataset type
1622 separately in turn, and no information about the relationships between
1623 datasets of different types is included.
1624 """
1625 if data_id is None:
1626 data_id = DataCoordinate.make_empty(self.dimensions)
1627 with self._query() as query:
1628 result = query.where(data_id, where, bind=bind, **kwargs).datasets(
1629 dataset_type,
1630 collections=collections,
1631 find_first=find_first,
1632 )
1633 if with_dimension_records:
1634 result = result.with_dimension_records()
1635 refs = list(result)
1636 if explain and not refs:
1637 raise EmptyQueryResultError(list(result.explain_no_results()))
1638 return refs
1640 def _query_dimension_records(
1641 self,
1642 element: str,
1643 *,
1644 data_id: DataId | None = None,
1645 where: str = "",
1646 bind: Mapping[str, Any] | None = None,
1647 order_by: Iterable[str] | str | None = None,
1648 limit: int | None = None,
1649 offset: int = 0,
1650 explain: bool = True,
1651 **kwargs: Any,
1652 ) -> list[DimensionRecord]:
1653 """Query for dimension information matching user-provided criteria.
1655 Parameters
1656 ----------
1657 element : `str`
1658 The name of a dimension element to obtain records for.
1659 data_id : `dict` or `DataCoordinate`, optional
1660 A data ID whose key-value pairs are used as equality constraints
1661 in the query.
1662 where : `str`, optional
1663 A string expression similar to a SQL WHERE clause. See
1664 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1665 information.
1666 bind : `~collections.abc.Mapping`, optional
1667 Mapping containing literal values that should be injected into the
1668 ``where`` expression, keyed by the identifiers they replace.
1669 Values of collection type can be expanded in some cases; see
1670 :ref:`daf_butler_dimension_expressions_identifiers` for more
1671 information.
1672 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1673 Names of the columns/dimensions to use for ordering returned data
1674 IDs. Column name can be prefixed with minus (``-``) to use
1675 descending ordering.
1676 limit : `int`, optional
1677 Upper limit on the number of returned records.
1678 offset : `int`, optional
1679 The number of records to skip before returning at most ``limit``
1680 records. If ``offset`` is specified then ``limit`` must be
1681 specified as well.
1682 explain : `bool`, optional
1683 If `True` (default) then `EmptyQueryResultError` exception is
1684 raised when resulting list is empty. The exception contains
1685 non-empty list of strings explaining possible causes for empty
1686 result.
1687 **kwargs
1688 Additional keyword arguments are forwarded to
1689 `DataCoordinate.standardize` when processing the ``data_id``
1690 argument (and may be used to provide a constraining data ID even
1691 when the ``data_id`` argument is `None`).
1693 Returns
1694 -------
1695 records : `list`[`DimensionRecord`]
1696 Dimension records matching the given query parameters.
1698 Raises
1699 ------
1700 lsst.daf.butler.registry.DataIdError
1701 Raised when ``data_id`` or keyword arguments specify unknown
1702 dimensions or values, or when they contain inconsistent values.
1703 lsst.daf.butler.registry.UserExpressionError
1704 Raised when ``where`` expression is invalid.
1705 lsst.daf.butler.EmptyQueryResultError
1706 Raised when query generates empty result and ``explain`` is set to
1707 `True`.
1708 TypeError
1709 Raised when the arguments are incompatible, such as when a
1710 collection wildcard is passed when ``find_first`` is `True`, or
1711 when ``collections`` is `None` and default butler collections are
1712 not defined.
1713 """
1714 if data_id is None:
1715 data_id = DataCoordinate.make_empty(self.dimensions)
1716 with self._query() as query:
1717 result = (
1718 query.where(data_id, where, bind=bind, **kwargs)
1719 .dimension_records(element)
1720 .order_by(*ensure_iterable(order_by))
1721 .limit(limit, offset)
1722 )
1723 dimension_records = list(result)
1724 if explain and not dimension_records:
1725 raise EmptyQueryResultError(list(result.explain_no_results()))
1726 return dimension_records
1728 @abstractmethod
1729 def _clone(
1730 self,
1731 *,
1732 collections: Any = None,
1733 run: str | None = None,
1734 inferDefaults: bool = True,
1735 **kwargs: Any,
1736 ) -> Butler:
1737 """Return a new Butler instance connected to the same repository
1738 as this one, but overriding ``collections``, ``run``,
1739 ``inferDefaults``, and default data ID.
1740 """
1741 raise NotImplementedError()