Coverage for python/lsst/daf/butler/_butler.py: 51%
181 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["Butler"]
32from abc import abstractmethod
33from collections.abc import Collection, Iterable, Mapping, Sequence
34from contextlib import AbstractContextManager
35from typing import TYPE_CHECKING, Any, TextIO
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.utils import doImportType
39from lsst.utils.iteration import ensure_iterable
40from lsst.utils.logging import getLogger
42from ._butler_config import ButlerConfig, ButlerType
43from ._butler_instance_options import ButlerInstanceOptions
44from ._butler_repo_index import ButlerRepoIndex
45from ._config import Config, ConfigSubset
46from ._exceptions import EmptyQueryResultError
47from ._limited_butler import LimitedButler
48from .datastore import Datastore
49from .dimensions import DimensionConfig
50from .registry import RegistryConfig, _RegistryFactory
51from .repo_relocation import BUTLER_ROOT_TAG
53if TYPE_CHECKING:
54 from ._dataset_existence import DatasetExistence
55 from ._dataset_ref import DatasetId, DatasetRef
56 from ._dataset_type import DatasetType
57 from ._deferredDatasetHandle import DeferredDatasetHandle
58 from ._file_dataset import FileDataset
59 from ._storage_class import StorageClass
60 from ._timespan import Timespan
61 from .datastore import DatasetRefURIs
62 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord
63 from .queries import Query
64 from .registry import Registry
65 from .transfers import RepoExportContext
67_LOG = getLogger(__name__)
70class Butler(LimitedButler): # numpydoc ignore=PR02
71 """Interface for data butler and factory for Butler instances.
73 Parameters
74 ----------
75 config : `ButlerConfig`, `Config` or `str`, optional
76 Configuration. Anything acceptable to the `ButlerConfig` constructor.
77 If a directory path is given the configuration will be read from a
78 ``butler.yaml`` file in that location. If `None` is given default
79 values will be used. If ``config`` contains "cls" key then its value is
80 used as a name of butler class and it must be a sub-class of this
81 class, otherwise `DirectButler` is instantiated.
82 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
83 An expression specifying the collections to be searched (in order) when
84 reading datasets.
85 This may be a `str` collection name or an iterable thereof.
86 See :ref:`daf_butler_collection_expressions` for more information.
87 These collections are not registered automatically and must be
88 manually registered before they are used by any method, but they may be
89 manually registered after the `Butler` is initialized.
90 run : `str`, optional
91 Name of the `~CollectionType.RUN` collection new datasets should be
92 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
93 ``collections`` will be set to ``[run]``. If not `None`, this
94 collection will automatically be registered. If this is not set (and
95 ``writeable`` is not set either), a read-only butler will be created.
96 searchPaths : `list` of `str`, optional
97 Directory paths to search when calculating the full Butler
98 configuration. Not used if the supplied config is already a
99 `ButlerConfig`.
100 writeable : `bool`, optional
101 Explicitly sets whether the butler supports write operations. If not
102 provided, a read-write butler is created if any of ``run``, ``tags``,
103 or ``chains`` is non-empty.
104 inferDefaults : `bool`, optional
105 If `True` (default) infer default data ID values from the values
106 present in the datasets in ``collections``: if all collections have the
107 same value (or no value) for a governor dimension, that value will be
108 the default for that dimension. Nonexistent collections are ignored.
109 If a default value is provided explicitly for a governor dimension via
110 ``**kwargs``, no default will be inferred for that dimension.
111 without_datastore : `bool`, optional
112 If `True` do not attach a datastore to this butler. Any attempts
113 to use a datastore will fail.
114 **kwargs : `Any`
115 Additional keyword arguments passed to a constructor of actual butler
116 class.
118 Notes
119 -----
120 The preferred way to instantiate Butler is via the `from_config` method.
121 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``,
122 but ``mypy`` will complain about the former.
123 """
125 def __new__(
126 cls,
127 config: Config | ResourcePathExpression | None = None,
128 *,
129 collections: Any = None,
130 run: str | None = None,
131 searchPaths: Sequence[ResourcePathExpression] | None = None,
132 writeable: bool | None = None,
133 inferDefaults: bool = True,
134 without_datastore: bool = False,
135 **kwargs: Any,
136 ) -> Butler:
137 if cls is Butler:
138 return Butler.from_config(
139 config=config,
140 collections=collections,
141 run=run,
142 searchPaths=searchPaths,
143 writeable=writeable,
144 inferDefaults=inferDefaults,
145 without_datastore=without_datastore,
146 **kwargs,
147 )
149 # Note: we do not pass any parameters to __new__, Python will pass them
150 # to __init__ after __new__ returns sub-class instance.
151 return super().__new__(cls)
153 @classmethod
154 def from_config(
155 cls,
156 config: Config | ResourcePathExpression | None = None,
157 *,
158 collections: Any = None,
159 run: str | None = None,
160 searchPaths: Sequence[ResourcePathExpression] | None = None,
161 writeable: bool | None = None,
162 inferDefaults: bool = True,
163 without_datastore: bool = False,
164 **kwargs: Any,
165 ) -> Butler:
166 """Create butler instance from configuration.
168 Parameters
169 ----------
170 config : `ButlerConfig`, `Config` or `str`, optional
171 Configuration. Anything acceptable to the `ButlerConfig`
172 constructor. If a directory path is given the configuration will be
173 read from a ``butler.yaml`` file in that location. If `None` is
174 given default values will be used. If ``config`` contains "cls" key
175 then its value is used as a name of butler class and it must be a
176 sub-class of this class, otherwise `DirectButler` is instantiated.
177 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
178 An expression specifying the collections to be searched (in order)
179 when reading datasets.
180 This may be a `str` collection name or an iterable thereof.
181 See :ref:`daf_butler_collection_expressions` for more information.
182 These collections are not registered automatically and must be
183 manually registered before they are used by any method, but they
184 may be manually registered after the `Butler` is initialized.
185 run : `str`, optional
186 Name of the `~CollectionType.RUN` collection new datasets should be
187 inserted into. If ``collections`` is `None` and ``run`` is not
188 `None`, ``collections`` will be set to ``[run]``. If not `None`,
189 this collection will automatically be registered. If this is not
190 set (and ``writeable`` is not set either), a read-only butler will
191 be created.
192 searchPaths : `list` of `str`, optional
193 Directory paths to search when calculating the full Butler
194 configuration. Not used if the supplied config is already a
195 `ButlerConfig`.
196 writeable : `bool`, optional
197 Explicitly sets whether the butler supports write operations. If
198 not provided, a read-write butler is created if any of ``run``,
199 ``tags``, or ``chains`` is non-empty.
200 inferDefaults : `bool`, optional
201 If `True` (default) infer default data ID values from the values
202 present in the datasets in ``collections``: if all collections have
203 the same value (or no value) for a governor dimension, that value
204 will be the default for that dimension. Nonexistent collections
205 are ignored. If a default value is provided explicitly for a
206 governor dimension via ``**kwargs``, no default will be inferred
207 for that dimension.
208 without_datastore : `bool`, optional
209 If `True` do not attach a datastore to this butler. Any attempts
210 to use a datastore will fail.
211 **kwargs : `Any`
212 Default data ID key-value pairs. These may only identify
213 "governor" dimensions like ``instrument`` and ``skymap``.
215 Returns
216 -------
217 butler : `Butler`
218 A `Butler` constructed from the given configuration.
220 Notes
221 -----
222 Calling this factory method is identical to calling
223 ``Butler(config, ...)``. Its only raison d'être is that ``mypy``
224 complains about ``Butler()`` call.
226 Examples
227 --------
228 While there are many ways to control exactly how a `Butler` interacts
229 with the collections in its `Registry`, the most common cases are still
230 simple.
232 For a read-only `Butler` that searches one collection, do::
234 butler = Butler.from_config(
235 "/path/to/repo", collections=["u/alice/DM-50000"]
236 )
238 For a read-write `Butler` that writes to and reads from a
239 `~CollectionType.RUN` collection::
241 butler = Butler.from_config(
242 "/path/to/repo", run="u/alice/DM-50000/a"
243 )
245 The `Butler` passed to a ``PipelineTask`` is often much more complex,
246 because we want to write to one `~CollectionType.RUN` collection but
247 read from several others (as well)::
249 butler = Butler.from_config(
250 "/path/to/repo",
251 run="u/alice/DM-50000/a",
252 collections=[
253 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults"
254 ]
255 )
257 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
258 Datasets will be read first from that run (since it appears first in
259 the chain), and then from ``u/bob/DM-49998`` and finally
260 ``HSC/defaults``.
262 Finally, one can always create a `Butler` with no collections::
264 butler = Butler.from_config("/path/to/repo", writeable=True)
266 This can be extremely useful when you just want to use
267 ``butler.registry``, e.g. for inserting dimension data or managing
268 collections, or when the collections you want to use with the butler
269 are not consistent. Passing ``writeable`` explicitly here is only
270 necessary if you want to be able to make changes to the repo - usually
271 the value for ``writeable`` can be guessed from the collection
272 arguments provided, but it defaults to `False` when there are not
273 collection arguments.
274 """
275 # DirectButler used to have a way to specify a "copy constructor" by
276 # passing the "butler" parameter to its constructor. This
277 # functionality has been moved out of the constructor into
278 # Butler._clone(), but the new interface is not public yet.
279 butler = kwargs.pop("butler", None)
280 if butler is not None:
281 if not isinstance(butler, Butler):
282 raise TypeError("'butler' parameter must be a Butler instance")
283 if config is not None or searchPaths is not None or writeable is not None:
284 raise TypeError(
285 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
286 )
287 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)
289 options = ButlerInstanceOptions(
290 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
291 )
293 # Load the Butler configuration. This may involve searching the
294 # environment to locate a configuration file.
295 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
296 butler_type = butler_config.get_butler_type()
298 # Make DirectButler if class is not specified.
299 match butler_type:
300 case ButlerType.DIRECT:
301 from .direct_butler import DirectButler
303 return DirectButler.create_from_config(
304 butler_config,
305 options=options,
306 without_datastore=without_datastore,
307 )
308 case ButlerType.REMOTE:
309 from .remote_butler import RemoteButlerFactory
311 factory = RemoteButlerFactory.create_factory_from_config(butler_config)
312 return factory.create_butler_with_credentials_from_environment(butler_options=options)
313 case _:
314 raise TypeError(f"Unknown Butler type '{butler_type}'")
316 @staticmethod
317 def makeRepo(
318 root: ResourcePathExpression,
319 config: Config | str | None = None,
320 dimensionConfig: Config | str | None = None,
321 standalone: bool = False,
322 searchPaths: list[str] | None = None,
323 forceConfigRoot: bool = True,
324 outfile: ResourcePathExpression | None = None,
325 overwrite: bool = False,
326 ) -> Config:
327 """Create an empty data repository by adding a butler.yaml config
328 to a repository root directory.
330 Parameters
331 ----------
332 root : `lsst.resources.ResourcePathExpression`
333 Path or URI to the root location of the new repository. Will be
334 created if it does not exist.
335 config : `Config` or `str`, optional
336 Configuration to write to the repository, after setting any
337 root-dependent Registry or Datastore config options. Can not
338 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
339 configuration will be used. Root-dependent config options
340 specified in this config are overwritten if ``forceConfigRoot``
341 is `True`.
342 dimensionConfig : `Config` or `str`, optional
343 Configuration for dimensions, will be used to initialize registry
344 database.
345 standalone : `bool`
346 If True, write all expanded defaults, not just customized or
347 repository-specific settings.
348 This (mostly) decouples the repository from the default
349 configuration, insulating it from changes to the defaults (which
350 may be good or bad, depending on the nature of the changes).
351 Future *additions* to the defaults will still be picked up when
352 initializing `Butlers` to repos created with ``standalone=True``.
353 searchPaths : `list` of `str`, optional
354 Directory paths to search when calculating the full butler
355 configuration.
356 forceConfigRoot : `bool`, optional
357 If `False`, any values present in the supplied ``config`` that
358 would normally be reset are not overridden and will appear
359 directly in the output config. This allows non-standard overrides
360 of the root directory for a datastore or registry to be given.
361 If this parameter is `True` the values for ``root`` will be
362 forced into the resulting config if appropriate.
363 outfile : `lss.resources.ResourcePathExpression`, optional
364 If not-`None`, the output configuration will be written to this
365 location rather than into the repository itself. Can be a URI
366 string. Can refer to a directory that will be used to write
367 ``butler.yaml``.
368 overwrite : `bool`, optional
369 Create a new configuration file even if one already exists
370 in the specified output location. Default is to raise
371 an exception.
373 Returns
374 -------
375 config : `Config`
376 The updated `Config` instance written to the repo.
378 Raises
379 ------
380 ValueError
381 Raised if a ButlerConfig or ConfigSubset is passed instead of a
382 regular Config (as these subclasses would make it impossible to
383 support ``standalone=False``).
384 FileExistsError
385 Raised if the output config file already exists.
386 os.error
387 Raised if the directory does not exist, exists but is not a
388 directory, or cannot be created.
390 Notes
391 -----
392 Note that when ``standalone=False`` (the default), the configuration
393 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
394 construct the repository should also be used to construct any Butlers
395 to avoid configuration inconsistencies.
396 """
397 if isinstance(config, ButlerConfig | ConfigSubset):
398 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
400 # Ensure that the root of the repository exists or can be made
401 root_uri = ResourcePath(root, forceDirectory=True)
402 root_uri.mkdir()
404 config = Config(config)
406 # If we are creating a new repo from scratch with relative roots,
407 # do not propagate an explicit root from the config file
408 if "root" in config:
409 del config["root"]
411 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
412 imported_class = doImportType(full["datastore", "cls"])
413 if not issubclass(imported_class, Datastore):
414 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
415 datastoreClass: type[Datastore] = imported_class
416 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
418 # if key exists in given config, parse it, otherwise parse the defaults
419 # in the expanded config
420 if config.get(("registry", "db")):
421 registryConfig = RegistryConfig(config)
422 else:
423 registryConfig = RegistryConfig(full)
424 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
425 if defaultDatabaseUri is not None:
426 Config.updateParameters(
427 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
428 )
429 else:
430 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
432 if standalone:
433 config.merge(full)
434 else:
435 # Always expand the registry.managers section into the per-repo
436 # config, because after the database schema is created, it's not
437 # allowed to change anymore. Note that in the standalone=True
438 # branch, _everything_ in the config is expanded, so there's no
439 # need to special case this.
440 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
441 configURI: ResourcePathExpression
442 if outfile is not None:
443 # When writing to a separate location we must include
444 # the root of the butler repo in the config else it won't know
445 # where to look.
446 config["root"] = root_uri.geturl()
447 configURI = outfile
448 else:
449 configURI = root_uri
450 # Strip obscore configuration, if it is present, before writing config
451 # to a file, obscore config will be stored in registry.
452 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
453 config_to_write = config.copy()
454 del config_to_write[obscore_config_key]
455 config_to_write.dumpToUri(configURI, overwrite=overwrite)
456 # configFile attribute is updated, need to copy it to original.
457 config.configFile = config_to_write.configFile
458 else:
459 config.dumpToUri(configURI, overwrite=overwrite)
461 # Create Registry and populate tables
462 registryConfig = RegistryConfig(config.get("registry"))
463 dimensionConfig = DimensionConfig(dimensionConfig)
464 _RegistryFactory(registryConfig).create_from_config(
465 dimensionConfig=dimensionConfig, butlerRoot=root_uri
466 )
468 _LOG.verbose("Wrote new Butler configuration file to %s", configURI)
470 return config
472 @classmethod
473 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
474 """Look up the label in a butler repository index.
476 Parameters
477 ----------
478 label : `str`
479 Label of the Butler repository to look up.
480 return_label : `bool`, optional
481 If ``label`` cannot be found in the repository index (either
482 because index is not defined or ``label`` is not in the index) and
483 ``return_label`` is `True` then return ``ResourcePath(label)``.
484 If ``return_label`` is `False` (default) then an exception will be
485 raised instead.
487 Returns
488 -------
489 uri : `lsst.resources.ResourcePath`
490 URI to the Butler repository associated with the given label or
491 default value if it is provided.
493 Raises
494 ------
495 KeyError
496 Raised if the label is not found in the index, or if an index
497 is not defined, and ``return_label`` is `False`.
499 Notes
500 -----
501 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
502 information is discovered.
503 """
504 return ButlerRepoIndex.get_repo_uri(label, return_label)
506 @classmethod
507 def get_known_repos(cls) -> set[str]:
508 """Retrieve the list of known repository labels.
510 Returns
511 -------
512 repos : `set` of `str`
513 All the known labels. Can be empty if no index can be found.
515 Notes
516 -----
517 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
518 information is discovered.
519 """
520 return ButlerRepoIndex.get_known_repos()
522 @abstractmethod
523 def _caching_context(self) -> AbstractContextManager[None]:
524 """Context manager that enables caching."""
525 raise NotImplementedError()
527 @abstractmethod
528 def transaction(self) -> AbstractContextManager[None]:
529 """Context manager supporting `Butler` transactions.
531 Transactions can be nested.
532 """
533 raise NotImplementedError()
535 @abstractmethod
536 def put(
537 self,
538 obj: Any,
539 datasetRefOrType: DatasetRef | DatasetType | str,
540 /,
541 dataId: DataId | None = None,
542 *,
543 run: str | None = None,
544 **kwargs: Any,
545 ) -> DatasetRef:
546 """Store and register a dataset.
548 Parameters
549 ----------
550 obj : `object`
551 The dataset.
552 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
553 When `DatasetRef` is provided, ``dataId`` should be `None`.
554 Otherwise the `DatasetType` or name thereof. If a fully resolved
555 `DatasetRef` is given the run and ID are used directly.
556 dataId : `dict` or `DataCoordinate`
557 A `dict` of `Dimension` link name, value pairs that label the
558 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
559 should be provided as the second argument.
560 run : `str`, optional
561 The name of the run the dataset should be added to, overriding
562 ``self.run``. Not used if a resolved `DatasetRef` is provided.
563 **kwargs
564 Additional keyword arguments used to augment or construct a
565 `DataCoordinate`. See `DataCoordinate.standardize`
566 parameters. Not used if a resolve `DatasetRef` is provided.
568 Returns
569 -------
570 ref : `DatasetRef`
571 A reference to the stored dataset, updated with the correct id if
572 given.
574 Raises
575 ------
576 TypeError
577 Raised if the butler is read-only or if no run has been provided.
578 """
579 raise NotImplementedError()
581 @abstractmethod
582 def getDeferred(
583 self,
584 datasetRefOrType: DatasetRef | DatasetType | str,
585 /,
586 dataId: DataId | None = None,
587 *,
588 parameters: dict | None = None,
589 collections: Any = None,
590 storageClass: str | StorageClass | None = None,
591 **kwargs: Any,
592 ) -> DeferredDatasetHandle:
593 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
594 after an immediate registry lookup.
596 Parameters
597 ----------
598 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
599 When `DatasetRef` the `dataId` should be `None`.
600 Otherwise the `DatasetType` or name thereof.
601 dataId : `dict` or `DataCoordinate`, optional
602 A `dict` of `Dimension` link name, value pairs that label the
603 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
604 should be provided as the first argument.
605 parameters : `dict`
606 Additional StorageClass-defined options to control reading,
607 typically used to efficiently read only a subset of the dataset.
608 collections : Any, optional
609 Collections to be searched, overriding ``self.collections``.
610 Can be any of the types supported by the ``collections`` argument
611 to butler construction.
612 storageClass : `StorageClass` or `str`, optional
613 The storage class to be used to override the Python type
614 returned by this method. By default the returned type matches
615 the dataset type definition for this dataset. Specifying a
616 read `StorageClass` can force a different type to be returned.
617 This type must be compatible with the original type.
618 **kwargs
619 Additional keyword arguments used to augment or construct a
620 `DataId`. See `DataId` parameters.
622 Returns
623 -------
624 obj : `DeferredDatasetHandle`
625 A handle which can be used to retrieve a dataset at a later time.
627 Raises
628 ------
629 LookupError
630 Raised if no matching dataset exists in the `Registry` or
631 datastore.
632 ValueError
633 Raised if a resolved `DatasetRef` was passed as an input, but it
634 differs from the one found in the registry.
635 TypeError
636 Raised if no collections were provided.
637 """
638 raise NotImplementedError()
640 @abstractmethod
641 def get(
642 self,
643 datasetRefOrType: DatasetRef | DatasetType | str,
644 /,
645 dataId: DataId | None = None,
646 *,
647 parameters: dict[str, Any] | None = None,
648 collections: Any = None,
649 storageClass: StorageClass | str | None = None,
650 **kwargs: Any,
651 ) -> Any:
652 """Retrieve a stored dataset.
654 Parameters
655 ----------
656 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
657 When `DatasetRef` the `dataId` should be `None`.
658 Otherwise the `DatasetType` or name thereof.
659 If a resolved `DatasetRef`, the associated dataset
660 is returned directly without additional querying.
661 dataId : `dict` or `DataCoordinate`
662 A `dict` of `Dimension` link name, value pairs that label the
663 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
664 should be provided as the first argument.
665 parameters : `dict`
666 Additional StorageClass-defined options to control reading,
667 typically used to efficiently read only a subset of the dataset.
668 collections : Any, optional
669 Collections to be searched, overriding ``self.collections``.
670 Can be any of the types supported by the ``collections`` argument
671 to butler construction.
672 storageClass : `StorageClass` or `str`, optional
673 The storage class to be used to override the Python type
674 returned by this method. By default the returned type matches
675 the dataset type definition for this dataset. Specifying a
676 read `StorageClass` can force a different type to be returned.
677 This type must be compatible with the original type.
678 **kwargs
679 Additional keyword arguments used to augment or construct a
680 `DataCoordinate`. See `DataCoordinate.standardize`
681 parameters.
683 Returns
684 -------
685 obj : `object`
686 The dataset.
688 Raises
689 ------
690 LookupError
691 Raised if no matching dataset exists in the `Registry`.
692 TypeError
693 Raised if no collections were provided.
695 Notes
696 -----
697 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
698 this method requires that the given data ID include temporal dimensions
699 beyond the dimensions of the dataset type itself, in order to find the
700 dataset with the appropriate validity range. For example, a "bias"
701 dataset with native dimensions ``{instrument, detector}`` could be
702 fetched with a ``{instrument, detector, exposure}`` data ID, because
703 ``exposure`` is a temporal dimension.
704 """
705 raise NotImplementedError()
707 @abstractmethod
708 def getURIs(
709 self,
710 datasetRefOrType: DatasetRef | DatasetType | str,
711 /,
712 dataId: DataId | None = None,
713 *,
714 predict: bool = False,
715 collections: Any = None,
716 run: str | None = None,
717 **kwargs: Any,
718 ) -> DatasetRefURIs:
719 """Return the URIs associated with the dataset.
721 Parameters
722 ----------
723 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
724 When `DatasetRef` the `dataId` should be `None`.
725 Otherwise the `DatasetType` or name thereof.
726 dataId : `dict` or `DataCoordinate`
727 A `dict` of `Dimension` link name, value pairs that label the
728 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
729 should be provided as the first argument.
730 predict : `bool`
731 If `True`, allow URIs to be returned of datasets that have not
732 been written.
733 collections : Any, optional
734 Collections to be searched, overriding ``self.collections``.
735 Can be any of the types supported by the ``collections`` argument
736 to butler construction.
737 run : `str`, optional
738 Run to use for predictions, overriding ``self.run``.
739 **kwargs
740 Additional keyword arguments used to augment or construct a
741 `DataCoordinate`. See `DataCoordinate.standardize`
742 parameters.
744 Returns
745 -------
746 uris : `DatasetRefURIs`
747 The URI to the primary artifact associated with this dataset (if
748 the dataset was disassembled within the datastore this may be
749 `None`), and the URIs to any components associated with the dataset
750 artifact. (can be empty if there are no components).
751 """
752 raise NotImplementedError()
754 def getURI(
755 self,
756 datasetRefOrType: DatasetRef | DatasetType | str,
757 /,
758 dataId: DataId | None = None,
759 *,
760 predict: bool = False,
761 collections: Any = None,
762 run: str | None = None,
763 **kwargs: Any,
764 ) -> ResourcePath:
765 """Return the URI to the Dataset.
767 Parameters
768 ----------
769 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
770 When `DatasetRef` the `dataId` should be `None`.
771 Otherwise the `DatasetType` or name thereof.
772 dataId : `dict` or `DataCoordinate`
773 A `dict` of `Dimension` link name, value pairs that label the
774 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
775 should be provided as the first argument.
776 predict : `bool`
777 If `True`, allow URIs to be returned of datasets that have not
778 been written.
779 collections : Any, optional
780 Collections to be searched, overriding ``self.collections``.
781 Can be any of the types supported by the ``collections`` argument
782 to butler construction.
783 run : `str`, optional
784 Run to use for predictions, overriding ``self.run``.
785 **kwargs
786 Additional keyword arguments used to augment or construct a
787 `DataCoordinate`. See `DataCoordinate.standardize`
788 parameters.
790 Returns
791 -------
792 uri : `lsst.resources.ResourcePath`
793 URI pointing to the Dataset within the datastore. If the
794 Dataset does not exist in the datastore, and if ``predict`` is
795 `True`, the URI will be a prediction and will include a URI
796 fragment "#predicted".
797 If the datastore does not have entities that relate well
798 to the concept of a URI the returned URI string will be
799 descriptive. The returned URI is not guaranteed to be obtainable.
801 Raises
802 ------
803 LookupError
804 A URI has been requested for a dataset that does not exist and
805 guessing is not allowed.
806 ValueError
807 Raised if a resolved `DatasetRef` was passed as an input, but it
808 differs from the one found in the registry.
809 TypeError
810 Raised if no collections were provided.
811 RuntimeError
812 Raised if a URI is requested for a dataset that consists of
813 multiple artifacts.
814 """
815 primary, components = self.getURIs(
816 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
817 )
819 if primary is None or components:
820 raise RuntimeError(
821 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
822 "Use Butler.getURIs() instead."
823 )
824 return primary
826 @abstractmethod
827 def get_dataset_type(self, name: str) -> DatasetType:
828 """Get the `DatasetType`.
830 Parameters
831 ----------
832 name : `str`
833 Name of the type.
835 Returns
836 -------
837 type : `DatasetType`
838 The `DatasetType` associated with the given name.
840 Raises
841 ------
842 lsst.daf.butler.MissingDatasetTypeError
843 Raised if the requested dataset type has not been registered.
845 Notes
846 -----
847 This method handles component dataset types automatically, though most
848 other operations do not.
849 """
850 raise NotImplementedError()
852 @abstractmethod
853 def get_dataset(
854 self,
855 id: DatasetId,
856 *,
857 storage_class: str | StorageClass | None = None,
858 dimension_records: bool = False,
859 datastore_records: bool = False,
860 ) -> DatasetRef | None:
861 """Retrieve a Dataset entry.
863 Parameters
864 ----------
865 id : `DatasetId`
866 The unique identifier for the dataset.
867 storage_class : `str` or `StorageClass` or `None`
868 A storage class to use when creating the returned entry. If given
869 it must be compatible with the default storage class.
870 dimension_records : `bool`, optional
871 If `True` the ref will be expanded and contain dimension records.
872 datastore_records : `bool`, optional
873 If `True` the ref will contain associated datastore records.
875 Returns
876 -------
877 ref : `DatasetRef` or `None`
878 A ref to the Dataset, or `None` if no matching Dataset
879 was found.
880 """
881 raise NotImplementedError()
883 @abstractmethod
884 def find_dataset(
885 self,
886 dataset_type: DatasetType | str,
887 data_id: DataId | None = None,
888 *,
889 collections: str | Sequence[str] | None = None,
890 timespan: Timespan | None = None,
891 storage_class: str | StorageClass | None = None,
892 dimension_records: bool = False,
893 datastore_records: bool = False,
894 **kwargs: Any,
895 ) -> DatasetRef | None:
896 """Find a dataset given its `DatasetType` and data ID.
898 This can be used to obtain a `DatasetRef` that permits the dataset to
899 be read from a `Datastore`. If the dataset is a component and can not
900 be found using the provided dataset type, a dataset ref for the parent
901 will be returned instead but with the correct dataset type.
903 Parameters
904 ----------
905 dataset_type : `DatasetType` or `str`
906 A `DatasetType` or the name of one. If this is a `DatasetType`
907 instance, its storage class will be respected and propagated to
908 the output, even if it differs from the dataset type definition
909 in the registry, as long as the storage classes are convertible.
910 data_id : `dict` or `DataCoordinate`, optional
911 A `dict`-like object containing the `Dimension` links that identify
912 the dataset within a collection. If it is a `dict` the dataId
913 can include dimension record values such as ``day_obs`` and
914 ``seq_num`` or ``full_name`` that can be used to derive the
915 primary dimension.
916 collections : `str` or `list` [`str`], optional
917 A an ordered list of collections to search for the dataset.
918 Defaults to ``self.defaults.collections``.
919 timespan : `Timespan`, optional
920 A timespan that the validity range of the dataset must overlap.
921 If not provided, any `~CollectionType.CALIBRATION` collections
922 matched by the ``collections`` argument will not be searched.
923 storage_class : `str` or `StorageClass` or `None`
924 A storage class to use when creating the returned entry. If given
925 it must be compatible with the default storage class.
926 dimension_records : `bool`, optional
927 If `True` the ref will be expanded and contain dimension records.
928 datastore_records : `bool`, optional
929 If `True` the ref will contain associated datastore records.
930 **kwargs
931 Additional keyword arguments passed to
932 `DataCoordinate.standardize` to convert ``dataId`` to a true
933 `DataCoordinate` or augment an existing one. This can also include
934 dimension record metadata that can be used to derive a primary
935 dimension value.
937 Returns
938 -------
939 ref : `DatasetRef`
940 A reference to the dataset, or `None` if no matching Dataset
941 was found.
943 Raises
944 ------
945 lsst.daf.butler.NoDefaultCollectionError
946 Raised if ``collections`` is `None` and
947 ``self.collections`` is `None`.
948 LookupError
949 Raised if one or more data ID keys are missing.
950 lsst.daf.butler.MissingDatasetTypeError
951 Raised if the dataset type does not exist.
952 lsst.daf.butler.MissingCollectionError
953 Raised if any of ``collections`` does not exist in the registry.
955 Notes
956 -----
957 This method simply returns `None` and does not raise an exception even
958 when the set of collections searched is intrinsically incompatible with
959 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
960 only `~CollectionType.CALIBRATION` collections are being searched.
961 This may make it harder to debug some lookup failures, but the behavior
962 is intentional; we consider it more important that failed searches are
963 reported consistently, regardless of the reason, and that adding
964 additional collections that do not contain a match to the search path
965 never changes the behavior.
967 This method handles component dataset types automatically, though most
968 other query operations do not.
969 """
970 raise NotImplementedError()
972 @abstractmethod
973 def retrieveArtifacts(
974 self,
975 refs: Iterable[DatasetRef],
976 destination: ResourcePathExpression,
977 transfer: str = "auto",
978 preserve_path: bool = True,
979 overwrite: bool = False,
980 ) -> list[ResourcePath]:
981 """Retrieve the artifacts associated with the supplied refs.
983 Parameters
984 ----------
985 refs : iterable of `DatasetRef`
986 The datasets for which artifacts are to be retrieved.
987 A single ref can result in multiple artifacts. The refs must
988 be resolved.
989 destination : `lsst.resources.ResourcePath` or `str`
990 Location to write the artifacts.
991 transfer : `str`, optional
992 Method to use to transfer the artifacts. Must be one of the options
993 supported by `~lsst.resources.ResourcePath.transfer_from()`.
994 "move" is not allowed.
995 preserve_path : `bool`, optional
996 If `True` the full path of the artifact within the datastore
997 is preserved. If `False` the final file component of the path
998 is used.
999 overwrite : `bool`, optional
1000 If `True` allow transfers to overwrite existing files at the
1001 destination.
1003 Returns
1004 -------
1005 targets : `list` of `lsst.resources.ResourcePath`
1006 URIs of file artifacts in destination location. Order is not
1007 preserved.
1009 Notes
1010 -----
1011 For non-file datastores the artifacts written to the destination
1012 may not match the representation inside the datastore. For example
1013 a hierarchical data structure in a NoSQL database may well be stored
1014 as a JSON file.
1015 """
1016 raise NotImplementedError()
1018 @abstractmethod
1019 def exists(
1020 self,
1021 dataset_ref_or_type: DatasetRef | DatasetType | str,
1022 /,
1023 data_id: DataId | None = None,
1024 *,
1025 full_check: bool = True,
1026 collections: Any = None,
1027 **kwargs: Any,
1028 ) -> DatasetExistence:
1029 """Indicate whether a dataset is known to Butler registry and
1030 datastore.
1032 Parameters
1033 ----------
1034 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
1035 When `DatasetRef` the `dataId` should be `None`.
1036 Otherwise the `DatasetType` or name thereof.
1037 data_id : `dict` or `DataCoordinate`
1038 A `dict` of `Dimension` link name, value pairs that label the
1039 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1040 should be provided as the first argument.
1041 full_check : `bool`, optional
1042 If `True`, a check will be made for the actual existence of a
1043 dataset artifact. This will involve additional overhead due to
1044 the need to query an external system. If `False`, this check will
1045 be omitted, and the registry and datastore will solely be asked
1046 if they know about the dataset but no direct check for the
1047 artifact will be performed.
1048 collections : Any, optional
1049 Collections to be searched, overriding ``self.collections``.
1050 Can be any of the types supported by the ``collections`` argument
1051 to butler construction.
1052 **kwargs
1053 Additional keyword arguments used to augment or construct a
1054 `DataCoordinate`. See `DataCoordinate.standardize`
1055 parameters.
1057 Returns
1058 -------
1059 existence : `DatasetExistence`
1060 Object indicating whether the dataset is known to registry and
1061 datastore. Evaluates to `True` if the dataset is present and known
1062 to both.
1063 """
1064 raise NotImplementedError()
1066 @abstractmethod
1067 def _exists_many(
1068 self,
1069 refs: Iterable[DatasetRef],
1070 /,
1071 *,
1072 full_check: bool = True,
1073 ) -> dict[DatasetRef, DatasetExistence]:
1074 """Indicate whether multiple datasets are known to Butler registry and
1075 datastore.
1077 This is an experimental API that may change at any moment.
1079 Parameters
1080 ----------
1081 refs : iterable of `DatasetRef`
1082 The datasets to be checked.
1083 full_check : `bool`, optional
1084 If `True`, a check will be made for the actual existence of each
1085 dataset artifact. This will involve additional overhead due to
1086 the need to query an external system. If `False`, this check will
1087 be omitted, and the registry and datastore will solely be asked
1088 if they know about the dataset(s) but no direct check for the
1089 artifact(s) will be performed.
1091 Returns
1092 -------
1093 existence : dict of [`DatasetRef`, `DatasetExistence`]
1094 Mapping from the given dataset refs to an enum indicating the
1095 status of the dataset in registry and datastore.
1096 Each value evaluates to `True` if the dataset is present and known
1097 to both.
1098 """
1099 raise NotImplementedError()
1101 @abstractmethod
1102 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1103 """Remove one or more `~CollectionType.RUN` collections and the
1104 datasets within them.
1106 Parameters
1107 ----------
1108 names : `~collections.abc.Iterable` [ `str` ]
1109 The names of the collections to remove.
1110 unstore : `bool`, optional
1111 If `True` (default), delete datasets from all datastores in which
1112 they are present, and attempt to rollback the registry deletions if
1113 datastore deletions fail (which may not always be possible). If
1114 `False`, datastore records for these datasets are still removed,
1115 but any artifacts (e.g. files) will not be.
1117 Raises
1118 ------
1119 TypeError
1120 Raised if one or more collections are not of type
1121 `~CollectionType.RUN`.
1122 """
1123 raise NotImplementedError()
1125 @abstractmethod
1126 def ingest(
1127 self,
1128 *datasets: FileDataset,
1129 transfer: str | None = "auto",
1130 record_validation_info: bool = True,
1131 ) -> None:
1132 """Store and register one or more datasets that already exist on disk.
1134 Parameters
1135 ----------
1136 *datasets : `FileDataset`
1137 Each positional argument is a struct containing information about
1138 a file to be ingested, including its URI (either absolute or
1139 relative to the datastore root, if applicable), a resolved
1140 `DatasetRef`, and optionally a formatter class or its
1141 fully-qualified string name. If a formatter is not provided, the
1142 formatter that would be used for `put` is assumed. On successful
1143 ingest all `FileDataset.formatter` attributes will be set to the
1144 formatter class used. `FileDataset.path` attributes may be modified
1145 to put paths in whatever the datastore considers a standardized
1146 form.
1147 transfer : `str`, optional
1148 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1149 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1150 transfer the file.
1151 record_validation_info : `bool`, optional
1152 If `True`, the default, the datastore can record validation
1153 information associated with the file. If `False` the datastore
1154 will not attempt to track any information such as checksums
1155 or file sizes. This can be useful if such information is tracked
1156 in an external system or if the file is to be compressed in place.
1157 It is up to the datastore whether this parameter is relevant.
1159 Raises
1160 ------
1161 TypeError
1162 Raised if the butler is read-only or if no run was provided.
1163 NotImplementedError
1164 Raised if the `Datastore` does not support the given transfer mode.
1165 DatasetTypeNotSupportedError
1166 Raised if one or more files to be ingested have a dataset type that
1167 is not supported by the `Datastore`..
1168 FileNotFoundError
1169 Raised if one of the given files does not exist.
1170 FileExistsError
1171 Raised if transfer is not `None` but the (internal) location the
1172 file would be moved to is already occupied.
1174 Notes
1175 -----
1176 This operation is not fully exception safe: if a database operation
1177 fails, the given `FileDataset` instances may be only partially updated.
1179 It is atomic in terms of database operations (they will either all
1180 succeed or all fail) providing the database engine implements
1181 transactions correctly. It will attempt to be atomic in terms of
1182 filesystem operations as well, but this cannot be implemented
1183 rigorously for most datastores.
1184 """
1185 raise NotImplementedError()
1187 @abstractmethod
1188 def export(
1189 self,
1190 *,
1191 directory: str | None = None,
1192 filename: str | None = None,
1193 format: str | None = None,
1194 transfer: str | None = None,
1195 ) -> AbstractContextManager[RepoExportContext]:
1196 """Export datasets from the repository represented by this `Butler`.
1198 This method is a context manager that returns a helper object
1199 (`RepoExportContext`) that is used to indicate what information from
1200 the repository should be exported.
1202 Parameters
1203 ----------
1204 directory : `str`, optional
1205 Directory dataset files should be written to if ``transfer`` is not
1206 `None`.
1207 filename : `str`, optional
1208 Name for the file that will include database information associated
1209 with the exported datasets. If this is not an absolute path and
1210 ``directory`` is not `None`, it will be written to ``directory``
1211 instead of the current working directory. Defaults to
1212 "export.{format}".
1213 format : `str`, optional
1214 File format for the database information file. If `None`, the
1215 extension of ``filename`` will be used.
1216 transfer : `str`, optional
1217 Transfer mode passed to `Datastore.export`.
1219 Raises
1220 ------
1221 TypeError
1222 Raised if the set of arguments passed is inconsistent.
1224 Examples
1225 --------
1226 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
1227 methods are used to provide the iterables over data IDs and/or datasets
1228 to be exported::
1230 with butler.export("exports.yaml") as export:
1231 # Export all flats, but none of the dimension element rows
1232 # (i.e. data ID information) associated with them.
1233 export.saveDatasets(butler.registry.queryDatasets("flat"),
1234 elements=())
1235 # Export all datasets that start with "deepCoadd_" and all of
1236 # their associated data ID information.
1237 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
1238 """
1239 raise NotImplementedError()
1241 @abstractmethod
1242 def import_(
1243 self,
1244 *,
1245 directory: ResourcePathExpression | None = None,
1246 filename: ResourcePathExpression | TextIO | None = None,
1247 format: str | None = None,
1248 transfer: str | None = None,
1249 skip_dimensions: set | None = None,
1250 ) -> None:
1251 """Import datasets into this repository that were exported from a
1252 different butler repository via `~lsst.daf.butler.Butler.export`.
1254 Parameters
1255 ----------
1256 directory : `~lsst.resources.ResourcePathExpression`, optional
1257 Directory containing dataset files to import from. If `None`,
1258 ``filename`` and all dataset file paths specified therein must
1259 be absolute.
1260 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
1261 A stream or name of file that contains database information
1262 associated with the exported datasets, typically generated by
1263 `~lsst.daf.butler.Butler.export`. If this a string (name) or
1264 `~lsst.resources.ResourcePath` and is not an absolute path,
1265 it will first be looked for relative to ``directory`` and if not
1266 found there it will be looked for in the current working
1267 directory. Defaults to "export.{format}".
1268 format : `str`, optional
1269 File format for ``filename``. If `None`, the extension of
1270 ``filename`` will be used.
1271 transfer : `str`, optional
1272 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
1273 skip_dimensions : `set`, optional
1274 Names of dimensions that should be skipped and not imported.
1276 Raises
1277 ------
1278 TypeError
1279 Raised if the set of arguments passed is inconsistent, or if the
1280 butler is read-only.
1281 """
1282 raise NotImplementedError()
1284 @abstractmethod
1285 def transfer_dimension_records_from(
1286 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1287 ) -> None:
1288 """Transfer dimension records to this Butler from another Butler.
1290 Parameters
1291 ----------
1292 source_butler : `LimitedButler` or `Butler`
1293 Butler from which the records are to be transferred. If data IDs
1294 in ``source_refs`` are not expanded then this has to be a full
1295 `Butler` whose registry will be used to expand data IDs. If the
1296 source refs contain coordinates that are used to populate other
1297 records then this will also need to be a full `Butler`.
1298 source_refs : iterable of `DatasetRef`
1299 Datasets defined in the source butler whose dimension records
1300 should be transferred to this butler. In most circumstances.
1301 transfer is faster if the dataset refs are expanded.
1302 """
1303 raise NotImplementedError()
1305 @abstractmethod
1306 def transfer_from(
1307 self,
1308 source_butler: LimitedButler,
1309 source_refs: Iterable[DatasetRef],
1310 transfer: str = "auto",
1311 skip_missing: bool = True,
1312 register_dataset_types: bool = False,
1313 transfer_dimensions: bool = False,
1314 dry_run: bool = False,
1315 ) -> Collection[DatasetRef]:
1316 """Transfer datasets to this Butler from a run in another Butler.
1318 Parameters
1319 ----------
1320 source_butler : `LimitedButler`
1321 Butler from which the datasets are to be transferred. If data IDs
1322 in ``source_refs`` are not expanded then this has to be a full
1323 `Butler` whose registry will be used to expand data IDs.
1324 source_refs : iterable of `DatasetRef`
1325 Datasets defined in the source butler that should be transferred to
1326 this butler. In most circumstances, ``transfer_from`` is faster if
1327 the dataset refs are expanded.
1328 transfer : `str`, optional
1329 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
1330 skip_missing : `bool`
1331 If `True`, datasets with no datastore artifact associated with
1332 them are not transferred. If `False` a registry entry will be
1333 created even if no datastore record is created (and so will
1334 look equivalent to the dataset being unstored).
1335 register_dataset_types : `bool`
1336 If `True` any missing dataset types are registered. Otherwise
1337 an exception is raised.
1338 transfer_dimensions : `bool`, optional
1339 If `True`, dimension record data associated with the new datasets
1340 will be transferred.
1341 dry_run : `bool`, optional
1342 If `True` the transfer will be processed without any modifications
1343 made to the target butler and as if the target butler did not
1344 have any of the datasets.
1346 Returns
1347 -------
1348 refs : `list` of `DatasetRef`
1349 The refs added to this Butler.
1351 Notes
1352 -----
1353 The datastore artifact has to exist for a transfer
1354 to be made but non-existence is not an error.
1356 Datasets that already exist in this run will be skipped.
1358 The datasets are imported as part of a transaction, although
1359 dataset types are registered before the transaction is started.
1360 This means that it is possible for a dataset type to be registered
1361 even though transfer has failed.
1362 """
1363 raise NotImplementedError()
1365 @abstractmethod
1366 def validateConfiguration(
1367 self,
1368 logFailures: bool = False,
1369 datasetTypeNames: Iterable[str] | None = None,
1370 ignore: Iterable[str] | None = None,
1371 ) -> None:
1372 """Validate butler configuration.
1374 Checks that each `DatasetType` can be stored in the `Datastore`.
1376 Parameters
1377 ----------
1378 logFailures : `bool`, optional
1379 If `True`, output a log message for every validation error
1380 detected.
1381 datasetTypeNames : iterable of `str`, optional
1382 The `DatasetType` names that should be checked. This allows
1383 only a subset to be selected.
1384 ignore : iterable of `str`, optional
1385 Names of DatasetTypes to skip over. This can be used to skip
1386 known problems. If a named `DatasetType` corresponds to a
1387 composite, all components of that `DatasetType` will also be
1388 ignored.
1390 Raises
1391 ------
1392 ButlerValidationError
1393 Raised if there is some inconsistency with how this Butler
1394 is configured.
1395 """
1396 raise NotImplementedError()
1398 @property
1399 @abstractmethod
1400 def collections(self) -> Sequence[str]:
1401 """The collections to search by default, in order
1402 (`~collections.abc.Sequence` [ `str` ]).
1403 """
1404 raise NotImplementedError()
1406 @property
1407 @abstractmethod
1408 def run(self) -> str | None:
1409 """Name of the run this butler writes outputs to by default (`str` or
1410 `None`).
1411 """
1412 raise NotImplementedError()
1414 @property
1415 @abstractmethod
1416 def registry(self) -> Registry:
1417 """The object that manages dataset metadata and relationships
1418 (`Registry`).
1420 Many operations that don't involve reading or writing butler datasets
1421 are accessible only via `Registry` methods. Eventually these methods
1422 will be replaced by equivalent `Butler` methods.
1423 """
1424 raise NotImplementedError()
1426 @abstractmethod
1427 def _query(self) -> AbstractContextManager[Query]:
1428 """Context manager returning a `Query` object used for construction
1429 and execution of complex queries.
1430 """
1431 raise NotImplementedError()
1433 def _query_data_ids(
1434 self,
1435 dimensions: DimensionGroup | Iterable[str] | str,
1436 *,
1437 data_id: DataId | None = None,
1438 where: str = "",
1439 bind: Mapping[str, Any] | None = None,
1440 with_dimension_records: bool = False,
1441 order_by: Iterable[str] | str | None = None,
1442 limit: int | None = None,
1443 explain: bool = True,
1444 **kwargs: Any,
1445 ) -> list[DataCoordinate]:
1446 """Query for data IDs matching user-provided criteria.
1448 Parameters
1449 ----------
1450 dimensions : `DimensionGroup`, `str`, or \
1451 `~collections.abc.Iterable` [`str`]
1452 The dimensions of the data IDs to yield, as either `DimensionGroup`
1453 instances or `str`. Will be automatically expanded to a complete
1454 `DimensionGroup`.
1455 data_id : `dict` or `DataCoordinate`, optional
1456 A data ID whose key-value pairs are used as equality constraints
1457 in the query.
1458 where : `str`, optional
1459 A string expression similar to a SQL WHERE clause. May involve
1460 any column of a dimension table or (as a shortcut for the primary
1461 key column of a dimension table) dimension name. See
1462 :ref:`daf_butler_dimension_expressions` for more information.
1463 bind : `~collections.abc.Mapping`, optional
1464 Mapping containing literal values that should be injected into the
1465 ``where`` expression, keyed by the identifiers they replace.
1466 Values of collection type can be expanded in some cases; see
1467 :ref:`daf_butler_dimension_expressions_identifiers` for more
1468 information.
1469 with_dimension_records : `bool`, optional
1470 If `True` (default is `False`) then returned data IDs will have
1471 dimension records.
1472 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1473 Names of the columns/dimensions to use for ordering returned data
1474 IDs. Column name can be prefixed with minus (``-``) to use
1475 descending ordering.
1476 limit : `int`, optional
1477 Upper limit on the number of returned records.
1478 explain : `bool`, optional
1479 If `True` (default) then `EmptyQueryResultError` exception is
1480 raised when resulting list is empty. The exception contains
1481 non-empty list of strings explaining possible causes for empty
1482 result.
1483 **kwargs
1484 Additional keyword arguments are forwarded to
1485 `DataCoordinate.standardize` when processing the ``data_id``
1486 argument (and may be used to provide a constraining data ID even
1487 when the ``data_id`` argument is `None`).
1489 Returns
1490 -------
1491 dataIds : `list` [`DataCoordinate`]
1492 Data IDs matching the given query parameters. These are always
1493 guaranteed to identify all dimensions (`DataCoordinate.hasFull`
1494 returns `True`).
1496 Raises
1497 ------
1498 lsst.daf.butler.registry.DataIdError
1499 Raised when ``data_id`` or keyword arguments specify unknown
1500 dimensions or values, or when they contain inconsistent values.
1501 lsst.daf.butler.registry.UserExpressionError
1502 Raised when ``where`` expression is invalid.
1503 lsst.daf.butler.EmptyQueryResultError
1504 Raised when query generates empty result and ``explain`` is set to
1505 `True`.
1506 TypeError
1507 Raised when the arguments are incompatible.
1508 """
1509 if data_id is None:
1510 data_id = DataCoordinate.make_empty(self.dimensions)
1511 with self._query() as query:
1512 result = (
1513 query.where(data_id, where, bind=bind, **kwargs)
1514 .data_ids(dimensions)
1515 .order_by(*ensure_iterable(order_by))
1516 .limit(limit)
1517 )
1518 if with_dimension_records:
1519 result = result.with_dimension_records()
1520 data_ids = list(result)
1521 if explain and not data_ids:
1522 raise EmptyQueryResultError(list(result.explain_no_results()))
1523 return data_ids
1525 def _query_datasets(
1526 self,
1527 dataset_type: str | DatasetType,
1528 collections: str | Iterable[str] | None = None,
1529 *,
1530 find_first: bool = True,
1531 data_id: DataId | None = None,
1532 where: str = "",
1533 bind: Mapping[str, Any] | None = None,
1534 with_dimension_records: bool = False,
1535 explain: bool = True,
1536 **kwargs: Any,
1537 ) -> list[DatasetRef]:
1538 """Query for dataset references matching user-provided criteria.
1540 Parameters
1541 ----------
1542 dataset_type : `str` or `DatasetType`
1543 Dataset type object or name to search for.
1544 collections : collection expression, optional
1545 A collection name or iterable of collection names to search. If not
1546 provided, the default collections are used. See
1547 :ref:`daf_butler_collection_expressions` for more information.
1548 find_first : `bool`, optional
1549 If `True` (default), for each result data ID, only yield one
1550 `DatasetRef` of each `DatasetType`, from the first collection in
1551 which a dataset of that dataset type appears (according to the
1552 order of ``collections`` passed in). If `True`, ``collections``
1553 must not contain regular expressions and may not be ``...``.
1554 data_id : `dict` or `DataCoordinate`, optional
1555 A data ID whose key-value pairs are used as equality constraints in
1556 the query.
1557 where : `str`, optional
1558 A string expression similar to a SQL WHERE clause. May involve any
1559 column of a dimension table or (as a shortcut for the primary key
1560 column of a dimension table) dimension name. See
1561 :ref:`daf_butler_dimension_expressions` for more information.
1562 bind : `~collections.abc.Mapping`, optional
1563 Mapping containing literal values that should be injected into the
1564 ``where`` expression, keyed by the identifiers they replace. Values
1565 of collection type can be expanded in some cases; see
1566 :ref:`daf_butler_dimension_expressions_identifiers` for more
1567 information.
1568 with_dimension_records : `bool`, optional
1569 If `True` (default is `False`) then returned data IDs will have
1570 dimension records.
1571 explain : `bool`, optional
1572 If `True` (default) then `EmptyQueryResultError` exception is
1573 raised when resulting list is empty. The exception contains
1574 non-empty list of strings explaining possible causes for empty
1575 result.
1576 **kwargs
1577 Additional keyword arguments are forwarded to
1578 `DataCoordinate.standardize` when processing the ``data_id``
1579 argument (and may be used to provide a constraining data ID even
1580 when the ``data_id`` argument is `None`).
1582 Returns
1583 -------
1584 refs : `.queries.DatasetRefQueryResults`
1585 Dataset references matching the given query criteria. Nested data
1586 IDs are guaranteed to include values for all implied dimensions
1587 (i.e. `DataCoordinate.hasFull` will return `True`).
1589 Raises
1590 ------
1591 lsst.daf.butler.registry.DatasetTypeExpressionError
1592 Raised when ``dataset_type`` expression is invalid.
1593 lsst.daf.butler.registry.DataIdError
1594 Raised when ``data_id`` or keyword arguments specify unknown
1595 dimensions or values, or when they contain inconsistent values.
1596 lsst.daf.butler.registry.UserExpressionError
1597 Raised when ``where`` expression is invalid.
1598 lsst.daf.butler.EmptyQueryResultError
1599 Raised when query generates empty result and ``explain`` is set to
1600 `True`.
1601 TypeError
1602 Raised when the arguments are incompatible, such as when a
1603 collection wildcard is passed when ``find_first`` is `True`, or
1604 when ``collections`` is `None` and default butler collections are
1605 not defined.
1607 Notes
1608 -----
1609 When multiple dataset types are queried in a single call, the results
1610 of this operation are equivalent to querying for each dataset type
1611 separately in turn, and no information about the relationships between
1612 datasets of different types is included.
1613 """
1614 if data_id is None:
1615 data_id = DataCoordinate.make_empty(self.dimensions)
1616 with self._query() as query:
1617 result = query.where(data_id, where, bind=bind, **kwargs).datasets(
1618 dataset_type,
1619 collections=collections,
1620 find_first=find_first,
1621 )
1622 if with_dimension_records:
1623 result = result.with_dimension_records()
1624 refs = list(result)
1625 if explain and not refs:
1626 raise EmptyQueryResultError(list(result.explain_no_results()))
1627 return refs
1629 def _query_dimension_records(
1630 self,
1631 element: str,
1632 *,
1633 data_id: DataId | None = None,
1634 where: str = "",
1635 bind: Mapping[str, Any] | None = None,
1636 order_by: Iterable[str] | str | None = None,
1637 limit: int | None = None,
1638 explain: bool = True,
1639 **kwargs: Any,
1640 ) -> list[DimensionRecord]:
1641 """Query for dimension information matching user-provided criteria.
1643 Parameters
1644 ----------
1645 element : `str`
1646 The name of a dimension element to obtain records for.
1647 data_id : `dict` or `DataCoordinate`, optional
1648 A data ID whose key-value pairs are used as equality constraints
1649 in the query.
1650 where : `str`, optional
1651 A string expression similar to a SQL WHERE clause. See
1652 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1653 information.
1654 bind : `~collections.abc.Mapping`, optional
1655 Mapping containing literal values that should be injected into the
1656 ``where`` expression, keyed by the identifiers they replace.
1657 Values of collection type can be expanded in some cases; see
1658 :ref:`daf_butler_dimension_expressions_identifiers` for more
1659 information.
1660 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1661 Names of the columns/dimensions to use for ordering returned data
1662 IDs. Column name can be prefixed with minus (``-``) to use
1663 descending ordering.
1664 limit : `int`, optional
1665 Upper limit on the number of returned records.
1666 explain : `bool`, optional
1667 If `True` (default) then `EmptyQueryResultError` exception is
1668 raised when resulting list is empty. The exception contains
1669 non-empty list of strings explaining possible causes for empty
1670 result.
1671 **kwargs
1672 Additional keyword arguments are forwarded to
1673 `DataCoordinate.standardize` when processing the ``data_id``
1674 argument (and may be used to provide a constraining data ID even
1675 when the ``data_id`` argument is `None`).
1677 Returns
1678 -------
1679 records : `list`[`DimensionRecord`]
1680 Dimension records matching the given query parameters.
1682 Raises
1683 ------
1684 lsst.daf.butler.registry.DataIdError
1685 Raised when ``data_id`` or keyword arguments specify unknown
1686 dimensions or values, or when they contain inconsistent values.
1687 lsst.daf.butler.registry.UserExpressionError
1688 Raised when ``where`` expression is invalid.
1689 lsst.daf.butler.EmptyQueryResultError
1690 Raised when query generates empty result and ``explain`` is set to
1691 `True`.
1692 TypeError
1693 Raised when the arguments are incompatible, such as when a
1694 collection wildcard is passed when ``find_first`` is `True`, or
1695 when ``collections`` is `None` and default butler collections are
1696 not defined.
1697 """
1698 if data_id is None:
1699 data_id = DataCoordinate.make_empty(self.dimensions)
1700 with self._query() as query:
1701 result = (
1702 query.where(data_id, where, bind=bind, **kwargs)
1703 .dimension_records(element)
1704 .order_by(*ensure_iterable(order_by))
1705 .limit(limit)
1706 )
1707 dimension_records = list(result)
1708 if explain and not dimension_records:
1709 raise EmptyQueryResultError(list(result.explain_no_results()))
1710 return dimension_records
1712 @abstractmethod
1713 def _clone(
1714 self,
1715 *,
1716 collections: Any = None,
1717 run: str | None = None,
1718 inferDefaults: bool = True,
1719 **kwargs: Any,
1720 ) -> Butler:
1721 """Return a new Butler instance connected to the same repository
1722 as this one, but overriding ``collections``, ``run``,
1723 ``inferDefaults``, and default data ID.
1724 """
1725 raise NotImplementedError()