Coverage for python/lsst/daf/butler/_butler.py: 62%
154 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["Butler"]
32from abc import abstractmethod
33from collections.abc import Collection, Iterable, Mapping, Sequence
34from contextlib import AbstractContextManager
35from typing import TYPE_CHECKING, Any, TextIO
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.utils import doImportType
39from lsst.utils.logging import getLogger
41from ._butler_config import ButlerConfig, ButlerType
42from ._butler_instance_options import ButlerInstanceOptions
43from ._butler_repo_index import ButlerRepoIndex
44from ._config import Config, ConfigSubset
45from ._limited_butler import LimitedButler
46from .datastore import Datastore
47from .dimensions import DimensionConfig
48from .registry import RegistryConfig, _RegistryFactory
49from .repo_relocation import BUTLER_ROOT_TAG
51if TYPE_CHECKING:
52 from ._dataset_existence import DatasetExistence
53 from ._dataset_ref import DatasetId, DatasetRef
54 from ._dataset_type import DatasetType
55 from ._deferredDatasetHandle import DeferredDatasetHandle
56 from ._file_dataset import FileDataset
57 from ._query import Query
58 from ._storage_class import StorageClass
59 from ._timespan import Timespan
60 from .datastore import DatasetRefURIs
61 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord
62 from .registry import CollectionArgType, Registry
63 from .transfers import RepoExportContext
65_LOG = getLogger(__name__)
68class Butler(LimitedButler): # numpydoc ignore=PR02
69 """Interface for data butler and factory for Butler instances.
71 Parameters
72 ----------
73 config : `ButlerConfig`, `Config` or `str`, optional
74 Configuration. Anything acceptable to the `ButlerConfig` constructor.
75 If a directory path is given the configuration will be read from a
76 ``butler.yaml`` file in that location. If `None` is given default
77 values will be used. If ``config`` contains "cls" key then its value is
78 used as a name of butler class and it must be a sub-class of this
79 class, otherwise `DirectButler` is instantiated.
80 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
81 An expression specifying the collections to be searched (in order) when
82 reading datasets.
83 This may be a `str` collection name or an iterable thereof.
84 See :ref:`daf_butler_collection_expressions` for more information.
85 These collections are not registered automatically and must be
86 manually registered before they are used by any method, but they may be
87 manually registered after the `Butler` is initialized.
88 run : `str`, optional
89 Name of the `~CollectionType.RUN` collection new datasets should be
90 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
91 ``collections`` will be set to ``[run]``. If not `None`, this
92 collection will automatically be registered. If this is not set (and
93 ``writeable`` is not set either), a read-only butler will be created.
94 searchPaths : `list` of `str`, optional
95 Directory paths to search when calculating the full Butler
96 configuration. Not used if the supplied config is already a
97 `ButlerConfig`.
98 writeable : `bool`, optional
99 Explicitly sets whether the butler supports write operations. If not
100 provided, a read-write butler is created if any of ``run``, ``tags``,
101 or ``chains`` is non-empty.
102 inferDefaults : `bool`, optional
103 If `True` (default) infer default data ID values from the values
104 present in the datasets in ``collections``: if all collections have the
105 same value (or no value) for a governor dimension, that value will be
106 the default for that dimension. Nonexistent collections are ignored.
107 If a default value is provided explicitly for a governor dimension via
108 ``**kwargs``, no default will be inferred for that dimension.
109 without_datastore : `bool`, optional
110 If `True` do not attach a datastore to this butler. Any attempts
111 to use a datastore will fail.
112 **kwargs : `Any`
113 Additional keyword arguments passed to a constructor of actual butler
114 class.
116 Notes
117 -----
118 The preferred way to instantiate Butler is via the `from_config` method.
119 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``,
120 but ``mypy`` will complain about the former.
121 """
123 def __new__(
124 cls,
125 config: Config | ResourcePathExpression | None = None,
126 *,
127 collections: Any = None,
128 run: str | None = None,
129 searchPaths: Sequence[ResourcePathExpression] | None = None,
130 writeable: bool | None = None,
131 inferDefaults: bool = True,
132 without_datastore: bool = False,
133 **kwargs: Any,
134 ) -> Butler:
135 if cls is Butler:
136 return Butler.from_config(
137 config=config,
138 collections=collections,
139 run=run,
140 searchPaths=searchPaths,
141 writeable=writeable,
142 inferDefaults=inferDefaults,
143 without_datastore=without_datastore,
144 **kwargs,
145 )
147 # Note: we do not pass any parameters to __new__, Python will pass them
148 # to __init__ after __new__ returns sub-class instance.
149 return super().__new__(cls)
151 @classmethod
152 def from_config(
153 cls,
154 config: Config | ResourcePathExpression | None = None,
155 *,
156 collections: Any = None,
157 run: str | None = None,
158 searchPaths: Sequence[ResourcePathExpression] | None = None,
159 writeable: bool | None = None,
160 inferDefaults: bool = True,
161 without_datastore: bool = False,
162 **kwargs: Any,
163 ) -> Butler:
164 """Create butler instance from configuration.
166 Parameters
167 ----------
168 config : `ButlerConfig`, `Config` or `str`, optional
169 Configuration. Anything acceptable to the `ButlerConfig`
170 constructor. If a directory path is given the configuration will be
171 read from a ``butler.yaml`` file in that location. If `None` is
172 given default values will be used. If ``config`` contains "cls" key
173 then its value is used as a name of butler class and it must be a
174 sub-class of this class, otherwise `DirectButler` is instantiated.
175 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
176 An expression specifying the collections to be searched (in order)
177 when reading datasets.
178 This may be a `str` collection name or an iterable thereof.
179 See :ref:`daf_butler_collection_expressions` for more information.
180 These collections are not registered automatically and must be
181 manually registered before they are used by any method, but they
182 may be manually registered after the `Butler` is initialized.
183 run : `str`, optional
184 Name of the `~CollectionType.RUN` collection new datasets should be
185 inserted into. If ``collections`` is `None` and ``run`` is not
186 `None`, ``collections`` will be set to ``[run]``. If not `None`,
187 this collection will automatically be registered. If this is not
188 set (and ``writeable`` is not set either), a read-only butler will
189 be created.
190 searchPaths : `list` of `str`, optional
191 Directory paths to search when calculating the full Butler
192 configuration. Not used if the supplied config is already a
193 `ButlerConfig`.
194 writeable : `bool`, optional
195 Explicitly sets whether the butler supports write operations. If
196 not provided, a read-write butler is created if any of ``run``,
197 ``tags``, or ``chains`` is non-empty.
198 inferDefaults : `bool`, optional
199 If `True` (default) infer default data ID values from the values
200 present in the datasets in ``collections``: if all collections have
201 the same value (or no value) for a governor dimension, that value
202 will be the default for that dimension. Nonexistent collections
203 are ignored. If a default value is provided explicitly for a
204 governor dimension via ``**kwargs``, no default will be inferred
205 for that dimension.
206 without_datastore : `bool`, optional
207 If `True` do not attach a datastore to this butler. Any attempts
208 to use a datastore will fail.
209 **kwargs : `Any`
210 Default data ID key-value pairs. These may only identify
211 "governor" dimensions like ``instrument`` and ``skymap``.
213 Returns
214 -------
215 butler : `Butler`
216 A `Butler` constructed from the given configuration.
218 Notes
219 -----
220 Calling this factory method is identical to calling
221 ``Butler(config, ...)``. Its only raison d'être is that ``mypy``
222 complains about ``Butler()`` call.
224 Examples
225 --------
226 While there are many ways to control exactly how a `Butler` interacts
227 with the collections in its `Registry`, the most common cases are still
228 simple.
230 For a read-only `Butler` that searches one collection, do::
232 butler = Butler.from_config(
233 "/path/to/repo", collections=["u/alice/DM-50000"]
234 )
236 For a read-write `Butler` that writes to and reads from a
237 `~CollectionType.RUN` collection::
239 butler = Butler.from_config(
240 "/path/to/repo", run="u/alice/DM-50000/a"
241 )
243 The `Butler` passed to a ``PipelineTask`` is often much more complex,
244 because we want to write to one `~CollectionType.RUN` collection but
245 read from several others (as well)::
247 butler = Butler.from_config(
248 "/path/to/repo",
249 run="u/alice/DM-50000/a",
250 collections=[
251 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults"
252 ]
253 )
255 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
256 Datasets will be read first from that run (since it appears first in
257 the chain), and then from ``u/bob/DM-49998`` and finally
258 ``HSC/defaults``.
260 Finally, one can always create a `Butler` with no collections::
262 butler = Butler.from_config("/path/to/repo", writeable=True)
264 This can be extremely useful when you just want to use
265 ``butler.registry``, e.g. for inserting dimension data or managing
266 collections, or when the collections you want to use with the butler
267 are not consistent. Passing ``writeable`` explicitly here is only
268 necessary if you want to be able to make changes to the repo - usually
269 the value for ``writeable`` can be guessed from the collection
270 arguments provided, but it defaults to `False` when there are not
271 collection arguments.
272 """
273 # DirectButler used to have a way to specify a "copy constructor" by
274 # passing the "butler" parameter to its constructor. This
275 # functionality has been moved out of the constructor into
276 # Butler._clone(), but the new interface is not public yet.
277 butler = kwargs.pop("butler", None)
278 if butler is not None:
279 if not isinstance(butler, Butler):
280 raise TypeError("'butler' parameter must be a Butler instance")
281 if config is not None or searchPaths is not None or writeable is not None:
282 raise TypeError(
283 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
284 )
285 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)
287 options = ButlerInstanceOptions(
288 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
289 )
291 # Load the Butler configuration. This may involve searching the
292 # environment to locate a configuration file.
293 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
294 butler_type = butler_config.get_butler_type()
296 # Make DirectButler if class is not specified.
297 match butler_type:
298 case ButlerType.DIRECT:
299 from .direct_butler import DirectButler
301 return DirectButler.create_from_config(
302 butler_config,
303 options=options,
304 without_datastore=without_datastore,
305 )
306 case ButlerType.REMOTE:
307 from .remote_butler import RemoteButlerFactory
309 factory = RemoteButlerFactory.create_factory_from_config(butler_config)
310 return factory.create_butler_with_credentials_from_environment(butler_options=options)
311 case _:
312 raise TypeError(f"Unknown Butler type '{butler_type}'")
314 @staticmethod
315 def makeRepo(
316 root: ResourcePathExpression,
317 config: Config | str | None = None,
318 dimensionConfig: Config | str | None = None,
319 standalone: bool = False,
320 searchPaths: list[str] | None = None,
321 forceConfigRoot: bool = True,
322 outfile: ResourcePathExpression | None = None,
323 overwrite: bool = False,
324 ) -> Config:
325 """Create an empty data repository by adding a butler.yaml config
326 to a repository root directory.
328 Parameters
329 ----------
330 root : `lsst.resources.ResourcePathExpression`
331 Path or URI to the root location of the new repository. Will be
332 created if it does not exist.
333 config : `Config` or `str`, optional
334 Configuration to write to the repository, after setting any
335 root-dependent Registry or Datastore config options. Can not
336 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
337 configuration will be used. Root-dependent config options
338 specified in this config are overwritten if ``forceConfigRoot``
339 is `True`.
340 dimensionConfig : `Config` or `str`, optional
341 Configuration for dimensions, will be used to initialize registry
342 database.
343 standalone : `bool`
344 If True, write all expanded defaults, not just customized or
345 repository-specific settings.
346 This (mostly) decouples the repository from the default
347 configuration, insulating it from changes to the defaults (which
348 may be good or bad, depending on the nature of the changes).
349 Future *additions* to the defaults will still be picked up when
350 initializing `Butlers` to repos created with ``standalone=True``.
351 searchPaths : `list` of `str`, optional
352 Directory paths to search when calculating the full butler
353 configuration.
354 forceConfigRoot : `bool`, optional
355 If `False`, any values present in the supplied ``config`` that
356 would normally be reset are not overridden and will appear
357 directly in the output config. This allows non-standard overrides
358 of the root directory for a datastore or registry to be given.
359 If this parameter is `True` the values for ``root`` will be
360 forced into the resulting config if appropriate.
361 outfile : `lss.resources.ResourcePathExpression`, optional
362 If not-`None`, the output configuration will be written to this
363 location rather than into the repository itself. Can be a URI
364 string. Can refer to a directory that will be used to write
365 ``butler.yaml``.
366 overwrite : `bool`, optional
367 Create a new configuration file even if one already exists
368 in the specified output location. Default is to raise
369 an exception.
371 Returns
372 -------
373 config : `Config`
374 The updated `Config` instance written to the repo.
376 Raises
377 ------
378 ValueError
379 Raised if a ButlerConfig or ConfigSubset is passed instead of a
380 regular Config (as these subclasses would make it impossible to
381 support ``standalone=False``).
382 FileExistsError
383 Raised if the output config file already exists.
384 os.error
385 Raised if the directory does not exist, exists but is not a
386 directory, or cannot be created.
388 Notes
389 -----
390 Note that when ``standalone=False`` (the default), the configuration
391 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
392 construct the repository should also be used to construct any Butlers
393 to avoid configuration inconsistencies.
394 """
395 if isinstance(config, ButlerConfig | ConfigSubset):
396 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
398 # Ensure that the root of the repository exists or can be made
399 root_uri = ResourcePath(root, forceDirectory=True)
400 root_uri.mkdir()
402 config = Config(config)
404 # If we are creating a new repo from scratch with relative roots,
405 # do not propagate an explicit root from the config file
406 if "root" in config:
407 del config["root"]
409 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
410 imported_class = doImportType(full["datastore", "cls"])
411 if not issubclass(imported_class, Datastore):
412 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
413 datastoreClass: type[Datastore] = imported_class
414 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
416 # if key exists in given config, parse it, otherwise parse the defaults
417 # in the expanded config
418 if config.get(("registry", "db")):
419 registryConfig = RegistryConfig(config)
420 else:
421 registryConfig = RegistryConfig(full)
422 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
423 if defaultDatabaseUri is not None:
424 Config.updateParameters(
425 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
426 )
427 else:
428 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
430 if standalone:
431 config.merge(full)
432 else:
433 # Always expand the registry.managers section into the per-repo
434 # config, because after the database schema is created, it's not
435 # allowed to change anymore. Note that in the standalone=True
436 # branch, _everything_ in the config is expanded, so there's no
437 # need to special case this.
438 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
439 configURI: ResourcePathExpression
440 if outfile is not None:
441 # When writing to a separate location we must include
442 # the root of the butler repo in the config else it won't know
443 # where to look.
444 config["root"] = root_uri.geturl()
445 configURI = outfile
446 else:
447 configURI = root_uri
448 # Strip obscore configuration, if it is present, before writing config
449 # to a file, obscore config will be stored in registry.
450 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
451 config_to_write = config.copy()
452 del config_to_write[obscore_config_key]
453 config_to_write.dumpToUri(configURI, overwrite=overwrite)
454 # configFile attribute is updated, need to copy it to original.
455 config.configFile = config_to_write.configFile
456 else:
457 config.dumpToUri(configURI, overwrite=overwrite)
459 # Create Registry and populate tables
460 registryConfig = RegistryConfig(config.get("registry"))
461 dimensionConfig = DimensionConfig(dimensionConfig)
462 _RegistryFactory(registryConfig).create_from_config(
463 dimensionConfig=dimensionConfig, butlerRoot=root_uri
464 )
466 _LOG.verbose("Wrote new Butler configuration file to %s", configURI)
468 return config
470 @classmethod
471 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
472 """Look up the label in a butler repository index.
474 Parameters
475 ----------
476 label : `str`
477 Label of the Butler repository to look up.
478 return_label : `bool`, optional
479 If ``label`` cannot be found in the repository index (either
480 because index is not defined or ``label`` is not in the index) and
481 ``return_label`` is `True` then return ``ResourcePath(label)``.
482 If ``return_label`` is `False` (default) then an exception will be
483 raised instead.
485 Returns
486 -------
487 uri : `lsst.resources.ResourcePath`
488 URI to the Butler repository associated with the given label or
489 default value if it is provided.
491 Raises
492 ------
493 KeyError
494 Raised if the label is not found in the index, or if an index
495 is not defined, and ``return_label`` is `False`.
497 Notes
498 -----
499 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
500 information is discovered.
501 """
502 return ButlerRepoIndex.get_repo_uri(label, return_label)
504 @classmethod
505 def get_known_repos(cls) -> set[str]:
506 """Retrieve the list of known repository labels.
508 Returns
509 -------
510 repos : `set` of `str`
511 All the known labels. Can be empty if no index can be found.
513 Notes
514 -----
515 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
516 information is discovered.
517 """
518 return ButlerRepoIndex.get_known_repos()
520 @abstractmethod
521 def _caching_context(self) -> AbstractContextManager[None]:
522 """Context manager that enables caching."""
523 raise NotImplementedError()
525 @abstractmethod
526 def transaction(self) -> AbstractContextManager[None]:
527 """Context manager supporting `Butler` transactions.
529 Transactions can be nested.
530 """
531 raise NotImplementedError()
533 @abstractmethod
534 def put(
535 self,
536 obj: Any,
537 datasetRefOrType: DatasetRef | DatasetType | str,
538 /,
539 dataId: DataId | None = None,
540 *,
541 run: str | None = None,
542 **kwargs: Any,
543 ) -> DatasetRef:
544 """Store and register a dataset.
546 Parameters
547 ----------
548 obj : `object`
549 The dataset.
550 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
551 When `DatasetRef` is provided, ``dataId`` should be `None`.
552 Otherwise the `DatasetType` or name thereof. If a fully resolved
553 `DatasetRef` is given the run and ID are used directly.
554 dataId : `dict` or `DataCoordinate`
555 A `dict` of `Dimension` link name, value pairs that label the
556 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
557 should be provided as the second argument.
558 run : `str`, optional
559 The name of the run the dataset should be added to, overriding
560 ``self.run``. Not used if a resolved `DatasetRef` is provided.
561 **kwargs
562 Additional keyword arguments used to augment or construct a
563 `DataCoordinate`. See `DataCoordinate.standardize`
564 parameters. Not used if a resolve `DatasetRef` is provided.
566 Returns
567 -------
568 ref : `DatasetRef`
569 A reference to the stored dataset, updated with the correct id if
570 given.
572 Raises
573 ------
574 TypeError
575 Raised if the butler is read-only or if no run has been provided.
576 """
577 raise NotImplementedError()
579 @abstractmethod
580 def getDeferred(
581 self,
582 datasetRefOrType: DatasetRef | DatasetType | str,
583 /,
584 dataId: DataId | None = None,
585 *,
586 parameters: dict | None = None,
587 collections: Any = None,
588 storageClass: str | StorageClass | None = None,
589 **kwargs: Any,
590 ) -> DeferredDatasetHandle:
591 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
592 after an immediate registry lookup.
594 Parameters
595 ----------
596 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
597 When `DatasetRef` the `dataId` should be `None`.
598 Otherwise the `DatasetType` or name thereof.
599 dataId : `dict` or `DataCoordinate`, optional
600 A `dict` of `Dimension` link name, value pairs that label the
601 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
602 should be provided as the first argument.
603 parameters : `dict`
604 Additional StorageClass-defined options to control reading,
605 typically used to efficiently read only a subset of the dataset.
606 collections : Any, optional
607 Collections to be searched, overriding ``self.collections``.
608 Can be any of the types supported by the ``collections`` argument
609 to butler construction.
610 storageClass : `StorageClass` or `str`, optional
611 The storage class to be used to override the Python type
612 returned by this method. By default the returned type matches
613 the dataset type definition for this dataset. Specifying a
614 read `StorageClass` can force a different type to be returned.
615 This type must be compatible with the original type.
616 **kwargs
617 Additional keyword arguments used to augment or construct a
618 `DataId`. See `DataId` parameters.
620 Returns
621 -------
622 obj : `DeferredDatasetHandle`
623 A handle which can be used to retrieve a dataset at a later time.
625 Raises
626 ------
627 LookupError
628 Raised if no matching dataset exists in the `Registry` or
629 datastore.
630 ValueError
631 Raised if a resolved `DatasetRef` was passed as an input, but it
632 differs from the one found in the registry.
633 TypeError
634 Raised if no collections were provided.
635 """
636 raise NotImplementedError()
638 @abstractmethod
639 def get(
640 self,
641 datasetRefOrType: DatasetRef | DatasetType | str,
642 /,
643 dataId: DataId | None = None,
644 *,
645 parameters: dict[str, Any] | None = None,
646 collections: Any = None,
647 storageClass: StorageClass | str | None = None,
648 **kwargs: Any,
649 ) -> Any:
650 """Retrieve a stored dataset.
652 Parameters
653 ----------
654 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
655 When `DatasetRef` the `dataId` should be `None`.
656 Otherwise the `DatasetType` or name thereof.
657 If a resolved `DatasetRef`, the associated dataset
658 is returned directly without additional querying.
659 dataId : `dict` or `DataCoordinate`
660 A `dict` of `Dimension` link name, value pairs that label the
661 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
662 should be provided as the first argument.
663 parameters : `dict`
664 Additional StorageClass-defined options to control reading,
665 typically used to efficiently read only a subset of the dataset.
666 collections : Any, optional
667 Collections to be searched, overriding ``self.collections``.
668 Can be any of the types supported by the ``collections`` argument
669 to butler construction.
670 storageClass : `StorageClass` or `str`, optional
671 The storage class to be used to override the Python type
672 returned by this method. By default the returned type matches
673 the dataset type definition for this dataset. Specifying a
674 read `StorageClass` can force a different type to be returned.
675 This type must be compatible with the original type.
676 **kwargs
677 Additional keyword arguments used to augment or construct a
678 `DataCoordinate`. See `DataCoordinate.standardize`
679 parameters.
681 Returns
682 -------
683 obj : `object`
684 The dataset.
686 Raises
687 ------
688 LookupError
689 Raised if no matching dataset exists in the `Registry`.
690 TypeError
691 Raised if no collections were provided.
693 Notes
694 -----
695 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
696 this method requires that the given data ID include temporal dimensions
697 beyond the dimensions of the dataset type itself, in order to find the
698 dataset with the appropriate validity range. For example, a "bias"
699 dataset with native dimensions ``{instrument, detector}`` could be
700 fetched with a ``{instrument, detector, exposure}`` data ID, because
701 ``exposure`` is a temporal dimension.
702 """
703 raise NotImplementedError()
705 @abstractmethod
706 def getURIs(
707 self,
708 datasetRefOrType: DatasetRef | DatasetType | str,
709 /,
710 dataId: DataId | None = None,
711 *,
712 predict: bool = False,
713 collections: Any = None,
714 run: str | None = None,
715 **kwargs: Any,
716 ) -> DatasetRefURIs:
717 """Return the URIs associated with the dataset.
719 Parameters
720 ----------
721 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
722 When `DatasetRef` the `dataId` should be `None`.
723 Otherwise the `DatasetType` or name thereof.
724 dataId : `dict` or `DataCoordinate`
725 A `dict` of `Dimension` link name, value pairs that label the
726 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
727 should be provided as the first argument.
728 predict : `bool`
729 If `True`, allow URIs to be returned of datasets that have not
730 been written.
731 collections : Any, optional
732 Collections to be searched, overriding ``self.collections``.
733 Can be any of the types supported by the ``collections`` argument
734 to butler construction.
735 run : `str`, optional
736 Run to use for predictions, overriding ``self.run``.
737 **kwargs
738 Additional keyword arguments used to augment or construct a
739 `DataCoordinate`. See `DataCoordinate.standardize`
740 parameters.
742 Returns
743 -------
744 uris : `DatasetRefURIs`
745 The URI to the primary artifact associated with this dataset (if
746 the dataset was disassembled within the datastore this may be
747 `None`), and the URIs to any components associated with the dataset
748 artifact. (can be empty if there are no components).
749 """
750 raise NotImplementedError()
752 def getURI(
753 self,
754 datasetRefOrType: DatasetRef | DatasetType | str,
755 /,
756 dataId: DataId | None = None,
757 *,
758 predict: bool = False,
759 collections: Any = None,
760 run: str | None = None,
761 **kwargs: Any,
762 ) -> ResourcePath:
763 """Return the URI to the Dataset.
765 Parameters
766 ----------
767 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
768 When `DatasetRef` the `dataId` should be `None`.
769 Otherwise the `DatasetType` or name thereof.
770 dataId : `dict` or `DataCoordinate`
771 A `dict` of `Dimension` link name, value pairs that label the
772 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
773 should be provided as the first argument.
774 predict : `bool`
775 If `True`, allow URIs to be returned of datasets that have not
776 been written.
777 collections : Any, optional
778 Collections to be searched, overriding ``self.collections``.
779 Can be any of the types supported by the ``collections`` argument
780 to butler construction.
781 run : `str`, optional
782 Run to use for predictions, overriding ``self.run``.
783 **kwargs
784 Additional keyword arguments used to augment or construct a
785 `DataCoordinate`. See `DataCoordinate.standardize`
786 parameters.
788 Returns
789 -------
790 uri : `lsst.resources.ResourcePath`
791 URI pointing to the Dataset within the datastore. If the
792 Dataset does not exist in the datastore, and if ``predict`` is
793 `True`, the URI will be a prediction and will include a URI
794 fragment "#predicted".
795 If the datastore does not have entities that relate well
796 to the concept of a URI the returned URI string will be
797 descriptive. The returned URI is not guaranteed to be obtainable.
799 Raises
800 ------
801 LookupError
802 A URI has been requested for a dataset that does not exist and
803 guessing is not allowed.
804 ValueError
805 Raised if a resolved `DatasetRef` was passed as an input, but it
806 differs from the one found in the registry.
807 TypeError
808 Raised if no collections were provided.
809 RuntimeError
810 Raised if a URI is requested for a dataset that consists of
811 multiple artifacts.
812 """
813 primary, components = self.getURIs(
814 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
815 )
817 if primary is None or components:
818 raise RuntimeError(
819 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
820 "Use Butler.getURIs() instead."
821 )
822 return primary
824 @abstractmethod
825 def get_dataset_type(self, name: str) -> DatasetType:
826 """Get the `DatasetType`.
828 Parameters
829 ----------
830 name : `str`
831 Name of the type.
833 Returns
834 -------
835 type : `DatasetType`
836 The `DatasetType` associated with the given name.
838 Raises
839 ------
840 lsst.daf.butler.MissingDatasetTypeError
841 Raised if the requested dataset type has not been registered.
843 Notes
844 -----
845 This method handles component dataset types automatically, though most
846 other operations do not.
847 """
848 raise NotImplementedError()
850 @abstractmethod
851 def get_dataset(
852 self,
853 id: DatasetId,
854 *,
855 storage_class: str | StorageClass | None = None,
856 dimension_records: bool = False,
857 datastore_records: bool = False,
858 ) -> DatasetRef | None:
859 """Retrieve a Dataset entry.
861 Parameters
862 ----------
863 id : `DatasetId`
864 The unique identifier for the dataset.
865 storage_class : `str` or `StorageClass` or `None`
866 A storage class to use when creating the returned entry. If given
867 it must be compatible with the default storage class.
868 dimension_records : `bool`, optional
869 If `True` the ref will be expanded and contain dimension records.
870 datastore_records : `bool`, optional
871 If `True` the ref will contain associated datastore records.
873 Returns
874 -------
875 ref : `DatasetRef` or `None`
876 A ref to the Dataset, or `None` if no matching Dataset
877 was found.
878 """
879 raise NotImplementedError()
881 @abstractmethod
882 def find_dataset(
883 self,
884 dataset_type: DatasetType | str,
885 data_id: DataId | None = None,
886 *,
887 collections: str | Sequence[str] | None = None,
888 timespan: Timespan | None = None,
889 storage_class: str | StorageClass | None = None,
890 dimension_records: bool = False,
891 datastore_records: bool = False,
892 **kwargs: Any,
893 ) -> DatasetRef | None:
894 """Find a dataset given its `DatasetType` and data ID.
896 This can be used to obtain a `DatasetRef` that permits the dataset to
897 be read from a `Datastore`. If the dataset is a component and can not
898 be found using the provided dataset type, a dataset ref for the parent
899 will be returned instead but with the correct dataset type.
901 Parameters
902 ----------
903 dataset_type : `DatasetType` or `str`
904 A `DatasetType` or the name of one. If this is a `DatasetType`
905 instance, its storage class will be respected and propagated to
906 the output, even if it differs from the dataset type definition
907 in the registry, as long as the storage classes are convertible.
908 data_id : `dict` or `DataCoordinate`, optional
909 A `dict`-like object containing the `Dimension` links that identify
910 the dataset within a collection. If it is a `dict` the dataId
911 can include dimension record values such as ``day_obs`` and
912 ``seq_num`` or ``full_name`` that can be used to derive the
913 primary dimension.
914 collections : `str` or `list` [`str`], optional
915 A an ordered list of collections to search for the dataset.
916 Defaults to ``self.defaults.collections``.
917 timespan : `Timespan`, optional
918 A timespan that the validity range of the dataset must overlap.
919 If not provided, any `~CollectionType.CALIBRATION` collections
920 matched by the ``collections`` argument will not be searched.
921 storage_class : `str` or `StorageClass` or `None`
922 A storage class to use when creating the returned entry. If given
923 it must be compatible with the default storage class.
924 dimension_records : `bool`, optional
925 If `True` the ref will be expanded and contain dimension records.
926 datastore_records : `bool`, optional
927 If `True` the ref will contain associated datastore records.
928 **kwargs
929 Additional keyword arguments passed to
930 `DataCoordinate.standardize` to convert ``dataId`` to a true
931 `DataCoordinate` or augment an existing one. This can also include
932 dimension record metadata that can be used to derive a primary
933 dimension value.
935 Returns
936 -------
937 ref : `DatasetRef`
938 A reference to the dataset, or `None` if no matching Dataset
939 was found.
941 Raises
942 ------
943 lsst.daf.butler.NoDefaultCollectionError
944 Raised if ``collections`` is `None` and
945 ``self.collections`` is `None`.
946 LookupError
947 Raised if one or more data ID keys are missing.
948 lsst.daf.butler.MissingDatasetTypeError
949 Raised if the dataset type does not exist.
950 lsst.daf.butler.MissingCollectionError
951 Raised if any of ``collections`` does not exist in the registry.
953 Notes
954 -----
955 This method simply returns `None` and does not raise an exception even
956 when the set of collections searched is intrinsically incompatible with
957 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
958 only `~CollectionType.CALIBRATION` collections are being searched.
959 This may make it harder to debug some lookup failures, but the behavior
960 is intentional; we consider it more important that failed searches are
961 reported consistently, regardless of the reason, and that adding
962 additional collections that do not contain a match to the search path
963 never changes the behavior.
965 This method handles component dataset types automatically, though most
966 other query operations do not.
967 """
968 raise NotImplementedError()
970 @abstractmethod
971 def retrieveArtifacts(
972 self,
973 refs: Iterable[DatasetRef],
974 destination: ResourcePathExpression,
975 transfer: str = "auto",
976 preserve_path: bool = True,
977 overwrite: bool = False,
978 ) -> list[ResourcePath]:
979 """Retrieve the artifacts associated with the supplied refs.
981 Parameters
982 ----------
983 refs : iterable of `DatasetRef`
984 The datasets for which artifacts are to be retrieved.
985 A single ref can result in multiple artifacts. The refs must
986 be resolved.
987 destination : `lsst.resources.ResourcePath` or `str`
988 Location to write the artifacts.
989 transfer : `str`, optional
990 Method to use to transfer the artifacts. Must be one of the options
991 supported by `~lsst.resources.ResourcePath.transfer_from()`.
992 "move" is not allowed.
993 preserve_path : `bool`, optional
994 If `True` the full path of the artifact within the datastore
995 is preserved. If `False` the final file component of the path
996 is used.
997 overwrite : `bool`, optional
998 If `True` allow transfers to overwrite existing files at the
999 destination.
1001 Returns
1002 -------
1003 targets : `list` of `lsst.resources.ResourcePath`
1004 URIs of file artifacts in destination location. Order is not
1005 preserved.
1007 Notes
1008 -----
1009 For non-file datastores the artifacts written to the destination
1010 may not match the representation inside the datastore. For example
1011 a hierarchical data structure in a NoSQL database may well be stored
1012 as a JSON file.
1013 """
1014 raise NotImplementedError()
1016 @abstractmethod
1017 def exists(
1018 self,
1019 dataset_ref_or_type: DatasetRef | DatasetType | str,
1020 /,
1021 data_id: DataId | None = None,
1022 *,
1023 full_check: bool = True,
1024 collections: Any = None,
1025 **kwargs: Any,
1026 ) -> DatasetExistence:
1027 """Indicate whether a dataset is known to Butler registry and
1028 datastore.
1030 Parameters
1031 ----------
1032 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
1033 When `DatasetRef` the `dataId` should be `None`.
1034 Otherwise the `DatasetType` or name thereof.
1035 data_id : `dict` or `DataCoordinate`
1036 A `dict` of `Dimension` link name, value pairs that label the
1037 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1038 should be provided as the first argument.
1039 full_check : `bool`, optional
1040 If `True`, a check will be made for the actual existence of a
1041 dataset artifact. This will involve additional overhead due to
1042 the need to query an external system. If `False`, this check will
1043 be omitted, and the registry and datastore will solely be asked
1044 if they know about the dataset but no direct check for the
1045 artifact will be performed.
1046 collections : Any, optional
1047 Collections to be searched, overriding ``self.collections``.
1048 Can be any of the types supported by the ``collections`` argument
1049 to butler construction.
1050 **kwargs
1051 Additional keyword arguments used to augment or construct a
1052 `DataCoordinate`. See `DataCoordinate.standardize`
1053 parameters.
1055 Returns
1056 -------
1057 existence : `DatasetExistence`
1058 Object indicating whether the dataset is known to registry and
1059 datastore. Evaluates to `True` if the dataset is present and known
1060 to both.
1061 """
1062 raise NotImplementedError()
1064 @abstractmethod
1065 def _exists_many(
1066 self,
1067 refs: Iterable[DatasetRef],
1068 /,
1069 *,
1070 full_check: bool = True,
1071 ) -> dict[DatasetRef, DatasetExistence]:
1072 """Indicate whether multiple datasets are known to Butler registry and
1073 datastore.
1075 This is an experimental API that may change at any moment.
1077 Parameters
1078 ----------
1079 refs : iterable of `DatasetRef`
1080 The datasets to be checked.
1081 full_check : `bool`, optional
1082 If `True`, a check will be made for the actual existence of each
1083 dataset artifact. This will involve additional overhead due to
1084 the need to query an external system. If `False`, this check will
1085 be omitted, and the registry and datastore will solely be asked
1086 if they know about the dataset(s) but no direct check for the
1087 artifact(s) will be performed.
1089 Returns
1090 -------
1091 existence : dict of [`DatasetRef`, `DatasetExistence`]
1092 Mapping from the given dataset refs to an enum indicating the
1093 status of the dataset in registry and datastore.
1094 Each value evaluates to `True` if the dataset is present and known
1095 to both.
1096 """
1097 raise NotImplementedError()
1099 @abstractmethod
1100 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1101 """Remove one or more `~CollectionType.RUN` collections and the
1102 datasets within them.
1104 Parameters
1105 ----------
1106 names : `~collections.abc.Iterable` [ `str` ]
1107 The names of the collections to remove.
1108 unstore : `bool`, optional
1109 If `True` (default), delete datasets from all datastores in which
1110 they are present, and attempt to rollback the registry deletions if
1111 datastore deletions fail (which may not always be possible). If
1112 `False`, datastore records for these datasets are still removed,
1113 but any artifacts (e.g. files) will not be.
1115 Raises
1116 ------
1117 TypeError
1118 Raised if one or more collections are not of type
1119 `~CollectionType.RUN`.
1120 """
1121 raise NotImplementedError()
1123 @abstractmethod
1124 def ingest(
1125 self,
1126 *datasets: FileDataset,
1127 transfer: str | None = "auto",
1128 record_validation_info: bool = True,
1129 ) -> None:
1130 """Store and register one or more datasets that already exist on disk.
1132 Parameters
1133 ----------
1134 *datasets : `FileDataset`
1135 Each positional argument is a struct containing information about
1136 a file to be ingested, including its URI (either absolute or
1137 relative to the datastore root, if applicable), a resolved
1138 `DatasetRef`, and optionally a formatter class or its
1139 fully-qualified string name. If a formatter is not provided, the
1140 formatter that would be used for `put` is assumed. On successful
1141 ingest all `FileDataset.formatter` attributes will be set to the
1142 formatter class used. `FileDataset.path` attributes may be modified
1143 to put paths in whatever the datastore considers a standardized
1144 form.
1145 transfer : `str`, optional
1146 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1147 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1148 transfer the file.
1149 record_validation_info : `bool`, optional
1150 If `True`, the default, the datastore can record validation
1151 information associated with the file. If `False` the datastore
1152 will not attempt to track any information such as checksums
1153 or file sizes. This can be useful if such information is tracked
1154 in an external system or if the file is to be compressed in place.
1155 It is up to the datastore whether this parameter is relevant.
1157 Raises
1158 ------
1159 TypeError
1160 Raised if the butler is read-only or if no run was provided.
1161 NotImplementedError
1162 Raised if the `Datastore` does not support the given transfer mode.
1163 DatasetTypeNotSupportedError
1164 Raised if one or more files to be ingested have a dataset type that
1165 is not supported by the `Datastore`..
1166 FileNotFoundError
1167 Raised if one of the given files does not exist.
1168 FileExistsError
1169 Raised if transfer is not `None` but the (internal) location the
1170 file would be moved to is already occupied.
1172 Notes
1173 -----
1174 This operation is not fully exception safe: if a database operation
1175 fails, the given `FileDataset` instances may be only partially updated.
1177 It is atomic in terms of database operations (they will either all
1178 succeed or all fail) providing the database engine implements
1179 transactions correctly. It will attempt to be atomic in terms of
1180 filesystem operations as well, but this cannot be implemented
1181 rigorously for most datastores.
1182 """
1183 raise NotImplementedError()
1185 @abstractmethod
1186 def export(
1187 self,
1188 *,
1189 directory: str | None = None,
1190 filename: str | None = None,
1191 format: str | None = None,
1192 transfer: str | None = None,
1193 ) -> AbstractContextManager[RepoExportContext]:
1194 """Export datasets from the repository represented by this `Butler`.
1196 This method is a context manager that returns a helper object
1197 (`RepoExportContext`) that is used to indicate what information from
1198 the repository should be exported.
1200 Parameters
1201 ----------
1202 directory : `str`, optional
1203 Directory dataset files should be written to if ``transfer`` is not
1204 `None`.
1205 filename : `str`, optional
1206 Name for the file that will include database information associated
1207 with the exported datasets. If this is not an absolute path and
1208 ``directory`` is not `None`, it will be written to ``directory``
1209 instead of the current working directory. Defaults to
1210 "export.{format}".
1211 format : `str`, optional
1212 File format for the database information file. If `None`, the
1213 extension of ``filename`` will be used.
1214 transfer : `str`, optional
1215 Transfer mode passed to `Datastore.export`.
1217 Raises
1218 ------
1219 TypeError
1220 Raised if the set of arguments passed is inconsistent.
1222 Examples
1223 --------
1224 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
1225 methods are used to provide the iterables over data IDs and/or datasets
1226 to be exported::
1228 with butler.export("exports.yaml") as export:
1229 # Export all flats, but none of the dimension element rows
1230 # (i.e. data ID information) associated with them.
1231 export.saveDatasets(butler.registry.queryDatasets("flat"),
1232 elements=())
1233 # Export all datasets that start with "deepCoadd_" and all of
1234 # their associated data ID information.
1235 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
1236 """
1237 raise NotImplementedError()
1239 @abstractmethod
1240 def import_(
1241 self,
1242 *,
1243 directory: ResourcePathExpression | None = None,
1244 filename: ResourcePathExpression | TextIO | None = None,
1245 format: str | None = None,
1246 transfer: str | None = None,
1247 skip_dimensions: set | None = None,
1248 ) -> None:
1249 """Import datasets into this repository that were exported from a
1250 different butler repository via `~lsst.daf.butler.Butler.export`.
1252 Parameters
1253 ----------
1254 directory : `~lsst.resources.ResourcePathExpression`, optional
1255 Directory containing dataset files to import from. If `None`,
1256 ``filename`` and all dataset file paths specified therein must
1257 be absolute.
1258 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
1259 A stream or name of file that contains database information
1260 associated with the exported datasets, typically generated by
1261 `~lsst.daf.butler.Butler.export`. If this a string (name) or
1262 `~lsst.resources.ResourcePath` and is not an absolute path,
1263 it will first be looked for relative to ``directory`` and if not
1264 found there it will be looked for in the current working
1265 directory. Defaults to "export.{format}".
1266 format : `str`, optional
1267 File format for ``filename``. If `None`, the extension of
1268 ``filename`` will be used.
1269 transfer : `str`, optional
1270 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
1271 skip_dimensions : `set`, optional
1272 Names of dimensions that should be skipped and not imported.
1274 Raises
1275 ------
1276 TypeError
1277 Raised if the set of arguments passed is inconsistent, or if the
1278 butler is read-only.
1279 """
1280 raise NotImplementedError()
1282 @abstractmethod
1283 def transfer_dimension_records_from(
1284 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1285 ) -> None:
1286 """Transfer dimension records to this Butler from another Butler.
1288 Parameters
1289 ----------
1290 source_butler : `LimitedButler` or `Butler`
1291 Butler from which the records are to be transferred. If data IDs
1292 in ``source_refs`` are not expanded then this has to be a full
1293 `Butler` whose registry will be used to expand data IDs. If the
1294 source refs contain coordinates that are used to populate other
1295 records then this will also need to be a full `Butler`.
1296 source_refs : iterable of `DatasetRef`
1297 Datasets defined in the source butler whose dimension records
1298 should be transferred to this butler. In most circumstances.
1299 transfer is faster if the dataset refs are expanded.
1300 """
1301 raise NotImplementedError()
1303 @abstractmethod
1304 def transfer_from(
1305 self,
1306 source_butler: LimitedButler,
1307 source_refs: Iterable[DatasetRef],
1308 transfer: str = "auto",
1309 skip_missing: bool = True,
1310 register_dataset_types: bool = False,
1311 transfer_dimensions: bool = False,
1312 ) -> Collection[DatasetRef]:
1313 """Transfer datasets to this Butler from a run in another Butler.
1315 Parameters
1316 ----------
1317 source_butler : `LimitedButler`
1318 Butler from which the datasets are to be transferred. If data IDs
1319 in ``source_refs`` are not expanded then this has to be a full
1320 `Butler` whose registry will be used to expand data IDs.
1321 source_refs : iterable of `DatasetRef`
1322 Datasets defined in the source butler that should be transferred to
1323 this butler. In most circumstances, ``transfer_from`` is faster if
1324 the dataset refs are expanded.
1325 transfer : `str`, optional
1326 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
1327 skip_missing : `bool`
1328 If `True`, datasets with no datastore artifact associated with
1329 them are not transferred. If `False` a registry entry will be
1330 created even if no datastore record is created (and so will
1331 look equivalent to the dataset being unstored).
1332 register_dataset_types : `bool`
1333 If `True` any missing dataset types are registered. Otherwise
1334 an exception is raised.
1335 transfer_dimensions : `bool`, optional
1336 If `True`, dimension record data associated with the new datasets
1337 will be transferred.
1339 Returns
1340 -------
1341 refs : `list` of `DatasetRef`
1342 The refs added to this Butler.
1344 Notes
1345 -----
1346 The datastore artifact has to exist for a transfer
1347 to be made but non-existence is not an error.
1349 Datasets that already exist in this run will be skipped.
1351 The datasets are imported as part of a transaction, although
1352 dataset types are registered before the transaction is started.
1353 This means that it is possible for a dataset type to be registered
1354 even though transfer has failed.
1355 """
1356 raise NotImplementedError()
1358 @abstractmethod
1359 def validateConfiguration(
1360 self,
1361 logFailures: bool = False,
1362 datasetTypeNames: Iterable[str] | None = None,
1363 ignore: Iterable[str] | None = None,
1364 ) -> None:
1365 """Validate butler configuration.
1367 Checks that each `DatasetType` can be stored in the `Datastore`.
1369 Parameters
1370 ----------
1371 logFailures : `bool`, optional
1372 If `True`, output a log message for every validation error
1373 detected.
1374 datasetTypeNames : iterable of `str`, optional
1375 The `DatasetType` names that should be checked. This allows
1376 only a subset to be selected.
1377 ignore : iterable of `str`, optional
1378 Names of DatasetTypes to skip over. This can be used to skip
1379 known problems. If a named `DatasetType` corresponds to a
1380 composite, all components of that `DatasetType` will also be
1381 ignored.
1383 Raises
1384 ------
1385 ButlerValidationError
1386 Raised if there is some inconsistency with how this Butler
1387 is configured.
1388 """
1389 raise NotImplementedError()
1391 @property
1392 @abstractmethod
1393 def collections(self) -> Sequence[str]:
1394 """The collections to search by default, in order
1395 (`~collections.abc.Sequence` [ `str` ]).
1396 """
1397 raise NotImplementedError()
1399 @property
1400 @abstractmethod
1401 def run(self) -> str | None:
1402 """Name of the run this butler writes outputs to by default (`str` or
1403 `None`).
1404 """
1405 raise NotImplementedError()
1407 @property
1408 @abstractmethod
1409 def registry(self) -> Registry:
1410 """The object that manages dataset metadata and relationships
1411 (`Registry`).
1413 Many operations that don't involve reading or writing butler datasets
1414 are accessible only via `Registry` methods. Eventually these methods
1415 will be replaced by equivalent `Butler` methods.
1416 """
1417 raise NotImplementedError()
1419 @abstractmethod
1420 def _query(self) -> AbstractContextManager[Query]:
1421 """Context manager returning a `Query` object used for construction
1422 and execution of complex queries.
1423 """
1424 raise NotImplementedError()
1426 @abstractmethod
1427 def _query_data_ids(
1428 self,
1429 dimensions: DimensionGroup | Iterable[str] | str,
1430 *,
1431 data_id: DataId | None = None,
1432 where: str = "",
1433 bind: Mapping[str, Any] | None = None,
1434 expanded: bool = False,
1435 order_by: Iterable[str] | str | None = None,
1436 limit: int | None = None,
1437 offset: int | None = None,
1438 explain: bool = True,
1439 **kwargs: Any,
1440 ) -> list[DataCoordinate]:
1441 """Query for data IDs matching user-provided criteria.
1443 Parameters
1444 ----------
1445 dimensions : `DimensionGroup`, `str`, or \
1446 `~collections.abc.Iterable` [`str`]
1447 The dimensions of the data IDs to yield, as either `DimensionGroup`
1448 instances or `str`. Will be automatically expanded to a complete
1449 `DimensionGroup`.
1450 data_id : `dict` or `DataCoordinate`, optional
1451 A data ID whose key-value pairs are used as equality constraints
1452 in the query.
1453 where : `str`, optional
1454 A string expression similar to a SQL WHERE clause. May involve
1455 any column of a dimension table or (as a shortcut for the primary
1456 key column of a dimension table) dimension name. See
1457 :ref:`daf_butler_dimension_expressions` for more information.
1458 bind : `~collections.abc.Mapping`, optional
1459 Mapping containing literal values that should be injected into the
1460 ``where`` expression, keyed by the identifiers they replace.
1461 Values of collection type can be expanded in some cases; see
1462 :ref:`daf_butler_dimension_expressions_identifiers` for more
1463 information.
1464 expanded : `bool`, optional
1465 If `True` (default is `False`) then returned data IDs will have
1466 dimension records.
1467 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1468 Names of the columns/dimensions to use for ordering returned data
1469 IDs. Column name can be prefixed with minus (``-``) to use
1470 descending ordering.
1471 limit : `int`, optional
1472 Upper limit on the number of returned records.
1473 offset : `int`, optional
1474 The number of records to skip before returning at most ``limit``
1475 records. If ``offset`` is specified then ``limit`` must be
1476 specified as well.
1477 explain : `bool`, optional
1478 If `True` (default) then `EmptyQueryResultError` exception is
1479 raised when resulting list is empty. The exception contains
1480 non-empty list of strings explaining possible causes for empty
1481 result.
1482 **kwargs
1483 Additional keyword arguments are forwarded to
1484 `DataCoordinate.standardize` when processing the ``data_id``
1485 argument (and may be used to provide a constraining data ID even
1486 when the ``data_id`` argument is `None`).
1488 Returns
1489 -------
1490 dataIds : `list` [`DataCoordinate`]
1491 Data IDs matching the given query parameters. These are always
1492 guaranteed to identify all dimensions (`DataCoordinate.hasFull`
1493 returns `True`).
1495 Raises
1496 ------
1497 lsst.daf.butler.registry.DataIdError
1498 Raised when ``data_id`` or keyword arguments specify unknown
1499 dimensions or values, or when they contain inconsistent values.
1500 lsst.daf.butler.registry.UserExpressionError
1501 Raised when ``where`` expression is invalid.
1502 lsst.daf.butler.EmptyQueryResultError
1503 Raised when query generates empty result and ``explain`` is set to
1504 `True`.
1505 TypeError
1506 Raised when the arguments are incompatible, e.g. ``offset`` is
1507 specified, but ``limit`` is not.
1508 """
1509 raise NotImplementedError()
1511 @abstractmethod
1512 def _query_datasets(
1513 self,
1514 dataset_type: Any,
1515 collections: CollectionArgType | None = None,
1516 *,
1517 find_first: bool = True,
1518 data_id: DataId | None = None,
1519 where: str = "",
1520 bind: Mapping[str, Any] | None = None,
1521 expanded: bool = False,
1522 explain: bool = True,
1523 **kwargs: Any,
1524 ) -> list[DatasetRef]:
1525 """Query for dataset references matching user-provided criteria.
1527 Parameters
1528 ----------
1529 dataset_type : dataset type expression
1530 An expression that fully or partially identifies the dataset types
1531 to be queried. Allowed types include `DatasetType`, `str`,
1532 `re.Pattern`, and iterables thereof. The special value ``...`` can
1533 be used to query all dataset types. See
1534 :ref:`daf_butler_dataset_type_expressions` for more information.
1535 collections : collection expression, optional
1536 An expression that identifies the collections to search, such as a
1537 `str` (for full matches or partial matches via globs), `re.Pattern`
1538 (for partial matches), or iterable thereof. ``...`` can be used to
1539 search all collections (actually just all `~CollectionType.RUN`
1540 collections, because this will still find all datasets).
1541 If not provided, the default collections are used. See
1542 :ref:`daf_butler_collection_expressions` for more information.
1543 find_first : `bool`, optional
1544 If `True` (default), for each result data ID, only yield one
1545 `DatasetRef` of each `DatasetType`, from the first collection in
1546 which a dataset of that dataset type appears (according to the
1547 order of ``collections`` passed in). If `True`, ``collections``
1548 must not contain regular expressions and may not be ``...``.
1549 data_id : `dict` or `DataCoordinate`, optional
1550 A data ID whose key-value pairs are used as equality constraints
1551 in the query.
1552 where : `str`, optional
1553 A string expression similar to a SQL WHERE clause. May involve
1554 any column of a dimension table or (as a shortcut for the primary
1555 key column of a dimension table) dimension name. See
1556 :ref:`daf_butler_dimension_expressions` for more information.
1557 bind : `~collections.abc.Mapping`, optional
1558 Mapping containing literal values that should be injected into the
1559 ``where`` expression, keyed by the identifiers they replace.
1560 Values of collection type can be expanded in some cases; see
1561 :ref:`daf_butler_dimension_expressions_identifiers` for more
1562 information.
1563 expanded : `bool`, optional
1564 If `True` (default is `False`) then returned data IDs will have
1565 dimension records.
1566 explain : `bool`, optional
1567 If `True` (default) then `EmptyQueryResultError` exception is
1568 raised when resulting list is empty. The exception contains
1569 non-empty list of strings explaining possible causes for empty
1570 result.
1571 **kwargs
1572 Additional keyword arguments are forwarded to
1573 `DataCoordinate.standardize` when processing the ``data_id``
1574 argument (and may be used to provide a constraining data ID even
1575 when the ``data_id`` argument is `None`).
1577 Returns
1578 -------
1579 refs : `.queries.DatasetQueryResults`
1580 Dataset references matching the given query criteria. Nested data
1581 IDs are guaranteed to include values for all implied dimensions
1582 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1583 include dimension records (`DataCoordinate.hasRecords` will be
1584 `False`) unless `~.queries.DatasetQueryResults.expanded` is
1585 called on the result object (which returns a new one).
1587 Raises
1588 ------
1589 lsst.daf.butler.registry.DatasetTypeExpressionError
1590 Raised when ``dataset_type`` expression is invalid.
1591 lsst.daf.butler.registry.DataIdError
1592 Raised when ``data_id`` or keyword arguments specify unknown
1593 dimensions or values, or when they contain inconsistent values.
1594 lsst.daf.butler.registry.UserExpressionError
1595 Raised when ``where`` expression is invalid.
1596 lsst.daf.butler.EmptyQueryResultError
1597 Raised when query generates empty result and ``explain`` is set to
1598 `True`.
1599 TypeError
1600 Raised when the arguments are incompatible, such as when a
1601 collection wildcard is passed when ``find_first`` is `True`, or
1602 when ``collections`` is `None` and default butler collections are
1603 not defined.
1605 Notes
1606 -----
1607 When multiple dataset types are queried in a single call, the
1608 results of this operation are equivalent to querying for each dataset
1609 type separately in turn, and no information about the relationships
1610 between datasets of different types is included.
1611 """
1612 raise NotImplementedError()
1614 @abstractmethod
1615 def _query_dimension_records(
1616 self,
1617 element: str,
1618 *,
1619 data_id: DataId | None = None,
1620 where: str = "",
1621 bind: Mapping[str, Any] | None = None,
1622 order_by: Iterable[str] | str | None = None,
1623 limit: int | None = None,
1624 offset: int | None = None,
1625 explain: bool = True,
1626 **kwargs: Any,
1627 ) -> list[DimensionRecord]:
1628 """Query for dimension information matching user-provided criteria.
1630 Parameters
1631 ----------
1632 element : `str`
1633 The name of a dimension element to obtain records for.
1634 data_id : `dict` or `DataCoordinate`, optional
1635 A data ID whose key-value pairs are used as equality constraints
1636 in the query.
1637 where : `str`, optional
1638 A string expression similar to a SQL WHERE clause. See
1639 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1640 information.
1641 bind : `~collections.abc.Mapping`, optional
1642 Mapping containing literal values that should be injected into the
1643 ``where`` expression, keyed by the identifiers they replace.
1644 Values of collection type can be expanded in some cases; see
1645 :ref:`daf_butler_dimension_expressions_identifiers` for more
1646 information.
1647 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1648 Names of the columns/dimensions to use for ordering returned data
1649 IDs. Column name can be prefixed with minus (``-``) to use
1650 descending ordering.
1651 limit : `int`, optional
1652 Upper limit on the number of returned records.
1653 offset : `int`, optional
1654 The number of records to skip before returning at most ``limit``
1655 records. If ``offset`` is specified then ``limit`` must be
1656 specified as well.
1657 explain : `bool`, optional
1658 If `True` (default) then `EmptyQueryResultError` exception is
1659 raised when resulting list is empty. The exception contains
1660 non-empty list of strings explaining possible causes for empty
1661 result.
1662 **kwargs
1663 Additional keyword arguments are forwarded to
1664 `DataCoordinate.standardize` when processing the ``data_id``
1665 argument (and may be used to provide a constraining data ID even
1666 when the ``data_id`` argument is `None`).
1668 Returns
1669 -------
1670 records : `list`[`DimensionRecord`]
1671 Dimension records matching the given query parameters.
1673 Raises
1674 ------
1675 lsst.daf.butler.registry.DataIdError
1676 Raised when ``data_id`` or keyword arguments specify unknown
1677 dimensions or values, or when they contain inconsistent values.
1678 lsst.daf.butler.registry.UserExpressionError
1679 Raised when ``where`` expression is invalid.
1680 lsst.daf.butler.EmptyQueryResultError
1681 Raised when query generates empty result and ``explain`` is set to
1682 `True`.
1683 TypeError
1684 Raised when the arguments are incompatible, such as when a
1685 collection wildcard is passed when ``find_first`` is `True`, or
1686 when ``collections`` is `None` and default butler collections are
1687 not defined.
1688 """
1689 raise NotImplementedError()
1691 @abstractmethod
1692 def _clone(
1693 self,
1694 *,
1695 collections: Any = None,
1696 run: str | None = None,
1697 inferDefaults: bool = True,
1698 **kwargs: Any,
1699 ) -> Butler:
1700 """Return a new Butler instance connected to the same repository
1701 as this one, but overriding ``collections``, ``run``,
1702 ``inferDefaults``, and default data ID.
1703 """
1704 raise NotImplementedError()