Coverage for python/lsst/daf/butler/_butler.py: 53%
185 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:53 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 02:53 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["Butler"]
32from abc import abstractmethod
33from collections.abc import Collection, Iterable, Mapping, Sequence
34from contextlib import AbstractContextManager
35from typing import TYPE_CHECKING, Any, TextIO
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.utils import doImportType
39from lsst.utils.iteration import ensure_iterable
40from lsst.utils.logging import getLogger
42from ._butler_collections import ButlerCollections
43from ._butler_config import ButlerConfig, ButlerType
44from ._butler_instance_options import ButlerInstanceOptions
45from ._butler_repo_index import ButlerRepoIndex
46from ._config import Config, ConfigSubset
47from ._exceptions import EmptyQueryResultError
48from ._limited_butler import LimitedButler
49from .datastore import Datastore
50from .dimensions import DimensionConfig
51from .registry import RegistryConfig, _RegistryFactory
52from .repo_relocation import BUTLER_ROOT_TAG
54if TYPE_CHECKING:
55 from ._dataset_existence import DatasetExistence
56 from ._dataset_ref import DatasetId, DatasetRef
57 from ._dataset_type import DatasetType
58 from ._deferredDatasetHandle import DeferredDatasetHandle
59 from ._file_dataset import FileDataset
60 from ._storage_class import StorageClass
61 from ._timespan import Timespan
62 from .datastore import DatasetRefURIs
63 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord
64 from .queries import Query
65 from .registry import Registry
66 from .transfers import RepoExportContext
68_LOG = getLogger(__name__)
71class Butler(LimitedButler): # numpydoc ignore=PR02
72 """Interface for data butler and factory for Butler instances.
74 Parameters
75 ----------
76 config : `ButlerConfig`, `Config` or `str`, optional
77 Configuration. Anything acceptable to the `ButlerConfig` constructor.
78 If a directory path is given the configuration will be read from a
79 ``butler.yaml`` file in that location. If `None` is given default
80 values will be used. If ``config`` contains "cls" key then its value is
81 used as a name of butler class and it must be a sub-class of this
82 class, otherwise `DirectButler` is instantiated.
83 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
84 An expression specifying the collections to be searched (in order) when
85 reading datasets.
86 This may be a `str` collection name or an iterable thereof.
87 See :ref:`daf_butler_collection_expressions` for more information.
88 These collections are not registered automatically and must be
89 manually registered before they are used by any method, but they may be
90 manually registered after the `Butler` is initialized.
91 run : `str`, optional
92 Name of the `~CollectionType.RUN` collection new datasets should be
93 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
94 ``collections`` will be set to ``[run]``. If not `None`, this
95 collection will automatically be registered. If this is not set (and
96 ``writeable`` is not set either), a read-only butler will be created.
97 searchPaths : `list` of `str`, optional
98 Directory paths to search when calculating the full Butler
99 configuration. Not used if the supplied config is already a
100 `ButlerConfig`.
101 writeable : `bool`, optional
102 Explicitly sets whether the butler supports write operations. If not
103 provided, a read-write butler is created if any of ``run``, ``tags``,
104 or ``chains`` is non-empty.
105 inferDefaults : `bool`, optional
106 If `True` (default) infer default data ID values from the values
107 present in the datasets in ``collections``: if all collections have the
108 same value (or no value) for a governor dimension, that value will be
109 the default for that dimension. Nonexistent collections are ignored.
110 If a default value is provided explicitly for a governor dimension via
111 ``**kwargs``, no default will be inferred for that dimension.
112 without_datastore : `bool`, optional
113 If `True` do not attach a datastore to this butler. Any attempts
114 to use a datastore will fail.
115 **kwargs : `Any`
116 Additional keyword arguments passed to a constructor of actual butler
117 class.
119 Notes
120 -----
121 The preferred way to instantiate Butler is via the `from_config` method.
122 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``,
123 but ``mypy`` will complain about the former.
124 """
126 def __new__(
127 cls,
128 config: Config | ResourcePathExpression | None = None,
129 *,
130 collections: Any = None,
131 run: str | None = None,
132 searchPaths: Sequence[ResourcePathExpression] | None = None,
133 writeable: bool | None = None,
134 inferDefaults: bool = True,
135 without_datastore: bool = False,
136 **kwargs: Any,
137 ) -> Butler:
138 if cls is Butler:
139 return Butler.from_config(
140 config=config,
141 collections=collections,
142 run=run,
143 searchPaths=searchPaths,
144 writeable=writeable,
145 inferDefaults=inferDefaults,
146 without_datastore=without_datastore,
147 **kwargs,
148 )
150 # Note: we do not pass any parameters to __new__, Python will pass them
151 # to __init__ after __new__ returns sub-class instance.
152 return super().__new__(cls)
154 @classmethod
155 def from_config(
156 cls,
157 config: Config | ResourcePathExpression | None = None,
158 *,
159 collections: Any = None,
160 run: str | None = None,
161 searchPaths: Sequence[ResourcePathExpression] | None = None,
162 writeable: bool | None = None,
163 inferDefaults: bool = True,
164 without_datastore: bool = False,
165 **kwargs: Any,
166 ) -> Butler:
167 """Create butler instance from configuration.
169 Parameters
170 ----------
171 config : `ButlerConfig`, `Config` or `str`, optional
172 Configuration. Anything acceptable to the `ButlerConfig`
173 constructor. If a directory path is given the configuration will be
174 read from a ``butler.yaml`` file in that location. If `None` is
175 given default values will be used. If ``config`` contains "cls" key
176 then its value is used as a name of butler class and it must be a
177 sub-class of this class, otherwise `DirectButler` is instantiated.
178 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
179 An expression specifying the collections to be searched (in order)
180 when reading datasets.
181 This may be a `str` collection name or an iterable thereof.
182 See :ref:`daf_butler_collection_expressions` for more information.
183 These collections are not registered automatically and must be
184 manually registered before they are used by any method, but they
185 may be manually registered after the `Butler` is initialized.
186 run : `str`, optional
187 Name of the `~CollectionType.RUN` collection new datasets should be
188 inserted into. If ``collections`` is `None` and ``run`` is not
189 `None`, ``collections`` will be set to ``[run]``. If not `None`,
190 this collection will automatically be registered. If this is not
191 set (and ``writeable`` is not set either), a read-only butler will
192 be created.
193 searchPaths : `list` of `str`, optional
194 Directory paths to search when calculating the full Butler
195 configuration. Not used if the supplied config is already a
196 `ButlerConfig`.
197 writeable : `bool`, optional
198 Explicitly sets whether the butler supports write operations. If
199 not provided, a read-write butler is created if any of ``run``,
200 ``tags``, or ``chains`` is non-empty.
201 inferDefaults : `bool`, optional
202 If `True` (default) infer default data ID values from the values
203 present in the datasets in ``collections``: if all collections have
204 the same value (or no value) for a governor dimension, that value
205 will be the default for that dimension. Nonexistent collections
206 are ignored. If a default value is provided explicitly for a
207 governor dimension via ``**kwargs``, no default will be inferred
208 for that dimension.
209 without_datastore : `bool`, optional
210 If `True` do not attach a datastore to this butler. Any attempts
211 to use a datastore will fail.
212 **kwargs : `Any`
213 Default data ID key-value pairs. These may only identify
214 "governor" dimensions like ``instrument`` and ``skymap``.
216 Returns
217 -------
218 butler : `Butler`
219 A `Butler` constructed from the given configuration.
221 Notes
222 -----
223 Calling this factory method is identical to calling
224 ``Butler(config, ...)``. Its only raison d'être is that ``mypy``
225 complains about ``Butler()`` call.
227 Examples
228 --------
229 While there are many ways to control exactly how a `Butler` interacts
230 with the collections in its `Registry`, the most common cases are still
231 simple.
233 For a read-only `Butler` that searches one collection, do::
235 butler = Butler.from_config(
236 "/path/to/repo", collections=["u/alice/DM-50000"]
237 )
239 For a read-write `Butler` that writes to and reads from a
240 `~CollectionType.RUN` collection::
242 butler = Butler.from_config(
243 "/path/to/repo", run="u/alice/DM-50000/a"
244 )
246 The `Butler` passed to a ``PipelineTask`` is often much more complex,
247 because we want to write to one `~CollectionType.RUN` collection but
248 read from several others (as well)::
250 butler = Butler.from_config(
251 "/path/to/repo",
252 run="u/alice/DM-50000/a",
253 collections=[
254 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults"
255 ]
256 )
258 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
259 Datasets will be read first from that run (since it appears first in
260 the chain), and then from ``u/bob/DM-49998`` and finally
261 ``HSC/defaults``.
263 Finally, one can always create a `Butler` with no collections::
265 butler = Butler.from_config("/path/to/repo", writeable=True)
267 This can be extremely useful when you just want to use
268 ``butler.registry``, e.g. for inserting dimension data or managing
269 collections, or when the collections you want to use with the butler
270 are not consistent. Passing ``writeable`` explicitly here is only
271 necessary if you want to be able to make changes to the repo - usually
272 the value for ``writeable`` can be guessed from the collection
273 arguments provided, but it defaults to `False` when there are not
274 collection arguments.
275 """
276 # DirectButler used to have a way to specify a "copy constructor" by
277 # passing the "butler" parameter to its constructor. This
278 # functionality has been moved out of the constructor into
279 # Butler._clone(), but the new interface is not public yet.
280 butler = kwargs.pop("butler", None)
281 if butler is not None:
282 if not isinstance(butler, Butler):
283 raise TypeError("'butler' parameter must be a Butler instance")
284 if config is not None or searchPaths is not None or writeable is not None:
285 raise TypeError(
286 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
287 )
288 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)
290 options = ButlerInstanceOptions(
291 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
292 )
294 # Load the Butler configuration. This may involve searching the
295 # environment to locate a configuration file.
296 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
297 butler_type = butler_config.get_butler_type()
299 # Make DirectButler if class is not specified.
300 match butler_type:
301 case ButlerType.DIRECT:
302 from .direct_butler import DirectButler
304 return DirectButler.create_from_config(
305 butler_config,
306 options=options,
307 without_datastore=without_datastore,
308 )
309 case ButlerType.REMOTE:
310 from .remote_butler import RemoteButlerFactory
312 factory = RemoteButlerFactory.create_factory_from_config(butler_config)
313 return factory.create_butler_with_credentials_from_environment(butler_options=options)
314 case _:
315 raise TypeError(f"Unknown Butler type '{butler_type}'")
317 @staticmethod
318 def makeRepo(
319 root: ResourcePathExpression,
320 config: Config | str | None = None,
321 dimensionConfig: Config | str | None = None,
322 standalone: bool = False,
323 searchPaths: list[str] | None = None,
324 forceConfigRoot: bool = True,
325 outfile: ResourcePathExpression | None = None,
326 overwrite: bool = False,
327 ) -> Config:
328 """Create an empty data repository by adding a butler.yaml config
329 to a repository root directory.
331 Parameters
332 ----------
333 root : `lsst.resources.ResourcePathExpression`
334 Path or URI to the root location of the new repository. Will be
335 created if it does not exist.
336 config : `Config` or `str`, optional
337 Configuration to write to the repository, after setting any
338 root-dependent Registry or Datastore config options. Can not
339 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
340 configuration will be used. Root-dependent config options
341 specified in this config are overwritten if ``forceConfigRoot``
342 is `True`.
343 dimensionConfig : `Config` or `str`, optional
344 Configuration for dimensions, will be used to initialize registry
345 database.
346 standalone : `bool`
347 If True, write all expanded defaults, not just customized or
348 repository-specific settings.
349 This (mostly) decouples the repository from the default
350 configuration, insulating it from changes to the defaults (which
351 may be good or bad, depending on the nature of the changes).
352 Future *additions* to the defaults will still be picked up when
353 initializing `Butlers` to repos created with ``standalone=True``.
354 searchPaths : `list` of `str`, optional
355 Directory paths to search when calculating the full butler
356 configuration.
357 forceConfigRoot : `bool`, optional
358 If `False`, any values present in the supplied ``config`` that
359 would normally be reset are not overridden and will appear
360 directly in the output config. This allows non-standard overrides
361 of the root directory for a datastore or registry to be given.
362 If this parameter is `True` the values for ``root`` will be
363 forced into the resulting config if appropriate.
364 outfile : `lss.resources.ResourcePathExpression`, optional
365 If not-`None`, the output configuration will be written to this
366 location rather than into the repository itself. Can be a URI
367 string. Can refer to a directory that will be used to write
368 ``butler.yaml``.
369 overwrite : `bool`, optional
370 Create a new configuration file even if one already exists
371 in the specified output location. Default is to raise
372 an exception.
374 Returns
375 -------
376 config : `Config`
377 The updated `Config` instance written to the repo.
379 Raises
380 ------
381 ValueError
382 Raised if a ButlerConfig or ConfigSubset is passed instead of a
383 regular Config (as these subclasses would make it impossible to
384 support ``standalone=False``).
385 FileExistsError
386 Raised if the output config file already exists.
387 os.error
388 Raised if the directory does not exist, exists but is not a
389 directory, or cannot be created.
391 Notes
392 -----
393 Note that when ``standalone=False`` (the default), the configuration
394 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
395 construct the repository should also be used to construct any Butlers
396 to avoid configuration inconsistencies.
397 """
398 if isinstance(config, ButlerConfig | ConfigSubset):
399 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
401 # Ensure that the root of the repository exists or can be made
402 root_uri = ResourcePath(root, forceDirectory=True)
403 root_uri.mkdir()
405 config = Config(config)
407 # If we are creating a new repo from scratch with relative roots,
408 # do not propagate an explicit root from the config file
409 if "root" in config:
410 del config["root"]
412 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
413 imported_class = doImportType(full["datastore", "cls"])
414 if not issubclass(imported_class, Datastore):
415 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
416 datastoreClass: type[Datastore] = imported_class
417 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
419 # if key exists in given config, parse it, otherwise parse the defaults
420 # in the expanded config
421 if config.get(("registry", "db")):
422 registryConfig = RegistryConfig(config)
423 else:
424 registryConfig = RegistryConfig(full)
425 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
426 if defaultDatabaseUri is not None:
427 Config.updateParameters(
428 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
429 )
430 else:
431 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
433 if standalone:
434 config.merge(full)
435 else:
436 # Always expand the registry.managers section into the per-repo
437 # config, because after the database schema is created, it's not
438 # allowed to change anymore. Note that in the standalone=True
439 # branch, _everything_ in the config is expanded, so there's no
440 # need to special case this.
441 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
442 configURI: ResourcePathExpression
443 if outfile is not None:
444 # When writing to a separate location we must include
445 # the root of the butler repo in the config else it won't know
446 # where to look.
447 config["root"] = root_uri.geturl()
448 configURI = outfile
449 else:
450 configURI = root_uri
451 # Strip obscore configuration, if it is present, before writing config
452 # to a file, obscore config will be stored in registry.
453 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
454 config_to_write = config.copy()
455 del config_to_write[obscore_config_key]
456 config_to_write.dumpToUri(configURI, overwrite=overwrite)
457 # configFile attribute is updated, need to copy it to original.
458 config.configFile = config_to_write.configFile
459 else:
460 config.dumpToUri(configURI, overwrite=overwrite)
462 # Create Registry and populate tables
463 registryConfig = RegistryConfig(config.get("registry"))
464 dimensionConfig = DimensionConfig(dimensionConfig)
465 _RegistryFactory(registryConfig).create_from_config(
466 dimensionConfig=dimensionConfig, butlerRoot=root_uri
467 )
469 _LOG.verbose("Wrote new Butler configuration file to %s", configURI)
471 return config
473 @classmethod
474 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
475 """Look up the label in a butler repository index.
477 Parameters
478 ----------
479 label : `str`
480 Label of the Butler repository to look up.
481 return_label : `bool`, optional
482 If ``label`` cannot be found in the repository index (either
483 because index is not defined or ``label`` is not in the index) and
484 ``return_label`` is `True` then return ``ResourcePath(label)``.
485 If ``return_label`` is `False` (default) then an exception will be
486 raised instead.
488 Returns
489 -------
490 uri : `lsst.resources.ResourcePath`
491 URI to the Butler repository associated with the given label or
492 default value if it is provided.
494 Raises
495 ------
496 KeyError
497 Raised if the label is not found in the index, or if an index
498 is not defined, and ``return_label`` is `False`.
500 Notes
501 -----
502 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
503 information is discovered.
504 """
505 return ButlerRepoIndex.get_repo_uri(label, return_label)
507 @classmethod
508 def get_known_repos(cls) -> set[str]:
509 """Retrieve the list of known repository labels.
511 Returns
512 -------
513 repos : `set` of `str`
514 All the known labels. Can be empty if no index can be found.
516 Notes
517 -----
518 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
519 information is discovered.
520 """
521 return ButlerRepoIndex.get_known_repos()
523 @abstractmethod
524 def _caching_context(self) -> AbstractContextManager[None]:
525 """Context manager that enables caching."""
526 raise NotImplementedError()
528 @abstractmethod
529 def transaction(self) -> AbstractContextManager[None]:
530 """Context manager supporting `Butler` transactions.
532 Transactions can be nested.
533 """
534 raise NotImplementedError()
536 @abstractmethod
537 def put(
538 self,
539 obj: Any,
540 datasetRefOrType: DatasetRef | DatasetType | str,
541 /,
542 dataId: DataId | None = None,
543 *,
544 run: str | None = None,
545 **kwargs: Any,
546 ) -> DatasetRef:
547 """Store and register a dataset.
549 Parameters
550 ----------
551 obj : `object`
552 The dataset.
553 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
554 When `DatasetRef` is provided, ``dataId`` should be `None`.
555 Otherwise the `DatasetType` or name thereof. If a fully resolved
556 `DatasetRef` is given the run and ID are used directly.
557 dataId : `dict` or `DataCoordinate`
558 A `dict` of `Dimension` link name, value pairs that label the
559 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
560 should be provided as the second argument.
561 run : `str`, optional
562 The name of the run the dataset should be added to, overriding
563 ``self.run``. Not used if a resolved `DatasetRef` is provided.
564 **kwargs
565 Additional keyword arguments used to augment or construct a
566 `DataCoordinate`. See `DataCoordinate.standardize`
567 parameters. Not used if a resolve `DatasetRef` is provided.
569 Returns
570 -------
571 ref : `DatasetRef`
572 A reference to the stored dataset, updated with the correct id if
573 given.
575 Raises
576 ------
577 TypeError
578 Raised if the butler is read-only or if no run has been provided.
579 """
580 raise NotImplementedError()
582 @abstractmethod
583 def getDeferred(
584 self,
585 datasetRefOrType: DatasetRef | DatasetType | str,
586 /,
587 dataId: DataId | None = None,
588 *,
589 parameters: dict | None = None,
590 collections: Any = None,
591 storageClass: str | StorageClass | None = None,
592 timespan: Timespan | None = None,
593 **kwargs: Any,
594 ) -> DeferredDatasetHandle:
595 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
596 after an immediate registry lookup.
598 Parameters
599 ----------
600 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
601 When `DatasetRef` the `dataId` should be `None`.
602 Otherwise the `DatasetType` or name thereof.
603 dataId : `dict` or `DataCoordinate`, optional
604 A `dict` of `Dimension` link name, value pairs that label the
605 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
606 should be provided as the first argument.
607 parameters : `dict`
608 Additional StorageClass-defined options to control reading,
609 typically used to efficiently read only a subset of the dataset.
610 collections : Any, optional
611 Collections to be searched, overriding ``self.collections``.
612 Can be any of the types supported by the ``collections`` argument
613 to butler construction.
614 storageClass : `StorageClass` or `str`, optional
615 The storage class to be used to override the Python type
616 returned by this method. By default the returned type matches
617 the dataset type definition for this dataset. Specifying a
618 read `StorageClass` can force a different type to be returned.
619 This type must be compatible with the original type.
620 timespan : `Timespan` or `None`, optional
621 A timespan that the validity range of the dataset must overlap.
622 If not provided and this is a calibration dataset type, an attempt
623 will be made to find the timespan from any temporal coordinate
624 in the data ID.
625 **kwargs
626 Additional keyword arguments used to augment or construct a
627 `DataId`. See `DataId` parameters.
629 Returns
630 -------
631 obj : `DeferredDatasetHandle`
632 A handle which can be used to retrieve a dataset at a later time.
634 Raises
635 ------
636 LookupError
637 Raised if no matching dataset exists in the `Registry` or
638 datastore.
639 ValueError
640 Raised if a resolved `DatasetRef` was passed as an input, but it
641 differs from the one found in the registry.
642 TypeError
643 Raised if no collections were provided.
644 """
645 raise NotImplementedError()
647 @abstractmethod
648 def get(
649 self,
650 datasetRefOrType: DatasetRef | DatasetType | str,
651 /,
652 dataId: DataId | None = None,
653 *,
654 parameters: dict[str, Any] | None = None,
655 collections: Any = None,
656 storageClass: StorageClass | str | None = None,
657 timespan: Timespan | None = None,
658 **kwargs: Any,
659 ) -> Any:
660 """Retrieve a stored dataset.
662 Parameters
663 ----------
664 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
665 When `DatasetRef` the `dataId` should be `None`.
666 Otherwise the `DatasetType` or name thereof.
667 If a resolved `DatasetRef`, the associated dataset
668 is returned directly without additional querying.
669 dataId : `dict` or `DataCoordinate`
670 A `dict` of `Dimension` link name, value pairs that label the
671 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
672 should be provided as the first argument.
673 parameters : `dict`
674 Additional StorageClass-defined options to control reading,
675 typically used to efficiently read only a subset of the dataset.
676 collections : Any, optional
677 Collections to be searched, overriding ``self.collections``.
678 Can be any of the types supported by the ``collections`` argument
679 to butler construction.
680 storageClass : `StorageClass` or `str`, optional
681 The storage class to be used to override the Python type
682 returned by this method. By default the returned type matches
683 the dataset type definition for this dataset. Specifying a
684 read `StorageClass` can force a different type to be returned.
685 This type must be compatible with the original type.
686 timespan : `Timespan` or `None`, optional
687 A timespan that the validity range of the dataset must overlap.
688 If not provided and this is a calibration dataset type, an attempt
689 will be made to find the timespan from any temporal coordinate
690 in the data ID.
691 **kwargs
692 Additional keyword arguments used to augment or construct a
693 `DataCoordinate`. See `DataCoordinate.standardize`
694 parameters.
696 Returns
697 -------
698 obj : `object`
699 The dataset.
701 Raises
702 ------
703 LookupError
704 Raised if no matching dataset exists in the `Registry`.
705 TypeError
706 Raised if no collections were provided.
708 Notes
709 -----
710 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
711 this method requires that the given data ID include temporal dimensions
712 beyond the dimensions of the dataset type itself, in order to find the
713 dataset with the appropriate validity range. For example, a "bias"
714 dataset with native dimensions ``{instrument, detector}`` could be
715 fetched with a ``{instrument, detector, exposure}`` data ID, because
716 ``exposure`` is a temporal dimension.
717 """
718 raise NotImplementedError()
720 @abstractmethod
721 def getURIs(
722 self,
723 datasetRefOrType: DatasetRef | DatasetType | str,
724 /,
725 dataId: DataId | None = None,
726 *,
727 predict: bool = False,
728 collections: Any = None,
729 run: str | None = None,
730 **kwargs: Any,
731 ) -> DatasetRefURIs:
732 """Return the URIs associated with the dataset.
734 Parameters
735 ----------
736 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
737 When `DatasetRef` the `dataId` should be `None`.
738 Otherwise the `DatasetType` or name thereof.
739 dataId : `dict` or `DataCoordinate`
740 A `dict` of `Dimension` link name, value pairs that label the
741 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
742 should be provided as the first argument.
743 predict : `bool`
744 If `True`, allow URIs to be returned of datasets that have not
745 been written.
746 collections : Any, optional
747 Collections to be searched, overriding ``self.collections``.
748 Can be any of the types supported by the ``collections`` argument
749 to butler construction.
750 run : `str`, optional
751 Run to use for predictions, overriding ``self.run``.
752 **kwargs
753 Additional keyword arguments used to augment or construct a
754 `DataCoordinate`. See `DataCoordinate.standardize`
755 parameters.
757 Returns
758 -------
759 uris : `DatasetRefURIs`
760 The URI to the primary artifact associated with this dataset (if
761 the dataset was disassembled within the datastore this may be
762 `None`), and the URIs to any components associated with the dataset
763 artifact. (can be empty if there are no components).
764 """
765 raise NotImplementedError()
767 def getURI(
768 self,
769 datasetRefOrType: DatasetRef | DatasetType | str,
770 /,
771 dataId: DataId | None = None,
772 *,
773 predict: bool = False,
774 collections: Any = None,
775 run: str | None = None,
776 **kwargs: Any,
777 ) -> ResourcePath:
778 """Return the URI to the Dataset.
780 Parameters
781 ----------
782 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
783 When `DatasetRef` the `dataId` should be `None`.
784 Otherwise the `DatasetType` or name thereof.
785 dataId : `dict` or `DataCoordinate`
786 A `dict` of `Dimension` link name, value pairs that label the
787 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
788 should be provided as the first argument.
789 predict : `bool`
790 If `True`, allow URIs to be returned of datasets that have not
791 been written.
792 collections : Any, optional
793 Collections to be searched, overriding ``self.collections``.
794 Can be any of the types supported by the ``collections`` argument
795 to butler construction.
796 run : `str`, optional
797 Run to use for predictions, overriding ``self.run``.
798 **kwargs
799 Additional keyword arguments used to augment or construct a
800 `DataCoordinate`. See `DataCoordinate.standardize`
801 parameters.
803 Returns
804 -------
805 uri : `lsst.resources.ResourcePath`
806 URI pointing to the Dataset within the datastore. If the
807 Dataset does not exist in the datastore, and if ``predict`` is
808 `True`, the URI will be a prediction and will include a URI
809 fragment "#predicted".
810 If the datastore does not have entities that relate well
811 to the concept of a URI the returned URI string will be
812 descriptive. The returned URI is not guaranteed to be obtainable.
814 Raises
815 ------
816 LookupError
817 A URI has been requested for a dataset that does not exist and
818 guessing is not allowed.
819 ValueError
820 Raised if a resolved `DatasetRef` was passed as an input, but it
821 differs from the one found in the registry.
822 TypeError
823 Raised if no collections were provided.
824 RuntimeError
825 Raised if a URI is requested for a dataset that consists of
826 multiple artifacts.
827 """
828 primary, components = self.getURIs(
829 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
830 )
832 if primary is None or components:
833 raise RuntimeError(
834 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
835 "Use Butler.getURIs() instead."
836 )
837 return primary
839 @abstractmethod
840 def get_dataset_type(self, name: str) -> DatasetType:
841 """Get the `DatasetType`.
843 Parameters
844 ----------
845 name : `str`
846 Name of the type.
848 Returns
849 -------
850 type : `DatasetType`
851 The `DatasetType` associated with the given name.
853 Raises
854 ------
855 lsst.daf.butler.MissingDatasetTypeError
856 Raised if the requested dataset type has not been registered.
858 Notes
859 -----
860 This method handles component dataset types automatically, though most
861 other operations do not.
862 """
863 raise NotImplementedError()
865 @abstractmethod
866 def get_dataset(
867 self,
868 id: DatasetId,
869 *,
870 storage_class: str | StorageClass | None = None,
871 dimension_records: bool = False,
872 datastore_records: bool = False,
873 ) -> DatasetRef | None:
874 """Retrieve a Dataset entry.
876 Parameters
877 ----------
878 id : `DatasetId`
879 The unique identifier for the dataset.
880 storage_class : `str` or `StorageClass` or `None`
881 A storage class to use when creating the returned entry. If given
882 it must be compatible with the default storage class.
883 dimension_records : `bool`, optional
884 If `True` the ref will be expanded and contain dimension records.
885 datastore_records : `bool`, optional
886 If `True` the ref will contain associated datastore records.
888 Returns
889 -------
890 ref : `DatasetRef` or `None`
891 A ref to the Dataset, or `None` if no matching Dataset
892 was found.
893 """
894 raise NotImplementedError()
896 @abstractmethod
897 def find_dataset(
898 self,
899 dataset_type: DatasetType | str,
900 data_id: DataId | None = None,
901 *,
902 collections: str | Sequence[str] | None = None,
903 timespan: Timespan | None = None,
904 storage_class: str | StorageClass | None = None,
905 dimension_records: bool = False,
906 datastore_records: bool = False,
907 **kwargs: Any,
908 ) -> DatasetRef | None:
909 """Find a dataset given its `DatasetType` and data ID.
911 This can be used to obtain a `DatasetRef` that permits the dataset to
912 be read from a `Datastore`. If the dataset is a component and can not
913 be found using the provided dataset type, a dataset ref for the parent
914 will be returned instead but with the correct dataset type.
916 Parameters
917 ----------
918 dataset_type : `DatasetType` or `str`
919 A `DatasetType` or the name of one. If this is a `DatasetType`
920 instance, its storage class will be respected and propagated to
921 the output, even if it differs from the dataset type definition
922 in the registry, as long as the storage classes are convertible.
923 data_id : `dict` or `DataCoordinate`, optional
924 A `dict`-like object containing the `Dimension` links that identify
925 the dataset within a collection. If it is a `dict` the dataId
926 can include dimension record values such as ``day_obs`` and
927 ``seq_num`` or ``full_name`` that can be used to derive the
928 primary dimension.
929 collections : `str` or `list` [`str`], optional
930 A an ordered list of collections to search for the dataset.
931 Defaults to ``self.defaults.collections``.
932 timespan : `Timespan`, optional
933 A timespan that the validity range of the dataset must overlap.
934 If not provided, any `~CollectionType.CALIBRATION` collections
935 matched by the ``collections`` argument will not be searched.
936 storage_class : `str` or `StorageClass` or `None`
937 A storage class to use when creating the returned entry. If given
938 it must be compatible with the default storage class.
939 dimension_records : `bool`, optional
940 If `True` the ref will be expanded and contain dimension records.
941 datastore_records : `bool`, optional
942 If `True` the ref will contain associated datastore records.
943 **kwargs
944 Additional keyword arguments passed to
945 `DataCoordinate.standardize` to convert ``dataId`` to a true
946 `DataCoordinate` or augment an existing one. This can also include
947 dimension record metadata that can be used to derive a primary
948 dimension value.
950 Returns
951 -------
952 ref : `DatasetRef`
953 A reference to the dataset, or `None` if no matching Dataset
954 was found.
956 Raises
957 ------
958 lsst.daf.butler.NoDefaultCollectionError
959 Raised if ``collections`` is `None` and
960 ``self.collections`` is `None`.
961 LookupError
962 Raised if one or more data ID keys are missing.
963 lsst.daf.butler.MissingDatasetTypeError
964 Raised if the dataset type does not exist.
965 lsst.daf.butler.MissingCollectionError
966 Raised if any of ``collections`` does not exist in the registry.
968 Notes
969 -----
970 This method simply returns `None` and does not raise an exception even
971 when the set of collections searched is intrinsically incompatible with
972 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
973 only `~CollectionType.CALIBRATION` collections are being searched.
974 This may make it harder to debug some lookup failures, but the behavior
975 is intentional; we consider it more important that failed searches are
976 reported consistently, regardless of the reason, and that adding
977 additional collections that do not contain a match to the search path
978 never changes the behavior.
980 This method handles component dataset types automatically, though most
981 other query operations do not.
982 """
983 raise NotImplementedError()
985 @abstractmethod
986 def retrieveArtifacts(
987 self,
988 refs: Iterable[DatasetRef],
989 destination: ResourcePathExpression,
990 transfer: str = "auto",
991 preserve_path: bool = True,
992 overwrite: bool = False,
993 ) -> list[ResourcePath]:
994 """Retrieve the artifacts associated with the supplied refs.
996 Parameters
997 ----------
998 refs : iterable of `DatasetRef`
999 The datasets for which artifacts are to be retrieved.
1000 A single ref can result in multiple artifacts. The refs must
1001 be resolved.
1002 destination : `lsst.resources.ResourcePath` or `str`
1003 Location to write the artifacts.
1004 transfer : `str`, optional
1005 Method to use to transfer the artifacts. Must be one of the options
1006 supported by `~lsst.resources.ResourcePath.transfer_from()`.
1007 "move" is not allowed.
1008 preserve_path : `bool`, optional
1009 If `True` the full path of the artifact within the datastore
1010 is preserved. If `False` the final file component of the path
1011 is used.
1012 overwrite : `bool`, optional
1013 If `True` allow transfers to overwrite existing files at the
1014 destination.
1016 Returns
1017 -------
1018 targets : `list` of `lsst.resources.ResourcePath`
1019 URIs of file artifacts in destination location. Order is not
1020 preserved.
1022 Notes
1023 -----
1024 For non-file datastores the artifacts written to the destination
1025 may not match the representation inside the datastore. For example
1026 a hierarchical data structure in a NoSQL database may well be stored
1027 as a JSON file.
1028 """
1029 raise NotImplementedError()
1031 @abstractmethod
1032 def exists(
1033 self,
1034 dataset_ref_or_type: DatasetRef | DatasetType | str,
1035 /,
1036 data_id: DataId | None = None,
1037 *,
1038 full_check: bool = True,
1039 collections: Any = None,
1040 **kwargs: Any,
1041 ) -> DatasetExistence:
1042 """Indicate whether a dataset is known to Butler registry and
1043 datastore.
1045 Parameters
1046 ----------
1047 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
1048 When `DatasetRef` the `dataId` should be `None`.
1049 Otherwise the `DatasetType` or name thereof.
1050 data_id : `dict` or `DataCoordinate`
1051 A `dict` of `Dimension` link name, value pairs that label the
1052 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1053 should be provided as the first argument.
1054 full_check : `bool`, optional
1055 If `True`, a check will be made for the actual existence of a
1056 dataset artifact. This will involve additional overhead due to
1057 the need to query an external system. If `False`, this check will
1058 be omitted, and the registry and datastore will solely be asked
1059 if they know about the dataset but no direct check for the
1060 artifact will be performed.
1061 collections : Any, optional
1062 Collections to be searched, overriding ``self.collections``.
1063 Can be any of the types supported by the ``collections`` argument
1064 to butler construction.
1065 **kwargs
1066 Additional keyword arguments used to augment or construct a
1067 `DataCoordinate`. See `DataCoordinate.standardize`
1068 parameters.
1070 Returns
1071 -------
1072 existence : `DatasetExistence`
1073 Object indicating whether the dataset is known to registry and
1074 datastore. Evaluates to `True` if the dataset is present and known
1075 to both.
1076 """
1077 raise NotImplementedError()
1079 @abstractmethod
1080 def _exists_many(
1081 self,
1082 refs: Iterable[DatasetRef],
1083 /,
1084 *,
1085 full_check: bool = True,
1086 ) -> dict[DatasetRef, DatasetExistence]:
1087 """Indicate whether multiple datasets are known to Butler registry and
1088 datastore.
1090 This is an experimental API that may change at any moment.
1092 Parameters
1093 ----------
1094 refs : iterable of `DatasetRef`
1095 The datasets to be checked.
1096 full_check : `bool`, optional
1097 If `True`, a check will be made for the actual existence of each
1098 dataset artifact. This will involve additional overhead due to
1099 the need to query an external system. If `False`, this check will
1100 be omitted, and the registry and datastore will solely be asked
1101 if they know about the dataset(s) but no direct check for the
1102 artifact(s) will be performed.
1104 Returns
1105 -------
1106 existence : dict of [`DatasetRef`, `DatasetExistence`]
1107 Mapping from the given dataset refs to an enum indicating the
1108 status of the dataset in registry and datastore.
1109 Each value evaluates to `True` if the dataset is present and known
1110 to both.
1111 """
1112 raise NotImplementedError()
1114 @abstractmethod
1115 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1116 """Remove one or more `~CollectionType.RUN` collections and the
1117 datasets within them.
1119 Parameters
1120 ----------
1121 names : `~collections.abc.Iterable` [ `str` ]
1122 The names of the collections to remove.
1123 unstore : `bool`, optional
1124 If `True` (default), delete datasets from all datastores in which
1125 they are present, and attempt to rollback the registry deletions if
1126 datastore deletions fail (which may not always be possible). If
1127 `False`, datastore records for these datasets are still removed,
1128 but any artifacts (e.g. files) will not be.
1130 Raises
1131 ------
1132 TypeError
1133 Raised if one or more collections are not of type
1134 `~CollectionType.RUN`.
1135 """
1136 raise NotImplementedError()
1138 @abstractmethod
1139 def ingest(
1140 self,
1141 *datasets: FileDataset,
1142 transfer: str | None = "auto",
1143 record_validation_info: bool = True,
1144 ) -> None:
1145 """Store and register one or more datasets that already exist on disk.
1147 Parameters
1148 ----------
1149 *datasets : `FileDataset`
1150 Each positional argument is a struct containing information about
1151 a file to be ingested, including its URI (either absolute or
1152 relative to the datastore root, if applicable), a resolved
1153 `DatasetRef`, and optionally a formatter class or its
1154 fully-qualified string name. If a formatter is not provided, the
1155 formatter that would be used for `put` is assumed. On successful
1156 ingest all `FileDataset.formatter` attributes will be set to the
1157 formatter class used. `FileDataset.path` attributes may be modified
1158 to put paths in whatever the datastore considers a standardized
1159 form.
1160 transfer : `str`, optional
1161 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1162 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1163 transfer the file.
1164 record_validation_info : `bool`, optional
1165 If `True`, the default, the datastore can record validation
1166 information associated with the file. If `False` the datastore
1167 will not attempt to track any information such as checksums
1168 or file sizes. This can be useful if such information is tracked
1169 in an external system or if the file is to be compressed in place.
1170 It is up to the datastore whether this parameter is relevant.
1172 Raises
1173 ------
1174 TypeError
1175 Raised if the butler is read-only or if no run was provided.
1176 NotImplementedError
1177 Raised if the `Datastore` does not support the given transfer mode.
1178 DatasetTypeNotSupportedError
1179 Raised if one or more files to be ingested have a dataset type that
1180 is not supported by the `Datastore`..
1181 FileNotFoundError
1182 Raised if one of the given files does not exist.
1183 FileExistsError
1184 Raised if transfer is not `None` but the (internal) location the
1185 file would be moved to is already occupied.
1187 Notes
1188 -----
1189 This operation is not fully exception safe: if a database operation
1190 fails, the given `FileDataset` instances may be only partially updated.
1192 It is atomic in terms of database operations (they will either all
1193 succeed or all fail) providing the database engine implements
1194 transactions correctly. It will attempt to be atomic in terms of
1195 filesystem operations as well, but this cannot be implemented
1196 rigorously for most datastores.
1197 """
1198 raise NotImplementedError()
1200 @abstractmethod
1201 def export(
1202 self,
1203 *,
1204 directory: str | None = None,
1205 filename: str | None = None,
1206 format: str | None = None,
1207 transfer: str | None = None,
1208 ) -> AbstractContextManager[RepoExportContext]:
1209 """Export datasets from the repository represented by this `Butler`.
1211 This method is a context manager that returns a helper object
1212 (`RepoExportContext`) that is used to indicate what information from
1213 the repository should be exported.
1215 Parameters
1216 ----------
1217 directory : `str`, optional
1218 Directory dataset files should be written to if ``transfer`` is not
1219 `None`.
1220 filename : `str`, optional
1221 Name for the file that will include database information associated
1222 with the exported datasets. If this is not an absolute path and
1223 ``directory`` is not `None`, it will be written to ``directory``
1224 instead of the current working directory. Defaults to
1225 "export.{format}".
1226 format : `str`, optional
1227 File format for the database information file. If `None`, the
1228 extension of ``filename`` will be used.
1229 transfer : `str`, optional
1230 Transfer mode passed to `Datastore.export`.
1232 Raises
1233 ------
1234 TypeError
1235 Raised if the set of arguments passed is inconsistent.
1237 Examples
1238 --------
1239 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
1240 methods are used to provide the iterables over data IDs and/or datasets
1241 to be exported::
1243 with butler.export("exports.yaml") as export:
1244 # Export all flats, but none of the dimension element rows
1245 # (i.e. data ID information) associated with them.
1246 export.saveDatasets(butler.registry.queryDatasets("flat"),
1247 elements=())
1248 # Export all datasets that start with "deepCoadd_" and all of
1249 # their associated data ID information.
1250 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
1251 """
1252 raise NotImplementedError()
1254 @abstractmethod
1255 def import_(
1256 self,
1257 *,
1258 directory: ResourcePathExpression | None = None,
1259 filename: ResourcePathExpression | TextIO | None = None,
1260 format: str | None = None,
1261 transfer: str | None = None,
1262 skip_dimensions: set | None = None,
1263 ) -> None:
1264 """Import datasets into this repository that were exported from a
1265 different butler repository via `~lsst.daf.butler.Butler.export`.
1267 Parameters
1268 ----------
1269 directory : `~lsst.resources.ResourcePathExpression`, optional
1270 Directory containing dataset files to import from. If `None`,
1271 ``filename`` and all dataset file paths specified therein must
1272 be absolute.
1273 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
1274 A stream or name of file that contains database information
1275 associated with the exported datasets, typically generated by
1276 `~lsst.daf.butler.Butler.export`. If this a string (name) or
1277 `~lsst.resources.ResourcePath` and is not an absolute path,
1278 it will first be looked for relative to ``directory`` and if not
1279 found there it will be looked for in the current working
1280 directory. Defaults to "export.{format}".
1281 format : `str`, optional
1282 File format for ``filename``. If `None`, the extension of
1283 ``filename`` will be used.
1284 transfer : `str`, optional
1285 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
1286 skip_dimensions : `set`, optional
1287 Names of dimensions that should be skipped and not imported.
1289 Raises
1290 ------
1291 TypeError
1292 Raised if the set of arguments passed is inconsistent, or if the
1293 butler is read-only.
1294 """
1295 raise NotImplementedError()
1297 @abstractmethod
1298 def transfer_dimension_records_from(
1299 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1300 ) -> None:
1301 """Transfer dimension records to this Butler from another Butler.
1303 Parameters
1304 ----------
1305 source_butler : `LimitedButler` or `Butler`
1306 Butler from which the records are to be transferred. If data IDs
1307 in ``source_refs`` are not expanded then this has to be a full
1308 `Butler` whose registry will be used to expand data IDs. If the
1309 source refs contain coordinates that are used to populate other
1310 records then this will also need to be a full `Butler`.
1311 source_refs : iterable of `DatasetRef`
1312 Datasets defined in the source butler whose dimension records
1313 should be transferred to this butler. In most circumstances.
1314 transfer is faster if the dataset refs are expanded.
1315 """
1316 raise NotImplementedError()
1318 @abstractmethod
1319 def transfer_from(
1320 self,
1321 source_butler: LimitedButler,
1322 source_refs: Iterable[DatasetRef],
1323 transfer: str = "auto",
1324 skip_missing: bool = True,
1325 register_dataset_types: bool = False,
1326 transfer_dimensions: bool = False,
1327 dry_run: bool = False,
1328 ) -> Collection[DatasetRef]:
1329 """Transfer datasets to this Butler from a run in another Butler.
1331 Parameters
1332 ----------
1333 source_butler : `LimitedButler`
1334 Butler from which the datasets are to be transferred. If data IDs
1335 in ``source_refs`` are not expanded then this has to be a full
1336 `Butler` whose registry will be used to expand data IDs.
1337 source_refs : iterable of `DatasetRef`
1338 Datasets defined in the source butler that should be transferred to
1339 this butler. In most circumstances, ``transfer_from`` is faster if
1340 the dataset refs are expanded.
1341 transfer : `str`, optional
1342 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
1343 skip_missing : `bool`
1344 If `True`, datasets with no datastore artifact associated with
1345 them are not transferred. If `False` a registry entry will be
1346 created even if no datastore record is created (and so will
1347 look equivalent to the dataset being unstored).
1348 register_dataset_types : `bool`
1349 If `True` any missing dataset types are registered. Otherwise
1350 an exception is raised.
1351 transfer_dimensions : `bool`, optional
1352 If `True`, dimension record data associated with the new datasets
1353 will be transferred.
1354 dry_run : `bool`, optional
1355 If `True` the transfer will be processed without any modifications
1356 made to the target butler and as if the target butler did not
1357 have any of the datasets.
1359 Returns
1360 -------
1361 refs : `list` of `DatasetRef`
1362 The refs added to this Butler.
1364 Notes
1365 -----
1366 The datastore artifact has to exist for a transfer
1367 to be made but non-existence is not an error.
1369 Datasets that already exist in this run will be skipped.
1371 The datasets are imported as part of a transaction, although
1372 dataset types are registered before the transaction is started.
1373 This means that it is possible for a dataset type to be registered
1374 even though transfer has failed.
1375 """
1376 raise NotImplementedError()
1378 @abstractmethod
1379 def validateConfiguration(
1380 self,
1381 logFailures: bool = False,
1382 datasetTypeNames: Iterable[str] | None = None,
1383 ignore: Iterable[str] | None = None,
1384 ) -> None:
1385 """Validate butler configuration.
1387 Checks that each `DatasetType` can be stored in the `Datastore`.
1389 Parameters
1390 ----------
1391 logFailures : `bool`, optional
1392 If `True`, output a log message for every validation error
1393 detected.
1394 datasetTypeNames : iterable of `str`, optional
1395 The `DatasetType` names that should be checked. This allows
1396 only a subset to be selected.
1397 ignore : iterable of `str`, optional
1398 Names of DatasetTypes to skip over. This can be used to skip
1399 known problems. If a named `DatasetType` corresponds to a
1400 composite, all components of that `DatasetType` will also be
1401 ignored.
1403 Raises
1404 ------
1405 ButlerValidationError
1406 Raised if there is some inconsistency with how this Butler
1407 is configured.
1408 """
1409 raise NotImplementedError()
1411 @property
1412 @abstractmethod
1413 def collection_chains(self) -> ButlerCollections:
1414 """Object with methods for modifying collection chains."""
1415 raise NotImplementedError()
1417 @property
1418 @abstractmethod
1419 def collections(self) -> Sequence[str]:
1420 """The collections to search by default, in order
1421 (`~collections.abc.Sequence` [ `str` ]).
1422 """
1423 raise NotImplementedError()
1425 @property
1426 @abstractmethod
1427 def run(self) -> str | None:
1428 """Name of the run this butler writes outputs to by default (`str` or
1429 `None`).
1430 """
1431 raise NotImplementedError()
1433 @property
1434 @abstractmethod
1435 def registry(self) -> Registry:
1436 """The object that manages dataset metadata and relationships
1437 (`Registry`).
1439 Many operations that don't involve reading or writing butler datasets
1440 are accessible only via `Registry` methods. Eventually these methods
1441 will be replaced by equivalent `Butler` methods.
1442 """
1443 raise NotImplementedError()
1445 @abstractmethod
1446 def _query(self) -> AbstractContextManager[Query]:
1447 """Context manager returning a `Query` object used for construction
1448 and execution of complex queries.
1449 """
1450 raise NotImplementedError()
1452 def _query_data_ids(
1453 self,
1454 dimensions: DimensionGroup | Iterable[str] | str,
1455 *,
1456 data_id: DataId | None = None,
1457 where: str = "",
1458 bind: Mapping[str, Any] | None = None,
1459 with_dimension_records: bool = False,
1460 order_by: Iterable[str] | str | None = None,
1461 limit: int | None = None,
1462 explain: bool = True,
1463 **kwargs: Any,
1464 ) -> list[DataCoordinate]:
1465 """Query for data IDs matching user-provided criteria.
1467 Parameters
1468 ----------
1469 dimensions : `DimensionGroup`, `str`, or \
1470 `~collections.abc.Iterable` [`str`]
1471 The dimensions of the data IDs to yield, as either `DimensionGroup`
1472 instances or `str`. Will be automatically expanded to a complete
1473 `DimensionGroup`.
1474 data_id : `dict` or `DataCoordinate`, optional
1475 A data ID whose key-value pairs are used as equality constraints
1476 in the query.
1477 where : `str`, optional
1478 A string expression similar to a SQL WHERE clause. May involve
1479 any column of a dimension table or (as a shortcut for the primary
1480 key column of a dimension table) dimension name. See
1481 :ref:`daf_butler_dimension_expressions` for more information.
1482 bind : `~collections.abc.Mapping`, optional
1483 Mapping containing literal values that should be injected into the
1484 ``where`` expression, keyed by the identifiers they replace.
1485 Values of collection type can be expanded in some cases; see
1486 :ref:`daf_butler_dimension_expressions_identifiers` for more
1487 information.
1488 with_dimension_records : `bool`, optional
1489 If `True` (default is `False`) then returned data IDs will have
1490 dimension records.
1491 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1492 Names of the columns/dimensions to use for ordering returned data
1493 IDs. Column name can be prefixed with minus (``-``) to use
1494 descending ordering.
1495 limit : `int`, optional
1496 Upper limit on the number of returned records.
1497 explain : `bool`, optional
1498 If `True` (default) then `EmptyQueryResultError` exception is
1499 raised when resulting list is empty. The exception contains
1500 non-empty list of strings explaining possible causes for empty
1501 result.
1502 **kwargs
1503 Additional keyword arguments are forwarded to
1504 `DataCoordinate.standardize` when processing the ``data_id``
1505 argument (and may be used to provide a constraining data ID even
1506 when the ``data_id`` argument is `None`).
1508 Returns
1509 -------
1510 dataIds : `list` [`DataCoordinate`]
1511 Data IDs matching the given query parameters. These are always
1512 guaranteed to identify all dimensions (`DataCoordinate.hasFull`
1513 returns `True`).
1515 Raises
1516 ------
1517 lsst.daf.butler.registry.DataIdError
1518 Raised when ``data_id`` or keyword arguments specify unknown
1519 dimensions or values, or when they contain inconsistent values.
1520 lsst.daf.butler.registry.UserExpressionError
1521 Raised when ``where`` expression is invalid.
1522 lsst.daf.butler.EmptyQueryResultError
1523 Raised when query generates empty result and ``explain`` is set to
1524 `True`.
1525 TypeError
1526 Raised when the arguments are incompatible.
1527 """
1528 if data_id is None:
1529 data_id = DataCoordinate.make_empty(self.dimensions)
1530 with self._query() as query:
1531 result = (
1532 query.where(data_id, where, bind=bind, **kwargs)
1533 .data_ids(dimensions)
1534 .order_by(*ensure_iterable(order_by))
1535 .limit(limit)
1536 )
1537 if with_dimension_records:
1538 result = result.with_dimension_records()
1539 data_ids = list(result)
1540 if explain and not data_ids:
1541 raise EmptyQueryResultError(list(result.explain_no_results()))
1542 return data_ids
1544 def _query_datasets(
1545 self,
1546 dataset_type: str | DatasetType,
1547 collections: str | Iterable[str] | None = None,
1548 *,
1549 find_first: bool = True,
1550 data_id: DataId | None = None,
1551 where: str = "",
1552 bind: Mapping[str, Any] | None = None,
1553 with_dimension_records: bool = False,
1554 explain: bool = True,
1555 **kwargs: Any,
1556 ) -> list[DatasetRef]:
1557 """Query for dataset references matching user-provided criteria.
1559 Parameters
1560 ----------
1561 dataset_type : `str` or `DatasetType`
1562 Dataset type object or name to search for.
1563 collections : collection expression, optional
1564 A collection name or iterable of collection names to search. If not
1565 provided, the default collections are used. See
1566 :ref:`daf_butler_collection_expressions` for more information.
1567 find_first : `bool`, optional
1568 If `True` (default), for each result data ID, only yield one
1569 `DatasetRef` of each `DatasetType`, from the first collection in
1570 which a dataset of that dataset type appears (according to the
1571 order of ``collections`` passed in). If `True`, ``collections``
1572 must not contain regular expressions and may not be ``...``.
1573 data_id : `dict` or `DataCoordinate`, optional
1574 A data ID whose key-value pairs are used as equality constraints in
1575 the query.
1576 where : `str`, optional
1577 A string expression similar to a SQL WHERE clause. May involve any
1578 column of a dimension table or (as a shortcut for the primary key
1579 column of a dimension table) dimension name. See
1580 :ref:`daf_butler_dimension_expressions` for more information.
1581 bind : `~collections.abc.Mapping`, optional
1582 Mapping containing literal values that should be injected into the
1583 ``where`` expression, keyed by the identifiers they replace. Values
1584 of collection type can be expanded in some cases; see
1585 :ref:`daf_butler_dimension_expressions_identifiers` for more
1586 information.
1587 with_dimension_records : `bool`, optional
1588 If `True` (default is `False`) then returned data IDs will have
1589 dimension records.
1590 explain : `bool`, optional
1591 If `True` (default) then `EmptyQueryResultError` exception is
1592 raised when resulting list is empty. The exception contains
1593 non-empty list of strings explaining possible causes for empty
1594 result.
1595 **kwargs
1596 Additional keyword arguments are forwarded to
1597 `DataCoordinate.standardize` when processing the ``data_id``
1598 argument (and may be used to provide a constraining data ID even
1599 when the ``data_id`` argument is `None`).
1601 Returns
1602 -------
1603 refs : `.queries.DatasetRefQueryResults`
1604 Dataset references matching the given query criteria. Nested data
1605 IDs are guaranteed to include values for all implied dimensions
1606 (i.e. `DataCoordinate.hasFull` will return `True`).
1608 Raises
1609 ------
1610 lsst.daf.butler.registry.DatasetTypeExpressionError
1611 Raised when ``dataset_type`` expression is invalid.
1612 lsst.daf.butler.registry.DataIdError
1613 Raised when ``data_id`` or keyword arguments specify unknown
1614 dimensions or values, or when they contain inconsistent values.
1615 lsst.daf.butler.registry.UserExpressionError
1616 Raised when ``where`` expression is invalid.
1617 lsst.daf.butler.EmptyQueryResultError
1618 Raised when query generates empty result and ``explain`` is set to
1619 `True`.
1620 TypeError
1621 Raised when the arguments are incompatible, such as when a
1622 collection wildcard is passed when ``find_first`` is `True`, or
1623 when ``collections`` is `None` and default butler collections are
1624 not defined.
1626 Notes
1627 -----
1628 When multiple dataset types are queried in a single call, the results
1629 of this operation are equivalent to querying for each dataset type
1630 separately in turn, and no information about the relationships between
1631 datasets of different types is included.
1632 """
1633 if data_id is None:
1634 data_id = DataCoordinate.make_empty(self.dimensions)
1635 with self._query() as query:
1636 result = query.where(data_id, where, bind=bind, **kwargs).datasets(
1637 dataset_type,
1638 collections=collections,
1639 find_first=find_first,
1640 )
1641 if with_dimension_records:
1642 result = result.with_dimension_records()
1643 refs = list(result)
1644 if explain and not refs:
1645 raise EmptyQueryResultError(list(result.explain_no_results()))
1646 return refs
1648 def _query_dimension_records(
1649 self,
1650 element: str,
1651 *,
1652 data_id: DataId | None = None,
1653 where: str = "",
1654 bind: Mapping[str, Any] | None = None,
1655 order_by: Iterable[str] | str | None = None,
1656 limit: int | None = None,
1657 explain: bool = True,
1658 **kwargs: Any,
1659 ) -> list[DimensionRecord]:
1660 """Query for dimension information matching user-provided criteria.
1662 Parameters
1663 ----------
1664 element : `str`
1665 The name of a dimension element to obtain records for.
1666 data_id : `dict` or `DataCoordinate`, optional
1667 A data ID whose key-value pairs are used as equality constraints
1668 in the query.
1669 where : `str`, optional
1670 A string expression similar to a SQL WHERE clause. See
1671 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1672 information.
1673 bind : `~collections.abc.Mapping`, optional
1674 Mapping containing literal values that should be injected into the
1675 ``where`` expression, keyed by the identifiers they replace.
1676 Values of collection type can be expanded in some cases; see
1677 :ref:`daf_butler_dimension_expressions_identifiers` for more
1678 information.
1679 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1680 Names of the columns/dimensions to use for ordering returned data
1681 IDs. Column name can be prefixed with minus (``-``) to use
1682 descending ordering.
1683 limit : `int`, optional
1684 Upper limit on the number of returned records.
1685 explain : `bool`, optional
1686 If `True` (default) then `EmptyQueryResultError` exception is
1687 raised when resulting list is empty. The exception contains
1688 non-empty list of strings explaining possible causes for empty
1689 result.
1690 **kwargs
1691 Additional keyword arguments are forwarded to
1692 `DataCoordinate.standardize` when processing the ``data_id``
1693 argument (and may be used to provide a constraining data ID even
1694 when the ``data_id`` argument is `None`).
1696 Returns
1697 -------
1698 records : `list`[`DimensionRecord`]
1699 Dimension records matching the given query parameters.
1701 Raises
1702 ------
1703 lsst.daf.butler.registry.DataIdError
1704 Raised when ``data_id`` or keyword arguments specify unknown
1705 dimensions or values, or when they contain inconsistent values.
1706 lsst.daf.butler.registry.UserExpressionError
1707 Raised when ``where`` expression is invalid.
1708 lsst.daf.butler.EmptyQueryResultError
1709 Raised when query generates empty result and ``explain`` is set to
1710 `True`.
1711 TypeError
1712 Raised when the arguments are incompatible, such as when a
1713 collection wildcard is passed when ``find_first`` is `True`, or
1714 when ``collections`` is `None` and default butler collections are
1715 not defined.
1716 """
1717 if data_id is None:
1718 data_id = DataCoordinate.make_empty(self.dimensions)
1719 with self._query() as query:
1720 result = (
1721 query.where(data_id, where, bind=bind, **kwargs)
1722 .dimension_records(element)
1723 .order_by(*ensure_iterable(order_by))
1724 .limit(limit)
1725 )
1726 dimension_records = list(result)
1727 if explain and not dimension_records:
1728 raise EmptyQueryResultError(list(result.explain_no_results()))
1729 return dimension_records
1731 @abstractmethod
1732 def _clone(
1733 self,
1734 *,
1735 collections: Any = None,
1736 run: str | None = None,
1737 inferDefaults: bool = True,
1738 **kwargs: Any,
1739 ) -> Butler:
1740 """Return a new Butler instance connected to the same repository
1741 as this one, but overriding ``collections``, ``run``,
1742 ``inferDefaults``, and default data ID.
1743 """
1744 raise NotImplementedError()