Coverage for python/lsst/daf/butler/_butler.py: 11%
722 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Butler top level classes.
23"""
24from __future__ import annotations
26__all__ = (
27 "Butler",
28 "ButlerValidationError",
29)
31import collections.abc
32import contextlib
33import io
34import logging
35import numbers
36import os
37import warnings
38from collections import Counter, defaultdict
39from collections.abc import Iterable, Iterator, MutableMapping, Sequence
40from typing import TYPE_CHECKING, Any, ClassVar, TextIO
42from deprecated.sphinx import deprecated
43from lsst.resources import ResourcePath, ResourcePathExpression
44from lsst.utils import doImportType
45from lsst.utils.introspection import get_class_of
46from lsst.utils.logging import VERBOSE, getLogger
47from sqlalchemy.exc import IntegrityError
49from ._butlerConfig import ButlerConfig
50from ._butlerRepoIndex import ButlerRepoIndex
51from ._dataset_existence import DatasetExistence
52from ._deferredDatasetHandle import DeferredDatasetHandle
53from ._limited_butler import LimitedButler
54from ._registry_shim import RegistryShim
55from .core import (
56 Config,
57 ConfigSubset,
58 DataCoordinate,
59 DataId,
60 DataIdValue,
61 DatasetIdGenEnum,
62 DatasetRef,
63 DatasetRefURIs,
64 DatasetType,
65 Datastore,
66 Dimension,
67 DimensionConfig,
68 DimensionElement,
69 DimensionRecord,
70 DimensionUniverse,
71 FileDataset,
72 NullDatastore,
73 Progress,
74 StorageClass,
75 StorageClassFactory,
76 Timespan,
77 ValidationError,
78)
79from .core.repoRelocation import BUTLER_ROOT_TAG
80from .core.utils import transactional
81from .registry import (
82 CollectionType,
83 ConflictingDefinitionError,
84 DataIdError,
85 MissingDatasetTypeError,
86 NoDefaultCollectionError,
87 Registry,
88 RegistryConfig,
89 RegistryDefaults,
90 _ButlerRegistry,
91 _RegistryFactory,
92)
93from .transfers import RepoExportContext
95if TYPE_CHECKING:
96 from lsst.resources import ResourceHandleProtocol
98 from .transfers import RepoImportBackend
100log = getLogger(__name__)
103class ButlerValidationError(ValidationError):
104 """There is a problem with the Butler configuration."""
106 pass
109class Butler(LimitedButler):
110 """Main entry point for the data access system.
112 Parameters
113 ----------
114 config : `ButlerConfig`, `Config` or `str`, optional.
115 Configuration. Anything acceptable to the
116 `ButlerConfig` constructor. If a directory path
117 is given the configuration will be read from a ``butler.yaml`` file in
118 that location. If `None` is given default values will be used.
119 butler : `Butler`, optional.
120 If provided, construct a new Butler that uses the same registry and
121 datastore as the given one, but with the given collection and run.
122 Incompatible with the ``config``, ``searchPaths``, and ``writeable``
123 arguments.
124 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
125 An expression specifying the collections to be searched (in order) when
126 reading datasets.
127 This may be a `str` collection name or an iterable thereof.
128 See :ref:`daf_butler_collection_expressions` for more information.
129 These collections are not registered automatically and must be
130 manually registered before they are used by any method, but they may be
131 manually registered after the `Butler` is initialized.
132 run : `str`, optional
133 Name of the `~CollectionType.RUN` collection new datasets should be
134 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
135 ``collections`` will be set to ``[run]``. If not `None`, this
136 collection will automatically be registered. If this is not set (and
137 ``writeable`` is not set either), a read-only butler will be created.
138 searchPaths : `list` of `str`, optional
139 Directory paths to search when calculating the full Butler
140 configuration. Not used if the supplied config is already a
141 `ButlerConfig`.
142 writeable : `bool`, optional
143 Explicitly sets whether the butler supports write operations. If not
144 provided, a read-write butler is created if any of ``run``, ``tags``,
145 or ``chains`` is non-empty.
146 inferDefaults : `bool`, optional
147 If `True` (default) infer default data ID values from the values
148 present in the datasets in ``collections``: if all collections have the
149 same value (or no value) for a governor dimension, that value will be
150 the default for that dimension. Nonexistent collections are ignored.
151 If a default value is provided explicitly for a governor dimension via
152 ``**kwargs``, no default will be inferred for that dimension.
153 without_datastore : `bool`, optional
154 If `True` do not attach a datastore to this butler. Any attempts
155 to use a datastore will fail.
156 **kwargs : `str`
157 Default data ID key-value pairs. These may only identify "governor"
158 dimensions like ``instrument`` and ``skymap``.
160 Examples
161 --------
162 While there are many ways to control exactly how a `Butler` interacts with
163 the collections in its `Registry`, the most common cases are still simple.
165 For a read-only `Butler` that searches one collection, do::
167 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"])
169 For a read-write `Butler` that writes to and reads from a
170 `~CollectionType.RUN` collection::
172 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a")
174 The `Butler` passed to a ``PipelineTask`` is often much more complex,
175 because we want to write to one `~CollectionType.RUN` collection but read
176 from several others (as well)::
178 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a",
179 collections=["u/alice/DM-50000/a",
180 "u/bob/DM-49998",
181 "HSC/defaults"])
183 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
184 Datasets will be read first from that run (since it appears first in the
185 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``.
187 Finally, one can always create a `Butler` with no collections::
189 butler = Butler("/path/to/repo", writeable=True)
191 This can be extremely useful when you just want to use ``butler.registry``,
192 e.g. for inserting dimension data or managing collections, or when the
193 collections you want to use with the butler are not consistent.
194 Passing ``writeable`` explicitly here is only necessary if you want to be
195 able to make changes to the repo - usually the value for ``writeable`` can
196 be guessed from the collection arguments provided, but it defaults to
197 `False` when there are not collection arguments.
198 """
200 def __init__(
201 self,
202 config: Config | ResourcePathExpression | None = None,
203 *,
204 butler: Butler | None = None,
205 collections: Any = None,
206 run: str | None = None,
207 searchPaths: Sequence[ResourcePathExpression] | None = None,
208 writeable: bool | None = None,
209 inferDefaults: bool = True,
210 without_datastore: bool = False,
211 **kwargs: str,
212 ):
213 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs)
214 # Load registry, datastore, etc. from config or existing butler.
215 if butler is not None:
216 if config is not None or searchPaths is not None or writeable is not None:
217 raise TypeError(
218 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
219 )
220 self._registry = butler._registry.copy(defaults)
221 self._datastore = butler._datastore
222 self.storageClasses = butler.storageClasses
223 self._config: ButlerConfig = butler._config
224 else:
225 self._config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
226 try:
227 butlerRoot = self._config.get("root", self._config.configDir)
228 if writeable is None:
229 writeable = run is not None
230 self._registry = _RegistryFactory(self._config).from_config(
231 butlerRoot=butlerRoot, writeable=writeable, defaults=defaults
232 )
233 if without_datastore:
234 self._datastore = NullDatastore(None, None)
235 else:
236 self._datastore = Datastore.fromConfig(
237 self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
238 )
239 self.storageClasses = StorageClassFactory()
240 self.storageClasses.addFromConfig(self._config)
241 except Exception:
242 # Failures here usually mean that configuration is incomplete,
243 # just issue an error message which includes config file URI.
244 log.error(f"Failed to instantiate Butler from config {self._config.configFile}.")
245 raise
247 # For execution butler the datastore needs a special
248 # dependency-inversion trick. This is not used by regular butler,
249 # but we do not have a way to distinguish regular butler from execution
250 # butler.
251 self._datastore.set_retrieve_dataset_type_method(self._retrieve_dataset_type)
253 if "run" in self._config or "collection" in self._config:
254 raise ValueError("Passing a run or collection via configuration is no longer supported.")
256 self._registry_shim = RegistryShim(self)
258 GENERATION: ClassVar[int] = 3
259 """This is a Generation 3 Butler.
261 This attribute may be removed in the future, once the Generation 2 Butler
262 interface has been fully retired; it should only be used in transitional
263 code.
264 """
266 def _retrieve_dataset_type(self, name: str) -> DatasetType | None:
267 """Return DatasetType defined in registry given dataset type name."""
268 try:
269 return self._registry.getDatasetType(name)
270 except MissingDatasetTypeError:
271 return None
273 @classmethod
274 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
275 """Look up the label in a butler repository index.
277 Parameters
278 ----------
279 label : `str`
280 Label of the Butler repository to look up.
281 return_label : `bool`, optional
282 If ``label`` cannot be found in the repository index (either
283 because index is not defined or ``label`` is not in the index) and
284 ``return_label`` is `True` then return ``ResourcePath(label)``.
285 If ``return_label`` is `False` (default) then an exception will be
286 raised instead.
288 Returns
289 -------
290 uri : `lsst.resources.ResourcePath`
291 URI to the Butler repository associated with the given label or
292 default value if it is provided.
294 Raises
295 ------
296 KeyError
297 Raised if the label is not found in the index, or if an index
298 is not defined, and ``return_label`` is `False`.
300 Notes
301 -----
302 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
303 information is discovered.
304 """
305 return ButlerRepoIndex.get_repo_uri(label, return_label)
307 @classmethod
308 def get_known_repos(cls) -> set[str]:
309 """Retrieve the list of known repository labels.
311 Returns
312 -------
313 repos : `set` of `str`
314 All the known labels. Can be empty if no index can be found.
316 Notes
317 -----
318 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
319 information is discovered.
320 """
321 return ButlerRepoIndex.get_known_repos()
323 @staticmethod
324 def makeRepo(
325 root: ResourcePathExpression,
326 config: Config | str | None = None,
327 dimensionConfig: Config | str | None = None,
328 standalone: bool = False,
329 searchPaths: list[str] | None = None,
330 forceConfigRoot: bool = True,
331 outfile: ResourcePathExpression | None = None,
332 overwrite: bool = False,
333 ) -> Config:
334 """Create an empty data repository by adding a butler.yaml config
335 to a repository root directory.
337 Parameters
338 ----------
339 root : `lsst.resources.ResourcePathExpression`
340 Path or URI to the root location of the new repository. Will be
341 created if it does not exist.
342 config : `Config` or `str`, optional
343 Configuration to write to the repository, after setting any
344 root-dependent Registry or Datastore config options. Can not
345 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
346 configuration will be used. Root-dependent config options
347 specified in this config are overwritten if ``forceConfigRoot``
348 is `True`.
349 dimensionConfig : `Config` or `str`, optional
350 Configuration for dimensions, will be used to initialize registry
351 database.
352 standalone : `bool`
353 If True, write all expanded defaults, not just customized or
354 repository-specific settings.
355 This (mostly) decouples the repository from the default
356 configuration, insulating it from changes to the defaults (which
357 may be good or bad, depending on the nature of the changes).
358 Future *additions* to the defaults will still be picked up when
359 initializing `Butlers` to repos created with ``standalone=True``.
360 searchPaths : `list` of `str`, optional
361 Directory paths to search when calculating the full butler
362 configuration.
363 forceConfigRoot : `bool`, optional
364 If `False`, any values present in the supplied ``config`` that
365 would normally be reset are not overridden and will appear
366 directly in the output config. This allows non-standard overrides
367 of the root directory for a datastore or registry to be given.
368 If this parameter is `True` the values for ``root`` will be
369 forced into the resulting config if appropriate.
370 outfile : `lss.resources.ResourcePathExpression`, optional
371 If not-`None`, the output configuration will be written to this
372 location rather than into the repository itself. Can be a URI
373 string. Can refer to a directory that will be used to write
374 ``butler.yaml``.
375 overwrite : `bool`, optional
376 Create a new configuration file even if one already exists
377 in the specified output location. Default is to raise
378 an exception.
380 Returns
381 -------
382 config : `Config`
383 The updated `Config` instance written to the repo.
385 Raises
386 ------
387 ValueError
388 Raised if a ButlerConfig or ConfigSubset is passed instead of a
389 regular Config (as these subclasses would make it impossible to
390 support ``standalone=False``).
391 FileExistsError
392 Raised if the output config file already exists.
393 os.error
394 Raised if the directory does not exist, exists but is not a
395 directory, or cannot be created.
397 Notes
398 -----
399 Note that when ``standalone=False`` (the default), the configuration
400 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
401 construct the repository should also be used to construct any Butlers
402 to avoid configuration inconsistencies.
403 """
404 if isinstance(config, ButlerConfig | ConfigSubset):
405 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
407 # Ensure that the root of the repository exists or can be made
408 root_uri = ResourcePath(root, forceDirectory=True)
409 root_uri.mkdir()
411 config = Config(config)
413 # If we are creating a new repo from scratch with relative roots,
414 # do not propagate an explicit root from the config file
415 if "root" in config:
416 del config["root"]
418 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
419 imported_class = doImportType(full["datastore", "cls"])
420 if not issubclass(imported_class, Datastore):
421 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
422 datastoreClass: type[Datastore] = imported_class
423 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
425 # if key exists in given config, parse it, otherwise parse the defaults
426 # in the expanded config
427 if config.get(("registry", "db")):
428 registryConfig = RegistryConfig(config)
429 else:
430 registryConfig = RegistryConfig(full)
431 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
432 if defaultDatabaseUri is not None:
433 Config.updateParameters(
434 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
435 )
436 else:
437 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
439 if standalone:
440 config.merge(full)
441 else:
442 # Always expand the registry.managers section into the per-repo
443 # config, because after the database schema is created, it's not
444 # allowed to change anymore. Note that in the standalone=True
445 # branch, _everything_ in the config is expanded, so there's no
446 # need to special case this.
447 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
448 configURI: ResourcePathExpression
449 if outfile is not None:
450 # When writing to a separate location we must include
451 # the root of the butler repo in the config else it won't know
452 # where to look.
453 config["root"] = root_uri.geturl()
454 configURI = outfile
455 else:
456 configURI = root_uri
457 # Strip obscore configuration, if it is present, before writing config
458 # to a file, obscore config will be stored in registry.
459 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
460 config_to_write = config.copy()
461 del config_to_write[obscore_config_key]
462 config_to_write.dumpToUri(configURI, overwrite=overwrite)
463 # configFile attribute is updated, need to copy it to original.
464 config.configFile = config_to_write.configFile
465 else:
466 config.dumpToUri(configURI, overwrite=overwrite)
468 # Create Registry and populate tables
469 registryConfig = RegistryConfig(config.get("registry"))
470 dimensionConfig = DimensionConfig(dimensionConfig)
471 _RegistryFactory(registryConfig).create_from_config(
472 dimensionConfig=dimensionConfig, butlerRoot=root_uri
473 )
475 log.verbose("Wrote new Butler configuration file to %s", configURI)
477 return config
479 @classmethod
480 def _unpickle(
481 cls,
482 config: ButlerConfig,
483 collections: tuple[str, ...] | None,
484 run: str | None,
485 defaultDataId: dict[str, str],
486 writeable: bool,
487 ) -> Butler:
488 """Callable used to unpickle a Butler.
490 We prefer not to use ``Butler.__init__`` directly so we can force some
491 of its many arguments to be keyword-only (note that ``__reduce__``
492 can only invoke callables with positional arguments).
494 Parameters
495 ----------
496 config : `ButlerConfig`
497 Butler configuration, already coerced into a true `ButlerConfig`
498 instance (and hence after any search paths for overrides have been
499 utilized).
500 collections : `tuple` [ `str` ]
501 Names of the default collections to read from.
502 run : `str`, optional
503 Name of the default `~CollectionType.RUN` collection to write to.
504 defaultDataId : `dict` [ `str`, `str` ]
505 Default data ID values.
506 writeable : `bool`
507 Whether the Butler should support write operations.
509 Returns
510 -------
511 butler : `Butler`
512 A new `Butler` instance.
513 """
514 # MyPy doesn't recognize that the kwargs below are totally valid; it
515 # seems to think '**defaultDataId* is a _positional_ argument!
516 return cls(
517 config=config,
518 collections=collections,
519 run=run,
520 writeable=writeable,
521 **defaultDataId, # type: ignore
522 )
524 def __reduce__(self) -> tuple:
525 """Support pickling."""
526 return (
527 Butler._unpickle,
528 (
529 self._config,
530 self.collections,
531 self.run,
532 self._registry.defaults.dataId.byName(),
533 self._registry.isWriteable(),
534 ),
535 )
537 def __str__(self) -> str:
538 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format(
539 self.collections, self.run, self._datastore, self._registry
540 )
542 def isWriteable(self) -> bool:
543 """Return `True` if this `Butler` supports write operations."""
544 return self._registry.isWriteable()
546 @contextlib.contextmanager
547 def transaction(self) -> Iterator[None]:
548 """Context manager supporting `Butler` transactions.
550 Transactions can be nested.
551 """
552 with self._registry.transaction(), self._datastore.transaction():
553 yield
555 def _standardizeArgs(
556 self,
557 datasetRefOrType: DatasetRef | DatasetType | str,
558 dataId: DataId | None = None,
559 for_put: bool = True,
560 **kwargs: Any,
561 ) -> tuple[DatasetType, DataId | None]:
562 """Standardize the arguments passed to several Butler APIs.
564 Parameters
565 ----------
566 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
567 When `DatasetRef` the `dataId` should be `None`.
568 Otherwise the `DatasetType` or name thereof.
569 dataId : `dict` or `DataCoordinate`
570 A `dict` of `Dimension` link name, value pairs that label the
571 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
572 should be provided as the second argument.
573 for_put : `bool`, optional
574 If `True` this call is invoked as part of a `Butler.put()`.
575 Otherwise it is assumed to be part of a `Butler.get()`. This
576 parameter is only relevant if there is dataset type
577 inconsistency.
578 **kwargs
579 Additional keyword arguments used to augment or construct a
580 `DataCoordinate`. See `DataCoordinate.standardize`
581 parameters.
583 Returns
584 -------
585 datasetType : `DatasetType`
586 A `DatasetType` instance extracted from ``datasetRefOrType``.
587 dataId : `dict` or `DataId`, optional
588 Argument that can be used (along with ``kwargs``) to construct a
589 `DataId`.
591 Notes
592 -----
593 Butler APIs that conceptually need a DatasetRef also allow passing a
594 `DatasetType` (or the name of one) and a `DataId` (or a dict and
595 keyword arguments that can be used to construct one) separately. This
596 method accepts those arguments and always returns a true `DatasetType`
597 and a `DataId` or `dict`.
599 Standardization of `dict` vs `DataId` is best handled by passing the
600 returned ``dataId`` (and ``kwargs``) to `Registry` APIs, which are
601 generally similarly flexible.
602 """
603 externalDatasetType: DatasetType | None = None
604 internalDatasetType: DatasetType | None = None
605 if isinstance(datasetRefOrType, DatasetRef):
606 if dataId is not None or kwargs:
607 raise ValueError("DatasetRef given, cannot use dataId as well")
608 externalDatasetType = datasetRefOrType.datasetType
609 dataId = datasetRefOrType.dataId
610 else:
611 # Don't check whether DataId is provided, because Registry APIs
612 # can usually construct a better error message when it wasn't.
613 if isinstance(datasetRefOrType, DatasetType):
614 externalDatasetType = datasetRefOrType
615 else:
616 internalDatasetType = self._registry.getDatasetType(datasetRefOrType)
618 # Check that they are self-consistent
619 if externalDatasetType is not None:
620 internalDatasetType = self._registry.getDatasetType(externalDatasetType.name)
621 if externalDatasetType != internalDatasetType:
622 # We can allow differences if they are compatible, depending
623 # on whether this is a get or a put. A get requires that
624 # the python type associated with the datastore can be
625 # converted to the user type. A put requires that the user
626 # supplied python type can be converted to the internal
627 # type expected by registry.
628 relevantDatasetType = internalDatasetType
629 if for_put:
630 is_compatible = internalDatasetType.is_compatible_with(externalDatasetType)
631 else:
632 is_compatible = externalDatasetType.is_compatible_with(internalDatasetType)
633 relevantDatasetType = externalDatasetType
634 if not is_compatible:
635 raise ValueError(
636 f"Supplied dataset type ({externalDatasetType}) inconsistent with "
637 f"registry definition ({internalDatasetType})"
638 )
639 # Override the internal definition.
640 internalDatasetType = relevantDatasetType
642 assert internalDatasetType is not None
643 return internalDatasetType, dataId
645 def _rewrite_data_id(
646 self, dataId: DataId | None, datasetType: DatasetType, **kwargs: Any
647 ) -> tuple[DataId | None, dict[str, Any]]:
648 """Rewrite a data ID taking into account dimension records.
650 Take a Data ID and keyword args and rewrite it if necessary to
651 allow the user to specify dimension records rather than dimension
652 primary values.
654 This allows a user to include a dataId dict with keys of
655 ``exposure.day_obs`` and ``exposure.seq_num`` instead of giving
656 the integer exposure ID. It also allows a string to be given
657 for a dimension value rather than the integer ID if that is more
658 convenient. For example, rather than having to specifying the
659 detector with ``detector.full_name``, a string given for ``detector``
660 will be interpreted as the full name and converted to the integer
661 value.
663 Keyword arguments can also use strings for dimensions like detector
664 and exposure but python does not allow them to include ``.`` and
665 so the ``exposure.day_obs`` syntax can not be used in a keyword
666 argument.
668 Parameters
669 ----------
670 dataId : `dict` or `DataCoordinate`
671 A `dict` of `Dimension` link name, value pairs that will label the
672 `DatasetRef` within a Collection.
673 datasetType : `DatasetType`
674 The dataset type associated with this dataId. Required to
675 determine the relevant dimensions.
676 **kwargs
677 Additional keyword arguments used to augment or construct a
678 `DataId`. See `DataId` parameters.
680 Returns
681 -------
682 dataId : `dict` or `DataCoordinate`
683 The, possibly rewritten, dataId. If given a `DataCoordinate` and
684 no keyword arguments, the original dataId will be returned
685 unchanged.
686 **kwargs : `dict`
687 Any unused keyword arguments (would normally be empty dict).
688 """
689 # Do nothing if we have a standalone DataCoordinate.
690 if isinstance(dataId, DataCoordinate) and not kwargs:
691 return dataId, kwargs
693 # Process dimension records that are using record information
694 # rather than ids
695 newDataId: dict[str, DataIdValue] = {}
696 byRecord: dict[str, dict[str, Any]] = defaultdict(dict)
698 # if all the dataId comes from keyword parameters we do not need
699 # to do anything here because they can't be of the form
700 # exposure.obs_id because a "." is not allowed in a keyword parameter.
701 if dataId:
702 for k, v in dataId.items():
703 # If we have a Dimension we do not need to do anything
704 # because it cannot be a compound key.
705 if isinstance(k, str) and "." in k:
706 # Someone is using a more human-readable dataId
707 dimensionName, record = k.split(".", 1)
708 byRecord[dimensionName][record] = v
709 elif isinstance(k, Dimension):
710 newDataId[k.name] = v
711 else:
712 newDataId[k] = v
714 # Go through the updated dataId and check the type in case someone is
715 # using an alternate key. We have already filtered out the compound
716 # keys dimensions.record format.
717 not_dimensions = {}
719 # Will need to look in the dataId and the keyword arguments
720 # and will remove them if they need to be fixed or are unrecognized.
721 for dataIdDict in (newDataId, kwargs):
722 # Use a list so we can adjust the dict safely in the loop
723 for dimensionName in list(dataIdDict):
724 value = dataIdDict[dimensionName]
725 try:
726 dimension = self.dimensions.getStaticDimensions()[dimensionName]
727 except KeyError:
728 # This is not a real dimension
729 not_dimensions[dimensionName] = value
730 del dataIdDict[dimensionName]
731 continue
733 # Convert an integral type to an explicit int to simplify
734 # comparisons here
735 if isinstance(value, numbers.Integral):
736 value = int(value)
738 if not isinstance(value, dimension.primaryKey.getPythonType()):
739 for alternate in dimension.alternateKeys:
740 if isinstance(value, alternate.getPythonType()):
741 byRecord[dimensionName][alternate.name] = value
742 del dataIdDict[dimensionName]
743 log.debug(
744 "Converting dimension %s to %s.%s=%s",
745 dimensionName,
746 dimensionName,
747 alternate.name,
748 value,
749 )
750 break
751 else:
752 log.warning(
753 "Type mismatch found for value '%r' provided for dimension %s. "
754 "Could not find matching alternative (primary key has type %s) "
755 "so attempting to use as-is.",
756 value,
757 dimensionName,
758 dimension.primaryKey.getPythonType(),
759 )
761 # By this point kwargs and newDataId should only include valid
762 # dimensions. Merge kwargs in to the new dataId and log if there
763 # are dimensions in both (rather than calling update).
764 for k, v in kwargs.items():
765 if k in newDataId and newDataId[k] != v:
766 log.debug(
767 "Keyword arg %s overriding explicit value in dataId of %s with %s", k, newDataId[k], v
768 )
769 newDataId[k] = v
770 # No need to retain any values in kwargs now.
771 kwargs = {}
773 # If we have some unrecognized dimensions we have to try to connect
774 # them to records in other dimensions. This is made more complicated
775 # by some dimensions having records with clashing names. A mitigation
776 # is that we can tell by this point which dimensions are missing
777 # for the DatasetType but this does not work for calibrations
778 # where additional dimensions can be used to constrain the temporal
779 # axis.
780 if not_dimensions:
781 # Search for all dimensions even if we have been given a value
782 # explicitly. In some cases records are given as well as the
783 # actually dimension and this should not be an error if they
784 # match.
785 mandatoryDimensions = datasetType.dimensions.names # - provided
787 candidateDimensions: set[str] = set()
788 candidateDimensions.update(mandatoryDimensions)
790 # For calibrations we may well be needing temporal dimensions
791 # so rather than always including all dimensions in the scan
792 # restrict things a little. It is still possible for there
793 # to be confusion over day_obs in visit vs exposure for example.
794 # If we are not searching calibration collections things may
795 # fail but they are going to fail anyway because of the
796 # ambiguousness of the dataId...
797 if datasetType.isCalibration():
798 for dim in self.dimensions.getStaticDimensions():
799 if dim.temporal:
800 candidateDimensions.add(str(dim))
802 # Look up table for the first association with a dimension
803 guessedAssociation: dict[str, dict[str, Any]] = defaultdict(dict)
805 # Keep track of whether an item is associated with multiple
806 # dimensions.
807 counter: Counter[str] = Counter()
808 assigned: dict[str, set[str]] = defaultdict(set)
810 # Go through the missing dimensions and associate the
811 # given names with records within those dimensions
812 matched_dims = set()
813 for dimensionName in candidateDimensions:
814 dimension = self.dimensions.getStaticDimensions()[dimensionName]
815 fields = dimension.metadata.names | dimension.uniqueKeys.names
816 for field in not_dimensions:
817 if field in fields:
818 guessedAssociation[dimensionName][field] = not_dimensions[field]
819 counter[dimensionName] += 1
820 assigned[field].add(dimensionName)
821 matched_dims.add(field)
823 # Calculate the fields that matched nothing.
824 never_found = set(not_dimensions) - matched_dims
826 if never_found:
827 raise ValueError(f"Unrecognized keyword args given: {never_found}")
829 # There is a chance we have allocated a single dataId item
830 # to multiple dimensions. Need to decide which should be retained.
831 # For now assume that the most popular alternative wins.
832 # This means that day_obs with seq_num will result in
833 # exposure.day_obs and not visit.day_obs
834 # Also prefer an explicitly missing dimension over an inferred
835 # temporal dimension.
836 for fieldName, assignedDimensions in assigned.items():
837 if len(assignedDimensions) > 1:
838 # Pick the most popular (preferring mandatory dimensions)
839 requiredButMissing = assignedDimensions.intersection(mandatoryDimensions)
840 if requiredButMissing:
841 candidateDimensions = requiredButMissing
842 else:
843 candidateDimensions = assignedDimensions
845 # If this is a choice between visit and exposure and
846 # neither was a required part of the dataset type,
847 # (hence in this branch) always prefer exposure over
848 # visit since exposures are always defined and visits
849 # are defined from exposures.
850 if candidateDimensions == {"exposure", "visit"}:
851 candidateDimensions = {"exposure"}
853 # Select the relevant items and get a new restricted
854 # counter.
855 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions}
856 duplicatesCounter: Counter[str] = Counter()
857 duplicatesCounter.update(theseCounts)
859 # Choose the most common. If they are equally common
860 # we will pick the one that was found first.
861 # Returns a list of tuples
862 selected = duplicatesCounter.most_common(1)[0][0]
864 log.debug(
865 "Ambiguous dataId entry '%s' associated with multiple dimensions: %s."
866 " Removed ambiguity by choosing dimension %s.",
867 fieldName,
868 ", ".join(assignedDimensions),
869 selected,
870 )
872 for candidateDimension in assignedDimensions:
873 if candidateDimension != selected:
874 del guessedAssociation[candidateDimension][fieldName]
876 # Update the record look up dict with the new associations
877 for dimensionName, values in guessedAssociation.items():
878 if values: # A dict might now be empty
879 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", dimensionName, values)
880 byRecord[dimensionName].update(values)
882 if byRecord:
883 # Some record specifiers were found so we need to convert
884 # them to the Id form
885 for dimensionName, values in byRecord.items():
886 if dimensionName in newDataId:
887 log.debug(
888 "DataId specified explicit %s dimension value of %s in addition to"
889 " general record specifiers for it of %s. Ignoring record information.",
890 dimensionName,
891 newDataId[dimensionName],
892 str(values),
893 )
894 # Get the actual record and compare with these values.
895 try:
896 recs = list(self._registry.queryDimensionRecords(dimensionName, dataId=newDataId))
897 except DataIdError:
898 raise ValueError(
899 f"Could not find dimension '{dimensionName}'"
900 f" with dataId {newDataId} as part of comparing with"
901 f" record values {byRecord[dimensionName]}"
902 ) from None
903 if len(recs) == 1:
904 errmsg: list[str] = []
905 for k, v in values.items():
906 if (recval := getattr(recs[0], k)) != v:
907 errmsg.append(f"{k}({recval} != {v})")
908 if errmsg:
909 raise ValueError(
910 f"Dimension {dimensionName} in dataId has explicit value"
911 " inconsistent with records: " + ", ".join(errmsg)
912 )
913 else:
914 # Multiple matches for an explicit dimension
915 # should never happen but let downstream complain.
916 pass
917 continue
919 # Build up a WHERE expression
920 bind = {k: v for k, v in values.items()}
921 where = " AND ".join(f"{dimensionName}.{k} = {k}" for k in bind)
923 # Hopefully we get a single record that matches
924 records = set(
925 self._registry.queryDimensionRecords(
926 dimensionName, dataId=newDataId, where=where, bind=bind, **kwargs
927 )
928 )
930 if len(records) != 1:
931 if len(records) > 1:
932 # visit can have an ambiguous answer without involving
933 # visit_system. The default visit_system is defined
934 # by the instrument.
935 if (
936 dimensionName == "visit"
937 and "visit_system_membership" in self.dimensions
938 and "visit_system" in self.dimensions["instrument"].metadata
939 ):
940 instrument_records = list(
941 self._registry.queryDimensionRecords(
942 "instrument",
943 dataId=newDataId,
944 **kwargs,
945 )
946 )
947 if len(instrument_records) == 1:
948 visit_system = instrument_records[0].visit_system
949 if visit_system is None:
950 # Set to a value that will never match.
951 visit_system = -1
953 # Look up each visit in the
954 # visit_system_membership records.
955 for rec in records:
956 membership = list(
957 self._registry.queryDimensionRecords(
958 # Use bind to allow zero results.
959 # This is a fully-specified query.
960 "visit_system_membership",
961 where="instrument = inst AND visit_system = system AND visit = v",
962 bind=dict(
963 inst=instrument_records[0].name, system=visit_system, v=rec.id
964 ),
965 )
966 )
967 if membership:
968 # This record is the right answer.
969 records = {rec}
970 break
972 # The ambiguity may have been resolved so check again.
973 if len(records) > 1:
974 log.debug("Received %d records from constraints of %s", len(records), str(values))
975 for r in records:
976 log.debug("- %s", str(r))
977 raise ValueError(
978 f"DataId specification for dimension {dimensionName} is not"
979 f" uniquely constrained to a single dataset by {values}."
980 f" Got {len(records)} results."
981 )
982 else:
983 raise ValueError(
984 f"DataId specification for dimension {dimensionName} matched no"
985 f" records when constrained by {values}"
986 )
988 # Get the primary key from the real dimension object
989 dimension = self.dimensions.getStaticDimensions()[dimensionName]
990 if not isinstance(dimension, Dimension):
991 raise RuntimeError(
992 f"{dimension.name} is not a true dimension, and cannot be used in data IDs."
993 )
994 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name)
996 return newDataId, kwargs
998 def _findDatasetRef(
999 self,
1000 datasetRefOrType: DatasetRef | DatasetType | str,
1001 dataId: DataId | None = None,
1002 *,
1003 collections: Any = None,
1004 predict: bool = False,
1005 run: str | None = None,
1006 **kwargs: Any,
1007 ) -> DatasetRef:
1008 """Shared logic for methods that start with a search for a dataset in
1009 the registry.
1011 Parameters
1012 ----------
1013 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1014 When `DatasetRef` the `dataId` should be `None`.
1015 Otherwise the `DatasetType` or name thereof.
1016 dataId : `dict` or `DataCoordinate`, optional
1017 A `dict` of `Dimension` link name, value pairs that label the
1018 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1019 should be provided as the first argument.
1020 collections : Any, optional
1021 Collections to be searched, overriding ``self.collections``.
1022 Can be any of the types supported by the ``collections`` argument
1023 to butler construction.
1024 predict : `bool`, optional
1025 If `True`, return a newly created `DatasetRef` with a unique
1026 dataset ID if finding a reference in the `Registry` fails.
1027 Defaults to `False`.
1028 run : `str`, optional
1029 Run collection name to use for creating `DatasetRef` for predicted
1030 datasets. Only used if ``predict`` is `True`.
1031 **kwargs
1032 Additional keyword arguments used to augment or construct a
1033 `DataId`. See `DataId` parameters.
1035 Returns
1036 -------
1037 ref : `DatasetRef`
1038 A reference to the dataset identified by the given arguments.
1039 This can be the same dataset reference as given if it was
1040 resolved.
1042 Raises
1043 ------
1044 LookupError
1045 Raised if no matching dataset exists in the `Registry` (and
1046 ``predict`` is `False`).
1047 ValueError
1048 Raised if a resolved `DatasetRef` was passed as an input, but it
1049 differs from the one found in the registry.
1050 TypeError
1051 Raised if no collections were provided.
1052 """
1053 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, for_put=False, **kwargs)
1054 if isinstance(datasetRefOrType, DatasetRef):
1055 if collections is not None:
1056 warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3)
1057 return datasetRefOrType
1058 timespan: Timespan | None = None
1060 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs)
1062 if datasetType.isCalibration():
1063 # Because this is a calibration dataset, first try to make a
1064 # standardize the data ID without restricting the dimensions to
1065 # those of the dataset type requested, because there may be extra
1066 # dimensions that provide temporal information for a validity-range
1067 # lookup.
1068 dataId = DataCoordinate.standardize(
1069 dataId, universe=self.dimensions, defaults=self._registry.defaults.dataId, **kwargs
1070 )
1071 if dataId.graph.temporal:
1072 dataId = self._registry.expandDataId(dataId)
1073 timespan = dataId.timespan
1074 else:
1075 # Standardize the data ID to just the dimensions of the dataset
1076 # type instead of letting registry.findDataset do it, so we get the
1077 # result even if no dataset is found.
1078 dataId = DataCoordinate.standardize(
1079 dataId, graph=datasetType.dimensions, defaults=self._registry.defaults.dataId, **kwargs
1080 )
1081 # Always lookup the DatasetRef, even if one is given, to ensure it is
1082 # present in the current collection.
1083 ref = self._registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan)
1084 if ref is None:
1085 if predict:
1086 if run is None:
1087 run = self.run
1088 if run is None:
1089 raise TypeError("Cannot predict dataset ID/location with run=None.")
1090 return DatasetRef(datasetType, dataId, run=run)
1091 else:
1092 if collections is None:
1093 collections = self._registry.defaults.collections
1094 raise LookupError(
1095 f"Dataset {datasetType.name} with data ID {dataId} "
1096 f"could not be found in collections {collections}."
1097 )
1098 if datasetType != ref.datasetType:
1099 # If they differ it is because the user explicitly specified
1100 # a compatible dataset type to this call rather than using the
1101 # registry definition. The DatasetRef must therefore be recreated
1102 # using the user definition such that the expected type is
1103 # returned.
1104 ref = DatasetRef(datasetType, ref.dataId, run=ref.run, id=ref.id)
1106 return ref
1108 # TODO: remove on DM-40067.
1109 @transactional
1110 @deprecated(
1111 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
1112 " Please use Butler.put(). Be aware that you may need to adjust your usage if you"
1113 " were relying on the run parameter to determine the run."
1114 " Will be removed after v26.0.",
1115 version="v26.0",
1116 category=FutureWarning,
1117 )
1118 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
1119 # Docstring inherited.
1120 return self.put(obj, ref)
1122 @transactional
1123 def put(
1124 self,
1125 obj: Any,
1126 datasetRefOrType: DatasetRef | DatasetType | str,
1127 /,
1128 dataId: DataId | None = None,
1129 *,
1130 run: str | None = None,
1131 **kwargs: Any,
1132 ) -> DatasetRef:
1133 """Store and register a dataset.
1135 Parameters
1136 ----------
1137 obj : `object`
1138 The dataset.
1139 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1140 When `DatasetRef` is provided, ``dataId`` should be `None`.
1141 Otherwise the `DatasetType` or name thereof. If a fully resolved
1142 `DatasetRef` is given the run and ID are used directly.
1143 dataId : `dict` or `DataCoordinate`
1144 A `dict` of `Dimension` link name, value pairs that label the
1145 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1146 should be provided as the second argument.
1147 run : `str`, optional
1148 The name of the run the dataset should be added to, overriding
1149 ``self.run``. Not used if a resolved `DatasetRef` is provided.
1150 **kwargs
1151 Additional keyword arguments used to augment or construct a
1152 `DataCoordinate`. See `DataCoordinate.standardize`
1153 parameters. Not used if a resolve `DatasetRef` is provided.
1155 Returns
1156 -------
1157 ref : `DatasetRef`
1158 A reference to the stored dataset, updated with the correct id if
1159 given.
1161 Raises
1162 ------
1163 TypeError
1164 Raised if the butler is read-only or if no run has been provided.
1165 """
1166 if isinstance(datasetRefOrType, DatasetRef):
1167 # This is a direct put of predefined DatasetRef.
1168 log.debug("Butler put direct: %s", datasetRefOrType)
1169 if run is not None:
1170 warnings.warn("Run collection is not used for DatasetRef", stacklevel=3)
1171 # If registry already has a dataset with the same dataset ID,
1172 # dataset type and DataId, then _importDatasets will do nothing and
1173 # just return an original ref. We have to raise in this case, there
1174 # is a datastore check below for that.
1175 self._registry._importDatasets([datasetRefOrType], expand=True)
1176 # Before trying to write to the datastore check that it does not
1177 # know this dataset. This is prone to races, of course.
1178 if self._datastore.knows(datasetRefOrType):
1179 raise ConflictingDefinitionError(f"Datastore already contains dataset: {datasetRefOrType}")
1180 # Try to write dataset to the datastore, if it fails due to a race
1181 # with another write, the content of stored data may be
1182 # unpredictable.
1183 try:
1184 self._datastore.put(obj, datasetRefOrType)
1185 except IntegrityError as e:
1186 raise ConflictingDefinitionError(f"Datastore already contains dataset: {e}") from e
1187 return datasetRefOrType
1189 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run)
1190 if not self.isWriteable():
1191 raise TypeError("Butler is read-only.")
1192 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwargs)
1194 # Handle dimension records in dataId
1195 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs)
1197 # Add Registry Dataset entry.
1198 dataId = self._registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs)
1199 (ref,) = self._registry.insertDatasets(datasetType, run=run, dataIds=[dataId])
1200 self._datastore.put(obj, ref)
1202 return ref
1204 # TODO: remove on DM-40067.
1205 @deprecated(
1206 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
1207 " Please use Butler.get(). Will be removed after v26.0.",
1208 version="v26.0",
1209 category=FutureWarning,
1210 )
1211 def getDirect(
1212 self,
1213 ref: DatasetRef,
1214 *,
1215 parameters: dict[str, Any] | None = None,
1216 storageClass: StorageClass | str | None = None,
1217 ) -> Any:
1218 """Retrieve a stored dataset.
1220 Parameters
1221 ----------
1222 ref : `DatasetRef`
1223 Resolved reference to an already stored dataset.
1224 parameters : `dict`
1225 Additional StorageClass-defined options to control reading,
1226 typically used to efficiently read only a subset of the dataset.
1227 storageClass : `StorageClass` or `str`, optional
1228 The storage class to be used to override the Python type
1229 returned by this method. By default the returned type matches
1230 the dataset type definition for this dataset. Specifying a
1231 read `StorageClass` can force a different type to be returned.
1232 This type must be compatible with the original type.
1234 Returns
1235 -------
1236 obj : `object`
1237 The dataset.
1238 """
1239 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
1241 # TODO: remove on DM-40067.
1242 @deprecated(
1243 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
1244 "Please use Butler.getDeferred(). Will be removed after v26.0.",
1245 version="v26.0",
1246 category=FutureWarning,
1247 )
1248 def getDirectDeferred(
1249 self,
1250 ref: DatasetRef,
1251 *,
1252 parameters: dict | None = None,
1253 storageClass: str | StorageClass | None = None,
1254 ) -> DeferredDatasetHandle:
1255 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
1256 from a resolved `DatasetRef`.
1258 Parameters
1259 ----------
1260 ref : `DatasetRef`
1261 Resolved reference to an already stored dataset.
1262 parameters : `dict`
1263 Additional StorageClass-defined options to control reading,
1264 typically used to efficiently read only a subset of the dataset.
1265 storageClass : `StorageClass` or `str`, optional
1266 The storage class to be used to override the Python type
1267 returned by this method. By default the returned type matches
1268 the dataset type definition for this dataset. Specifying a
1269 read `StorageClass` can force a different type to be returned.
1270 This type must be compatible with the original type.
1272 Returns
1273 -------
1274 obj : `DeferredDatasetHandle`
1275 A handle which can be used to retrieve a dataset at a later time.
1277 Raises
1278 ------
1279 LookupError
1280 Raised if no matching dataset exists in the `Registry`.
1281 """
1282 # Check that dataset is known to the datastore.
1283 if not self._datastore.knows(ref):
1284 raise LookupError(f"Dataset reference {ref} is not known to datastore.")
1285 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
1287 def getDeferred(
1288 self,
1289 datasetRefOrType: DatasetRef | DatasetType | str,
1290 /,
1291 dataId: DataId | None = None,
1292 *,
1293 parameters: dict | None = None,
1294 collections: Any = None,
1295 storageClass: str | StorageClass | None = None,
1296 **kwargs: Any,
1297 ) -> DeferredDatasetHandle:
1298 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
1299 after an immediate registry lookup.
1301 Parameters
1302 ----------
1303 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1304 When `DatasetRef` the `dataId` should be `None`.
1305 Otherwise the `DatasetType` or name thereof.
1306 dataId : `dict` or `DataCoordinate`, optional
1307 A `dict` of `Dimension` link name, value pairs that label the
1308 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1309 should be provided as the first argument.
1310 parameters : `dict`
1311 Additional StorageClass-defined options to control reading,
1312 typically used to efficiently read only a subset of the dataset.
1313 collections : Any, optional
1314 Collections to be searched, overriding ``self.collections``.
1315 Can be any of the types supported by the ``collections`` argument
1316 to butler construction.
1317 storageClass : `StorageClass` or `str`, optional
1318 The storage class to be used to override the Python type
1319 returned by this method. By default the returned type matches
1320 the dataset type definition for this dataset. Specifying a
1321 read `StorageClass` can force a different type to be returned.
1322 This type must be compatible with the original type.
1323 **kwargs
1324 Additional keyword arguments used to augment or construct a
1325 `DataId`. See `DataId` parameters.
1327 Returns
1328 -------
1329 obj : `DeferredDatasetHandle`
1330 A handle which can be used to retrieve a dataset at a later time.
1332 Raises
1333 ------
1334 LookupError
1335 Raised if no matching dataset exists in the `Registry` or
1336 datastore.
1337 ValueError
1338 Raised if a resolved `DatasetRef` was passed as an input, but it
1339 differs from the one found in the registry.
1340 TypeError
1341 Raised if no collections were provided.
1342 """
1343 if isinstance(datasetRefOrType, DatasetRef):
1344 if not self._datastore.knows(datasetRefOrType):
1345 raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
1346 ref = datasetRefOrType
1347 else:
1348 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
1349 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
1351 def get(
1352 self,
1353 datasetRefOrType: DatasetRef | DatasetType | str,
1354 /,
1355 dataId: DataId | None = None,
1356 *,
1357 parameters: dict[str, Any] | None = None,
1358 collections: Any = None,
1359 storageClass: StorageClass | str | None = None,
1360 **kwargs: Any,
1361 ) -> Any:
1362 """Retrieve a stored dataset.
1364 Parameters
1365 ----------
1366 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1367 When `DatasetRef` the `dataId` should be `None`.
1368 Otherwise the `DatasetType` or name thereof.
1369 If a resolved `DatasetRef`, the associated dataset
1370 is returned directly without additional querying.
1371 dataId : `dict` or `DataCoordinate`
1372 A `dict` of `Dimension` link name, value pairs that label the
1373 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1374 should be provided as the first argument.
1375 parameters : `dict`
1376 Additional StorageClass-defined options to control reading,
1377 typically used to efficiently read only a subset of the dataset.
1378 collections : Any, optional
1379 Collections to be searched, overriding ``self.collections``.
1380 Can be any of the types supported by the ``collections`` argument
1381 to butler construction.
1382 storageClass : `StorageClass` or `str`, optional
1383 The storage class to be used to override the Python type
1384 returned by this method. By default the returned type matches
1385 the dataset type definition for this dataset. Specifying a
1386 read `StorageClass` can force a different type to be returned.
1387 This type must be compatible with the original type.
1388 **kwargs
1389 Additional keyword arguments used to augment or construct a
1390 `DataCoordinate`. See `DataCoordinate.standardize`
1391 parameters.
1393 Returns
1394 -------
1395 obj : `object`
1396 The dataset.
1398 Raises
1399 ------
1400 LookupError
1401 Raised if no matching dataset exists in the `Registry`.
1402 TypeError
1403 Raised if no collections were provided.
1405 Notes
1406 -----
1407 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
1408 this method requires that the given data ID include temporal dimensions
1409 beyond the dimensions of the dataset type itself, in order to find the
1410 dataset with the appropriate validity range. For example, a "bias"
1411 dataset with native dimensions ``{instrument, detector}`` could be
1412 fetched with a ``{instrument, detector, exposure}`` data ID, because
1413 ``exposure`` is a temporal dimension.
1414 """
1415 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters)
1416 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
1417 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
1419 def getURIs(
1420 self,
1421 datasetRefOrType: DatasetRef | DatasetType | str,
1422 /,
1423 dataId: DataId | None = None,
1424 *,
1425 predict: bool = False,
1426 collections: Any = None,
1427 run: str | None = None,
1428 **kwargs: Any,
1429 ) -> DatasetRefURIs:
1430 """Return the URIs associated with the dataset.
1432 Parameters
1433 ----------
1434 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1435 When `DatasetRef` the `dataId` should be `None`.
1436 Otherwise the `DatasetType` or name thereof.
1437 dataId : `dict` or `DataCoordinate`
1438 A `dict` of `Dimension` link name, value pairs that label the
1439 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1440 should be provided as the first argument.
1441 predict : `bool`
1442 If `True`, allow URIs to be returned of datasets that have not
1443 been written.
1444 collections : Any, optional
1445 Collections to be searched, overriding ``self.collections``.
1446 Can be any of the types supported by the ``collections`` argument
1447 to butler construction.
1448 run : `str`, optional
1449 Run to use for predictions, overriding ``self.run``.
1450 **kwargs
1451 Additional keyword arguments used to augment or construct a
1452 `DataCoordinate`. See `DataCoordinate.standardize`
1453 parameters.
1455 Returns
1456 -------
1457 uris : `DatasetRefURIs`
1458 The URI to the primary artifact associated with this dataset (if
1459 the dataset was disassembled within the datastore this may be
1460 `None`), and the URIs to any components associated with the dataset
1461 artifact. (can be empty if there are no components).
1462 """
1463 ref = self._findDatasetRef(
1464 datasetRefOrType, dataId, predict=predict, run=run, collections=collections, **kwargs
1465 )
1466 return self._datastore.getURIs(ref, predict)
1468 def getURI(
1469 self,
1470 datasetRefOrType: DatasetRef | DatasetType | str,
1471 /,
1472 dataId: DataId | None = None,
1473 *,
1474 predict: bool = False,
1475 collections: Any = None,
1476 run: str | None = None,
1477 **kwargs: Any,
1478 ) -> ResourcePath:
1479 """Return the URI to the Dataset.
1481 Parameters
1482 ----------
1483 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1484 When `DatasetRef` the `dataId` should be `None`.
1485 Otherwise the `DatasetType` or name thereof.
1486 dataId : `dict` or `DataCoordinate`
1487 A `dict` of `Dimension` link name, value pairs that label the
1488 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1489 should be provided as the first argument.
1490 predict : `bool`
1491 If `True`, allow URIs to be returned of datasets that have not
1492 been written.
1493 collections : Any, optional
1494 Collections to be searched, overriding ``self.collections``.
1495 Can be any of the types supported by the ``collections`` argument
1496 to butler construction.
1497 run : `str`, optional
1498 Run to use for predictions, overriding ``self.run``.
1499 **kwargs
1500 Additional keyword arguments used to augment or construct a
1501 `DataCoordinate`. See `DataCoordinate.standardize`
1502 parameters.
1504 Returns
1505 -------
1506 uri : `lsst.resources.ResourcePath`
1507 URI pointing to the Dataset within the datastore. If the
1508 Dataset does not exist in the datastore, and if ``predict`` is
1509 `True`, the URI will be a prediction and will include a URI
1510 fragment "#predicted".
1511 If the datastore does not have entities that relate well
1512 to the concept of a URI the returned URI string will be
1513 descriptive. The returned URI is not guaranteed to be obtainable.
1515 Raises
1516 ------
1517 LookupError
1518 A URI has been requested for a dataset that does not exist and
1519 guessing is not allowed.
1520 ValueError
1521 Raised if a resolved `DatasetRef` was passed as an input, but it
1522 differs from the one found in the registry.
1523 TypeError
1524 Raised if no collections were provided.
1525 RuntimeError
1526 Raised if a URI is requested for a dataset that consists of
1527 multiple artifacts.
1528 """
1529 primary, components = self.getURIs(
1530 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
1531 )
1533 if primary is None or components:
1534 raise RuntimeError(
1535 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
1536 "Use Butler.getURIs() instead."
1537 )
1538 return primary
1540 def retrieveArtifacts(
1541 self,
1542 refs: Iterable[DatasetRef],
1543 destination: ResourcePathExpression,
1544 transfer: str = "auto",
1545 preserve_path: bool = True,
1546 overwrite: bool = False,
1547 ) -> list[ResourcePath]:
1548 """Retrieve the artifacts associated with the supplied refs.
1550 Parameters
1551 ----------
1552 refs : iterable of `DatasetRef`
1553 The datasets for which artifacts are to be retrieved.
1554 A single ref can result in multiple artifacts. The refs must
1555 be resolved.
1556 destination : `lsst.resources.ResourcePath` or `str`
1557 Location to write the artifacts.
1558 transfer : `str`, optional
1559 Method to use to transfer the artifacts. Must be one of the options
1560 supported by `~lsst.resources.ResourcePath.transfer_from()`.
1561 "move" is not allowed.
1562 preserve_path : `bool`, optional
1563 If `True` the full path of the artifact within the datastore
1564 is preserved. If `False` the final file component of the path
1565 is used.
1566 overwrite : `bool`, optional
1567 If `True` allow transfers to overwrite existing files at the
1568 destination.
1570 Returns
1571 -------
1572 targets : `list` of `lsst.resources.ResourcePath`
1573 URIs of file artifacts in destination location. Order is not
1574 preserved.
1576 Notes
1577 -----
1578 For non-file datastores the artifacts written to the destination
1579 may not match the representation inside the datastore. For example
1580 a hierarchical data structure in a NoSQL database may well be stored
1581 as a JSON file.
1582 """
1583 return self._datastore.retrieveArtifacts(
1584 refs,
1585 ResourcePath(destination),
1586 transfer=transfer,
1587 preserve_path=preserve_path,
1588 overwrite=overwrite,
1589 )
1591 def exists(
1592 self,
1593 dataset_ref_or_type: DatasetRef | DatasetType | str,
1594 /,
1595 data_id: DataId | None = None,
1596 *,
1597 full_check: bool = True,
1598 collections: Any = None,
1599 **kwargs: Any,
1600 ) -> DatasetExistence:
1601 """Indicate whether a dataset is known to Butler registry and
1602 datastore.
1604 Parameters
1605 ----------
1606 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
1607 When `DatasetRef` the `dataId` should be `None`.
1608 Otherwise the `DatasetType` or name thereof.
1609 data_id : `dict` or `DataCoordinate`
1610 A `dict` of `Dimension` link name, value pairs that label the
1611 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1612 should be provided as the first argument.
1613 full_check : `bool`, optional
1614 If `True`, an additional check will be made for dataset artifact
1615 existence. This will involve additional overhead due to the need
1616 to query an external system. If `False` registry and datastore
1617 will solely be asked if they know about the dataset but no
1618 check for the artifact will be performed.
1619 collections : Any, optional
1620 Collections to be searched, overriding ``self.collections``.
1621 Can be any of the types supported by the ``collections`` argument
1622 to butler construction.
1623 **kwargs
1624 Additional keyword arguments used to augment or construct a
1625 `DataCoordinate`. See `DataCoordinate.standardize`
1626 parameters.
1628 Returns
1629 -------
1630 existence : `DatasetExistence`
1631 Object indicating whether the dataset is known to registry and
1632 datastore. Evaluates to `True` if the dataset is present and known
1633 to both.
1634 """
1635 existence = DatasetExistence.UNRECOGNIZED
1637 if isinstance(dataset_ref_or_type, DatasetRef):
1638 if collections is not None:
1639 warnings.warn("Collections should not be specified with DatasetRef", stacklevel=2)
1640 if data_id is not None:
1641 warnings.warn("A DataID should not be specified with DatasetRef", stacklevel=2)
1642 ref = dataset_ref_or_type
1643 registry_ref = self._registry.getDataset(dataset_ref_or_type.id)
1644 if registry_ref is not None:
1645 existence |= DatasetExistence.RECORDED
1647 if dataset_ref_or_type != registry_ref:
1648 # This could mean that storage classes differ, so we should
1649 # check for that but use the registry ref for the rest of
1650 # the method.
1651 if registry_ref.is_compatible_with(dataset_ref_or_type):
1652 # Use the registry version from now on.
1653 ref = registry_ref
1654 else:
1655 raise ValueError(
1656 f"The ref given to exists() ({ref}) has the same dataset ID as one "
1657 f"in registry but has different incompatible values ({registry_ref})."
1658 )
1659 else:
1660 try:
1661 ref = self._findDatasetRef(dataset_ref_or_type, data_id, collections=collections, **kwargs)
1662 except (LookupError, TypeError, NoDefaultCollectionError):
1663 return existence
1664 existence |= DatasetExistence.RECORDED
1666 if self._datastore.knows(ref):
1667 existence |= DatasetExistence.DATASTORE
1669 if full_check:
1670 if self._datastore.exists(ref):
1671 existence |= DatasetExistence._ARTIFACT
1672 elif existence.value != DatasetExistence.UNRECOGNIZED.value:
1673 # Do not add this flag if we have no other idea about a dataset.
1674 existence |= DatasetExistence(DatasetExistence._ASSUMED)
1676 return existence
1678 def _exists_many(
1679 self,
1680 refs: Iterable[DatasetRef],
1681 /,
1682 *,
1683 full_check: bool = True,
1684 ) -> dict[DatasetRef, DatasetExistence]:
1685 """Indicate whether multiple datasets are known to Butler registry and
1686 datastore.
1688 This is an experimental API that may change at any moment.
1690 Parameters
1691 ----------
1692 refs : iterable of `DatasetRef`
1693 The datasets to be checked.
1694 full_check : `bool`, optional
1695 If `True`, an additional check will be made for dataset artifact
1696 existence. This will involve additional overhead due to the need
1697 to query an external system. If `False` registry and datastore
1698 will solely be asked if they know about the dataset but no
1699 check for the artifact will be performed.
1701 Returns
1702 -------
1703 existence : dict of [`DatasetRef`, `DatasetExistence`]
1704 Mapping from the given dataset refs to an enum indicating the
1705 status of the dataset in registry and datastore.
1706 Each value evaluates to `True` if the dataset is present and known
1707 to both.
1708 """
1709 existence = {ref: DatasetExistence.UNRECOGNIZED for ref in refs}
1711 # Registry does not have a bulk API to check for a ref.
1712 for ref in refs:
1713 registry_ref = self._registry.getDataset(ref.id)
1714 if registry_ref is not None:
1715 # It is possible, albeit unlikely, that the given ref does
1716 # not match the one in registry even though the UUID matches.
1717 # When checking a single ref we raise, but it's impolite to
1718 # do that when potentially hundreds of refs are being checked.
1719 # We could change the API to only accept UUIDs and that would
1720 # remove the ability to even check and remove the worry
1721 # about differing storage classes. Given the ongoing discussion
1722 # on refs vs UUIDs and whether to raise or have a new
1723 # private flag, treat this as a private API for now.
1724 existence[ref] |= DatasetExistence.RECORDED
1726 # Ask datastore if it knows about these refs.
1727 knows = self._datastore.knows_these(refs)
1728 for ref, known in knows.items():
1729 if known:
1730 existence[ref] |= DatasetExistence.DATASTORE
1732 if full_check:
1733 mexists = self._datastore.mexists(refs)
1734 for ref, exists in mexists.items():
1735 if exists:
1736 existence[ref] |= DatasetExistence._ARTIFACT
1737 else:
1738 # Do not set this flag if nothing is known about the dataset.
1739 for ref in existence:
1740 if existence[ref] != DatasetExistence.UNRECOGNIZED:
1741 existence[ref] |= DatasetExistence._ASSUMED
1743 return existence
1745 # TODO: remove on DM-40079.
1746 @deprecated(
1747 reason="Butler.datasetExists() has been replaced by Butler.exists(). Will be removed after v26.0.",
1748 version="v26.0",
1749 category=FutureWarning,
1750 )
1751 def datasetExists(
1752 self,
1753 datasetRefOrType: DatasetRef | DatasetType | str,
1754 dataId: DataId | None = None,
1755 *,
1756 collections: Any = None,
1757 **kwargs: Any,
1758 ) -> bool:
1759 """Return True if the Dataset is actually present in the Datastore.
1761 Parameters
1762 ----------
1763 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1764 When `DatasetRef` the `dataId` should be `None`.
1765 Otherwise the `DatasetType` or name thereof.
1766 dataId : `dict` or `DataCoordinate`
1767 A `dict` of `Dimension` link name, value pairs that label the
1768 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1769 should be provided as the first argument.
1770 collections : Any, optional
1771 Collections to be searched, overriding ``self.collections``.
1772 Can be any of the types supported by the ``collections`` argument
1773 to butler construction.
1774 **kwargs
1775 Additional keyword arguments used to augment or construct a
1776 `DataCoordinate`. See `DataCoordinate.standardize`
1777 parameters.
1779 Raises
1780 ------
1781 LookupError
1782 Raised if the dataset is not even present in the Registry.
1783 ValueError
1784 Raised if a resolved `DatasetRef` was passed as an input, but it
1785 differs from the one found in the registry.
1786 NoDefaultCollectionError
1787 Raised if no collections were provided.
1788 """
1789 # A resolved ref may be given that is not known to this butler.
1790 if isinstance(datasetRefOrType, DatasetRef):
1791 ref = self._registry.getDataset(datasetRefOrType.id)
1792 if ref is None:
1793 raise LookupError(
1794 f"Resolved DatasetRef with id {datasetRefOrType.id} is not known to registry."
1795 )
1796 else:
1797 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
1798 return self._datastore.exists(ref)
1800 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1801 """Remove one or more `~CollectionType.RUN` collections and the
1802 datasets within them.
1804 Parameters
1805 ----------
1806 names : `~collections.abc.Iterable` [ `str` ]
1807 The names of the collections to remove.
1808 unstore : `bool`, optional
1809 If `True` (default), delete datasets from all datastores in which
1810 they are present, and attempt to rollback the registry deletions if
1811 datastore deletions fail (which may not always be possible). If
1812 `False`, datastore records for these datasets are still removed,
1813 but any artifacts (e.g. files) will not be.
1815 Raises
1816 ------
1817 TypeError
1818 Raised if one or more collections are not of type
1819 `~CollectionType.RUN`.
1820 """
1821 if not self.isWriteable():
1822 raise TypeError("Butler is read-only.")
1823 names = list(names)
1824 refs: list[DatasetRef] = []
1825 for name in names:
1826 collectionType = self._registry.getCollectionType(name)
1827 if collectionType is not CollectionType.RUN:
1828 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.")
1829 refs.extend(self._registry.queryDatasets(..., collections=name, findFirst=True))
1830 with self._datastore.transaction(), self._registry.transaction():
1831 if unstore:
1832 self._datastore.trash(refs)
1833 else:
1834 self._datastore.forget(refs)
1835 for name in names:
1836 self._registry.removeCollection(name)
1837 if unstore:
1838 # Point of no return for removing artifacts
1839 self._datastore.emptyTrash()
1841 def pruneDatasets(
1842 self,
1843 refs: Iterable[DatasetRef],
1844 *,
1845 disassociate: bool = True,
1846 unstore: bool = False,
1847 tags: Iterable[str] = (),
1848 purge: bool = False,
1849 ) -> None:
1850 # docstring inherited from LimitedButler
1852 if not self.isWriteable():
1853 raise TypeError("Butler is read-only.")
1854 if purge:
1855 if not disassociate:
1856 raise TypeError("Cannot pass purge=True without disassociate=True.")
1857 if not unstore:
1858 raise TypeError("Cannot pass purge=True without unstore=True.")
1859 elif disassociate:
1860 tags = tuple(tags)
1861 if not tags:
1862 raise TypeError("No tags provided but disassociate=True.")
1863 for tag in tags:
1864 collectionType = self._registry.getCollectionType(tag)
1865 if collectionType is not CollectionType.TAGGED:
1866 raise TypeError(
1867 f"Cannot disassociate from collection '{tag}' "
1868 f"of non-TAGGED type {collectionType.name}."
1869 )
1870 # Transform possibly-single-pass iterable into something we can iterate
1871 # over multiple times.
1872 refs = list(refs)
1873 # Pruning a component of a DatasetRef makes no sense since registry
1874 # doesn't know about components and datastore might not store
1875 # components in a separate file
1876 for ref in refs:
1877 if ref.datasetType.component():
1878 raise ValueError(f"Can not prune a component of a dataset (ref={ref})")
1879 # We don't need an unreliable Datastore transaction for this, because
1880 # we've been extra careful to ensure that Datastore.trash only involves
1881 # mutating the Registry (it can _look_ at Datastore-specific things,
1882 # but shouldn't change them), and hence all operations here are
1883 # Registry operations.
1884 with self._datastore.transaction(), self._registry.transaction():
1885 if unstore:
1886 self._datastore.trash(refs)
1887 if purge:
1888 self._registry.removeDatasets(refs)
1889 elif disassociate:
1890 assert tags, "Guaranteed by earlier logic in this function."
1891 for tag in tags:
1892 self._registry.disassociate(tag, refs)
1893 # We've exited the Registry transaction, and apparently committed.
1894 # (if there was an exception, everything rolled back, and it's as if
1895 # nothing happened - and we never get here).
1896 # Datastore artifacts are not yet gone, but they're clearly marked
1897 # as trash, so if we fail to delete now because of (e.g.) filesystem
1898 # problems we can try again later, and if manual administrative
1899 # intervention is required, it's pretty clear what that should entail:
1900 # deleting everything on disk and in private Datastore tables that is
1901 # in the dataset_location_trash table.
1902 if unstore:
1903 # Point of no return for removing artifacts
1904 self._datastore.emptyTrash()
1906 @transactional
1907 def ingest(
1908 self,
1909 *datasets: FileDataset,
1910 transfer: str | None = "auto",
1911 run: str | None = None,
1912 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
1913 record_validation_info: bool = True,
1914 ) -> None:
1915 """Store and register one or more datasets that already exist on disk.
1917 Parameters
1918 ----------
1919 datasets : `FileDataset`
1920 Each positional argument is a struct containing information about
1921 a file to be ingested, including its URI (either absolute or
1922 relative to the datastore root, if applicable), a resolved
1923 `DatasetRef`, and optionally a formatter class or its
1924 fully-qualified string name. If a formatter is not provided, the
1925 formatter that would be used for `put` is assumed. On successful
1926 ingest all `FileDataset.formatter` attributes will be set to the
1927 formatter class used. `FileDataset.path` attributes may be modified
1928 to put paths in whatever the datastore considers a standardized
1929 form.
1930 transfer : `str`, optional
1931 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1932 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1933 transfer the file.
1934 run : `str`, optional
1935 The name of the run ingested datasets should be added to,
1936 overriding ``self.run``. This parameter is now deprecated since
1937 the run is encoded in the ``FileDataset``.
1938 idGenerationMode : `DatasetIdGenEnum`, optional
1939 Specifies option for generating dataset IDs. By default unique IDs
1940 are generated for each inserted dataset.
1941 record_validation_info : `bool`, optional
1942 If `True`, the default, the datastore can record validation
1943 information associated with the file. If `False` the datastore
1944 will not attempt to track any information such as checksums
1945 or file sizes. This can be useful if such information is tracked
1946 in an external system or if the file is to be compressed in place.
1947 It is up to the datastore whether this parameter is relevant.
1949 Raises
1950 ------
1951 TypeError
1952 Raised if the butler is read-only or if no run was provided.
1953 NotImplementedError
1954 Raised if the `Datastore` does not support the given transfer mode.
1955 DatasetTypeNotSupportedError
1956 Raised if one or more files to be ingested have a dataset type that
1957 is not supported by the `Datastore`..
1958 FileNotFoundError
1959 Raised if one of the given files does not exist.
1960 FileExistsError
1961 Raised if transfer is not `None` but the (internal) location the
1962 file would be moved to is already occupied.
1964 Notes
1965 -----
1966 This operation is not fully exception safe: if a database operation
1967 fails, the given `FileDataset` instances may be only partially updated.
1969 It is atomic in terms of database operations (they will either all
1970 succeed or all fail) providing the database engine implements
1971 transactions correctly. It will attempt to be atomic in terms of
1972 filesystem operations as well, but this cannot be implemented
1973 rigorously for most datastores.
1974 """
1975 if not self.isWriteable():
1976 raise TypeError("Butler is read-only.")
1978 log.verbose("Ingesting %d file dataset%s.", len(datasets), "" if len(datasets) == 1 else "s")
1979 if not datasets:
1980 return
1982 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG)
1984 # We need to reorganize all the inputs so that they are grouped
1985 # by dataset type and run. Multiple refs in a single FileDataset
1986 # are required to share the run and dataset type.
1987 GroupedData = MutableMapping[tuple[DatasetType, str], list[FileDataset]]
1988 groupedData: GroupedData = defaultdict(list)
1990 # Track DataIDs that are being ingested so we can spot issues early
1991 # with duplication. Retain previous FileDataset so we can report it.
1992 groupedDataIds: MutableMapping[
1993 tuple[DatasetType, str], dict[DataCoordinate, FileDataset]
1994 ] = defaultdict(dict)
1996 used_run = False
1998 # And the nested loop that populates it:
1999 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"):
2000 # Somewhere to store pre-existing refs if we have an
2001 # execution butler.
2002 existingRefs: list[DatasetRef] = []
2004 for ref in dataset.refs:
2005 assert ref.run is not None # For mypy
2006 group_key = (ref.datasetType, ref.run)
2008 if ref.dataId in groupedDataIds[group_key]:
2009 raise ConflictingDefinitionError(
2010 f"Ingest conflict. Dataset {dataset.path} has same"
2011 " DataId as other ingest dataset"
2012 f" {groupedDataIds[group_key][ref.dataId].path} "
2013 f" ({ref.dataId})"
2014 )
2016 groupedDataIds[group_key][ref.dataId] = dataset
2018 if existingRefs:
2019 if len(dataset.refs) != len(existingRefs):
2020 # Keeping track of partially pre-existing datasets is hard
2021 # and should generally never happen. For now don't allow
2022 # it.
2023 raise ConflictingDefinitionError(
2024 f"For dataset {dataset.path} some dataIds already exist"
2025 " in registry but others do not. This is not supported."
2026 )
2028 # Store expanded form in the original FileDataset.
2029 dataset.refs = existingRefs
2030 else:
2031 groupedData[group_key].append(dataset)
2033 if not used_run and run is not None:
2034 warnings.warn(
2035 "All DatasetRefs to be ingested had resolved dataset IDs. The value given to the "
2036 f"'run' parameter ({run!r}) was not used and the parameter will be removed in the future.",
2037 category=FutureWarning,
2038 stacklevel=3, # Take into account the @transactional decorator.
2039 )
2041 # Now we can bulk-insert into Registry for each DatasetType.
2042 for (datasetType, this_run), grouped_datasets in progress.iter_item_chunks(
2043 groupedData.items(), desc="Bulk-inserting datasets by type"
2044 ):
2045 refs_to_import = []
2046 for dataset in grouped_datasets:
2047 refs_to_import.extend(dataset.refs)
2049 n_refs = len(refs_to_import)
2050 log.verbose(
2051 "Importing %d ref%s of dataset type %r into run %r",
2052 n_refs,
2053 "" if n_refs == 1 else "s",
2054 datasetType.name,
2055 this_run,
2056 )
2058 # Import the refs and expand the DataCoordinates since we can't
2059 # guarantee that they are expanded and Datastore will need
2060 # the records.
2061 imported_refs = self._registry._importDatasets(refs_to_import, expand=True)
2062 assert set(imported_refs) == set(refs_to_import)
2064 # Replace all the refs in the FileDataset with expanded versions.
2065 # Pull them off in the order we put them on the list.
2066 for dataset in grouped_datasets:
2067 n_dataset_refs = len(dataset.refs)
2068 dataset.refs = imported_refs[:n_dataset_refs]
2069 del imported_refs[:n_dataset_refs]
2071 # Bulk-insert everything into Datastore.
2072 # We do not know if any of the registry entries already existed
2073 # (_importDatasets only complains if they exist but differ) so
2074 # we have to catch IntegrityError explicitly.
2075 try:
2076 self._datastore.ingest(
2077 *datasets, transfer=transfer, record_validation_info=record_validation_info
2078 )
2079 except IntegrityError as e:
2080 raise ConflictingDefinitionError(f"Datastore already contains one or more datasets: {e}") from e
2082 @contextlib.contextmanager
2083 def export(
2084 self,
2085 *,
2086 directory: str | None = None,
2087 filename: str | None = None,
2088 format: str | None = None,
2089 transfer: str | None = None,
2090 ) -> Iterator[RepoExportContext]:
2091 """Export datasets from the repository represented by this `Butler`.
2093 This method is a context manager that returns a helper object
2094 (`RepoExportContext`) that is used to indicate what information from
2095 the repository should be exported.
2097 Parameters
2098 ----------
2099 directory : `str`, optional
2100 Directory dataset files should be written to if ``transfer`` is not
2101 `None`.
2102 filename : `str`, optional
2103 Name for the file that will include database information associated
2104 with the exported datasets. If this is not an absolute path and
2105 ``directory`` is not `None`, it will be written to ``directory``
2106 instead of the current working directory. Defaults to
2107 "export.{format}".
2108 format : `str`, optional
2109 File format for the database information file. If `None`, the
2110 extension of ``filename`` will be used.
2111 transfer : `str`, optional
2112 Transfer mode passed to `Datastore.export`.
2114 Raises
2115 ------
2116 TypeError
2117 Raised if the set of arguments passed is inconsistent.
2119 Examples
2120 --------
2121 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
2122 methods are used to provide the iterables over data IDs and/or datasets
2123 to be exported::
2125 with butler.export("exports.yaml") as export:
2126 # Export all flats, but none of the dimension element rows
2127 # (i.e. data ID information) associated with them.
2128 export.saveDatasets(butler.registry.queryDatasets("flat"),
2129 elements=())
2130 # Export all datasets that start with "deepCoadd_" and all of
2131 # their associated data ID information.
2132 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
2133 """
2134 if directory is None and transfer is not None:
2135 raise TypeError("Cannot transfer without providing a directory.")
2136 if transfer == "move":
2137 raise TypeError("Transfer may not be 'move': export is read-only")
2138 if format is None:
2139 if filename is None:
2140 raise TypeError("At least one of 'filename' or 'format' must be provided.")
2141 else:
2142 _, format = os.path.splitext(filename)
2143 if not format:
2144 raise ValueError("Please specify a file extension to determine export format.")
2145 format = format[1:] # Strip leading ".""
2146 elif filename is None:
2147 filename = f"export.{format}"
2148 if directory is not None:
2149 filename = os.path.join(directory, filename)
2150 formats = self._config["repo_transfer_formats"]
2151 if format not in formats:
2152 raise ValueError(f"Unknown export format {format!r}, allowed: {','.join(formats.keys())}")
2153 BackendClass = get_class_of(formats[format, "export"])
2154 with open(filename, "w") as stream:
2155 backend = BackendClass(stream, universe=self.dimensions)
2156 try:
2157 helper = RepoExportContext(
2158 self._registry, self._datastore, backend=backend, directory=directory, transfer=transfer
2159 )
2160 yield helper
2161 except BaseException:
2162 raise
2163 else:
2164 helper._finish()
2166 def import_(
2167 self,
2168 *,
2169 directory: ResourcePathExpression | None = None,
2170 filename: ResourcePathExpression | TextIO | None = None,
2171 format: str | None = None,
2172 transfer: str | None = None,
2173 skip_dimensions: set | None = None,
2174 ) -> None:
2175 """Import datasets into this repository that were exported from a
2176 different butler repository via `~lsst.daf.butler.Butler.export`.
2178 Parameters
2179 ----------
2180 directory : `~lsst.resources.ResourcePathExpression`, optional
2181 Directory containing dataset files to import from. If `None`,
2182 ``filename`` and all dataset file paths specified therein must
2183 be absolute.
2184 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
2185 A stream or name of file that contains database information
2186 associated with the exported datasets, typically generated by
2187 `~lsst.daf.butler.Butler.export`. If this a string (name) or
2188 `~lsst.resources.ResourcePath` and is not an absolute path,
2189 it will first be looked for relative to ``directory`` and if not
2190 found there it will be looked for in the current working
2191 directory. Defaults to "export.{format}".
2192 format : `str`, optional
2193 File format for ``filename``. If `None`, the extension of
2194 ``filename`` will be used.
2195 transfer : `str`, optional
2196 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
2197 skip_dimensions : `set`, optional
2198 Names of dimensions that should be skipped and not imported.
2200 Raises
2201 ------
2202 TypeError
2203 Raised if the set of arguments passed is inconsistent, or if the
2204 butler is read-only.
2205 """
2206 if not self.isWriteable():
2207 raise TypeError("Butler is read-only.")
2208 if format is None:
2209 if filename is None:
2210 raise TypeError("At least one of 'filename' or 'format' must be provided.")
2211 else:
2212 _, format = os.path.splitext(filename) # type: ignore
2213 elif filename is None:
2214 filename = ResourcePath(f"export.{format}", forceAbsolute=False)
2215 if directory is not None:
2216 directory = ResourcePath(directory, forceDirectory=True)
2217 # mypy doesn't think this will work but it does in python >= 3.10.
2218 if isinstance(filename, ResourcePathExpression): # type: ignore
2219 filename = ResourcePath(filename, forceAbsolute=False) # type: ignore
2220 if not filename.isabs() and directory is not None:
2221 potential = directory.join(filename)
2222 exists_in_cwd = filename.exists()
2223 exists_in_dir = potential.exists()
2224 if exists_in_cwd and exists_in_dir:
2225 log.warning(
2226 "A relative path for filename was specified (%s) which exists relative to cwd. "
2227 "Additionally, the file exists relative to the given search directory (%s). "
2228 "Using the export file in the given directory.",
2229 filename,
2230 potential,
2231 )
2232 # Given they specified an explicit directory and that
2233 # directory has the export file in it, assume that that
2234 # is what was meant despite the file in cwd.
2235 filename = potential
2236 elif exists_in_dir:
2237 filename = potential
2238 elif not exists_in_cwd and not exists_in_dir:
2239 # Raise early.
2240 raise FileNotFoundError(
2241 f"Export file could not be found in {filename.abspath()} or {potential.abspath()}."
2242 )
2243 BackendClass: type[RepoImportBackend] = get_class_of(
2244 self._config["repo_transfer_formats"][format]["import"]
2245 )
2247 def doImport(importStream: TextIO | ResourceHandleProtocol) -> None:
2248 backend = BackendClass(importStream, self._registry) # type: ignore[call-arg]
2249 backend.register()
2250 with self.transaction():
2251 backend.load(
2252 self._datastore,
2253 directory=directory,
2254 transfer=transfer,
2255 skip_dimensions=skip_dimensions,
2256 )
2258 if isinstance(filename, ResourcePath):
2259 # We can not use open() here at the moment because of
2260 # DM-38589 since yaml does stream.read(8192) in a loop.
2261 stream = io.StringIO(filename.read().decode())
2262 doImport(stream)
2263 else:
2264 doImport(filename) # type: ignore
2266 def transfer_from(
2267 self,
2268 source_butler: LimitedButler,
2269 source_refs: Iterable[DatasetRef],
2270 transfer: str = "auto",
2271 skip_missing: bool = True,
2272 register_dataset_types: bool = False,
2273 transfer_dimensions: bool = False,
2274 ) -> collections.abc.Collection[DatasetRef]:
2275 """Transfer datasets to this Butler from a run in another Butler.
2277 Parameters
2278 ----------
2279 source_butler : `LimitedButler`
2280 Butler from which the datasets are to be transferred. If data IDs
2281 in ``source_refs`` are not expanded then this has to be a full
2282 `Butler` whose registry will be used to expand data IDs.
2283 source_refs : iterable of `DatasetRef`
2284 Datasets defined in the source butler that should be transferred to
2285 this butler.
2286 transfer : `str`, optional
2287 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
2288 skip_missing : `bool`
2289 If `True`, datasets with no datastore artifact associated with
2290 them are not transferred. If `False` a registry entry will be
2291 created even if no datastore record is created (and so will
2292 look equivalent to the dataset being unstored).
2293 register_dataset_types : `bool`
2294 If `True` any missing dataset types are registered. Otherwise
2295 an exception is raised.
2296 transfer_dimensions : `bool`, optional
2297 If `True`, dimension record data associated with the new datasets
2298 will be transferred.
2300 Returns
2301 -------
2302 refs : `list` of `DatasetRef`
2303 The refs added to this Butler.
2305 Notes
2306 -----
2307 The datastore artifact has to exist for a transfer
2308 to be made but non-existence is not an error.
2310 Datasets that already exist in this run will be skipped.
2312 The datasets are imported as part of a transaction, although
2313 dataset types are registered before the transaction is started.
2314 This means that it is possible for a dataset type to be registered
2315 even though transfer has failed.
2316 """
2317 if not self.isWriteable():
2318 raise TypeError("Butler is read-only.")
2319 progress = Progress("lsst.daf.butler.Butler.transfer_from", level=VERBOSE)
2321 # Will iterate through the refs multiple times so need to convert
2322 # to a list if this isn't a collection.
2323 if not isinstance(source_refs, collections.abc.Collection):
2324 source_refs = list(source_refs)
2326 original_count = len(source_refs)
2327 log.info("Transferring %d datasets into %s", original_count, str(self))
2329 # In some situations the datastore artifact may be missing
2330 # and we do not want that registry entry to be imported.
2331 # Asking datastore is not sufficient, the records may have been
2332 # purged, we have to ask for the (predicted) URI and check
2333 # existence explicitly. Execution butler is set up exactly like
2334 # this with no datastore records.
2335 artifact_existence: dict[ResourcePath, bool] = {}
2336 if skip_missing:
2337 dataset_existence = source_butler._datastore.mexists(
2338 source_refs, artifact_existence=artifact_existence
2339 )
2340 source_refs = [ref for ref, exists in dataset_existence.items() if exists]
2341 filtered_count = len(source_refs)
2342 n_missing = original_count - filtered_count
2343 log.verbose(
2344 "%d dataset%s removed because the artifact does not exist. Now have %d.",
2345 n_missing,
2346 "" if n_missing == 1 else "s",
2347 filtered_count,
2348 )
2350 # Importing requires that we group the refs by dataset type and run
2351 # before doing the import.
2352 source_dataset_types = set()
2353 grouped_refs = defaultdict(list)
2354 for ref in source_refs:
2355 grouped_refs[ref.datasetType, ref.run].append(ref)
2356 source_dataset_types.add(ref.datasetType)
2358 # Check to see if the dataset type in the source butler has
2359 # the same definition in the target butler and register missing
2360 # ones if requested. Registration must happen outside a transaction.
2361 newly_registered_dataset_types = set()
2362 for datasetType in source_dataset_types:
2363 if register_dataset_types:
2364 # Let this raise immediately if inconsistent. Continuing
2365 # on to find additional inconsistent dataset types
2366 # might result in additional unwanted dataset types being
2367 # registered.
2368 if self._registry.registerDatasetType(datasetType):
2369 newly_registered_dataset_types.add(datasetType)
2370 else:
2371 # If the dataset type is missing, let it fail immediately.
2372 target_dataset_type = self._registry.getDatasetType(datasetType.name)
2373 if target_dataset_type != datasetType:
2374 raise ConflictingDefinitionError(
2375 "Source butler dataset type differs from definition"
2376 f" in target butler: {datasetType} !="
2377 f" {target_dataset_type}"
2378 )
2379 if newly_registered_dataset_types:
2380 # We may have registered some even if there were inconsistencies
2381 # but should let people know (or else remove them again).
2382 log.log(
2383 VERBOSE,
2384 "Registered the following dataset types in the target Butler: %s",
2385 ", ".join(d.name for d in newly_registered_dataset_types),
2386 )
2387 else:
2388 log.log(VERBOSE, "All required dataset types are known to the target Butler")
2390 dimension_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]] = defaultdict(dict)
2391 if transfer_dimensions:
2392 # Collect all the dimension records for these refs.
2393 # All dimensions are to be copied but the list of valid dimensions
2394 # come from this butler's universe.
2395 elements = frozenset(
2396 element
2397 for element in self.dimensions.getStaticElements()
2398 if element.hasTable() and element.viewOf is None
2399 )
2400 dataIds = {ref.dataId for ref in source_refs}
2401 # This logic comes from saveDataIds.
2402 for dataId in dataIds:
2403 # Need an expanded record, if not expanded that we need a full
2404 # butler with registry (allow mocks with registry too).
2405 if not dataId.hasRecords():
2406 if registry := getattr(source_butler, "registry", None):
2407 dataId = registry.expandDataId(dataId)
2408 else:
2409 raise TypeError("Input butler needs to be a full butler to expand DataId.")
2410 # If this butler doesn't know about a dimension in the source
2411 # butler things will break later.
2412 for record in dataId.records.values():
2413 if record is not None and record.definition in elements:
2414 dimension_records[record.definition].setdefault(record.dataId, record)
2416 handled_collections: set[str] = set()
2418 # Do all the importing in a single transaction.
2419 with self.transaction():
2420 if dimension_records:
2421 log.verbose("Ensuring that dimension records exist for transferred datasets.")
2422 for element, r in dimension_records.items():
2423 records = [r[dataId] for dataId in r]
2424 # Assume that if the record is already present that we can
2425 # use it without having to check that the record metadata
2426 # is consistent.
2427 self._registry.insertDimensionData(element, *records, skip_existing=True)
2429 n_imported = 0
2430 for (datasetType, run), refs_to_import in progress.iter_item_chunks(
2431 grouped_refs.items(), desc="Importing to registry by run and dataset type"
2432 ):
2433 if run not in handled_collections:
2434 # May need to create output collection. If source butler
2435 # has a registry, ask for documentation string.
2436 run_doc = None
2437 if registry := getattr(source_butler, "registry", None):
2438 run_doc = registry.getCollectionDocumentation(run)
2439 registered = self._registry.registerRun(run, doc=run_doc)
2440 handled_collections.add(run)
2441 if registered:
2442 log.log(VERBOSE, "Creating output run %s", run)
2444 n_refs = len(refs_to_import)
2445 log.verbose(
2446 "Importing %d ref%s of dataset type %s into run %s",
2447 n_refs,
2448 "" if n_refs == 1 else "s",
2449 datasetType.name,
2450 run,
2451 )
2453 # Assume we are using UUIDs and the source refs will match
2454 # those imported.
2455 imported_refs = self._registry._importDatasets(refs_to_import, expand=False)
2456 assert set(imported_refs) == set(refs_to_import)
2457 n_imported += len(imported_refs)
2459 assert len(source_refs) == n_imported
2460 log.verbose("Imported %d datasets into destination butler", n_imported)
2462 # Ask the datastore to transfer. The datastore has to check that
2463 # the source datastore is compatible with the target datastore.
2464 accepted, rejected = self._datastore.transfer_from(
2465 source_butler._datastore,
2466 source_refs,
2467 transfer=transfer,
2468 artifact_existence=artifact_existence,
2469 )
2470 if rejected:
2471 # For now, accept the registry entries but not the files.
2472 log.warning(
2473 "%d datasets were rejected and %d accepted for dataset type %s in run %r.",
2474 len(rejected),
2475 len(accepted),
2476 datasetType,
2477 run,
2478 )
2480 return source_refs
2482 def validateConfiguration(
2483 self,
2484 logFailures: bool = False,
2485 datasetTypeNames: Iterable[str] | None = None,
2486 ignore: Iterable[str] | None = None,
2487 ) -> None:
2488 """Validate butler configuration.
2490 Checks that each `DatasetType` can be stored in the `Datastore`.
2492 Parameters
2493 ----------
2494 logFailures : `bool`, optional
2495 If `True`, output a log message for every validation error
2496 detected.
2497 datasetTypeNames : iterable of `str`, optional
2498 The `DatasetType` names that should be checked. This allows
2499 only a subset to be selected.
2500 ignore : iterable of `str`, optional
2501 Names of DatasetTypes to skip over. This can be used to skip
2502 known problems. If a named `DatasetType` corresponds to a
2503 composite, all components of that `DatasetType` will also be
2504 ignored.
2506 Raises
2507 ------
2508 ButlerValidationError
2509 Raised if there is some inconsistency with how this Butler
2510 is configured.
2511 """
2512 if datasetTypeNames:
2513 datasetTypes = [self._registry.getDatasetType(name) for name in datasetTypeNames]
2514 else:
2515 datasetTypes = list(self._registry.queryDatasetTypes())
2517 # filter out anything from the ignore list
2518 if ignore:
2519 ignore = set(ignore)
2520 datasetTypes = [
2521 e for e in datasetTypes if e.name not in ignore and e.nameAndComponent()[0] not in ignore
2522 ]
2523 else:
2524 ignore = set()
2526 # For each datasetType that has an instrument dimension, create
2527 # a DatasetRef for each defined instrument
2528 datasetRefs = []
2530 # Find all the registered instruments (if "instrument" is in the
2531 # universe).
2532 if "instrument" in self.dimensions:
2533 instruments = {record.name for record in self._registry.queryDimensionRecords("instrument")}
2535 for datasetType in datasetTypes:
2536 if "instrument" in datasetType.dimensions:
2537 # In order to create a conforming dataset ref, create
2538 # fake DataCoordinate values for the non-instrument
2539 # dimensions. The type of the value does not matter here.
2540 dataId = {dim.name: 1 for dim in datasetType.dimensions if dim.name != "instrument"}
2542 for instrument in instruments:
2543 datasetRef = DatasetRef(
2544 datasetType,
2545 DataCoordinate.standardize(
2546 dataId, instrument=instrument, graph=datasetType.dimensions
2547 ),
2548 run="validate",
2549 )
2550 datasetRefs.append(datasetRef)
2552 entities: list[DatasetType | DatasetRef] = []
2553 entities.extend(datasetTypes)
2554 entities.extend(datasetRefs)
2556 datastoreErrorStr = None
2557 try:
2558 self._datastore.validateConfiguration(entities, logFailures=logFailures)
2559 except ValidationError as e:
2560 datastoreErrorStr = str(e)
2562 # Also check that the LookupKeys used by the datastores match
2563 # registry and storage class definitions
2564 keys = self._datastore.getLookupKeys()
2566 failedNames = set()
2567 failedDataId = set()
2568 for key in keys:
2569 if key.name is not None:
2570 if key.name in ignore:
2571 continue
2573 # skip if specific datasetType names were requested and this
2574 # name does not match
2575 if datasetTypeNames and key.name not in datasetTypeNames:
2576 continue
2578 # See if it is a StorageClass or a DatasetType
2579 if key.name in self.storageClasses:
2580 pass
2581 else:
2582 try:
2583 self._registry.getDatasetType(key.name)
2584 except KeyError:
2585 if logFailures:
2586 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key)
2587 failedNames.add(key)
2588 else:
2589 # Dimensions are checked for consistency when the Butler
2590 # is created and rendezvoused with a universe.
2591 pass
2593 # Check that the instrument is a valid instrument
2594 # Currently only support instrument so check for that
2595 if key.dataId:
2596 dataIdKeys = set(key.dataId)
2597 if {"instrument"} != dataIdKeys:
2598 if logFailures:
2599 log.critical("Key '%s' has unsupported DataId override", key)
2600 failedDataId.add(key)
2601 elif key.dataId["instrument"] not in instruments:
2602 if logFailures:
2603 log.critical("Key '%s' has unknown instrument", key)
2604 failedDataId.add(key)
2606 messages = []
2608 if datastoreErrorStr:
2609 messages.append(datastoreErrorStr)
2611 for failed, msg in (
2612 (failedNames, "Keys without corresponding DatasetType or StorageClass entry: "),
2613 (failedDataId, "Keys with bad DataId entries: "),
2614 ):
2615 if failed:
2616 msg += ", ".join(str(k) for k in failed)
2617 messages.append(msg)
2619 if messages:
2620 raise ValidationError(";\n".join(messages))
2622 @property
2623 def collections(self) -> Sequence[str]:
2624 """The collections to search by default, in order
2625 (`~collections.abc.Sequence` [ `str` ]).
2627 This is an alias for ``self.registry.defaults.collections``. It cannot
2628 be set directly in isolation, but all defaults may be changed together
2629 by assigning a new `RegistryDefaults` instance to
2630 ``self.registry.defaults``.
2631 """
2632 return self._registry.defaults.collections
2634 @property
2635 def run(self) -> str | None:
2636 """Name of the run this butler writes outputs to by default (`str` or
2637 `None`).
2639 This is an alias for ``self.registry.defaults.run``. It cannot be set
2640 directly in isolation, but all defaults may be changed together by
2641 assigning a new `RegistryDefaults` instance to
2642 ``self.registry.defaults``.
2643 """
2644 return self._registry.defaults.run
2646 @property
2647 def registry(self) -> Registry:
2648 """The object that manages dataset metadata and relationships
2649 (`Registry`).
2651 Many operations that don't involve reading or writing butler datasets
2652 are accessible only via `Registry` methods. Eventually these methods
2653 will be replaced by equivalent `Butler` methods.
2654 """
2655 return self._registry_shim
2657 @property
2658 def dimensions(self) -> DimensionUniverse:
2659 # Docstring inherited.
2660 return self._registry.dimensions
2662 _registry: _ButlerRegistry
2663 """The object that manages dataset metadata and relationships
2664 (`_ButlerRegistry`).
2666 Most operations that don't involve reading or writing butler datasets are
2667 accessible only via `Registry` methods.
2668 """
2670 datastore: Datastore
2671 """The object that manages actual dataset storage (`Datastore`).
2673 Direct user access to the datastore should rarely be necessary; the primary
2674 exception is the case where a `Datastore` implementation provides extra
2675 functionality beyond what the base class defines.
2676 """
2678 storageClasses: StorageClassFactory
2679 """An object that maps known storage class names to objects that fully
2680 describe them (`StorageClassFactory`).
2681 """