Coverage for python/lsst/daf/butler/_butler.py: 11%
723 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Butler top level classes.
23"""
24from __future__ import annotations
26__all__ = (
27 "Butler",
28 "ButlerValidationError",
29)
31import collections.abc
32import contextlib
33import io
34import logging
35import numbers
36import os
37import warnings
38from collections import Counter, defaultdict
39from collections.abc import Iterable, Iterator, MutableMapping, Sequence
40from typing import TYPE_CHECKING, Any, ClassVar, TextIO
42from deprecated.sphinx import deprecated
43from lsst.resources import ResourcePath, ResourcePathExpression
44from lsst.utils import doImportType
45from lsst.utils.introspection import get_class_of
46from lsst.utils.logging import VERBOSE, getLogger
47from sqlalchemy.exc import IntegrityError
49from ._butlerConfig import ButlerConfig
50from ._butlerRepoIndex import ButlerRepoIndex
51from ._dataset_existence import DatasetExistence
52from ._deferredDatasetHandle import DeferredDatasetHandle
53from ._limited_butler import LimitedButler
54from ._registry_shim import RegistryShim
55from .core import (
56 Config,
57 ConfigSubset,
58 DataCoordinate,
59 DataId,
60 DataIdValue,
61 DatasetIdGenEnum,
62 DatasetRef,
63 DatasetRefURIs,
64 DatasetType,
65 Datastore,
66 Dimension,
67 DimensionConfig,
68 DimensionElement,
69 DimensionRecord,
70 DimensionUniverse,
71 FileDataset,
72 Progress,
73 StorageClass,
74 StorageClassFactory,
75 Timespan,
76 ValidationError,
77)
78from .core.repoRelocation import BUTLER_ROOT_TAG
79from .core.utils import transactional
80from .registry import (
81 CollectionType,
82 ConflictingDefinitionError,
83 DataIdError,
84 MissingDatasetTypeError,
85 NoDefaultCollectionError,
86 Registry,
87 RegistryConfig,
88 RegistryDefaults,
89 _ButlerRegistry,
90 _RegistryFactory,
91)
92from .transfers import RepoExportContext
94if TYPE_CHECKING:
95 from lsst.resources import ResourceHandleProtocol
97 from .transfers import RepoImportBackend
99log = getLogger(__name__)
102class ButlerValidationError(ValidationError):
103 """There is a problem with the Butler configuration."""
105 pass
108class Butler(LimitedButler):
109 """Main entry point for the data access system.
111 Parameters
112 ----------
113 config : `ButlerConfig`, `Config` or `str`, optional.
114 Configuration. Anything acceptable to the
115 `ButlerConfig` constructor. If a directory path
116 is given the configuration will be read from a ``butler.yaml`` file in
117 that location. If `None` is given default values will be used.
118 butler : `Butler`, optional.
119 If provided, construct a new Butler that uses the same registry and
120 datastore as the given one, but with the given collection and run.
121 Incompatible with the ``config``, ``searchPaths``, and ``writeable``
122 arguments.
123 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
124 An expression specifying the collections to be searched (in order) when
125 reading datasets.
126 This may be a `str` collection name or an iterable thereof.
127 See :ref:`daf_butler_collection_expressions` for more information.
128 These collections are not registered automatically and must be
129 manually registered before they are used by any method, but they may be
130 manually registered after the `Butler` is initialized.
131 run : `str`, optional
132 Name of the `~CollectionType.RUN` collection new datasets should be
133 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
134 ``collections`` will be set to ``[run]``. If not `None`, this
135 collection will automatically be registered. If this is not set (and
136 ``writeable`` is not set either), a read-only butler will be created.
137 searchPaths : `list` of `str`, optional
138 Directory paths to search when calculating the full Butler
139 configuration. Not used if the supplied config is already a
140 `ButlerConfig`.
141 writeable : `bool`, optional
142 Explicitly sets whether the butler supports write operations. If not
143 provided, a read-write butler is created if any of ``run``, ``tags``,
144 or ``chains`` is non-empty.
145 inferDefaults : `bool`, optional
146 If `True` (default) infer default data ID values from the values
147 present in the datasets in ``collections``: if all collections have the
148 same value (or no value) for a governor dimension, that value will be
149 the default for that dimension. Nonexistent collections are ignored.
150 If a default value is provided explicitly for a governor dimension via
151 ``**kwargs``, no default will be inferred for that dimension.
152 **kwargs : `str`
153 Default data ID key-value pairs. These may only identify "governor"
154 dimensions like ``instrument`` and ``skymap``.
156 Examples
157 --------
158 While there are many ways to control exactly how a `Butler` interacts with
159 the collections in its `Registry`, the most common cases are still simple.
161 For a read-only `Butler` that searches one collection, do::
163 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"])
165 For a read-write `Butler` that writes to and reads from a
166 `~CollectionType.RUN` collection::
168 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a")
170 The `Butler` passed to a ``PipelineTask`` is often much more complex,
171 because we want to write to one `~CollectionType.RUN` collection but read
172 from several others (as well)::
174 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a",
175 collections=["u/alice/DM-50000/a",
176 "u/bob/DM-49998",
177 "HSC/defaults"])
179 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
180 Datasets will be read first from that run (since it appears first in the
181 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``.
183 Finally, one can always create a `Butler` with no collections::
185 butler = Butler("/path/to/repo", writeable=True)
187 This can be extremely useful when you just want to use ``butler.registry``,
188 e.g. for inserting dimension data or managing collections, or when the
189 collections you want to use with the butler are not consistent.
190 Passing ``writeable`` explicitly here is only necessary if you want to be
191 able to make changes to the repo - usually the value for ``writeable`` can
192 be guessed from the collection arguments provided, but it defaults to
193 `False` when there are not collection arguments.
194 """
196 def __init__(
197 self,
198 config: Config | ResourcePathExpression | None = None,
199 *,
200 butler: Butler | None = None,
201 collections: Any = None,
202 run: str | None = None,
203 searchPaths: Sequence[ResourcePathExpression] | None = None,
204 writeable: bool | None = None,
205 inferDefaults: bool = True,
206 **kwargs: str,
207 ):
208 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs)
209 # Load registry, datastore, etc. from config or existing butler.
210 if butler is not None:
211 if config is not None or searchPaths is not None or writeable is not None:
212 raise TypeError(
213 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
214 )
215 self._registry = butler._registry.copy(defaults)
216 self._datastore = butler._datastore
217 self.storageClasses = butler.storageClasses
218 self._config: ButlerConfig = butler._config
219 else:
220 self._config = ButlerConfig(config, searchPaths=searchPaths)
221 try:
222 if "root" in self._config:
223 butlerRoot = self._config["root"]
224 else:
225 butlerRoot = self._config.configDir
226 if writeable is None:
227 writeable = run is not None
228 self._registry = _RegistryFactory(self._config).from_config(
229 butlerRoot=butlerRoot, writeable=writeable, defaults=defaults
230 )
231 self._datastore = Datastore.fromConfig(
232 self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
233 )
234 self.storageClasses = StorageClassFactory()
235 self.storageClasses.addFromConfig(self._config)
236 except Exception:
237 # Failures here usually mean that configuration is incomplete,
238 # just issue an error message which includes config file URI.
239 log.error(f"Failed to instantiate Butler from config {self._config.configFile}.")
240 raise
242 # For execution butler the datastore needs a special
243 # dependency-inversion trick. This is not used by regular butler,
244 # but we do not have a way to distinguish regular butler from execution
245 # butler.
246 self._datastore.set_retrieve_dataset_type_method(self._retrieve_dataset_type)
248 if "run" in self._config or "collection" in self._config:
249 raise ValueError("Passing a run or collection via configuration is no longer supported.")
251 self._registry_shim = RegistryShim(self)
253 GENERATION: ClassVar[int] = 3
254 """This is a Generation 3 Butler.
256 This attribute may be removed in the future, once the Generation 2 Butler
257 interface has been fully retired; it should only be used in transitional
258 code.
259 """
261 def _retrieve_dataset_type(self, name: str) -> DatasetType | None:
262 """Return DatasetType defined in registry given dataset type name."""
263 try:
264 return self._registry.getDatasetType(name)
265 except MissingDatasetTypeError:
266 return None
268 @classmethod
269 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
270 """Look up the label in a butler repository index.
272 Parameters
273 ----------
274 label : `str`
275 Label of the Butler repository to look up.
276 return_label : `bool`, optional
277 If ``label`` cannot be found in the repository index (either
278 because index is not defined or ``label`` is not in the index) and
279 ``return_label`` is `True` then return ``ResourcePath(label)``.
280 If ``return_label`` is `False` (default) then an exception will be
281 raised instead.
283 Returns
284 -------
285 uri : `lsst.resources.ResourcePath`
286 URI to the Butler repository associated with the given label or
287 default value if it is provided.
289 Raises
290 ------
291 KeyError
292 Raised if the label is not found in the index, or if an index
293 is not defined, and ``return_label`` is `False`.
295 Notes
296 -----
297 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
298 information is discovered.
299 """
300 return ButlerRepoIndex.get_repo_uri(label, return_label)
302 @classmethod
303 def get_known_repos(cls) -> set[str]:
304 """Retrieve the list of known repository labels.
306 Returns
307 -------
308 repos : `set` of `str`
309 All the known labels. Can be empty if no index can be found.
311 Notes
312 -----
313 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
314 information is discovered.
315 """
316 return ButlerRepoIndex.get_known_repos()
318 @staticmethod
319 def makeRepo(
320 root: ResourcePathExpression,
321 config: Config | str | None = None,
322 dimensionConfig: Config | str | None = None,
323 standalone: bool = False,
324 searchPaths: list[str] | None = None,
325 forceConfigRoot: bool = True,
326 outfile: ResourcePathExpression | None = None,
327 overwrite: bool = False,
328 ) -> Config:
329 """Create an empty data repository by adding a butler.yaml config
330 to a repository root directory.
332 Parameters
333 ----------
334 root : `lsst.resources.ResourcePathExpression`
335 Path or URI to the root location of the new repository. Will be
336 created if it does not exist.
337 config : `Config` or `str`, optional
338 Configuration to write to the repository, after setting any
339 root-dependent Registry or Datastore config options. Can not
340 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
341 configuration will be used. Root-dependent config options
342 specified in this config are overwritten if ``forceConfigRoot``
343 is `True`.
344 dimensionConfig : `Config` or `str`, optional
345 Configuration for dimensions, will be used to initialize registry
346 database.
347 standalone : `bool`
348 If True, write all expanded defaults, not just customized or
349 repository-specific settings.
350 This (mostly) decouples the repository from the default
351 configuration, insulating it from changes to the defaults (which
352 may be good or bad, depending on the nature of the changes).
353 Future *additions* to the defaults will still be picked up when
354 initializing `Butlers` to repos created with ``standalone=True``.
355 searchPaths : `list` of `str`, optional
356 Directory paths to search when calculating the full butler
357 configuration.
358 forceConfigRoot : `bool`, optional
359 If `False`, any values present in the supplied ``config`` that
360 would normally be reset are not overridden and will appear
361 directly in the output config. This allows non-standard overrides
362 of the root directory for a datastore or registry to be given.
363 If this parameter is `True` the values for ``root`` will be
364 forced into the resulting config if appropriate.
365 outfile : `lss.resources.ResourcePathExpression`, optional
366 If not-`None`, the output configuration will be written to this
367 location rather than into the repository itself. Can be a URI
368 string. Can refer to a directory that will be used to write
369 ``butler.yaml``.
370 overwrite : `bool`, optional
371 Create a new configuration file even if one already exists
372 in the specified output location. Default is to raise
373 an exception.
375 Returns
376 -------
377 config : `Config`
378 The updated `Config` instance written to the repo.
380 Raises
381 ------
382 ValueError
383 Raised if a ButlerConfig or ConfigSubset is passed instead of a
384 regular Config (as these subclasses would make it impossible to
385 support ``standalone=False``).
386 FileExistsError
387 Raised if the output config file already exists.
388 os.error
389 Raised if the directory does not exist, exists but is not a
390 directory, or cannot be created.
392 Notes
393 -----
394 Note that when ``standalone=False`` (the default), the configuration
395 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
396 construct the repository should also be used to construct any Butlers
397 to avoid configuration inconsistencies.
398 """
399 if isinstance(config, (ButlerConfig, ConfigSubset)):
400 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
402 # Ensure that the root of the repository exists or can be made
403 root_uri = ResourcePath(root, forceDirectory=True)
404 root_uri.mkdir()
406 config = Config(config)
408 # If we are creating a new repo from scratch with relative roots,
409 # do not propagate an explicit root from the config file
410 if "root" in config:
411 del config["root"]
413 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
414 imported_class = doImportType(full["datastore", "cls"])
415 if not issubclass(imported_class, Datastore):
416 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
417 datastoreClass: type[Datastore] = imported_class
418 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
420 # if key exists in given config, parse it, otherwise parse the defaults
421 # in the expanded config
422 if config.get(("registry", "db")):
423 registryConfig = RegistryConfig(config)
424 else:
425 registryConfig = RegistryConfig(full)
426 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
427 if defaultDatabaseUri is not None:
428 Config.updateParameters(
429 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
430 )
431 else:
432 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
434 if standalone:
435 config.merge(full)
436 else:
437 # Always expand the registry.managers section into the per-repo
438 # config, because after the database schema is created, it's not
439 # allowed to change anymore. Note that in the standalone=True
440 # branch, _everything_ in the config is expanded, so there's no
441 # need to special case this.
442 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
443 configURI: ResourcePathExpression
444 if outfile is not None:
445 # When writing to a separate location we must include
446 # the root of the butler repo in the config else it won't know
447 # where to look.
448 config["root"] = root_uri.geturl()
449 configURI = outfile
450 else:
451 configURI = root_uri
452 # Strip obscore configuration, if it is present, before writing config
453 # to a file, obscore config will be stored in registry.
454 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
455 config_to_write = config.copy()
456 del config_to_write[obscore_config_key]
457 config_to_write.dumpToUri(configURI, overwrite=overwrite)
458 # configFile attribute is updated, need to copy it to original.
459 config.configFile = config_to_write.configFile
460 else:
461 config.dumpToUri(configURI, overwrite=overwrite)
463 # Create Registry and populate tables
464 registryConfig = RegistryConfig(config.get("registry"))
465 dimensionConfig = DimensionConfig(dimensionConfig)
466 _RegistryFactory(registryConfig).create_from_config(
467 dimensionConfig=dimensionConfig, butlerRoot=root_uri
468 )
470 log.verbose("Wrote new Butler configuration file to %s", configURI)
472 return config
474 @classmethod
475 def _unpickle(
476 cls,
477 config: ButlerConfig,
478 collections: tuple[str, ...] | None,
479 run: str | None,
480 defaultDataId: dict[str, str],
481 writeable: bool,
482 ) -> Butler:
483 """Callable used to unpickle a Butler.
485 We prefer not to use ``Butler.__init__`` directly so we can force some
486 of its many arguments to be keyword-only (note that ``__reduce__``
487 can only invoke callables with positional arguments).
489 Parameters
490 ----------
491 config : `ButlerConfig`
492 Butler configuration, already coerced into a true `ButlerConfig`
493 instance (and hence after any search paths for overrides have been
494 utilized).
495 collections : `tuple` [ `str` ]
496 Names of the default collections to read from.
497 run : `str`, optional
498 Name of the default `~CollectionType.RUN` collection to write to.
499 defaultDataId : `dict` [ `str`, `str` ]
500 Default data ID values.
501 writeable : `bool`
502 Whether the Butler should support write operations.
504 Returns
505 -------
506 butler : `Butler`
507 A new `Butler` instance.
508 """
509 # MyPy doesn't recognize that the kwargs below are totally valid; it
510 # seems to think '**defaultDataId* is a _positional_ argument!
511 return cls(
512 config=config,
513 collections=collections,
514 run=run,
515 writeable=writeable,
516 **defaultDataId, # type: ignore
517 )
519 def __reduce__(self) -> tuple:
520 """Support pickling."""
521 return (
522 Butler._unpickle,
523 (
524 self._config,
525 self.collections,
526 self.run,
527 self._registry.defaults.dataId.byName(),
528 self._registry.isWriteable(),
529 ),
530 )
532 def __str__(self) -> str:
533 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format(
534 self.collections, self.run, self._datastore, self._registry
535 )
537 def isWriteable(self) -> bool:
538 """Return `True` if this `Butler` supports write operations."""
539 return self._registry.isWriteable()
541 @contextlib.contextmanager
542 def transaction(self) -> Iterator[None]:
543 """Context manager supporting `Butler` transactions.
545 Transactions can be nested.
546 """
547 with self._registry.transaction():
548 with self._datastore.transaction():
549 yield
551 def _standardizeArgs(
552 self,
553 datasetRefOrType: DatasetRef | DatasetType | str,
554 dataId: DataId | None = None,
555 for_put: bool = True,
556 **kwargs: Any,
557 ) -> tuple[DatasetType, DataId | None]:
558 """Standardize the arguments passed to several Butler APIs.
560 Parameters
561 ----------
562 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
563 When `DatasetRef` the `dataId` should be `None`.
564 Otherwise the `DatasetType` or name thereof.
565 dataId : `dict` or `DataCoordinate`
566 A `dict` of `Dimension` link name, value pairs that label the
567 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
568 should be provided as the second argument.
569 for_put : `bool`, optional
570 If `True` this call is invoked as part of a `Butler.put()`.
571 Otherwise it is assumed to be part of a `Butler.get()`. This
572 parameter is only relevant if there is dataset type
573 inconsistency.
574 **kwargs
575 Additional keyword arguments used to augment or construct a
576 `DataCoordinate`. See `DataCoordinate.standardize`
577 parameters.
579 Returns
580 -------
581 datasetType : `DatasetType`
582 A `DatasetType` instance extracted from ``datasetRefOrType``.
583 dataId : `dict` or `DataId`, optional
584 Argument that can be used (along with ``kwargs``) to construct a
585 `DataId`.
587 Notes
588 -----
589 Butler APIs that conceptually need a DatasetRef also allow passing a
590 `DatasetType` (or the name of one) and a `DataId` (or a dict and
591 keyword arguments that can be used to construct one) separately. This
592 method accepts those arguments and always returns a true `DatasetType`
593 and a `DataId` or `dict`.
595 Standardization of `dict` vs `DataId` is best handled by passing the
596 returned ``dataId`` (and ``kwargs``) to `Registry` APIs, which are
597 generally similarly flexible.
598 """
599 externalDatasetType: DatasetType | None = None
600 internalDatasetType: DatasetType | None = None
601 if isinstance(datasetRefOrType, DatasetRef):
602 if dataId is not None or kwargs:
603 raise ValueError("DatasetRef given, cannot use dataId as well")
604 externalDatasetType = datasetRefOrType.datasetType
605 dataId = datasetRefOrType.dataId
606 else:
607 # Don't check whether DataId is provided, because Registry APIs
608 # can usually construct a better error message when it wasn't.
609 if isinstance(datasetRefOrType, DatasetType):
610 externalDatasetType = datasetRefOrType
611 else:
612 internalDatasetType = self._registry.getDatasetType(datasetRefOrType)
614 # Check that they are self-consistent
615 if externalDatasetType is not None:
616 internalDatasetType = self._registry.getDatasetType(externalDatasetType.name)
617 if externalDatasetType != internalDatasetType:
618 # We can allow differences if they are compatible, depending
619 # on whether this is a get or a put. A get requires that
620 # the python type associated with the datastore can be
621 # converted to the user type. A put requires that the user
622 # supplied python type can be converted to the internal
623 # type expected by registry.
624 relevantDatasetType = internalDatasetType
625 if for_put:
626 is_compatible = internalDatasetType.is_compatible_with(externalDatasetType)
627 else:
628 is_compatible = externalDatasetType.is_compatible_with(internalDatasetType)
629 relevantDatasetType = externalDatasetType
630 if not is_compatible:
631 raise ValueError(
632 f"Supplied dataset type ({externalDatasetType}) inconsistent with "
633 f"registry definition ({internalDatasetType})"
634 )
635 # Override the internal definition.
636 internalDatasetType = relevantDatasetType
638 assert internalDatasetType is not None
639 return internalDatasetType, dataId
641 def _rewrite_data_id(
642 self, dataId: DataId | None, datasetType: DatasetType, **kwargs: Any
643 ) -> tuple[DataId | None, dict[str, Any]]:
644 """Rewrite a data ID taking into account dimension records.
646 Take a Data ID and keyword args and rewrite it if necessary to
647 allow the user to specify dimension records rather than dimension
648 primary values.
650 This allows a user to include a dataId dict with keys of
651 ``exposure.day_obs`` and ``exposure.seq_num`` instead of giving
652 the integer exposure ID. It also allows a string to be given
653 for a dimension value rather than the integer ID if that is more
654 convenient. For example, rather than having to specifying the
655 detector with ``detector.full_name``, a string given for ``detector``
656 will be interpreted as the full name and converted to the integer
657 value.
659 Keyword arguments can also use strings for dimensions like detector
660 and exposure but python does not allow them to include ``.`` and
661 so the ``exposure.day_obs`` syntax can not be used in a keyword
662 argument.
664 Parameters
665 ----------
666 dataId : `dict` or `DataCoordinate`
667 A `dict` of `Dimension` link name, value pairs that will label the
668 `DatasetRef` within a Collection.
669 datasetType : `DatasetType`
670 The dataset type associated with this dataId. Required to
671 determine the relevant dimensions.
672 **kwargs
673 Additional keyword arguments used to augment or construct a
674 `DataId`. See `DataId` parameters.
676 Returns
677 -------
678 dataId : `dict` or `DataCoordinate`
679 The, possibly rewritten, dataId. If given a `DataCoordinate` and
680 no keyword arguments, the original dataId will be returned
681 unchanged.
682 **kwargs : `dict`
683 Any unused keyword arguments (would normally be empty dict).
684 """
685 # Do nothing if we have a standalone DataCoordinate.
686 if isinstance(dataId, DataCoordinate) and not kwargs:
687 return dataId, kwargs
689 # Process dimension records that are using record information
690 # rather than ids
691 newDataId: dict[str, DataIdValue] = {}
692 byRecord: dict[str, dict[str, Any]] = defaultdict(dict)
694 # if all the dataId comes from keyword parameters we do not need
695 # to do anything here because they can't be of the form
696 # exposure.obs_id because a "." is not allowed in a keyword parameter.
697 if dataId:
698 for k, v in dataId.items():
699 # If we have a Dimension we do not need to do anything
700 # because it cannot be a compound key.
701 if isinstance(k, str) and "." in k:
702 # Someone is using a more human-readable dataId
703 dimensionName, record = k.split(".", 1)
704 byRecord[dimensionName][record] = v
705 elif isinstance(k, Dimension):
706 newDataId[k.name] = v
707 else:
708 newDataId[k] = v
710 # Go through the updated dataId and check the type in case someone is
711 # using an alternate key. We have already filtered out the compound
712 # keys dimensions.record format.
713 not_dimensions = {}
715 # Will need to look in the dataId and the keyword arguments
716 # and will remove them if they need to be fixed or are unrecognized.
717 for dataIdDict in (newDataId, kwargs):
718 # Use a list so we can adjust the dict safely in the loop
719 for dimensionName in list(dataIdDict):
720 value = dataIdDict[dimensionName]
721 try:
722 dimension = self.dimensions.getStaticDimensions()[dimensionName]
723 except KeyError:
724 # This is not a real dimension
725 not_dimensions[dimensionName] = value
726 del dataIdDict[dimensionName]
727 continue
729 # Convert an integral type to an explicit int to simplify
730 # comparisons here
731 if isinstance(value, numbers.Integral):
732 value = int(value)
734 if not isinstance(value, dimension.primaryKey.getPythonType()):
735 for alternate in dimension.alternateKeys:
736 if isinstance(value, alternate.getPythonType()):
737 byRecord[dimensionName][alternate.name] = value
738 del dataIdDict[dimensionName]
739 log.debug(
740 "Converting dimension %s to %s.%s=%s",
741 dimensionName,
742 dimensionName,
743 alternate.name,
744 value,
745 )
746 break
747 else:
748 log.warning(
749 "Type mismatch found for value '%r' provided for dimension %s. "
750 "Could not find matching alternative (primary key has type %s) "
751 "so attempting to use as-is.",
752 value,
753 dimensionName,
754 dimension.primaryKey.getPythonType(),
755 )
757 # By this point kwargs and newDataId should only include valid
758 # dimensions. Merge kwargs in to the new dataId and log if there
759 # are dimensions in both (rather than calling update).
760 for k, v in kwargs.items():
761 if k in newDataId and newDataId[k] != v:
762 log.debug(
763 "Keyword arg %s overriding explicit value in dataId of %s with %s", k, newDataId[k], v
764 )
765 newDataId[k] = v
766 # No need to retain any values in kwargs now.
767 kwargs = {}
769 # If we have some unrecognized dimensions we have to try to connect
770 # them to records in other dimensions. This is made more complicated
771 # by some dimensions having records with clashing names. A mitigation
772 # is that we can tell by this point which dimensions are missing
773 # for the DatasetType but this does not work for calibrations
774 # where additional dimensions can be used to constrain the temporal
775 # axis.
776 if not_dimensions:
777 # Search for all dimensions even if we have been given a value
778 # explicitly. In some cases records are given as well as the
779 # actually dimension and this should not be an error if they
780 # match.
781 mandatoryDimensions = datasetType.dimensions.names # - provided
783 candidateDimensions: set[str] = set()
784 candidateDimensions.update(mandatoryDimensions)
786 # For calibrations we may well be needing temporal dimensions
787 # so rather than always including all dimensions in the scan
788 # restrict things a little. It is still possible for there
789 # to be confusion over day_obs in visit vs exposure for example.
790 # If we are not searching calibration collections things may
791 # fail but they are going to fail anyway because of the
792 # ambiguousness of the dataId...
793 if datasetType.isCalibration():
794 for dim in self.dimensions.getStaticDimensions():
795 if dim.temporal:
796 candidateDimensions.add(str(dim))
798 # Look up table for the first association with a dimension
799 guessedAssociation: dict[str, dict[str, Any]] = defaultdict(dict)
801 # Keep track of whether an item is associated with multiple
802 # dimensions.
803 counter: Counter[str] = Counter()
804 assigned: dict[str, set[str]] = defaultdict(set)
806 # Go through the missing dimensions and associate the
807 # given names with records within those dimensions
808 matched_dims = set()
809 for dimensionName in candidateDimensions:
810 dimension = self.dimensions.getStaticDimensions()[dimensionName]
811 fields = dimension.metadata.names | dimension.uniqueKeys.names
812 for field in not_dimensions:
813 if field in fields:
814 guessedAssociation[dimensionName][field] = not_dimensions[field]
815 counter[dimensionName] += 1
816 assigned[field].add(dimensionName)
817 matched_dims.add(field)
819 # Calculate the fields that matched nothing.
820 never_found = set(not_dimensions) - matched_dims
822 if never_found:
823 raise ValueError(f"Unrecognized keyword args given: {never_found}")
825 # There is a chance we have allocated a single dataId item
826 # to multiple dimensions. Need to decide which should be retained.
827 # For now assume that the most popular alternative wins.
828 # This means that day_obs with seq_num will result in
829 # exposure.day_obs and not visit.day_obs
830 # Also prefer an explicitly missing dimension over an inferred
831 # temporal dimension.
832 for fieldName, assignedDimensions in assigned.items():
833 if len(assignedDimensions) > 1:
834 # Pick the most popular (preferring mandatory dimensions)
835 requiredButMissing = assignedDimensions.intersection(mandatoryDimensions)
836 if requiredButMissing:
837 candidateDimensions = requiredButMissing
838 else:
839 candidateDimensions = assignedDimensions
841 # If this is a choice between visit and exposure and
842 # neither was a required part of the dataset type,
843 # (hence in this branch) always prefer exposure over
844 # visit since exposures are always defined and visits
845 # are defined from exposures.
846 if candidateDimensions == {"exposure", "visit"}:
847 candidateDimensions = {"exposure"}
849 # Select the relevant items and get a new restricted
850 # counter.
851 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions}
852 duplicatesCounter: Counter[str] = Counter()
853 duplicatesCounter.update(theseCounts)
855 # Choose the most common. If they are equally common
856 # we will pick the one that was found first.
857 # Returns a list of tuples
858 selected = duplicatesCounter.most_common(1)[0][0]
860 log.debug(
861 "Ambiguous dataId entry '%s' associated with multiple dimensions: %s."
862 " Removed ambiguity by choosing dimension %s.",
863 fieldName,
864 ", ".join(assignedDimensions),
865 selected,
866 )
868 for candidateDimension in assignedDimensions:
869 if candidateDimension != selected:
870 del guessedAssociation[candidateDimension][fieldName]
872 # Update the record look up dict with the new associations
873 for dimensionName, values in guessedAssociation.items():
874 if values: # A dict might now be empty
875 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", dimensionName, values)
876 byRecord[dimensionName].update(values)
878 if byRecord:
879 # Some record specifiers were found so we need to convert
880 # them to the Id form
881 for dimensionName, values in byRecord.items():
882 if dimensionName in newDataId:
883 log.debug(
884 "DataId specified explicit %s dimension value of %s in addition to"
885 " general record specifiers for it of %s. Ignoring record information.",
886 dimensionName,
887 newDataId[dimensionName],
888 str(values),
889 )
890 # Get the actual record and compare with these values.
891 try:
892 recs = list(self._registry.queryDimensionRecords(dimensionName, dataId=newDataId))
893 except DataIdError:
894 raise ValueError(
895 f"Could not find dimension '{dimensionName}'"
896 f" with dataId {newDataId} as part of comparing with"
897 f" record values {byRecord[dimensionName]}"
898 ) from None
899 if len(recs) == 1:
900 errmsg: list[str] = []
901 for k, v in values.items():
902 if (recval := getattr(recs[0], k)) != v:
903 errmsg.append(f"{k}({recval} != {v})")
904 if errmsg:
905 raise ValueError(
906 f"Dimension {dimensionName} in dataId has explicit value"
907 " inconsistent with records: " + ", ".join(errmsg)
908 )
909 else:
910 # Multiple matches for an explicit dimension
911 # should never happen but let downstream complain.
912 pass
913 continue
915 # Build up a WHERE expression
916 bind = {k: v for k, v in values.items()}
917 where = " AND ".join(f"{dimensionName}.{k} = {k}" for k in bind)
919 # Hopefully we get a single record that matches
920 records = set(
921 self._registry.queryDimensionRecords(
922 dimensionName, dataId=newDataId, where=where, bind=bind, **kwargs
923 )
924 )
926 if len(records) != 1:
927 if len(records) > 1:
928 # visit can have an ambiguous answer without involving
929 # visit_system. The default visit_system is defined
930 # by the instrument.
931 if (
932 dimensionName == "visit"
933 and "visit_system_membership" in self.dimensions
934 and "visit_system" in self.dimensions["instrument"].metadata
935 ):
936 instrument_records = list(
937 self._registry.queryDimensionRecords(
938 "instrument",
939 dataId=newDataId,
940 **kwargs,
941 )
942 )
943 if len(instrument_records) == 1:
944 visit_system = instrument_records[0].visit_system
945 if visit_system is None:
946 # Set to a value that will never match.
947 visit_system = -1
949 # Look up each visit in the
950 # visit_system_membership records.
951 for rec in records:
952 membership = list(
953 self._registry.queryDimensionRecords(
954 # Use bind to allow zero results.
955 # This is a fully-specified query.
956 "visit_system_membership",
957 where="instrument = inst AND visit_system = system AND visit = v",
958 bind=dict(
959 inst=instrument_records[0].name, system=visit_system, v=rec.id
960 ),
961 )
962 )
963 if membership:
964 # This record is the right answer.
965 records = {rec}
966 break
968 # The ambiguity may have been resolved so check again.
969 if len(records) > 1:
970 log.debug("Received %d records from constraints of %s", len(records), str(values))
971 for r in records:
972 log.debug("- %s", str(r))
973 raise ValueError(
974 f"DataId specification for dimension {dimensionName} is not"
975 f" uniquely constrained to a single dataset by {values}."
976 f" Got {len(records)} results."
977 )
978 else:
979 raise ValueError(
980 f"DataId specification for dimension {dimensionName} matched no"
981 f" records when constrained by {values}"
982 )
984 # Get the primary key from the real dimension object
985 dimension = self.dimensions.getStaticDimensions()[dimensionName]
986 if not isinstance(dimension, Dimension):
987 raise RuntimeError(
988 f"{dimension.name} is not a true dimension, and cannot be used in data IDs."
989 )
990 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name)
992 return newDataId, kwargs
994 def _findDatasetRef(
995 self,
996 datasetRefOrType: DatasetRef | DatasetType | str,
997 dataId: DataId | None = None,
998 *,
999 collections: Any = None,
1000 predict: bool = False,
1001 run: str | None = None,
1002 **kwargs: Any,
1003 ) -> DatasetRef:
1004 """Shared logic for methods that start with a search for a dataset in
1005 the registry.
1007 Parameters
1008 ----------
1009 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1010 When `DatasetRef` the `dataId` should be `None`.
1011 Otherwise the `DatasetType` or name thereof.
1012 dataId : `dict` or `DataCoordinate`, optional
1013 A `dict` of `Dimension` link name, value pairs that label the
1014 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1015 should be provided as the first argument.
1016 collections : Any, optional
1017 Collections to be searched, overriding ``self.collections``.
1018 Can be any of the types supported by the ``collections`` argument
1019 to butler construction.
1020 predict : `bool`, optional
1021 If `True`, return a newly created `DatasetRef` with a unique
1022 dataset ID if finding a reference in the `Registry` fails.
1023 Defaults to `False`.
1024 run : `str`, optional
1025 Run collection name to use for creating `DatasetRef` for predicted
1026 datasets. Only used if ``predict`` is `True`.
1027 **kwargs
1028 Additional keyword arguments used to augment or construct a
1029 `DataId`. See `DataId` parameters.
1031 Returns
1032 -------
1033 ref : `DatasetRef`
1034 A reference to the dataset identified by the given arguments.
1035 This can be the same dataset reference as given if it was
1036 resolved.
1038 Raises
1039 ------
1040 LookupError
1041 Raised if no matching dataset exists in the `Registry` (and
1042 ``predict`` is `False`).
1043 ValueError
1044 Raised if a resolved `DatasetRef` was passed as an input, but it
1045 differs from the one found in the registry.
1046 TypeError
1047 Raised if no collections were provided.
1048 """
1049 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, for_put=False, **kwargs)
1050 if isinstance(datasetRefOrType, DatasetRef):
1051 if collections is not None:
1052 warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3)
1053 return datasetRefOrType
1054 timespan: Timespan | None = None
1056 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs)
1058 if datasetType.isCalibration():
1059 # Because this is a calibration dataset, first try to make a
1060 # standardize the data ID without restricting the dimensions to
1061 # those of the dataset type requested, because there may be extra
1062 # dimensions that provide temporal information for a validity-range
1063 # lookup.
1064 dataId = DataCoordinate.standardize(
1065 dataId, universe=self.dimensions, defaults=self._registry.defaults.dataId, **kwargs
1066 )
1067 if dataId.graph.temporal:
1068 dataId = self._registry.expandDataId(dataId)
1069 timespan = dataId.timespan
1070 else:
1071 # Standardize the data ID to just the dimensions of the dataset
1072 # type instead of letting registry.findDataset do it, so we get the
1073 # result even if no dataset is found.
1074 dataId = DataCoordinate.standardize(
1075 dataId, graph=datasetType.dimensions, defaults=self._registry.defaults.dataId, **kwargs
1076 )
1077 # Always lookup the DatasetRef, even if one is given, to ensure it is
1078 # present in the current collection.
1079 ref = self._registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan)
1080 if ref is None:
1081 if predict:
1082 if run is None:
1083 run = self.run
1084 if run is None:
1085 raise TypeError("Cannot predict dataset ID/location with run=None.")
1086 return DatasetRef(datasetType, dataId, run=run)
1087 else:
1088 if collections is None:
1089 collections = self._registry.defaults.collections
1090 raise LookupError(
1091 f"Dataset {datasetType.name} with data ID {dataId} "
1092 f"could not be found in collections {collections}."
1093 )
1094 if datasetType != ref.datasetType:
1095 # If they differ it is because the user explicitly specified
1096 # a compatible dataset type to this call rather than using the
1097 # registry definition. The DatasetRef must therefore be recreated
1098 # using the user definition such that the expected type is
1099 # returned.
1100 ref = DatasetRef(datasetType, ref.dataId, run=ref.run, id=ref.id)
1102 return ref
1104 @transactional
1105 @deprecated(
1106 reason="Butler.put() now behaves like Butler.putDirect() when given a DatasetRef."
1107 " Please use Butler.put(). Be aware that you may need to adjust your usage if you"
1108 " were relying on the run parameter to determine the run."
1109 " Will be removed after v27.0.",
1110 version="v26.0",
1111 category=FutureWarning,
1112 )
1113 def putDirect(self, obj: Any, ref: DatasetRef, /) -> DatasetRef:
1114 # Docstring inherited.
1115 return self.put(obj, ref)
1117 @transactional
1118 def put(
1119 self,
1120 obj: Any,
1121 datasetRefOrType: DatasetRef | DatasetType | str,
1122 /,
1123 dataId: DataId | None = None,
1124 *,
1125 run: str | None = None,
1126 **kwargs: Any,
1127 ) -> DatasetRef:
1128 """Store and register a dataset.
1130 Parameters
1131 ----------
1132 obj : `object`
1133 The dataset.
1134 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1135 When `DatasetRef` is provided, ``dataId`` should be `None`.
1136 Otherwise the `DatasetType` or name thereof. If a fully resolved
1137 `DatasetRef` is given the run and ID are used directly.
1138 dataId : `dict` or `DataCoordinate`
1139 A `dict` of `Dimension` link name, value pairs that label the
1140 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1141 should be provided as the second argument.
1142 run : `str`, optional
1143 The name of the run the dataset should be added to, overriding
1144 ``self.run``. Not used if a resolved `DatasetRef` is provided.
1145 **kwargs
1146 Additional keyword arguments used to augment or construct a
1147 `DataCoordinate`. See `DataCoordinate.standardize`
1148 parameters. Not used if a resolve `DatasetRef` is provided.
1150 Returns
1151 -------
1152 ref : `DatasetRef`
1153 A reference to the stored dataset, updated with the correct id if
1154 given.
1156 Raises
1157 ------
1158 TypeError
1159 Raised if the butler is read-only or if no run has been provided.
1160 """
1161 if isinstance(datasetRefOrType, DatasetRef):
1162 # This is a direct put of predefined DatasetRef.
1163 log.debug("Butler put direct: %s", datasetRefOrType)
1164 if run is not None:
1165 warnings.warn("Run collection is not used for DatasetRef", stacklevel=3)
1166 # If registry already has a dataset with the same dataset ID,
1167 # dataset type and DataId, then _importDatasets will do nothing and
1168 # just return an original ref. We have to raise in this case, there
1169 # is a datastore check below for that.
1170 self._registry._importDatasets([datasetRefOrType], expand=True)
1171 # Before trying to write to the datastore check that it does not
1172 # know this dataset. This is prone to races, of course.
1173 if self._datastore.knows(datasetRefOrType):
1174 raise ConflictingDefinitionError(f"Datastore already contains dataset: {datasetRefOrType}")
1175 # Try to write dataset to the datastore, if it fails due to a race
1176 # with another write, the content of stored data may be
1177 # unpredictable.
1178 try:
1179 self._datastore.put(obj, datasetRefOrType)
1180 except IntegrityError as e:
1181 raise ConflictingDefinitionError(f"Datastore already contains dataset: {e}")
1182 return datasetRefOrType
1184 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run)
1185 if not self.isWriteable():
1186 raise TypeError("Butler is read-only.")
1187 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwargs)
1189 # Handle dimension records in dataId
1190 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs)
1192 # Add Registry Dataset entry.
1193 dataId = self._registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs)
1194 (ref,) = self._registry.insertDatasets(datasetType, run=run, dataIds=[dataId])
1195 self._datastore.put(obj, ref)
1197 return ref
1199 @deprecated(
1200 reason="Butler.get() now behaves like Butler.getDirect() when given a DatasetRef."
1201 " Please use Butler.get(). Will be removed after v27.0.",
1202 version="v26.0",
1203 category=FutureWarning,
1204 )
1205 def getDirect(
1206 self,
1207 ref: DatasetRef,
1208 *,
1209 parameters: dict[str, Any] | None = None,
1210 storageClass: StorageClass | str | None = None,
1211 ) -> Any:
1212 """Retrieve a stored dataset.
1214 Parameters
1215 ----------
1216 ref : `DatasetRef`
1217 Resolved reference to an already stored dataset.
1218 parameters : `dict`
1219 Additional StorageClass-defined options to control reading,
1220 typically used to efficiently read only a subset of the dataset.
1221 storageClass : `StorageClass` or `str`, optional
1222 The storage class to be used to override the Python type
1223 returned by this method. By default the returned type matches
1224 the dataset type definition for this dataset. Specifying a
1225 read `StorageClass` can force a different type to be returned.
1226 This type must be compatible with the original type.
1228 Returns
1229 -------
1230 obj : `object`
1231 The dataset.
1232 """
1233 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
1235 @deprecated(
1236 reason="Butler.getDeferred() now behaves like getDirectDeferred() when given a DatasetRef. "
1237 "Please use Butler.getDeferred(). Will be removed after v27.0.",
1238 version="v26.0",
1239 category=FutureWarning,
1240 )
1241 def getDirectDeferred(
1242 self,
1243 ref: DatasetRef,
1244 *,
1245 parameters: dict | None = None,
1246 storageClass: str | StorageClass | None = None,
1247 ) -> DeferredDatasetHandle:
1248 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
1249 from a resolved `DatasetRef`.
1251 Parameters
1252 ----------
1253 ref : `DatasetRef`
1254 Resolved reference to an already stored dataset.
1255 parameters : `dict`
1256 Additional StorageClass-defined options to control reading,
1257 typically used to efficiently read only a subset of the dataset.
1258 storageClass : `StorageClass` or `str`, optional
1259 The storage class to be used to override the Python type
1260 returned by this method. By default the returned type matches
1261 the dataset type definition for this dataset. Specifying a
1262 read `StorageClass` can force a different type to be returned.
1263 This type must be compatible with the original type.
1265 Returns
1266 -------
1267 obj : `DeferredDatasetHandle`
1268 A handle which can be used to retrieve a dataset at a later time.
1270 Raises
1271 ------
1272 LookupError
1273 Raised if no matching dataset exists in the `Registry`.
1274 """
1275 # Check that dataset actually exists.
1276 if not self._datastore.exists(ref):
1277 raise LookupError(f"Dataset reference {ref} does not exist.")
1278 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
1280 def getDeferred(
1281 self,
1282 datasetRefOrType: DatasetRef | DatasetType | str,
1283 /,
1284 dataId: DataId | None = None,
1285 *,
1286 parameters: dict | None = None,
1287 collections: Any = None,
1288 storageClass: str | StorageClass | None = None,
1289 **kwargs: Any,
1290 ) -> DeferredDatasetHandle:
1291 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
1292 after an immediate registry lookup.
1294 Parameters
1295 ----------
1296 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1297 When `DatasetRef` the `dataId` should be `None`.
1298 Otherwise the `DatasetType` or name thereof.
1299 dataId : `dict` or `DataCoordinate`, optional
1300 A `dict` of `Dimension` link name, value pairs that label the
1301 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1302 should be provided as the first argument.
1303 parameters : `dict`
1304 Additional StorageClass-defined options to control reading,
1305 typically used to efficiently read only a subset of the dataset.
1306 collections : Any, optional
1307 Collections to be searched, overriding ``self.collections``.
1308 Can be any of the types supported by the ``collections`` argument
1309 to butler construction.
1310 storageClass : `StorageClass` or `str`, optional
1311 The storage class to be used to override the Python type
1312 returned by this method. By default the returned type matches
1313 the dataset type definition for this dataset. Specifying a
1314 read `StorageClass` can force a different type to be returned.
1315 This type must be compatible with the original type.
1316 **kwargs
1317 Additional keyword arguments used to augment or construct a
1318 `DataId`. See `DataId` parameters.
1320 Returns
1321 -------
1322 obj : `DeferredDatasetHandle`
1323 A handle which can be used to retrieve a dataset at a later time.
1325 Raises
1326 ------
1327 LookupError
1328 Raised if no matching dataset exists in the `Registry`.
1329 ValueError
1330 Raised if a resolved `DatasetRef` was passed as an input, but it
1331 differs from the one found in the registry.
1332 TypeError
1333 Raised if no collections were provided.
1334 """
1335 if isinstance(datasetRefOrType, DatasetRef) and not self._datastore.exists(datasetRefOrType):
1336 raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
1337 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
1338 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)
1340 def get(
1341 self,
1342 datasetRefOrType: DatasetRef | DatasetType | str,
1343 /,
1344 dataId: DataId | None = None,
1345 *,
1346 parameters: dict[str, Any] | None = None,
1347 collections: Any = None,
1348 storageClass: StorageClass | str | None = None,
1349 **kwargs: Any,
1350 ) -> Any:
1351 """Retrieve a stored dataset.
1353 Parameters
1354 ----------
1355 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1356 When `DatasetRef` the `dataId` should be `None`.
1357 Otherwise the `DatasetType` or name thereof.
1358 If a resolved `DatasetRef`, the associated dataset
1359 is returned directly without additional querying.
1360 dataId : `dict` or `DataCoordinate`
1361 A `dict` of `Dimension` link name, value pairs that label the
1362 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1363 should be provided as the first argument.
1364 parameters : `dict`
1365 Additional StorageClass-defined options to control reading,
1366 typically used to efficiently read only a subset of the dataset.
1367 collections : Any, optional
1368 Collections to be searched, overriding ``self.collections``.
1369 Can be any of the types supported by the ``collections`` argument
1370 to butler construction.
1371 storageClass : `StorageClass` or `str`, optional
1372 The storage class to be used to override the Python type
1373 returned by this method. By default the returned type matches
1374 the dataset type definition for this dataset. Specifying a
1375 read `StorageClass` can force a different type to be returned.
1376 This type must be compatible with the original type.
1377 **kwargs
1378 Additional keyword arguments used to augment or construct a
1379 `DataCoordinate`. See `DataCoordinate.standardize`
1380 parameters.
1382 Returns
1383 -------
1384 obj : `object`
1385 The dataset.
1387 Raises
1388 ------
1389 LookupError
1390 Raised if no matching dataset exists in the `Registry`.
1391 TypeError
1392 Raised if no collections were provided.
1394 Notes
1395 -----
1396 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
1397 this method requires that the given data ID include temporal dimensions
1398 beyond the dimensions of the dataset type itself, in order to find the
1399 dataset with the appropriate validity range. For example, a "bias"
1400 dataset with native dimensions ``{instrument, detector}`` could be
1401 fetched with a ``{instrument, detector, exposure}`` data ID, because
1402 ``exposure`` is a temporal dimension.
1403 """
1404 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters)
1405 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
1406 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)
1408 def getURIs(
1409 self,
1410 datasetRefOrType: DatasetRef | DatasetType | str,
1411 /,
1412 dataId: DataId | None = None,
1413 *,
1414 predict: bool = False,
1415 collections: Any = None,
1416 run: str | None = None,
1417 **kwargs: Any,
1418 ) -> DatasetRefURIs:
1419 """Return the URIs associated with the dataset.
1421 Parameters
1422 ----------
1423 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1424 When `DatasetRef` the `dataId` should be `None`.
1425 Otherwise the `DatasetType` or name thereof.
1426 dataId : `dict` or `DataCoordinate`
1427 A `dict` of `Dimension` link name, value pairs that label the
1428 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1429 should be provided as the first argument.
1430 predict : `bool`
1431 If `True`, allow URIs to be returned of datasets that have not
1432 been written.
1433 collections : Any, optional
1434 Collections to be searched, overriding ``self.collections``.
1435 Can be any of the types supported by the ``collections`` argument
1436 to butler construction.
1437 run : `str`, optional
1438 Run to use for predictions, overriding ``self.run``.
1439 **kwargs
1440 Additional keyword arguments used to augment or construct a
1441 `DataCoordinate`. See `DataCoordinate.standardize`
1442 parameters.
1444 Returns
1445 -------
1446 uris : `DatasetRefURIs`
1447 The URI to the primary artifact associated with this dataset (if
1448 the dataset was disassembled within the datastore this may be
1449 `None`), and the URIs to any components associated with the dataset
1450 artifact. (can be empty if there are no components).
1451 """
1452 ref = self._findDatasetRef(
1453 datasetRefOrType, dataId, predict=predict, run=run, collections=collections, **kwargs
1454 )
1455 return self._datastore.getURIs(ref, predict)
1457 def getURI(
1458 self,
1459 datasetRefOrType: DatasetRef | DatasetType | str,
1460 /,
1461 dataId: DataId | None = None,
1462 *,
1463 predict: bool = False,
1464 collections: Any = None,
1465 run: str | None = None,
1466 **kwargs: Any,
1467 ) -> ResourcePath:
1468 """Return the URI to the Dataset.
1470 Parameters
1471 ----------
1472 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1473 When `DatasetRef` the `dataId` should be `None`.
1474 Otherwise the `DatasetType` or name thereof.
1475 dataId : `dict` or `DataCoordinate`
1476 A `dict` of `Dimension` link name, value pairs that label the
1477 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1478 should be provided as the first argument.
1479 predict : `bool`
1480 If `True`, allow URIs to be returned of datasets that have not
1481 been written.
1482 collections : Any, optional
1483 Collections to be searched, overriding ``self.collections``.
1484 Can be any of the types supported by the ``collections`` argument
1485 to butler construction.
1486 run : `str`, optional
1487 Run to use for predictions, overriding ``self.run``.
1488 **kwargs
1489 Additional keyword arguments used to augment or construct a
1490 `DataCoordinate`. See `DataCoordinate.standardize`
1491 parameters.
1493 Returns
1494 -------
1495 uri : `lsst.resources.ResourcePath`
1496 URI pointing to the Dataset within the datastore. If the
1497 Dataset does not exist in the datastore, and if ``predict`` is
1498 `True`, the URI will be a prediction and will include a URI
1499 fragment "#predicted".
1500 If the datastore does not have entities that relate well
1501 to the concept of a URI the returned URI string will be
1502 descriptive. The returned URI is not guaranteed to be obtainable.
1504 Raises
1505 ------
1506 LookupError
1507 A URI has been requested for a dataset that does not exist and
1508 guessing is not allowed.
1509 ValueError
1510 Raised if a resolved `DatasetRef` was passed as an input, but it
1511 differs from the one found in the registry.
1512 TypeError
1513 Raised if no collections were provided.
1514 RuntimeError
1515 Raised if a URI is requested for a dataset that consists of
1516 multiple artifacts.
1517 """
1518 primary, components = self.getURIs(
1519 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
1520 )
1522 if primary is None or components:
1523 raise RuntimeError(
1524 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
1525 "Use Butler.getURIs() instead."
1526 )
1527 return primary
1529 def retrieveArtifacts(
1530 self,
1531 refs: Iterable[DatasetRef],
1532 destination: ResourcePathExpression,
1533 transfer: str = "auto",
1534 preserve_path: bool = True,
1535 overwrite: bool = False,
1536 ) -> list[ResourcePath]:
1537 """Retrieve the artifacts associated with the supplied refs.
1539 Parameters
1540 ----------
1541 refs : iterable of `DatasetRef`
1542 The datasets for which artifacts are to be retrieved.
1543 A single ref can result in multiple artifacts. The refs must
1544 be resolved.
1545 destination : `lsst.resources.ResourcePath` or `str`
1546 Location to write the artifacts.
1547 transfer : `str`, optional
1548 Method to use to transfer the artifacts. Must be one of the options
1549 supported by `~lsst.resources.ResourcePath.transfer_from()`.
1550 "move" is not allowed.
1551 preserve_path : `bool`, optional
1552 If `True` the full path of the artifact within the datastore
1553 is preserved. If `False` the final file component of the path
1554 is used.
1555 overwrite : `bool`, optional
1556 If `True` allow transfers to overwrite existing files at the
1557 destination.
1559 Returns
1560 -------
1561 targets : `list` of `lsst.resources.ResourcePath`
1562 URIs of file artifacts in destination location. Order is not
1563 preserved.
1565 Notes
1566 -----
1567 For non-file datastores the artifacts written to the destination
1568 may not match the representation inside the datastore. For example
1569 a hierarchical data structure in a NoSQL database may well be stored
1570 as a JSON file.
1571 """
1572 return self._datastore.retrieveArtifacts(
1573 refs,
1574 ResourcePath(destination),
1575 transfer=transfer,
1576 preserve_path=preserve_path,
1577 overwrite=overwrite,
1578 )
1580 def exists(
1581 self,
1582 dataset_ref_or_type: DatasetRef | DatasetType | str,
1583 /,
1584 data_id: DataId | None = None,
1585 *,
1586 full_check: bool = True,
1587 collections: Any = None,
1588 **kwargs: Any,
1589 ) -> DatasetExistence:
1590 """Indicate whether a dataset is known to Butler registry and
1591 datastore.
1593 Parameters
1594 ----------
1595 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
1596 When `DatasetRef` the `dataId` should be `None`.
1597 Otherwise the `DatasetType` or name thereof.
1598 data_id : `dict` or `DataCoordinate`
1599 A `dict` of `Dimension` link name, value pairs that label the
1600 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1601 should be provided as the first argument.
1602 full_check : `bool`, optional
1603 If `True`, an additional check will be made for dataset artifact
1604 existence. This will involve additional overhead due to the need
1605 to query an external system. If `False` registry and datastore
1606 will solely be asked if they know about the dataset but no
1607 check for the artifact will be performed.
1608 collections : Any, optional
1609 Collections to be searched, overriding ``self.collections``.
1610 Can be any of the types supported by the ``collections`` argument
1611 to butler construction.
1612 **kwargs
1613 Additional keyword arguments used to augment or construct a
1614 `DataCoordinate`. See `DataCoordinate.standardize`
1615 parameters.
1617 Returns
1618 -------
1619 existence : `DatasetExistence`
1620 Object indicating whether the dataset is known to registry and
1621 datastore. Evaluates to `True` if the dataset is present and known
1622 to both.
1623 """
1624 existence = DatasetExistence.UNRECOGNIZED
1626 if isinstance(dataset_ref_or_type, DatasetRef):
1627 if collections is not None:
1628 warnings.warn("Collections should not be specified with DatasetRef", stacklevel=2)
1629 if data_id is not None:
1630 warnings.warn("A DataID should not be specified with DatasetRef", stacklevel=2)
1631 ref = dataset_ref_or_type
1632 registry_ref = self._registry.getDataset(dataset_ref_or_type.id)
1633 if registry_ref is not None:
1634 existence |= DatasetExistence.RECORDED
1636 if dataset_ref_or_type != registry_ref:
1637 # This could mean that storage classes differ, so we should
1638 # check for that but use the registry ref for the rest of
1639 # the method.
1640 if registry_ref.is_compatible_with(dataset_ref_or_type):
1641 # Use the registry version from now on.
1642 ref = registry_ref
1643 else:
1644 raise ValueError(
1645 f"The ref given to exists() ({ref}) has the same dataset ID as one "
1646 f"in registry but has different incompatible values ({registry_ref})."
1647 )
1648 else:
1649 try:
1650 ref = self._findDatasetRef(dataset_ref_or_type, data_id, collections=collections, **kwargs)
1651 except (LookupError, TypeError, NoDefaultCollectionError):
1652 return existence
1653 existence |= DatasetExistence.RECORDED
1655 if self._datastore.knows(ref):
1656 existence |= DatasetExistence.DATASTORE
1658 if full_check:
1659 if self._datastore.exists(ref):
1660 existence |= DatasetExistence._ARTIFACT
1661 elif existence.value != DatasetExistence.UNRECOGNIZED.value:
1662 # Do not add this flag if we have no other idea about a dataset.
1663 existence |= DatasetExistence(DatasetExistence._ASSUMED)
1665 return existence
1667 def _exists_many(
1668 self,
1669 refs: Iterable[DatasetRef],
1670 /,
1671 *,
1672 full_check: bool = True,
1673 ) -> dict[DatasetRef, DatasetExistence]:
1674 """Indicate whether multiple datasets are known to Butler registry and
1675 datastore.
1677 This is an experimental API that may change at any moment.
1679 Parameters
1680 ----------
1681 refs : iterable of `DatasetRef`
1682 The datasets to be checked.
1683 full_check : `bool`, optional
1684 If `True`, an additional check will be made for dataset artifact
1685 existence. This will involve additional overhead due to the need
1686 to query an external system. If `False` registry and datastore
1687 will solely be asked if they know about the dataset but no
1688 check for the artifact will be performed.
1690 Returns
1691 -------
1692 existence : dict of [`DatasetRef`, `DatasetExistence`]
1693 Mapping from the given dataset refs to an enum indicating the
1694 status of the dataset in registry and datastore.
1695 Each value evaluates to `True` if the dataset is present and known
1696 to both.
1697 """
1698 existence = {ref: DatasetExistence.UNRECOGNIZED for ref in refs}
1700 # Registry does not have a bulk API to check for a ref.
1701 for ref in refs:
1702 registry_ref = self._registry.getDataset(ref.id)
1703 if registry_ref is not None:
1704 # It is possible, albeit unlikely, that the given ref does
1705 # not match the one in registry even though the UUID matches.
1706 # When checking a single ref we raise, but it's impolite to
1707 # do that when potentially hundreds of refs are being checked.
1708 # We could change the API to only accept UUIDs and that would
1709 # remove the ability to even check and remove the worry
1710 # about differing storage classes. Given the ongoing discussion
1711 # on refs vs UUIDs and whether to raise or have a new
1712 # private flag, treat this as a private API for now.
1713 existence[ref] |= DatasetExistence.RECORDED
1715 # Ask datastore if it knows about these refs.
1716 knows = self._datastore.knows_these(refs)
1717 for ref, known in knows.items():
1718 if known:
1719 existence[ref] |= DatasetExistence.DATASTORE
1721 if full_check:
1722 mexists = self._datastore.mexists(refs)
1723 for ref, exists in mexists.items():
1724 if exists:
1725 existence[ref] |= DatasetExistence._ARTIFACT
1726 else:
1727 # Do not set this flag if nothing is known about the dataset.
1728 for ref in existence.keys():
1729 if existence[ref] != DatasetExistence.UNRECOGNIZED:
1730 existence[ref] |= DatasetExistence._ASSUMED
1732 return existence
1734 @deprecated(
1735 reason="Butler.datasetExists() has been replaced by Butler.exists(). Will be removed after v27.0.",
1736 version="v26.0",
1737 category=FutureWarning,
1738 )
1739 def datasetExists(
1740 self,
1741 datasetRefOrType: DatasetRef | DatasetType | str,
1742 dataId: DataId | None = None,
1743 *,
1744 collections: Any = None,
1745 **kwargs: Any,
1746 ) -> bool:
1747 """Return True if the Dataset is actually present in the Datastore.
1749 Parameters
1750 ----------
1751 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
1752 When `DatasetRef` the `dataId` should be `None`.
1753 Otherwise the `DatasetType` or name thereof.
1754 dataId : `dict` or `DataCoordinate`
1755 A `dict` of `Dimension` link name, value pairs that label the
1756 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1757 should be provided as the first argument.
1758 collections : Any, optional
1759 Collections to be searched, overriding ``self.collections``.
1760 Can be any of the types supported by the ``collections`` argument
1761 to butler construction.
1762 **kwargs
1763 Additional keyword arguments used to augment or construct a
1764 `DataCoordinate`. See `DataCoordinate.standardize`
1765 parameters.
1767 Raises
1768 ------
1769 LookupError
1770 Raised if the dataset is not even present in the Registry.
1771 ValueError
1772 Raised if a resolved `DatasetRef` was passed as an input, but it
1773 differs from the one found in the registry.
1774 NoDefaultCollectionError
1775 Raised if no collections were provided.
1776 """
1777 # A resolved ref may be given that is not known to this butler.
1778 if isinstance(datasetRefOrType, DatasetRef):
1779 ref = self._registry.getDataset(datasetRefOrType.id)
1780 if ref is None:
1781 raise LookupError(
1782 f"Resolved DatasetRef with id {datasetRefOrType.id} is not known to registry."
1783 )
1784 else:
1785 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
1786 return self._datastore.exists(ref)
1788 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1789 """Remove one or more `~CollectionType.RUN` collections and the
1790 datasets within them.
1792 Parameters
1793 ----------
1794 names : `~collections.abc.Iterable` [ `str` ]
1795 The names of the collections to remove.
1796 unstore : `bool`, optional
1797 If `True` (default), delete datasets from all datastores in which
1798 they are present, and attempt to rollback the registry deletions if
1799 datastore deletions fail (which may not always be possible). If
1800 `False`, datastore records for these datasets are still removed,
1801 but any artifacts (e.g. files) will not be.
1803 Raises
1804 ------
1805 TypeError
1806 Raised if one or more collections are not of type
1807 `~CollectionType.RUN`.
1808 """
1809 if not self.isWriteable():
1810 raise TypeError("Butler is read-only.")
1811 names = list(names)
1812 refs: list[DatasetRef] = []
1813 for name in names:
1814 collectionType = self._registry.getCollectionType(name)
1815 if collectionType is not CollectionType.RUN:
1816 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.")
1817 refs.extend(self._registry.queryDatasets(..., collections=name, findFirst=True))
1818 with self._datastore.transaction():
1819 with self._registry.transaction():
1820 if unstore:
1821 self._datastore.trash(refs)
1822 else:
1823 self._datastore.forget(refs)
1824 for name in names:
1825 self._registry.removeCollection(name)
1826 if unstore:
1827 # Point of no return for removing artifacts
1828 self._datastore.emptyTrash()
1830 def pruneDatasets(
1831 self,
1832 refs: Iterable[DatasetRef],
1833 *,
1834 disassociate: bool = True,
1835 unstore: bool = False,
1836 tags: Iterable[str] = (),
1837 purge: bool = False,
1838 ) -> None:
1839 # docstring inherited from LimitedButler
1841 if not self.isWriteable():
1842 raise TypeError("Butler is read-only.")
1843 if purge:
1844 if not disassociate:
1845 raise TypeError("Cannot pass purge=True without disassociate=True.")
1846 if not unstore:
1847 raise TypeError("Cannot pass purge=True without unstore=True.")
1848 elif disassociate:
1849 tags = tuple(tags)
1850 if not tags:
1851 raise TypeError("No tags provided but disassociate=True.")
1852 for tag in tags:
1853 collectionType = self._registry.getCollectionType(tag)
1854 if collectionType is not CollectionType.TAGGED:
1855 raise TypeError(
1856 f"Cannot disassociate from collection '{tag}' "
1857 f"of non-TAGGED type {collectionType.name}."
1858 )
1859 # Transform possibly-single-pass iterable into something we can iterate
1860 # over multiple times.
1861 refs = list(refs)
1862 # Pruning a component of a DatasetRef makes no sense since registry
1863 # doesn't know about components and datastore might not store
1864 # components in a separate file
1865 for ref in refs:
1866 if ref.datasetType.component():
1867 raise ValueError(f"Can not prune a component of a dataset (ref={ref})")
1868 # We don't need an unreliable Datastore transaction for this, because
1869 # we've been extra careful to ensure that Datastore.trash only involves
1870 # mutating the Registry (it can _look_ at Datastore-specific things,
1871 # but shouldn't change them), and hence all operations here are
1872 # Registry operations.
1873 with self._datastore.transaction():
1874 with self._registry.transaction():
1875 if unstore:
1876 self._datastore.trash(refs)
1877 if purge:
1878 self._registry.removeDatasets(refs)
1879 elif disassociate:
1880 assert tags, "Guaranteed by earlier logic in this function."
1881 for tag in tags:
1882 self._registry.disassociate(tag, refs)
1883 # We've exited the Registry transaction, and apparently committed.
1884 # (if there was an exception, everything rolled back, and it's as if
1885 # nothing happened - and we never get here).
1886 # Datastore artifacts are not yet gone, but they're clearly marked
1887 # as trash, so if we fail to delete now because of (e.g.) filesystem
1888 # problems we can try again later, and if manual administrative
1889 # intervention is required, it's pretty clear what that should entail:
1890 # deleting everything on disk and in private Datastore tables that is
1891 # in the dataset_location_trash table.
1892 if unstore:
1893 # Point of no return for removing artifacts
1894 self._datastore.emptyTrash()
1896 @transactional
1897 def ingest(
1898 self,
1899 *datasets: FileDataset,
1900 transfer: str | None = "auto",
1901 run: str | None = None,
1902 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
1903 record_validation_info: bool = True,
1904 ) -> None:
1905 """Store and register one or more datasets that already exist on disk.
1907 Parameters
1908 ----------
1909 datasets : `FileDataset`
1910 Each positional argument is a struct containing information about
1911 a file to be ingested, including its URI (either absolute or
1912 relative to the datastore root, if applicable), a resolved
1913 `DatasetRef`, and optionally a formatter class or its
1914 fully-qualified string name. If a formatter is not provided, the
1915 formatter that would be used for `put` is assumed. On successful
1916 ingest all `FileDataset.formatter` attributes will be set to the
1917 formatter class used. `FileDataset.path` attributes may be modified
1918 to put paths in whatever the datastore considers a standardized
1919 form.
1920 transfer : `str`, optional
1921 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1922 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1923 transfer the file.
1924 run : `str`, optional
1925 The name of the run ingested datasets should be added to,
1926 overriding ``self.run``. This parameter is now deprecated since
1927 the run is encoded in the ``FileDataset``.
1928 idGenerationMode : `DatasetIdGenEnum`, optional
1929 Specifies option for generating dataset IDs. By default unique IDs
1930 are generated for each inserted dataset.
1931 record_validation_info : `bool`, optional
1932 If `True`, the default, the datastore can record validation
1933 information associated with the file. If `False` the datastore
1934 will not attempt to track any information such as checksums
1935 or file sizes. This can be useful if such information is tracked
1936 in an external system or if the file is to be compressed in place.
1937 It is up to the datastore whether this parameter is relevant.
1939 Raises
1940 ------
1941 TypeError
1942 Raised if the butler is read-only or if no run was provided.
1943 NotImplementedError
1944 Raised if the `Datastore` does not support the given transfer mode.
1945 DatasetTypeNotSupportedError
1946 Raised if one or more files to be ingested have a dataset type that
1947 is not supported by the `Datastore`..
1948 FileNotFoundError
1949 Raised if one of the given files does not exist.
1950 FileExistsError
1951 Raised if transfer is not `None` but the (internal) location the
1952 file would be moved to is already occupied.
1954 Notes
1955 -----
1956 This operation is not fully exception safe: if a database operation
1957 fails, the given `FileDataset` instances may be only partially updated.
1959 It is atomic in terms of database operations (they will either all
1960 succeed or all fail) providing the database engine implements
1961 transactions correctly. It will attempt to be atomic in terms of
1962 filesystem operations as well, but this cannot be implemented
1963 rigorously for most datastores.
1964 """
1965 if not self.isWriteable():
1966 raise TypeError("Butler is read-only.")
1968 log.verbose("Ingesting %d file dataset%s.", len(datasets), "" if len(datasets) == 1 else "s")
1969 if not datasets:
1970 return
1972 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG)
1974 # We need to reorganize all the inputs so that they are grouped
1975 # by dataset type and run. Multiple refs in a single FileDataset
1976 # are required to share the run and dataset type.
1977 GroupedData = MutableMapping[tuple[DatasetType, str], list[FileDataset]]
1978 groupedData: GroupedData = defaultdict(list)
1980 # Track DataIDs that are being ingested so we can spot issues early
1981 # with duplication. Retain previous FileDataset so we can report it.
1982 groupedDataIds: MutableMapping[
1983 tuple[DatasetType, str], dict[DataCoordinate, FileDataset]
1984 ] = defaultdict(dict)
1986 used_run = False
1988 # And the nested loop that populates it:
1989 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"):
1990 # Somewhere to store pre-existing refs if we have an
1991 # execution butler.
1992 existingRefs: list[DatasetRef] = []
1994 for ref in dataset.refs:
1995 assert ref.run is not None # For mypy
1996 group_key = (ref.datasetType, ref.run)
1998 if ref.dataId in groupedDataIds[group_key]:
1999 raise ConflictingDefinitionError(
2000 f"Ingest conflict. Dataset {dataset.path} has same"
2001 " DataId as other ingest dataset"
2002 f" {groupedDataIds[group_key][ref.dataId].path} "
2003 f" ({ref.dataId})"
2004 )
2006 groupedDataIds[group_key][ref.dataId] = dataset
2008 if existingRefs:
2009 if len(dataset.refs) != len(existingRefs):
2010 # Keeping track of partially pre-existing datasets is hard
2011 # and should generally never happen. For now don't allow
2012 # it.
2013 raise ConflictingDefinitionError(
2014 f"For dataset {dataset.path} some dataIds already exist"
2015 " in registry but others do not. This is not supported."
2016 )
2018 # Store expanded form in the original FileDataset.
2019 dataset.refs = existingRefs
2020 else:
2021 groupedData[group_key].append(dataset)
2023 if not used_run and run is not None:
2024 warnings.warn(
2025 "All DatasetRefs to be ingested had resolved dataset IDs. The value given to the "
2026 f"'run' parameter ({run!r}) was not used and the parameter will be removed in the future.",
2027 category=FutureWarning,
2028 stacklevel=3, # Take into account the @transactional decorator.
2029 )
2031 # Now we can bulk-insert into Registry for each DatasetType.
2032 for (datasetType, this_run), grouped_datasets in progress.iter_item_chunks(
2033 groupedData.items(), desc="Bulk-inserting datasets by type"
2034 ):
2035 refs_to_import = []
2036 for dataset in grouped_datasets:
2037 refs_to_import.extend(dataset.refs)
2039 n_refs = len(refs_to_import)
2040 log.verbose(
2041 "Importing %d ref%s of dataset type %r into run %r",
2042 n_refs,
2043 "" if n_refs == 1 else "s",
2044 datasetType.name,
2045 this_run,
2046 )
2048 # Import the refs and expand the DataCoordinates since we can't
2049 # guarantee that they are expanded and Datastore will need
2050 # the records.
2051 imported_refs = self._registry._importDatasets(refs_to_import, expand=True)
2052 assert set(imported_refs) == set(refs_to_import)
2054 # Replace all the refs in the FileDataset with expanded versions.
2055 # Pull them off in the order we put them on the list.
2056 for dataset in grouped_datasets:
2057 n_dataset_refs = len(dataset.refs)
2058 dataset.refs = imported_refs[:n_dataset_refs]
2059 del imported_refs[:n_dataset_refs]
2061 # Bulk-insert everything into Datastore.
2062 # We do not know if any of the registry entries already existed
2063 # (_importDatasets only complains if they exist but differ) so
2064 # we have to catch IntegrityError explicitly.
2065 try:
2066 self._datastore.ingest(
2067 *datasets, transfer=transfer, record_validation_info=record_validation_info
2068 )
2069 except IntegrityError as e:
2070 raise ConflictingDefinitionError(f"Datastore already contains one or more datasets: {e}")
2072 @contextlib.contextmanager
2073 def export(
2074 self,
2075 *,
2076 directory: str | None = None,
2077 filename: str | None = None,
2078 format: str | None = None,
2079 transfer: str | None = None,
2080 ) -> Iterator[RepoExportContext]:
2081 """Export datasets from the repository represented by this `Butler`.
2083 This method is a context manager that returns a helper object
2084 (`RepoExportContext`) that is used to indicate what information from
2085 the repository should be exported.
2087 Parameters
2088 ----------
2089 directory : `str`, optional
2090 Directory dataset files should be written to if ``transfer`` is not
2091 `None`.
2092 filename : `str`, optional
2093 Name for the file that will include database information associated
2094 with the exported datasets. If this is not an absolute path and
2095 ``directory`` is not `None`, it will be written to ``directory``
2096 instead of the current working directory. Defaults to
2097 "export.{format}".
2098 format : `str`, optional
2099 File format for the database information file. If `None`, the
2100 extension of ``filename`` will be used.
2101 transfer : `str`, optional
2102 Transfer mode passed to `Datastore.export`.
2104 Raises
2105 ------
2106 TypeError
2107 Raised if the set of arguments passed is inconsistent.
2109 Examples
2110 --------
2111 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
2112 methods are used to provide the iterables over data IDs and/or datasets
2113 to be exported::
2115 with butler.export("exports.yaml") as export:
2116 # Export all flats, but none of the dimension element rows
2117 # (i.e. data ID information) associated with them.
2118 export.saveDatasets(butler.registry.queryDatasets("flat"),
2119 elements=())
2120 # Export all datasets that start with "deepCoadd_" and all of
2121 # their associated data ID information.
2122 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
2123 """
2124 if directory is None and transfer is not None:
2125 raise TypeError("Cannot transfer without providing a directory.")
2126 if transfer == "move":
2127 raise TypeError("Transfer may not be 'move': export is read-only")
2128 if format is None:
2129 if filename is None:
2130 raise TypeError("At least one of 'filename' or 'format' must be provided.")
2131 else:
2132 _, format = os.path.splitext(filename)
2133 if not format:
2134 raise ValueError("Please specify a file extension to determine export format.")
2135 format = format[1:] # Strip leading ".""
2136 elif filename is None:
2137 filename = f"export.{format}"
2138 if directory is not None:
2139 filename = os.path.join(directory, filename)
2140 formats = self._config["repo_transfer_formats"]
2141 if format not in formats:
2142 raise ValueError(f"Unknown export format {format!r}, allowed: {','.join(formats.keys())}")
2143 BackendClass = get_class_of(formats[format, "export"])
2144 with open(filename, "w") as stream:
2145 backend = BackendClass(stream, universe=self.dimensions)
2146 try:
2147 helper = RepoExportContext(
2148 self._registry, self._datastore, backend=backend, directory=directory, transfer=transfer
2149 )
2150 yield helper
2151 except BaseException:
2152 raise
2153 else:
2154 helper._finish()
2156 def import_(
2157 self,
2158 *,
2159 directory: ResourcePathExpression | None = None,
2160 filename: ResourcePathExpression | TextIO | None = None,
2161 format: str | None = None,
2162 transfer: str | None = None,
2163 skip_dimensions: set | None = None,
2164 ) -> None:
2165 """Import datasets into this repository that were exported from a
2166 different butler repository via `~lsst.daf.butler.Butler.export`.
2168 Parameters
2169 ----------
2170 directory : `~lsst.resources.ResourcePathExpression`, optional
2171 Directory containing dataset files to import from. If `None`,
2172 ``filename`` and all dataset file paths specified therein must
2173 be absolute.
2174 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
2175 A stream or name of file that contains database information
2176 associated with the exported datasets, typically generated by
2177 `~lsst.daf.butler.Butler.export`. If this a string (name) or
2178 `~lsst.resources.ResourcePath` and is not an absolute path,
2179 it will first be looked for relative to ``directory`` and if not
2180 found there it will be looked for in the current working
2181 directory. Defaults to "export.{format}".
2182 format : `str`, optional
2183 File format for ``filename``. If `None`, the extension of
2184 ``filename`` will be used.
2185 transfer : `str`, optional
2186 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
2187 skip_dimensions : `set`, optional
2188 Names of dimensions that should be skipped and not imported.
2190 Raises
2191 ------
2192 TypeError
2193 Raised if the set of arguments passed is inconsistent, or if the
2194 butler is read-only.
2195 """
2196 if not self.isWriteable():
2197 raise TypeError("Butler is read-only.")
2198 if format is None:
2199 if filename is None:
2200 raise TypeError("At least one of 'filename' or 'format' must be provided.")
2201 else:
2202 _, format = os.path.splitext(filename) # type: ignore
2203 elif filename is None:
2204 filename = ResourcePath(f"export.{format}", forceAbsolute=False)
2205 if directory is not None:
2206 directory = ResourcePath(directory, forceDirectory=True)
2207 # mypy doesn't think this will work but it does in python >= 3.10.
2208 if isinstance(filename, ResourcePathExpression): # type: ignore
2209 filename = ResourcePath(filename, forceAbsolute=False) # type: ignore
2210 if not filename.isabs() and directory is not None:
2211 potential = directory.join(filename)
2212 exists_in_cwd = filename.exists()
2213 exists_in_dir = potential.exists()
2214 if exists_in_cwd and exists_in_dir:
2215 log.warning(
2216 "A relative path for filename was specified (%s) which exists relative to cwd. "
2217 "Additionally, the file exists relative to the given search directory (%s). "
2218 "Using the export file in the given directory.",
2219 filename,
2220 potential,
2221 )
2222 # Given they specified an explicit directory and that
2223 # directory has the export file in it, assume that that
2224 # is what was meant despite the file in cwd.
2225 filename = potential
2226 elif exists_in_dir:
2227 filename = potential
2228 elif not exists_in_cwd and not exists_in_dir:
2229 # Raise early.
2230 raise FileNotFoundError(
2231 f"Export file could not be found in {filename.abspath()} or {potential.abspath()}."
2232 )
2233 BackendClass: type[RepoImportBackend] = get_class_of(
2234 self._config["repo_transfer_formats"][format]["import"]
2235 )
2237 def doImport(importStream: TextIO | ResourceHandleProtocol) -> None:
2238 backend = BackendClass(importStream, self._registry) # type: ignore[call-arg]
2239 backend.register()
2240 with self.transaction():
2241 backend.load(
2242 self._datastore,
2243 directory=directory,
2244 transfer=transfer,
2245 skip_dimensions=skip_dimensions,
2246 )
2248 if isinstance(filename, ResourcePath):
2249 # We can not use open() here at the moment because of
2250 # DM-38589 since yaml does stream.read(8192) in a loop.
2251 stream = io.StringIO(filename.read().decode())
2252 doImport(stream)
2253 else:
2254 doImport(filename) # type: ignore
2256 def transfer_from(
2257 self,
2258 source_butler: LimitedButler,
2259 source_refs: Iterable[DatasetRef],
2260 transfer: str = "auto",
2261 skip_missing: bool = True,
2262 register_dataset_types: bool = False,
2263 transfer_dimensions: bool = False,
2264 ) -> collections.abc.Collection[DatasetRef]:
2265 """Transfer datasets to this Butler from a run in another Butler.
2267 Parameters
2268 ----------
2269 source_butler : `LimitedButler`
2270 Butler from which the datasets are to be transferred. If data IDs
2271 in ``source_refs`` are not expanded then this has to be a full
2272 `Butler` whose registry will be used to expand data IDs.
2273 source_refs : iterable of `DatasetRef`
2274 Datasets defined in the source butler that should be transferred to
2275 this butler.
2276 transfer : `str`, optional
2277 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
2278 skip_missing : `bool`
2279 If `True`, datasets with no datastore artifact associated with
2280 them are not transferred. If `False` a registry entry will be
2281 created even if no datastore record is created (and so will
2282 look equivalent to the dataset being unstored).
2283 register_dataset_types : `bool`
2284 If `True` any missing dataset types are registered. Otherwise
2285 an exception is raised.
2286 transfer_dimensions : `bool`, optional
2287 If `True`, dimension record data associated with the new datasets
2288 will be transferred.
2290 Returns
2291 -------
2292 refs : `list` of `DatasetRef`
2293 The refs added to this Butler.
2295 Notes
2296 -----
2297 The datastore artifact has to exist for a transfer
2298 to be made but non-existence is not an error.
2300 Datasets that already exist in this run will be skipped.
2302 The datasets are imported as part of a transaction, although
2303 dataset types are registered before the transaction is started.
2304 This means that it is possible for a dataset type to be registered
2305 even though transfer has failed.
2306 """
2307 if not self.isWriteable():
2308 raise TypeError("Butler is read-only.")
2309 progress = Progress("lsst.daf.butler.Butler.transfer_from", level=VERBOSE)
2311 # Will iterate through the refs multiple times so need to convert
2312 # to a list if this isn't a collection.
2313 if not isinstance(source_refs, collections.abc.Collection):
2314 source_refs = list(source_refs)
2316 original_count = len(source_refs)
2317 log.info("Transferring %d datasets into %s", original_count, str(self))
2319 # In some situations the datastore artifact may be missing
2320 # and we do not want that registry entry to be imported.
2321 # Asking datastore is not sufficient, the records may have been
2322 # purged, we have to ask for the (predicted) URI and check
2323 # existence explicitly. Execution butler is set up exactly like
2324 # this with no datastore records.
2325 artifact_existence: dict[ResourcePath, bool] = {}
2326 if skip_missing:
2327 dataset_existence = source_butler._datastore.mexists(
2328 source_refs, artifact_existence=artifact_existence
2329 )
2330 source_refs = [ref for ref, exists in dataset_existence.items() if exists]
2331 filtered_count = len(source_refs)
2332 n_missing = original_count - filtered_count
2333 log.verbose(
2334 "%d dataset%s removed because the artifact does not exist. Now have %d.",
2335 n_missing,
2336 "" if n_missing == 1 else "s",
2337 filtered_count,
2338 )
2340 # Importing requires that we group the refs by dataset type and run
2341 # before doing the import.
2342 source_dataset_types = set()
2343 grouped_refs = defaultdict(list)
2344 for ref in source_refs:
2345 grouped_refs[ref.datasetType, ref.run].append(ref)
2346 source_dataset_types.add(ref.datasetType)
2348 # Check to see if the dataset type in the source butler has
2349 # the same definition in the target butler and register missing
2350 # ones if requested. Registration must happen outside a transaction.
2351 newly_registered_dataset_types = set()
2352 for datasetType in source_dataset_types:
2353 if register_dataset_types:
2354 # Let this raise immediately if inconsistent. Continuing
2355 # on to find additional inconsistent dataset types
2356 # might result in additional unwanted dataset types being
2357 # registered.
2358 if self._registry.registerDatasetType(datasetType):
2359 newly_registered_dataset_types.add(datasetType)
2360 else:
2361 # If the dataset type is missing, let it fail immediately.
2362 target_dataset_type = self._registry.getDatasetType(datasetType.name)
2363 if target_dataset_type != datasetType:
2364 raise ConflictingDefinitionError(
2365 "Source butler dataset type differs from definition"
2366 f" in target butler: {datasetType} !="
2367 f" {target_dataset_type}"
2368 )
2369 if newly_registered_dataset_types:
2370 # We may have registered some even if there were inconsistencies
2371 # but should let people know (or else remove them again).
2372 log.log(
2373 VERBOSE,
2374 "Registered the following dataset types in the target Butler: %s",
2375 ", ".join(d.name for d in newly_registered_dataset_types),
2376 )
2377 else:
2378 log.log(VERBOSE, "All required dataset types are known to the target Butler")
2380 dimension_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]] = defaultdict(dict)
2381 if transfer_dimensions:
2382 # Collect all the dimension records for these refs.
2383 # All dimensions are to be copied but the list of valid dimensions
2384 # come from this butler's universe.
2385 elements = frozenset(
2386 element
2387 for element in self.dimensions.getStaticElements()
2388 if element.hasTable() and element.viewOf is None
2389 )
2390 dataIds = {ref.dataId for ref in source_refs}
2391 # This logic comes from saveDataIds.
2392 for dataId in dataIds:
2393 # Need an expanded record, if not expanded that we need a full
2394 # butler with registry (allow mocks with registry too).
2395 if not dataId.hasRecords():
2396 if registry := getattr(source_butler, "registry", None):
2397 dataId = registry.expandDataId(dataId)
2398 else:
2399 raise TypeError("Input butler needs to be a full butler to expand DataId.")
2400 # If this butler doesn't know about a dimension in the source
2401 # butler things will break later.
2402 for record in dataId.records.values():
2403 if record is not None and record.definition in elements:
2404 dimension_records[record.definition].setdefault(record.dataId, record)
2406 handled_collections: set[str] = set()
2408 # Do all the importing in a single transaction.
2409 with self.transaction():
2410 if dimension_records:
2411 log.verbose("Ensuring that dimension records exist for transferred datasets.")
2412 for element, r in dimension_records.items():
2413 records = [r[dataId] for dataId in r]
2414 # Assume that if the record is already present that we can
2415 # use it without having to check that the record metadata
2416 # is consistent.
2417 self._registry.insertDimensionData(element, *records, skip_existing=True)
2419 n_imported = 0
2420 for (datasetType, run), refs_to_import in progress.iter_item_chunks(
2421 grouped_refs.items(), desc="Importing to registry by run and dataset type"
2422 ):
2423 if run not in handled_collections:
2424 # May need to create output collection. If source butler
2425 # has a registry, ask for documentation string.
2426 run_doc = None
2427 if registry := getattr(source_butler, "registry", None):
2428 run_doc = registry.getCollectionDocumentation(run)
2429 registered = self._registry.registerRun(run, doc=run_doc)
2430 handled_collections.add(run)
2431 if registered:
2432 log.log(VERBOSE, "Creating output run %s", run)
2434 n_refs = len(refs_to_import)
2435 log.verbose(
2436 "Importing %d ref%s of dataset type %s into run %s",
2437 n_refs,
2438 "" if n_refs == 1 else "s",
2439 datasetType.name,
2440 run,
2441 )
2443 # Assume we are using UUIDs and the source refs will match
2444 # those imported.
2445 imported_refs = self._registry._importDatasets(refs_to_import, expand=False)
2446 assert set(imported_refs) == set(refs_to_import)
2447 n_imported += len(imported_refs)
2449 assert len(source_refs) == n_imported
2450 log.verbose("Imported %d datasets into destination butler", n_imported)
2452 # Ask the datastore to transfer. The datastore has to check that
2453 # the source datastore is compatible with the target datastore.
2454 accepted, rejected = self._datastore.transfer_from(
2455 source_butler._datastore,
2456 source_refs,
2457 transfer=transfer,
2458 artifact_existence=artifact_existence,
2459 )
2460 if rejected:
2461 # For now, accept the registry entries but not the files.
2462 log.warning(
2463 "%d datasets were rejected and %d accepted for dataset type %s in run %r.",
2464 len(rejected),
2465 len(accepted),
2466 datasetType,
2467 run,
2468 )
2470 return source_refs
2472 def validateConfiguration(
2473 self,
2474 logFailures: bool = False,
2475 datasetTypeNames: Iterable[str] | None = None,
2476 ignore: Iterable[str] | None = None,
2477 ) -> None:
2478 """Validate butler configuration.
2480 Checks that each `DatasetType` can be stored in the `Datastore`.
2482 Parameters
2483 ----------
2484 logFailures : `bool`, optional
2485 If `True`, output a log message for every validation error
2486 detected.
2487 datasetTypeNames : iterable of `str`, optional
2488 The `DatasetType` names that should be checked. This allows
2489 only a subset to be selected.
2490 ignore : iterable of `str`, optional
2491 Names of DatasetTypes to skip over. This can be used to skip
2492 known problems. If a named `DatasetType` corresponds to a
2493 composite, all components of that `DatasetType` will also be
2494 ignored.
2496 Raises
2497 ------
2498 ButlerValidationError
2499 Raised if there is some inconsistency with how this Butler
2500 is configured.
2501 """
2502 if datasetTypeNames:
2503 datasetTypes = [self._registry.getDatasetType(name) for name in datasetTypeNames]
2504 else:
2505 datasetTypes = list(self._registry.queryDatasetTypes())
2507 # filter out anything from the ignore list
2508 if ignore:
2509 ignore = set(ignore)
2510 datasetTypes = [
2511 e for e in datasetTypes if e.name not in ignore and e.nameAndComponent()[0] not in ignore
2512 ]
2513 else:
2514 ignore = set()
2516 # For each datasetType that has an instrument dimension, create
2517 # a DatasetRef for each defined instrument
2518 datasetRefs = []
2520 # Find all the registered instruments (if "instrument" is in the
2521 # universe).
2522 if "instrument" in self.dimensions:
2523 instruments = {record.name for record in self._registry.queryDimensionRecords("instrument")}
2525 for datasetType in datasetTypes:
2526 if "instrument" in datasetType.dimensions:
2527 # In order to create a conforming dataset ref, create
2528 # fake DataCoordinate values for the non-instrument
2529 # dimensions. The type of the value does not matter here.
2530 dataId = {dim.name: 1 for dim in datasetType.dimensions if dim.name != "instrument"}
2532 for instrument in instruments:
2533 datasetRef = DatasetRef(
2534 datasetType,
2535 DataCoordinate.standardize(
2536 dataId, instrument=instrument, graph=datasetType.dimensions
2537 ),
2538 run="validate",
2539 )
2540 datasetRefs.append(datasetRef)
2542 entities: list[DatasetType | DatasetRef] = []
2543 entities.extend(datasetTypes)
2544 entities.extend(datasetRefs)
2546 datastoreErrorStr = None
2547 try:
2548 self._datastore.validateConfiguration(entities, logFailures=logFailures)
2549 except ValidationError as e:
2550 datastoreErrorStr = str(e)
2552 # Also check that the LookupKeys used by the datastores match
2553 # registry and storage class definitions
2554 keys = self._datastore.getLookupKeys()
2556 failedNames = set()
2557 failedDataId = set()
2558 for key in keys:
2559 if key.name is not None:
2560 if key.name in ignore:
2561 continue
2563 # skip if specific datasetType names were requested and this
2564 # name does not match
2565 if datasetTypeNames and key.name not in datasetTypeNames:
2566 continue
2568 # See if it is a StorageClass or a DatasetType
2569 if key.name in self.storageClasses:
2570 pass
2571 else:
2572 try:
2573 self._registry.getDatasetType(key.name)
2574 except KeyError:
2575 if logFailures:
2576 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key)
2577 failedNames.add(key)
2578 else:
2579 # Dimensions are checked for consistency when the Butler
2580 # is created and rendezvoused with a universe.
2581 pass
2583 # Check that the instrument is a valid instrument
2584 # Currently only support instrument so check for that
2585 if key.dataId:
2586 dataIdKeys = set(key.dataId)
2587 if {"instrument"} != dataIdKeys:
2588 if logFailures:
2589 log.critical("Key '%s' has unsupported DataId override", key)
2590 failedDataId.add(key)
2591 elif key.dataId["instrument"] not in instruments:
2592 if logFailures:
2593 log.critical("Key '%s' has unknown instrument", key)
2594 failedDataId.add(key)
2596 messages = []
2598 if datastoreErrorStr:
2599 messages.append(datastoreErrorStr)
2601 for failed, msg in (
2602 (failedNames, "Keys without corresponding DatasetType or StorageClass entry: "),
2603 (failedDataId, "Keys with bad DataId entries: "),
2604 ):
2605 if failed:
2606 msg += ", ".join(str(k) for k in failed)
2607 messages.append(msg)
2609 if messages:
2610 raise ValidationError(";\n".join(messages))
2612 @property
2613 def collections(self) -> Sequence[str]:
2614 """The collections to search by default, in order
2615 (`~collections.abc.Sequence` [ `str` ]).
2617 This is an alias for ``self.registry.defaults.collections``. It cannot
2618 be set directly in isolation, but all defaults may be changed together
2619 by assigning a new `RegistryDefaults` instance to
2620 ``self.registry.defaults``.
2621 """
2622 return self._registry.defaults.collections
2624 @property
2625 def run(self) -> str | None:
2626 """Name of the run this butler writes outputs to by default (`str` or
2627 `None`).
2629 This is an alias for ``self.registry.defaults.run``. It cannot be set
2630 directly in isolation, but all defaults may be changed together by
2631 assigning a new `RegistryDefaults` instance to
2632 ``self.registry.defaults``.
2633 """
2634 return self._registry.defaults.run
2636 @property
2637 def registry(self) -> Registry:
2638 """The object that manages dataset metadata and relationships
2639 (`Registry`).
2641 Many operations that don't involve reading or writing butler datasets
2642 are accessible only via `Registry` methods. Eventually these methods
2643 will be replaced by equivalent `Butler` methods.
2644 """
2645 return self._registry_shim
2647 @property
2648 def dimensions(self) -> DimensionUniverse:
2649 # Docstring inherited.
2650 return self._registry.dimensions
2652 _registry: _ButlerRegistry
2653 """The object that manages dataset metadata and relationships
2654 (`_ButlerRegistry`).
2656 Most operations that don't involve reading or writing butler datasets are
2657 accessible only via `Registry` methods.
2658 """
2660 datastore: Datastore
2661 """The object that manages actual dataset storage (`Datastore`).
2663 Direct user access to the datastore should rarely be necessary; the primary
2664 exception is the case where a `Datastore` implementation provides extra
2665 functionality beyond what the base class defines.
2666 """
2668 storageClasses: StorageClassFactory
2669 """An object that maps known storage class names to objects that fully
2670 describe them (`StorageClassFactory`).
2671 """