Coverage for python/lsst/daf/butler/_butler.py: 51%
181 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-30 02:51 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-30 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["Butler"]
32from abc import abstractmethod
33from collections.abc import Collection, Iterable, Mapping, Sequence
34from contextlib import AbstractContextManager
35from typing import TYPE_CHECKING, Any, TextIO
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.utils import doImportType
39from lsst.utils.iteration import ensure_iterable
40from lsst.utils.logging import getLogger
42from ._butler_config import ButlerConfig, ButlerType
43from ._butler_instance_options import ButlerInstanceOptions
44from ._butler_repo_index import ButlerRepoIndex
45from ._config import Config, ConfigSubset
46from ._exceptions import EmptyQueryResultError
47from ._limited_butler import LimitedButler
48from .datastore import Datastore
49from .dimensions import DimensionConfig
50from .registry import RegistryConfig, _RegistryFactory
51from .repo_relocation import BUTLER_ROOT_TAG
53if TYPE_CHECKING:
54 from ._dataset_existence import DatasetExistence
55 from ._dataset_ref import DatasetId, DatasetRef
56 from ._dataset_type import DatasetType
57 from ._deferredDatasetHandle import DeferredDatasetHandle
58 from ._file_dataset import FileDataset
59 from ._storage_class import StorageClass
60 from ._timespan import Timespan
61 from .datastore import DatasetRefURIs
62 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord
63 from .queries import Query
64 from .registry import Registry
65 from .transfers import RepoExportContext
67_LOG = getLogger(__name__)
70class Butler(LimitedButler): # numpydoc ignore=PR02
71 """Interface for data butler and factory for Butler instances.
73 Parameters
74 ----------
75 config : `ButlerConfig`, `Config` or `str`, optional
76 Configuration. Anything acceptable to the `ButlerConfig` constructor.
77 If a directory path is given the configuration will be read from a
78 ``butler.yaml`` file in that location. If `None` is given default
79 values will be used. If ``config`` contains "cls" key then its value is
80 used as a name of butler class and it must be a sub-class of this
81 class, otherwise `DirectButler` is instantiated.
82 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
83 An expression specifying the collections to be searched (in order) when
84 reading datasets.
85 This may be a `str` collection name or an iterable thereof.
86 See :ref:`daf_butler_collection_expressions` for more information.
87 These collections are not registered automatically and must be
88 manually registered before they are used by any method, but they may be
89 manually registered after the `Butler` is initialized.
90 run : `str`, optional
91 Name of the `~CollectionType.RUN` collection new datasets should be
92 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
93 ``collections`` will be set to ``[run]``. If not `None`, this
94 collection will automatically be registered. If this is not set (and
95 ``writeable`` is not set either), a read-only butler will be created.
96 searchPaths : `list` of `str`, optional
97 Directory paths to search when calculating the full Butler
98 configuration. Not used if the supplied config is already a
99 `ButlerConfig`.
100 writeable : `bool`, optional
101 Explicitly sets whether the butler supports write operations. If not
102 provided, a read-write butler is created if any of ``run``, ``tags``,
103 or ``chains`` is non-empty.
104 inferDefaults : `bool`, optional
105 If `True` (default) infer default data ID values from the values
106 present in the datasets in ``collections``: if all collections have the
107 same value (or no value) for a governor dimension, that value will be
108 the default for that dimension. Nonexistent collections are ignored.
109 If a default value is provided explicitly for a governor dimension via
110 ``**kwargs``, no default will be inferred for that dimension.
111 without_datastore : `bool`, optional
112 If `True` do not attach a datastore to this butler. Any attempts
113 to use a datastore will fail.
114 **kwargs : `Any`
115 Additional keyword arguments passed to a constructor of actual butler
116 class.
118 Notes
119 -----
120 The preferred way to instantiate Butler is via the `from_config` method.
121 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``,
122 but ``mypy`` will complain about the former.
123 """
125 def __new__(
126 cls,
127 config: Config | ResourcePathExpression | None = None,
128 *,
129 collections: Any = None,
130 run: str | None = None,
131 searchPaths: Sequence[ResourcePathExpression] | None = None,
132 writeable: bool | None = None,
133 inferDefaults: bool = True,
134 without_datastore: bool = False,
135 **kwargs: Any,
136 ) -> Butler:
137 if cls is Butler:
138 return Butler.from_config(
139 config=config,
140 collections=collections,
141 run=run,
142 searchPaths=searchPaths,
143 writeable=writeable,
144 inferDefaults=inferDefaults,
145 without_datastore=without_datastore,
146 **kwargs,
147 )
149 # Note: we do not pass any parameters to __new__, Python will pass them
150 # to __init__ after __new__ returns sub-class instance.
151 return super().__new__(cls)
153 @classmethod
154 def from_config(
155 cls,
156 config: Config | ResourcePathExpression | None = None,
157 *,
158 collections: Any = None,
159 run: str | None = None,
160 searchPaths: Sequence[ResourcePathExpression] | None = None,
161 writeable: bool | None = None,
162 inferDefaults: bool = True,
163 without_datastore: bool = False,
164 **kwargs: Any,
165 ) -> Butler:
166 """Create butler instance from configuration.
168 Parameters
169 ----------
170 config : `ButlerConfig`, `Config` or `str`, optional
171 Configuration. Anything acceptable to the `ButlerConfig`
172 constructor. If a directory path is given the configuration will be
173 read from a ``butler.yaml`` file in that location. If `None` is
174 given default values will be used. If ``config`` contains "cls" key
175 then its value is used as a name of butler class and it must be a
176 sub-class of this class, otherwise `DirectButler` is instantiated.
177 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
178 An expression specifying the collections to be searched (in order)
179 when reading datasets.
180 This may be a `str` collection name or an iterable thereof.
181 See :ref:`daf_butler_collection_expressions` for more information.
182 These collections are not registered automatically and must be
183 manually registered before they are used by any method, but they
184 may be manually registered after the `Butler` is initialized.
185 run : `str`, optional
186 Name of the `~CollectionType.RUN` collection new datasets should be
187 inserted into. If ``collections`` is `None` and ``run`` is not
188 `None`, ``collections`` will be set to ``[run]``. If not `None`,
189 this collection will automatically be registered. If this is not
190 set (and ``writeable`` is not set either), a read-only butler will
191 be created.
192 searchPaths : `list` of `str`, optional
193 Directory paths to search when calculating the full Butler
194 configuration. Not used if the supplied config is already a
195 `ButlerConfig`.
196 writeable : `bool`, optional
197 Explicitly sets whether the butler supports write operations. If
198 not provided, a read-write butler is created if any of ``run``,
199 ``tags``, or ``chains`` is non-empty.
200 inferDefaults : `bool`, optional
201 If `True` (default) infer default data ID values from the values
202 present in the datasets in ``collections``: if all collections have
203 the same value (or no value) for a governor dimension, that value
204 will be the default for that dimension. Nonexistent collections
205 are ignored. If a default value is provided explicitly for a
206 governor dimension via ``**kwargs``, no default will be inferred
207 for that dimension.
208 without_datastore : `bool`, optional
209 If `True` do not attach a datastore to this butler. Any attempts
210 to use a datastore will fail.
211 **kwargs : `Any`
212 Default data ID key-value pairs. These may only identify
213 "governor" dimensions like ``instrument`` and ``skymap``.
215 Returns
216 -------
217 butler : `Butler`
218 A `Butler` constructed from the given configuration.
220 Notes
221 -----
222 Calling this factory method is identical to calling
223 ``Butler(config, ...)``. Its only raison d'être is that ``mypy``
224 complains about ``Butler()`` call.
226 Examples
227 --------
228 While there are many ways to control exactly how a `Butler` interacts
229 with the collections in its `Registry`, the most common cases are still
230 simple.
232 For a read-only `Butler` that searches one collection, do::
234 butler = Butler.from_config(
235 "/path/to/repo", collections=["u/alice/DM-50000"]
236 )
238 For a read-write `Butler` that writes to and reads from a
239 `~CollectionType.RUN` collection::
241 butler = Butler.from_config(
242 "/path/to/repo", run="u/alice/DM-50000/a"
243 )
245 The `Butler` passed to a ``PipelineTask`` is often much more complex,
246 because we want to write to one `~CollectionType.RUN` collection but
247 read from several others (as well)::
249 butler = Butler.from_config(
250 "/path/to/repo",
251 run="u/alice/DM-50000/a",
252 collections=[
253 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults"
254 ]
255 )
257 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
258 Datasets will be read first from that run (since it appears first in
259 the chain), and then from ``u/bob/DM-49998`` and finally
260 ``HSC/defaults``.
262 Finally, one can always create a `Butler` with no collections::
264 butler = Butler.from_config("/path/to/repo", writeable=True)
266 This can be extremely useful when you just want to use
267 ``butler.registry``, e.g. for inserting dimension data or managing
268 collections, or when the collections you want to use with the butler
269 are not consistent. Passing ``writeable`` explicitly here is only
270 necessary if you want to be able to make changes to the repo - usually
271 the value for ``writeable`` can be guessed from the collection
272 arguments provided, but it defaults to `False` when there are not
273 collection arguments.
274 """
275 # DirectButler used to have a way to specify a "copy constructor" by
276 # passing the "butler" parameter to its constructor. This
277 # functionality has been moved out of the constructor into
278 # Butler._clone(), but the new interface is not public yet.
279 butler = kwargs.pop("butler", None)
280 if butler is not None:
281 if not isinstance(butler, Butler):
282 raise TypeError("'butler' parameter must be a Butler instance")
283 if config is not None or searchPaths is not None or writeable is not None:
284 raise TypeError(
285 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
286 )
287 return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)
289 options = ButlerInstanceOptions(
290 collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
291 )
293 # Load the Butler configuration. This may involve searching the
294 # environment to locate a configuration file.
295 butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
296 butler_type = butler_config.get_butler_type()
298 # Make DirectButler if class is not specified.
299 match butler_type:
300 case ButlerType.DIRECT:
301 from .direct_butler import DirectButler
303 return DirectButler.create_from_config(
304 butler_config,
305 options=options,
306 without_datastore=without_datastore,
307 )
308 case ButlerType.REMOTE:
309 from .remote_butler import RemoteButlerFactory
311 factory = RemoteButlerFactory.create_factory_from_config(butler_config)
312 return factory.create_butler_with_credentials_from_environment(butler_options=options)
313 case _:
314 raise TypeError(f"Unknown Butler type '{butler_type}'")
316 @staticmethod
317 def makeRepo(
318 root: ResourcePathExpression,
319 config: Config | str | None = None,
320 dimensionConfig: Config | str | None = None,
321 standalone: bool = False,
322 searchPaths: list[str] | None = None,
323 forceConfigRoot: bool = True,
324 outfile: ResourcePathExpression | None = None,
325 overwrite: bool = False,
326 ) -> Config:
327 """Create an empty data repository by adding a butler.yaml config
328 to a repository root directory.
330 Parameters
331 ----------
332 root : `lsst.resources.ResourcePathExpression`
333 Path or URI to the root location of the new repository. Will be
334 created if it does not exist.
335 config : `Config` or `str`, optional
336 Configuration to write to the repository, after setting any
337 root-dependent Registry or Datastore config options. Can not
338 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
339 configuration will be used. Root-dependent config options
340 specified in this config are overwritten if ``forceConfigRoot``
341 is `True`.
342 dimensionConfig : `Config` or `str`, optional
343 Configuration for dimensions, will be used to initialize registry
344 database.
345 standalone : `bool`
346 If True, write all expanded defaults, not just customized or
347 repository-specific settings.
348 This (mostly) decouples the repository from the default
349 configuration, insulating it from changes to the defaults (which
350 may be good or bad, depending on the nature of the changes).
351 Future *additions* to the defaults will still be picked up when
352 initializing `Butlers` to repos created with ``standalone=True``.
353 searchPaths : `list` of `str`, optional
354 Directory paths to search when calculating the full butler
355 configuration.
356 forceConfigRoot : `bool`, optional
357 If `False`, any values present in the supplied ``config`` that
358 would normally be reset are not overridden and will appear
359 directly in the output config. This allows non-standard overrides
360 of the root directory for a datastore or registry to be given.
361 If this parameter is `True` the values for ``root`` will be
362 forced into the resulting config if appropriate.
363 outfile : `lss.resources.ResourcePathExpression`, optional
364 If not-`None`, the output configuration will be written to this
365 location rather than into the repository itself. Can be a URI
366 string. Can refer to a directory that will be used to write
367 ``butler.yaml``.
368 overwrite : `bool`, optional
369 Create a new configuration file even if one already exists
370 in the specified output location. Default is to raise
371 an exception.
373 Returns
374 -------
375 config : `Config`
376 The updated `Config` instance written to the repo.
378 Raises
379 ------
380 ValueError
381 Raised if a ButlerConfig or ConfigSubset is passed instead of a
382 regular Config (as these subclasses would make it impossible to
383 support ``standalone=False``).
384 FileExistsError
385 Raised if the output config file already exists.
386 os.error
387 Raised if the directory does not exist, exists but is not a
388 directory, or cannot be created.
390 Notes
391 -----
392 Note that when ``standalone=False`` (the default), the configuration
393 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
394 construct the repository should also be used to construct any Butlers
395 to avoid configuration inconsistencies.
396 """
397 if isinstance(config, ButlerConfig | ConfigSubset):
398 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
400 # Ensure that the root of the repository exists or can be made
401 root_uri = ResourcePath(root, forceDirectory=True)
402 root_uri.mkdir()
404 config = Config(config)
406 # If we are creating a new repo from scratch with relative roots,
407 # do not propagate an explicit root from the config file
408 if "root" in config:
409 del config["root"]
411 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
412 imported_class = doImportType(full["datastore", "cls"])
413 if not issubclass(imported_class, Datastore):
414 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
415 datastoreClass: type[Datastore] = imported_class
416 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
418 # if key exists in given config, parse it, otherwise parse the defaults
419 # in the expanded config
420 if config.get(("registry", "db")):
421 registryConfig = RegistryConfig(config)
422 else:
423 registryConfig = RegistryConfig(full)
424 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
425 if defaultDatabaseUri is not None:
426 Config.updateParameters(
427 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
428 )
429 else:
430 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
432 if standalone:
433 config.merge(full)
434 else:
435 # Always expand the registry.managers section into the per-repo
436 # config, because after the database schema is created, it's not
437 # allowed to change anymore. Note that in the standalone=True
438 # branch, _everything_ in the config is expanded, so there's no
439 # need to special case this.
440 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
441 configURI: ResourcePathExpression
442 if outfile is not None:
443 # When writing to a separate location we must include
444 # the root of the butler repo in the config else it won't know
445 # where to look.
446 config["root"] = root_uri.geturl()
447 configURI = outfile
448 else:
449 configURI = root_uri
450 # Strip obscore configuration, if it is present, before writing config
451 # to a file, obscore config will be stored in registry.
452 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
453 config_to_write = config.copy()
454 del config_to_write[obscore_config_key]
455 config_to_write.dumpToUri(configURI, overwrite=overwrite)
456 # configFile attribute is updated, need to copy it to original.
457 config.configFile = config_to_write.configFile
458 else:
459 config.dumpToUri(configURI, overwrite=overwrite)
461 # Create Registry and populate tables
462 registryConfig = RegistryConfig(config.get("registry"))
463 dimensionConfig = DimensionConfig(dimensionConfig)
464 _RegistryFactory(registryConfig).create_from_config(
465 dimensionConfig=dimensionConfig, butlerRoot=root_uri
466 )
468 _LOG.verbose("Wrote new Butler configuration file to %s", configURI)
470 return config
472 @classmethod
473 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
474 """Look up the label in a butler repository index.
476 Parameters
477 ----------
478 label : `str`
479 Label of the Butler repository to look up.
480 return_label : `bool`, optional
481 If ``label`` cannot be found in the repository index (either
482 because index is not defined or ``label`` is not in the index) and
483 ``return_label`` is `True` then return ``ResourcePath(label)``.
484 If ``return_label`` is `False` (default) then an exception will be
485 raised instead.
487 Returns
488 -------
489 uri : `lsst.resources.ResourcePath`
490 URI to the Butler repository associated with the given label or
491 default value if it is provided.
493 Raises
494 ------
495 KeyError
496 Raised if the label is not found in the index, or if an index
497 is not defined, and ``return_label`` is `False`.
499 Notes
500 -----
501 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
502 information is discovered.
503 """
504 return ButlerRepoIndex.get_repo_uri(label, return_label)
506 @classmethod
507 def get_known_repos(cls) -> set[str]:
508 """Retrieve the list of known repository labels.
510 Returns
511 -------
512 repos : `set` of `str`
513 All the known labels. Can be empty if no index can be found.
515 Notes
516 -----
517 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
518 information is discovered.
519 """
520 return ButlerRepoIndex.get_known_repos()
522 @abstractmethod
523 def _caching_context(self) -> AbstractContextManager[None]:
524 """Context manager that enables caching."""
525 raise NotImplementedError()
527 @abstractmethod
528 def transaction(self) -> AbstractContextManager[None]:
529 """Context manager supporting `Butler` transactions.
531 Transactions can be nested.
532 """
533 raise NotImplementedError()
535 @abstractmethod
536 def put(
537 self,
538 obj: Any,
539 datasetRefOrType: DatasetRef | DatasetType | str,
540 /,
541 dataId: DataId | None = None,
542 *,
543 run: str | None = None,
544 **kwargs: Any,
545 ) -> DatasetRef:
546 """Store and register a dataset.
548 Parameters
549 ----------
550 obj : `object`
551 The dataset.
552 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
553 When `DatasetRef` is provided, ``dataId`` should be `None`.
554 Otherwise the `DatasetType` or name thereof. If a fully resolved
555 `DatasetRef` is given the run and ID are used directly.
556 dataId : `dict` or `DataCoordinate`
557 A `dict` of `Dimension` link name, value pairs that label the
558 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
559 should be provided as the second argument.
560 run : `str`, optional
561 The name of the run the dataset should be added to, overriding
562 ``self.run``. Not used if a resolved `DatasetRef` is provided.
563 **kwargs
564 Additional keyword arguments used to augment or construct a
565 `DataCoordinate`. See `DataCoordinate.standardize`
566 parameters. Not used if a resolve `DatasetRef` is provided.
568 Returns
569 -------
570 ref : `DatasetRef`
571 A reference to the stored dataset, updated with the correct id if
572 given.
574 Raises
575 ------
576 TypeError
577 Raised if the butler is read-only or if no run has been provided.
578 """
579 raise NotImplementedError()
581 @abstractmethod
582 def getDeferred(
583 self,
584 datasetRefOrType: DatasetRef | DatasetType | str,
585 /,
586 dataId: DataId | None = None,
587 *,
588 parameters: dict | None = None,
589 collections: Any = None,
590 storageClass: str | StorageClass | None = None,
591 timespan: Timespan | None = None,
592 **kwargs: Any,
593 ) -> DeferredDatasetHandle:
594 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
595 after an immediate registry lookup.
597 Parameters
598 ----------
599 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
600 When `DatasetRef` the `dataId` should be `None`.
601 Otherwise the `DatasetType` or name thereof.
602 dataId : `dict` or `DataCoordinate`, optional
603 A `dict` of `Dimension` link name, value pairs that label the
604 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
605 should be provided as the first argument.
606 parameters : `dict`
607 Additional StorageClass-defined options to control reading,
608 typically used to efficiently read only a subset of the dataset.
609 collections : Any, optional
610 Collections to be searched, overriding ``self.collections``.
611 Can be any of the types supported by the ``collections`` argument
612 to butler construction.
613 storageClass : `StorageClass` or `str`, optional
614 The storage class to be used to override the Python type
615 returned by this method. By default the returned type matches
616 the dataset type definition for this dataset. Specifying a
617 read `StorageClass` can force a different type to be returned.
618 This type must be compatible with the original type.
619 timespan : `Timespan` or `None`, optional
620 A timespan that the validity range of the dataset must overlap.
621 If not provided and this is a calibration dataset type, an attempt
622 will be made to find the timespan from any temporal coordinate
623 in the data ID.
624 **kwargs
625 Additional keyword arguments used to augment or construct a
626 `DataId`. See `DataId` parameters.
628 Returns
629 -------
630 obj : `DeferredDatasetHandle`
631 A handle which can be used to retrieve a dataset at a later time.
633 Raises
634 ------
635 LookupError
636 Raised if no matching dataset exists in the `Registry` or
637 datastore.
638 ValueError
639 Raised if a resolved `DatasetRef` was passed as an input, but it
640 differs from the one found in the registry.
641 TypeError
642 Raised if no collections were provided.
643 """
644 raise NotImplementedError()
646 @abstractmethod
647 def get(
648 self,
649 datasetRefOrType: DatasetRef | DatasetType | str,
650 /,
651 dataId: DataId | None = None,
652 *,
653 parameters: dict[str, Any] | None = None,
654 collections: Any = None,
655 storageClass: StorageClass | str | None = None,
656 timespan: Timespan | None = None,
657 **kwargs: Any,
658 ) -> Any:
659 """Retrieve a stored dataset.
661 Parameters
662 ----------
663 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
664 When `DatasetRef` the `dataId` should be `None`.
665 Otherwise the `DatasetType` or name thereof.
666 If a resolved `DatasetRef`, the associated dataset
667 is returned directly without additional querying.
668 dataId : `dict` or `DataCoordinate`
669 A `dict` of `Dimension` link name, value pairs that label the
670 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
671 should be provided as the first argument.
672 parameters : `dict`
673 Additional StorageClass-defined options to control reading,
674 typically used to efficiently read only a subset of the dataset.
675 collections : Any, optional
676 Collections to be searched, overriding ``self.collections``.
677 Can be any of the types supported by the ``collections`` argument
678 to butler construction.
679 storageClass : `StorageClass` or `str`, optional
680 The storage class to be used to override the Python type
681 returned by this method. By default the returned type matches
682 the dataset type definition for this dataset. Specifying a
683 read `StorageClass` can force a different type to be returned.
684 This type must be compatible with the original type.
685 timespan : `Timespan` or `None`, optional
686 A timespan that the validity range of the dataset must overlap.
687 If not provided and this is a calibration dataset type, an attempt
688 will be made to find the timespan from any temporal coordinate
689 in the data ID.
690 **kwargs
691 Additional keyword arguments used to augment or construct a
692 `DataCoordinate`. See `DataCoordinate.standardize`
693 parameters.
695 Returns
696 -------
697 obj : `object`
698 The dataset.
700 Raises
701 ------
702 LookupError
703 Raised if no matching dataset exists in the `Registry`.
704 TypeError
705 Raised if no collections were provided.
707 Notes
708 -----
709 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
710 this method requires that the given data ID include temporal dimensions
711 beyond the dimensions of the dataset type itself, in order to find the
712 dataset with the appropriate validity range. For example, a "bias"
713 dataset with native dimensions ``{instrument, detector}`` could be
714 fetched with a ``{instrument, detector, exposure}`` data ID, because
715 ``exposure`` is a temporal dimension.
716 """
717 raise NotImplementedError()
719 @abstractmethod
720 def getURIs(
721 self,
722 datasetRefOrType: DatasetRef | DatasetType | str,
723 /,
724 dataId: DataId | None = None,
725 *,
726 predict: bool = False,
727 collections: Any = None,
728 run: str | None = None,
729 **kwargs: Any,
730 ) -> DatasetRefURIs:
731 """Return the URIs associated with the dataset.
733 Parameters
734 ----------
735 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
736 When `DatasetRef` the `dataId` should be `None`.
737 Otherwise the `DatasetType` or name thereof.
738 dataId : `dict` or `DataCoordinate`
739 A `dict` of `Dimension` link name, value pairs that label the
740 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
741 should be provided as the first argument.
742 predict : `bool`
743 If `True`, allow URIs to be returned of datasets that have not
744 been written.
745 collections : Any, optional
746 Collections to be searched, overriding ``self.collections``.
747 Can be any of the types supported by the ``collections`` argument
748 to butler construction.
749 run : `str`, optional
750 Run to use for predictions, overriding ``self.run``.
751 **kwargs
752 Additional keyword arguments used to augment or construct a
753 `DataCoordinate`. See `DataCoordinate.standardize`
754 parameters.
756 Returns
757 -------
758 uris : `DatasetRefURIs`
759 The URI to the primary artifact associated with this dataset (if
760 the dataset was disassembled within the datastore this may be
761 `None`), and the URIs to any components associated with the dataset
762 artifact. (can be empty if there are no components).
763 """
764 raise NotImplementedError()
766 def getURI(
767 self,
768 datasetRefOrType: DatasetRef | DatasetType | str,
769 /,
770 dataId: DataId | None = None,
771 *,
772 predict: bool = False,
773 collections: Any = None,
774 run: str | None = None,
775 **kwargs: Any,
776 ) -> ResourcePath:
777 """Return the URI to the Dataset.
779 Parameters
780 ----------
781 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
782 When `DatasetRef` the `dataId` should be `None`.
783 Otherwise the `DatasetType` or name thereof.
784 dataId : `dict` or `DataCoordinate`
785 A `dict` of `Dimension` link name, value pairs that label the
786 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
787 should be provided as the first argument.
788 predict : `bool`
789 If `True`, allow URIs to be returned of datasets that have not
790 been written.
791 collections : Any, optional
792 Collections to be searched, overriding ``self.collections``.
793 Can be any of the types supported by the ``collections`` argument
794 to butler construction.
795 run : `str`, optional
796 Run to use for predictions, overriding ``self.run``.
797 **kwargs
798 Additional keyword arguments used to augment or construct a
799 `DataCoordinate`. See `DataCoordinate.standardize`
800 parameters.
802 Returns
803 -------
804 uri : `lsst.resources.ResourcePath`
805 URI pointing to the Dataset within the datastore. If the
806 Dataset does not exist in the datastore, and if ``predict`` is
807 `True`, the URI will be a prediction and will include a URI
808 fragment "#predicted".
809 If the datastore does not have entities that relate well
810 to the concept of a URI the returned URI string will be
811 descriptive. The returned URI is not guaranteed to be obtainable.
813 Raises
814 ------
815 LookupError
816 A URI has been requested for a dataset that does not exist and
817 guessing is not allowed.
818 ValueError
819 Raised if a resolved `DatasetRef` was passed as an input, but it
820 differs from the one found in the registry.
821 TypeError
822 Raised if no collections were provided.
823 RuntimeError
824 Raised if a URI is requested for a dataset that consists of
825 multiple artifacts.
826 """
827 primary, components = self.getURIs(
828 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
829 )
831 if primary is None or components:
832 raise RuntimeError(
833 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
834 "Use Butler.getURIs() instead."
835 )
836 return primary
838 @abstractmethod
839 def get_dataset_type(self, name: str) -> DatasetType:
840 """Get the `DatasetType`.
842 Parameters
843 ----------
844 name : `str`
845 Name of the type.
847 Returns
848 -------
849 type : `DatasetType`
850 The `DatasetType` associated with the given name.
852 Raises
853 ------
854 lsst.daf.butler.MissingDatasetTypeError
855 Raised if the requested dataset type has not been registered.
857 Notes
858 -----
859 This method handles component dataset types automatically, though most
860 other operations do not.
861 """
862 raise NotImplementedError()
864 @abstractmethod
865 def get_dataset(
866 self,
867 id: DatasetId,
868 *,
869 storage_class: str | StorageClass | None = None,
870 dimension_records: bool = False,
871 datastore_records: bool = False,
872 ) -> DatasetRef | None:
873 """Retrieve a Dataset entry.
875 Parameters
876 ----------
877 id : `DatasetId`
878 The unique identifier for the dataset.
879 storage_class : `str` or `StorageClass` or `None`
880 A storage class to use when creating the returned entry. If given
881 it must be compatible with the default storage class.
882 dimension_records : `bool`, optional
883 If `True` the ref will be expanded and contain dimension records.
884 datastore_records : `bool`, optional
885 If `True` the ref will contain associated datastore records.
887 Returns
888 -------
889 ref : `DatasetRef` or `None`
890 A ref to the Dataset, or `None` if no matching Dataset
891 was found.
892 """
893 raise NotImplementedError()
895 @abstractmethod
896 def find_dataset(
897 self,
898 dataset_type: DatasetType | str,
899 data_id: DataId | None = None,
900 *,
901 collections: str | Sequence[str] | None = None,
902 timespan: Timespan | None = None,
903 storage_class: str | StorageClass | None = None,
904 dimension_records: bool = False,
905 datastore_records: bool = False,
906 **kwargs: Any,
907 ) -> DatasetRef | None:
908 """Find a dataset given its `DatasetType` and data ID.
910 This can be used to obtain a `DatasetRef` that permits the dataset to
911 be read from a `Datastore`. If the dataset is a component and can not
912 be found using the provided dataset type, a dataset ref for the parent
913 will be returned instead but with the correct dataset type.
915 Parameters
916 ----------
917 dataset_type : `DatasetType` or `str`
918 A `DatasetType` or the name of one. If this is a `DatasetType`
919 instance, its storage class will be respected and propagated to
920 the output, even if it differs from the dataset type definition
921 in the registry, as long as the storage classes are convertible.
922 data_id : `dict` or `DataCoordinate`, optional
923 A `dict`-like object containing the `Dimension` links that identify
924 the dataset within a collection. If it is a `dict` the dataId
925 can include dimension record values such as ``day_obs`` and
926 ``seq_num`` or ``full_name`` that can be used to derive the
927 primary dimension.
928 collections : `str` or `list` [`str`], optional
929 A an ordered list of collections to search for the dataset.
930 Defaults to ``self.defaults.collections``.
931 timespan : `Timespan`, optional
932 A timespan that the validity range of the dataset must overlap.
933 If not provided, any `~CollectionType.CALIBRATION` collections
934 matched by the ``collections`` argument will not be searched.
935 storage_class : `str` or `StorageClass` or `None`
936 A storage class to use when creating the returned entry. If given
937 it must be compatible with the default storage class.
938 dimension_records : `bool`, optional
939 If `True` the ref will be expanded and contain dimension records.
940 datastore_records : `bool`, optional
941 If `True` the ref will contain associated datastore records.
942 **kwargs
943 Additional keyword arguments passed to
944 `DataCoordinate.standardize` to convert ``dataId`` to a true
945 `DataCoordinate` or augment an existing one. This can also include
946 dimension record metadata that can be used to derive a primary
947 dimension value.
949 Returns
950 -------
951 ref : `DatasetRef`
952 A reference to the dataset, or `None` if no matching Dataset
953 was found.
955 Raises
956 ------
957 lsst.daf.butler.NoDefaultCollectionError
958 Raised if ``collections`` is `None` and
959 ``self.collections`` is `None`.
960 LookupError
961 Raised if one or more data ID keys are missing.
962 lsst.daf.butler.MissingDatasetTypeError
963 Raised if the dataset type does not exist.
964 lsst.daf.butler.MissingCollectionError
965 Raised if any of ``collections`` does not exist in the registry.
967 Notes
968 -----
969 This method simply returns `None` and does not raise an exception even
970 when the set of collections searched is intrinsically incompatible with
971 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
972 only `~CollectionType.CALIBRATION` collections are being searched.
973 This may make it harder to debug some lookup failures, but the behavior
974 is intentional; we consider it more important that failed searches are
975 reported consistently, regardless of the reason, and that adding
976 additional collections that do not contain a match to the search path
977 never changes the behavior.
979 This method handles component dataset types automatically, though most
980 other query operations do not.
981 """
982 raise NotImplementedError()
984 @abstractmethod
985 def retrieveArtifacts(
986 self,
987 refs: Iterable[DatasetRef],
988 destination: ResourcePathExpression,
989 transfer: str = "auto",
990 preserve_path: bool = True,
991 overwrite: bool = False,
992 ) -> list[ResourcePath]:
993 """Retrieve the artifacts associated with the supplied refs.
995 Parameters
996 ----------
997 refs : iterable of `DatasetRef`
998 The datasets for which artifacts are to be retrieved.
999 A single ref can result in multiple artifacts. The refs must
1000 be resolved.
1001 destination : `lsst.resources.ResourcePath` or `str`
1002 Location to write the artifacts.
1003 transfer : `str`, optional
1004 Method to use to transfer the artifacts. Must be one of the options
1005 supported by `~lsst.resources.ResourcePath.transfer_from()`.
1006 "move" is not allowed.
1007 preserve_path : `bool`, optional
1008 If `True` the full path of the artifact within the datastore
1009 is preserved. If `False` the final file component of the path
1010 is used.
1011 overwrite : `bool`, optional
1012 If `True` allow transfers to overwrite existing files at the
1013 destination.
1015 Returns
1016 -------
1017 targets : `list` of `lsst.resources.ResourcePath`
1018 URIs of file artifacts in destination location. Order is not
1019 preserved.
1021 Notes
1022 -----
1023 For non-file datastores the artifacts written to the destination
1024 may not match the representation inside the datastore. For example
1025 a hierarchical data structure in a NoSQL database may well be stored
1026 as a JSON file.
1027 """
1028 raise NotImplementedError()
1030 @abstractmethod
1031 def exists(
1032 self,
1033 dataset_ref_or_type: DatasetRef | DatasetType | str,
1034 /,
1035 data_id: DataId | None = None,
1036 *,
1037 full_check: bool = True,
1038 collections: Any = None,
1039 **kwargs: Any,
1040 ) -> DatasetExistence:
1041 """Indicate whether a dataset is known to Butler registry and
1042 datastore.
1044 Parameters
1045 ----------
1046 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
1047 When `DatasetRef` the `dataId` should be `None`.
1048 Otherwise the `DatasetType` or name thereof.
1049 data_id : `dict` or `DataCoordinate`
1050 A `dict` of `Dimension` link name, value pairs that label the
1051 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1052 should be provided as the first argument.
1053 full_check : `bool`, optional
1054 If `True`, a check will be made for the actual existence of a
1055 dataset artifact. This will involve additional overhead due to
1056 the need to query an external system. If `False`, this check will
1057 be omitted, and the registry and datastore will solely be asked
1058 if they know about the dataset but no direct check for the
1059 artifact will be performed.
1060 collections : Any, optional
1061 Collections to be searched, overriding ``self.collections``.
1062 Can be any of the types supported by the ``collections`` argument
1063 to butler construction.
1064 **kwargs
1065 Additional keyword arguments used to augment or construct a
1066 `DataCoordinate`. See `DataCoordinate.standardize`
1067 parameters.
1069 Returns
1070 -------
1071 existence : `DatasetExistence`
1072 Object indicating whether the dataset is known to registry and
1073 datastore. Evaluates to `True` if the dataset is present and known
1074 to both.
1075 """
1076 raise NotImplementedError()
1078 @abstractmethod
1079 def _exists_many(
1080 self,
1081 refs: Iterable[DatasetRef],
1082 /,
1083 *,
1084 full_check: bool = True,
1085 ) -> dict[DatasetRef, DatasetExistence]:
1086 """Indicate whether multiple datasets are known to Butler registry and
1087 datastore.
1089 This is an experimental API that may change at any moment.
1091 Parameters
1092 ----------
1093 refs : iterable of `DatasetRef`
1094 The datasets to be checked.
1095 full_check : `bool`, optional
1096 If `True`, a check will be made for the actual existence of each
1097 dataset artifact. This will involve additional overhead due to
1098 the need to query an external system. If `False`, this check will
1099 be omitted, and the registry and datastore will solely be asked
1100 if they know about the dataset(s) but no direct check for the
1101 artifact(s) will be performed.
1103 Returns
1104 -------
1105 existence : dict of [`DatasetRef`, `DatasetExistence`]
1106 Mapping from the given dataset refs to an enum indicating the
1107 status of the dataset in registry and datastore.
1108 Each value evaluates to `True` if the dataset is present and known
1109 to both.
1110 """
1111 raise NotImplementedError()
1113 @abstractmethod
1114 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1115 """Remove one or more `~CollectionType.RUN` collections and the
1116 datasets within them.
1118 Parameters
1119 ----------
1120 names : `~collections.abc.Iterable` [ `str` ]
1121 The names of the collections to remove.
1122 unstore : `bool`, optional
1123 If `True` (default), delete datasets from all datastores in which
1124 they are present, and attempt to rollback the registry deletions if
1125 datastore deletions fail (which may not always be possible). If
1126 `False`, datastore records for these datasets are still removed,
1127 but any artifacts (e.g. files) will not be.
1129 Raises
1130 ------
1131 TypeError
1132 Raised if one or more collections are not of type
1133 `~CollectionType.RUN`.
1134 """
1135 raise NotImplementedError()
1137 @abstractmethod
1138 def ingest(
1139 self,
1140 *datasets: FileDataset,
1141 transfer: str | None = "auto",
1142 record_validation_info: bool = True,
1143 ) -> None:
1144 """Store and register one or more datasets that already exist on disk.
1146 Parameters
1147 ----------
1148 *datasets : `FileDataset`
1149 Each positional argument is a struct containing information about
1150 a file to be ingested, including its URI (either absolute or
1151 relative to the datastore root, if applicable), a resolved
1152 `DatasetRef`, and optionally a formatter class or its
1153 fully-qualified string name. If a formatter is not provided, the
1154 formatter that would be used for `put` is assumed. On successful
1155 ingest all `FileDataset.formatter` attributes will be set to the
1156 formatter class used. `FileDataset.path` attributes may be modified
1157 to put paths in whatever the datastore considers a standardized
1158 form.
1159 transfer : `str`, optional
1160 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1161 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1162 transfer the file.
1163 record_validation_info : `bool`, optional
1164 If `True`, the default, the datastore can record validation
1165 information associated with the file. If `False` the datastore
1166 will not attempt to track any information such as checksums
1167 or file sizes. This can be useful if such information is tracked
1168 in an external system or if the file is to be compressed in place.
1169 It is up to the datastore whether this parameter is relevant.
1171 Raises
1172 ------
1173 TypeError
1174 Raised if the butler is read-only or if no run was provided.
1175 NotImplementedError
1176 Raised if the `Datastore` does not support the given transfer mode.
1177 DatasetTypeNotSupportedError
1178 Raised if one or more files to be ingested have a dataset type that
1179 is not supported by the `Datastore`..
1180 FileNotFoundError
1181 Raised if one of the given files does not exist.
1182 FileExistsError
1183 Raised if transfer is not `None` but the (internal) location the
1184 file would be moved to is already occupied.
1186 Notes
1187 -----
1188 This operation is not fully exception safe: if a database operation
1189 fails, the given `FileDataset` instances may be only partially updated.
1191 It is atomic in terms of database operations (they will either all
1192 succeed or all fail) providing the database engine implements
1193 transactions correctly. It will attempt to be atomic in terms of
1194 filesystem operations as well, but this cannot be implemented
1195 rigorously for most datastores.
1196 """
1197 raise NotImplementedError()
1199 @abstractmethod
1200 def export(
1201 self,
1202 *,
1203 directory: str | None = None,
1204 filename: str | None = None,
1205 format: str | None = None,
1206 transfer: str | None = None,
1207 ) -> AbstractContextManager[RepoExportContext]:
1208 """Export datasets from the repository represented by this `Butler`.
1210 This method is a context manager that returns a helper object
1211 (`RepoExportContext`) that is used to indicate what information from
1212 the repository should be exported.
1214 Parameters
1215 ----------
1216 directory : `str`, optional
1217 Directory dataset files should be written to if ``transfer`` is not
1218 `None`.
1219 filename : `str`, optional
1220 Name for the file that will include database information associated
1221 with the exported datasets. If this is not an absolute path and
1222 ``directory`` is not `None`, it will be written to ``directory``
1223 instead of the current working directory. Defaults to
1224 "export.{format}".
1225 format : `str`, optional
1226 File format for the database information file. If `None`, the
1227 extension of ``filename`` will be used.
1228 transfer : `str`, optional
1229 Transfer mode passed to `Datastore.export`.
1231 Raises
1232 ------
1233 TypeError
1234 Raised if the set of arguments passed is inconsistent.
1236 Examples
1237 --------
1238 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
1239 methods are used to provide the iterables over data IDs and/or datasets
1240 to be exported::
1242 with butler.export("exports.yaml") as export:
1243 # Export all flats, but none of the dimension element rows
1244 # (i.e. data ID information) associated with them.
1245 export.saveDatasets(butler.registry.queryDatasets("flat"),
1246 elements=())
1247 # Export all datasets that start with "deepCoadd_" and all of
1248 # their associated data ID information.
1249 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
1250 """
1251 raise NotImplementedError()
1253 @abstractmethod
1254 def import_(
1255 self,
1256 *,
1257 directory: ResourcePathExpression | None = None,
1258 filename: ResourcePathExpression | TextIO | None = None,
1259 format: str | None = None,
1260 transfer: str | None = None,
1261 skip_dimensions: set | None = None,
1262 ) -> None:
1263 """Import datasets into this repository that were exported from a
1264 different butler repository via `~lsst.daf.butler.Butler.export`.
1266 Parameters
1267 ----------
1268 directory : `~lsst.resources.ResourcePathExpression`, optional
1269 Directory containing dataset files to import from. If `None`,
1270 ``filename`` and all dataset file paths specified therein must
1271 be absolute.
1272 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
1273 A stream or name of file that contains database information
1274 associated with the exported datasets, typically generated by
1275 `~lsst.daf.butler.Butler.export`. If this a string (name) or
1276 `~lsst.resources.ResourcePath` and is not an absolute path,
1277 it will first be looked for relative to ``directory`` and if not
1278 found there it will be looked for in the current working
1279 directory. Defaults to "export.{format}".
1280 format : `str`, optional
1281 File format for ``filename``. If `None`, the extension of
1282 ``filename`` will be used.
1283 transfer : `str`, optional
1284 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
1285 skip_dimensions : `set`, optional
1286 Names of dimensions that should be skipped and not imported.
1288 Raises
1289 ------
1290 TypeError
1291 Raised if the set of arguments passed is inconsistent, or if the
1292 butler is read-only.
1293 """
1294 raise NotImplementedError()
1296 @abstractmethod
1297 def transfer_dimension_records_from(
1298 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1299 ) -> None:
1300 """Transfer dimension records to this Butler from another Butler.
1302 Parameters
1303 ----------
1304 source_butler : `LimitedButler` or `Butler`
1305 Butler from which the records are to be transferred. If data IDs
1306 in ``source_refs`` are not expanded then this has to be a full
1307 `Butler` whose registry will be used to expand data IDs. If the
1308 source refs contain coordinates that are used to populate other
1309 records then this will also need to be a full `Butler`.
1310 source_refs : iterable of `DatasetRef`
1311 Datasets defined in the source butler whose dimension records
1312 should be transferred to this butler. In most circumstances.
1313 transfer is faster if the dataset refs are expanded.
1314 """
1315 raise NotImplementedError()
1317 @abstractmethod
1318 def transfer_from(
1319 self,
1320 source_butler: LimitedButler,
1321 source_refs: Iterable[DatasetRef],
1322 transfer: str = "auto",
1323 skip_missing: bool = True,
1324 register_dataset_types: bool = False,
1325 transfer_dimensions: bool = False,
1326 dry_run: bool = False,
1327 ) -> Collection[DatasetRef]:
1328 """Transfer datasets to this Butler from a run in another Butler.
1330 Parameters
1331 ----------
1332 source_butler : `LimitedButler`
1333 Butler from which the datasets are to be transferred. If data IDs
1334 in ``source_refs`` are not expanded then this has to be a full
1335 `Butler` whose registry will be used to expand data IDs.
1336 source_refs : iterable of `DatasetRef`
1337 Datasets defined in the source butler that should be transferred to
1338 this butler. In most circumstances, ``transfer_from`` is faster if
1339 the dataset refs are expanded.
1340 transfer : `str`, optional
1341 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
1342 skip_missing : `bool`
1343 If `True`, datasets with no datastore artifact associated with
1344 them are not transferred. If `False` a registry entry will be
1345 created even if no datastore record is created (and so will
1346 look equivalent to the dataset being unstored).
1347 register_dataset_types : `bool`
1348 If `True` any missing dataset types are registered. Otherwise
1349 an exception is raised.
1350 transfer_dimensions : `bool`, optional
1351 If `True`, dimension record data associated with the new datasets
1352 will be transferred.
1353 dry_run : `bool`, optional
1354 If `True` the transfer will be processed without any modifications
1355 made to the target butler and as if the target butler did not
1356 have any of the datasets.
1358 Returns
1359 -------
1360 refs : `list` of `DatasetRef`
1361 The refs added to this Butler.
1363 Notes
1364 -----
1365 The datastore artifact has to exist for a transfer
1366 to be made but non-existence is not an error.
1368 Datasets that already exist in this run will be skipped.
1370 The datasets are imported as part of a transaction, although
1371 dataset types are registered before the transaction is started.
1372 This means that it is possible for a dataset type to be registered
1373 even though transfer has failed.
1374 """
1375 raise NotImplementedError()
1377 @abstractmethod
1378 def validateConfiguration(
1379 self,
1380 logFailures: bool = False,
1381 datasetTypeNames: Iterable[str] | None = None,
1382 ignore: Iterable[str] | None = None,
1383 ) -> None:
1384 """Validate butler configuration.
1386 Checks that each `DatasetType` can be stored in the `Datastore`.
1388 Parameters
1389 ----------
1390 logFailures : `bool`, optional
1391 If `True`, output a log message for every validation error
1392 detected.
1393 datasetTypeNames : iterable of `str`, optional
1394 The `DatasetType` names that should be checked. This allows
1395 only a subset to be selected.
1396 ignore : iterable of `str`, optional
1397 Names of DatasetTypes to skip over. This can be used to skip
1398 known problems. If a named `DatasetType` corresponds to a
1399 composite, all components of that `DatasetType` will also be
1400 ignored.
1402 Raises
1403 ------
1404 ButlerValidationError
1405 Raised if there is some inconsistency with how this Butler
1406 is configured.
1407 """
1408 raise NotImplementedError()
1410 @property
1411 @abstractmethod
1412 def collections(self) -> Sequence[str]:
1413 """The collections to search by default, in order
1414 (`~collections.abc.Sequence` [ `str` ]).
1415 """
1416 raise NotImplementedError()
1418 @property
1419 @abstractmethod
1420 def run(self) -> str | None:
1421 """Name of the run this butler writes outputs to by default (`str` or
1422 `None`).
1423 """
1424 raise NotImplementedError()
1426 @property
1427 @abstractmethod
1428 def registry(self) -> Registry:
1429 """The object that manages dataset metadata and relationships
1430 (`Registry`).
1432 Many operations that don't involve reading or writing butler datasets
1433 are accessible only via `Registry` methods. Eventually these methods
1434 will be replaced by equivalent `Butler` methods.
1435 """
1436 raise NotImplementedError()
1438 @abstractmethod
1439 def _query(self) -> AbstractContextManager[Query]:
1440 """Context manager returning a `Query` object used for construction
1441 and execution of complex queries.
1442 """
1443 raise NotImplementedError()
1445 def _query_data_ids(
1446 self,
1447 dimensions: DimensionGroup | Iterable[str] | str,
1448 *,
1449 data_id: DataId | None = None,
1450 where: str = "",
1451 bind: Mapping[str, Any] | None = None,
1452 with_dimension_records: bool = False,
1453 order_by: Iterable[str] | str | None = None,
1454 limit: int | None = None,
1455 explain: bool = True,
1456 **kwargs: Any,
1457 ) -> list[DataCoordinate]:
1458 """Query for data IDs matching user-provided criteria.
1460 Parameters
1461 ----------
1462 dimensions : `DimensionGroup`, `str`, or \
1463 `~collections.abc.Iterable` [`str`]
1464 The dimensions of the data IDs to yield, as either `DimensionGroup`
1465 instances or `str`. Will be automatically expanded to a complete
1466 `DimensionGroup`.
1467 data_id : `dict` or `DataCoordinate`, optional
1468 A data ID whose key-value pairs are used as equality constraints
1469 in the query.
1470 where : `str`, optional
1471 A string expression similar to a SQL WHERE clause. May involve
1472 any column of a dimension table or (as a shortcut for the primary
1473 key column of a dimension table) dimension name. See
1474 :ref:`daf_butler_dimension_expressions` for more information.
1475 bind : `~collections.abc.Mapping`, optional
1476 Mapping containing literal values that should be injected into the
1477 ``where`` expression, keyed by the identifiers they replace.
1478 Values of collection type can be expanded in some cases; see
1479 :ref:`daf_butler_dimension_expressions_identifiers` for more
1480 information.
1481 with_dimension_records : `bool`, optional
1482 If `True` (default is `False`) then returned data IDs will have
1483 dimension records.
1484 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1485 Names of the columns/dimensions to use for ordering returned data
1486 IDs. Column name can be prefixed with minus (``-``) to use
1487 descending ordering.
1488 limit : `int`, optional
1489 Upper limit on the number of returned records.
1490 explain : `bool`, optional
1491 If `True` (default) then `EmptyQueryResultError` exception is
1492 raised when resulting list is empty. The exception contains
1493 non-empty list of strings explaining possible causes for empty
1494 result.
1495 **kwargs
1496 Additional keyword arguments are forwarded to
1497 `DataCoordinate.standardize` when processing the ``data_id``
1498 argument (and may be used to provide a constraining data ID even
1499 when the ``data_id`` argument is `None`).
1501 Returns
1502 -------
1503 dataIds : `list` [`DataCoordinate`]
1504 Data IDs matching the given query parameters. These are always
1505 guaranteed to identify all dimensions (`DataCoordinate.hasFull`
1506 returns `True`).
1508 Raises
1509 ------
1510 lsst.daf.butler.registry.DataIdError
1511 Raised when ``data_id`` or keyword arguments specify unknown
1512 dimensions or values, or when they contain inconsistent values.
1513 lsst.daf.butler.registry.UserExpressionError
1514 Raised when ``where`` expression is invalid.
1515 lsst.daf.butler.EmptyQueryResultError
1516 Raised when query generates empty result and ``explain`` is set to
1517 `True`.
1518 TypeError
1519 Raised when the arguments are incompatible.
1520 """
1521 if data_id is None:
1522 data_id = DataCoordinate.make_empty(self.dimensions)
1523 with self._query() as query:
1524 result = (
1525 query.where(data_id, where, bind=bind, **kwargs)
1526 .data_ids(dimensions)
1527 .order_by(*ensure_iterable(order_by))
1528 .limit(limit)
1529 )
1530 if with_dimension_records:
1531 result = result.with_dimension_records()
1532 data_ids = list(result)
1533 if explain and not data_ids:
1534 raise EmptyQueryResultError(list(result.explain_no_results()))
1535 return data_ids
1537 def _query_datasets(
1538 self,
1539 dataset_type: str | DatasetType,
1540 collections: str | Iterable[str] | None = None,
1541 *,
1542 find_first: bool = True,
1543 data_id: DataId | None = None,
1544 where: str = "",
1545 bind: Mapping[str, Any] | None = None,
1546 with_dimension_records: bool = False,
1547 explain: bool = True,
1548 **kwargs: Any,
1549 ) -> list[DatasetRef]:
1550 """Query for dataset references matching user-provided criteria.
1552 Parameters
1553 ----------
1554 dataset_type : `str` or `DatasetType`
1555 Dataset type object or name to search for.
1556 collections : collection expression, optional
1557 A collection name or iterable of collection names to search. If not
1558 provided, the default collections are used. See
1559 :ref:`daf_butler_collection_expressions` for more information.
1560 find_first : `bool`, optional
1561 If `True` (default), for each result data ID, only yield one
1562 `DatasetRef` of each `DatasetType`, from the first collection in
1563 which a dataset of that dataset type appears (according to the
1564 order of ``collections`` passed in). If `True`, ``collections``
1565 must not contain regular expressions and may not be ``...``.
1566 data_id : `dict` or `DataCoordinate`, optional
1567 A data ID whose key-value pairs are used as equality constraints in
1568 the query.
1569 where : `str`, optional
1570 A string expression similar to a SQL WHERE clause. May involve any
1571 column of a dimension table or (as a shortcut for the primary key
1572 column of a dimension table) dimension name. See
1573 :ref:`daf_butler_dimension_expressions` for more information.
1574 bind : `~collections.abc.Mapping`, optional
1575 Mapping containing literal values that should be injected into the
1576 ``where`` expression, keyed by the identifiers they replace. Values
1577 of collection type can be expanded in some cases; see
1578 :ref:`daf_butler_dimension_expressions_identifiers` for more
1579 information.
1580 with_dimension_records : `bool`, optional
1581 If `True` (default is `False`) then returned data IDs will have
1582 dimension records.
1583 explain : `bool`, optional
1584 If `True` (default) then `EmptyQueryResultError` exception is
1585 raised when resulting list is empty. The exception contains
1586 non-empty list of strings explaining possible causes for empty
1587 result.
1588 **kwargs
1589 Additional keyword arguments are forwarded to
1590 `DataCoordinate.standardize` when processing the ``data_id``
1591 argument (and may be used to provide a constraining data ID even
1592 when the ``data_id`` argument is `None`).
1594 Returns
1595 -------
1596 refs : `.queries.DatasetRefQueryResults`
1597 Dataset references matching the given query criteria. Nested data
1598 IDs are guaranteed to include values for all implied dimensions
1599 (i.e. `DataCoordinate.hasFull` will return `True`).
1601 Raises
1602 ------
1603 lsst.daf.butler.registry.DatasetTypeExpressionError
1604 Raised when ``dataset_type`` expression is invalid.
1605 lsst.daf.butler.registry.DataIdError
1606 Raised when ``data_id`` or keyword arguments specify unknown
1607 dimensions or values, or when they contain inconsistent values.
1608 lsst.daf.butler.registry.UserExpressionError
1609 Raised when ``where`` expression is invalid.
1610 lsst.daf.butler.EmptyQueryResultError
1611 Raised when query generates empty result and ``explain`` is set to
1612 `True`.
1613 TypeError
1614 Raised when the arguments are incompatible, such as when a
1615 collection wildcard is passed when ``find_first`` is `True`, or
1616 when ``collections`` is `None` and default butler collections are
1617 not defined.
1619 Notes
1620 -----
1621 When multiple dataset types are queried in a single call, the results
1622 of this operation are equivalent to querying for each dataset type
1623 separately in turn, and no information about the relationships between
1624 datasets of different types is included.
1625 """
1626 if data_id is None:
1627 data_id = DataCoordinate.make_empty(self.dimensions)
1628 with self._query() as query:
1629 result = query.where(data_id, where, bind=bind, **kwargs).datasets(
1630 dataset_type,
1631 collections=collections,
1632 find_first=find_first,
1633 )
1634 if with_dimension_records:
1635 result = result.with_dimension_records()
1636 refs = list(result)
1637 if explain and not refs:
1638 raise EmptyQueryResultError(list(result.explain_no_results()))
1639 return refs
1641 def _query_dimension_records(
1642 self,
1643 element: str,
1644 *,
1645 data_id: DataId | None = None,
1646 where: str = "",
1647 bind: Mapping[str, Any] | None = None,
1648 order_by: Iterable[str] | str | None = None,
1649 limit: int | None = None,
1650 explain: bool = True,
1651 **kwargs: Any,
1652 ) -> list[DimensionRecord]:
1653 """Query for dimension information matching user-provided criteria.
1655 Parameters
1656 ----------
1657 element : `str`
1658 The name of a dimension element to obtain records for.
1659 data_id : `dict` or `DataCoordinate`, optional
1660 A data ID whose key-value pairs are used as equality constraints
1661 in the query.
1662 where : `str`, optional
1663 A string expression similar to a SQL WHERE clause. See
1664 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1665 information.
1666 bind : `~collections.abc.Mapping`, optional
1667 Mapping containing literal values that should be injected into the
1668 ``where`` expression, keyed by the identifiers they replace.
1669 Values of collection type can be expanded in some cases; see
1670 :ref:`daf_butler_dimension_expressions_identifiers` for more
1671 information.
1672 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1673 Names of the columns/dimensions to use for ordering returned data
1674 IDs. Column name can be prefixed with minus (``-``) to use
1675 descending ordering.
1676 limit : `int`, optional
1677 Upper limit on the number of returned records.
1678 explain : `bool`, optional
1679 If `True` (default) then `EmptyQueryResultError` exception is
1680 raised when resulting list is empty. The exception contains
1681 non-empty list of strings explaining possible causes for empty
1682 result.
1683 **kwargs
1684 Additional keyword arguments are forwarded to
1685 `DataCoordinate.standardize` when processing the ``data_id``
1686 argument (and may be used to provide a constraining data ID even
1687 when the ``data_id`` argument is `None`).
1689 Returns
1690 -------
1691 records : `list`[`DimensionRecord`]
1692 Dimension records matching the given query parameters.
1694 Raises
1695 ------
1696 lsst.daf.butler.registry.DataIdError
1697 Raised when ``data_id`` or keyword arguments specify unknown
1698 dimensions or values, or when they contain inconsistent values.
1699 lsst.daf.butler.registry.UserExpressionError
1700 Raised when ``where`` expression is invalid.
1701 lsst.daf.butler.EmptyQueryResultError
1702 Raised when query generates empty result and ``explain`` is set to
1703 `True`.
1704 TypeError
1705 Raised when the arguments are incompatible, such as when a
1706 collection wildcard is passed when ``find_first`` is `True`, or
1707 when ``collections`` is `None` and default butler collections are
1708 not defined.
1709 """
1710 if data_id is None:
1711 data_id = DataCoordinate.make_empty(self.dimensions)
1712 with self._query() as query:
1713 result = (
1714 query.where(data_id, where, bind=bind, **kwargs)
1715 .dimension_records(element)
1716 .order_by(*ensure_iterable(order_by))
1717 .limit(limit)
1718 )
1719 dimension_records = list(result)
1720 if explain and not dimension_records:
1721 raise EmptyQueryResultError(list(result.explain_no_results()))
1722 return dimension_records
1724 @abstractmethod
1725 def _clone(
1726 self,
1727 *,
1728 collections: Any = None,
1729 run: str | None = None,
1730 inferDefaults: bool = True,
1731 **kwargs: Any,
1732 ) -> Butler:
1733 """Return a new Butler instance connected to the same repository
1734 as this one, but overriding ``collections``, ``run``,
1735 ``inferDefaults``, and default data ID.
1736 """
1737 raise NotImplementedError()