Coverage for python/lsst/daf/butler/_butler.py: 66%
144 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 10:56 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 10:56 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["Butler"]
32from abc import abstractmethod
33from collections.abc import Collection, Iterable, Mapping, Sequence
34from contextlib import AbstractContextManager
35from typing import TYPE_CHECKING, Any, TextIO
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.utils import doImportType
39from lsst.utils.logging import getLogger
41from ._butler_config import ButlerConfig
42from ._butler_repo_index import ButlerRepoIndex
43from ._config import Config, ConfigSubset
44from ._limited_butler import LimitedButler
45from .datastore import Datastore
46from .dimensions import DimensionConfig
47from .registry import RegistryConfig, _RegistryFactory
48from .repo_relocation import BUTLER_ROOT_TAG
50if TYPE_CHECKING:
51 from ._dataset_existence import DatasetExistence
52 from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
53 from ._dataset_type import DatasetType
54 from ._deferredDatasetHandle import DeferredDatasetHandle
55 from ._file_dataset import FileDataset
56 from ._query import Query
57 from ._storage_class import StorageClass
58 from ._timespan import Timespan
59 from .datastore import DatasetRefURIs
60 from .dimensions import DataCoordinate, DataId, DimensionGroup, DimensionRecord
61 from .registry import CollectionArgType, Registry
62 from .transfers import RepoExportContext
64_LOG = getLogger(__name__)
67class Butler(LimitedButler):
68 """Interface for data butler and factory for Butler instances.
70 Parameters
71 ----------
72 config : `ButlerConfig`, `Config` or `str`, optional.
73 Configuration. Anything acceptable to the `ButlerConfig` constructor.
74 If a directory path is given the configuration will be read from a
75 ``butler.yaml`` file in that location. If `None` is given default
76 values will be used. If ``config`` contains "cls" key then its value is
77 used as a name of butler class and it must be a sub-class of this
78 class, otherwise `DirectButler` is instantiated.
79 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
80 An expression specifying the collections to be searched (in order) when
81 reading datasets.
82 This may be a `str` collection name or an iterable thereof.
83 See :ref:`daf_butler_collection_expressions` for more information.
84 These collections are not registered automatically and must be
85 manually registered before they are used by any method, but they may be
86 manually registered after the `Butler` is initialized.
87 run : `str`, optional
88 Name of the `~CollectionType.RUN` collection new datasets should be
89 inserted into. If ``collections`` is `None` and ``run`` is not `None`,
90 ``collections`` will be set to ``[run]``. If not `None`, this
91 collection will automatically be registered. If this is not set (and
92 ``writeable`` is not set either), a read-only butler will be created.
93 searchPaths : `list` of `str`, optional
94 Directory paths to search when calculating the full Butler
95 configuration. Not used if the supplied config is already a
96 `ButlerConfig`.
97 writeable : `bool`, optional
98 Explicitly sets whether the butler supports write operations. If not
99 provided, a read-write butler is created if any of ``run``, ``tags``,
100 or ``chains`` is non-empty.
101 inferDefaults : `bool`, optional
102 If `True` (default) infer default data ID values from the values
103 present in the datasets in ``collections``: if all collections have the
104 same value (or no value) for a governor dimension, that value will be
105 the default for that dimension. Nonexistent collections are ignored.
106 If a default value is provided explicitly for a governor dimension via
107 ``**kwargs``, no default will be inferred for that dimension.
108 **kwargs : `Any`
109 Additional keyword arguments passed to a constructor of actual butler
110 class.
112 Notes
113 -----
114 The preferred way to instantiate Butler is via the `from_config` method.
115 The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``,
116 but ``mypy`` will complain about the former.
117 """
119 def __new__(
120 cls,
121 config: Config | ResourcePathExpression | None = None,
122 *,
123 collections: Any = None,
124 run: str | None = None,
125 searchPaths: Sequence[ResourcePathExpression] | None = None,
126 writeable: bool | None = None,
127 inferDefaults: bool = True,
128 **kwargs: Any,
129 ) -> Butler:
130 if cls is Butler:
131 cls = cls._find_butler_class(config, searchPaths)
132 # Note: we do not pass any parameters to __new__, Python will pass them
133 # to __init__ after __new__ returns sub-class instance.
134 return super().__new__(cls)
136 @staticmethod
137 def _find_butler_class(
138 config: Config | ResourcePathExpression | None = None,
139 searchPaths: Sequence[ResourcePathExpression] | None = None,
140 ) -> type[Butler]:
141 """Find actual class to instantiate."""
142 butler_class_name: str | None = None
143 if config is not None:
144 # Check for optional "cls" key in config.
145 if not isinstance(config, Config):
146 config = ButlerConfig(config, searchPaths=searchPaths)
147 butler_class_name = config.get("cls")
149 # Make DirectButler if class is not specified.
150 butler_class: type[Butler]
151 if butler_class_name is None:
152 from .direct_butler import DirectButler
154 butler_class = DirectButler
155 else:
156 butler_class = doImportType(butler_class_name)
157 if not issubclass(butler_class, Butler):
158 raise TypeError(f"{butler_class_name} is not a subclass of Butler")
159 return butler_class
161 @classmethod
162 def from_config(
163 cls,
164 config: Config | ResourcePathExpression | None = None,
165 *,
166 collections: Any = None,
167 run: str | None = None,
168 searchPaths: Sequence[ResourcePathExpression] | None = None,
169 writeable: bool | None = None,
170 inferDefaults: bool = True,
171 **kwargs: Any,
172 ) -> Butler:
173 """Create butler instance from configuration.
175 Parameters
176 ----------
177 config : `ButlerConfig`, `Config` or `str`, optional.
178 Configuration. Anything acceptable to the `ButlerConfig`
179 constructor. If a directory path is given the configuration will be
180 read from a ``butler.yaml`` file in that location. If `None` is
181 given default values will be used. If ``config`` contains "cls" key
182 then its value is used as a name of butler class and it must be a
183 sub-class of this class, otherwise `DirectButler` is instantiated.
184 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional
185 An expression specifying the collections to be searched (in order)
186 when reading datasets.
187 This may be a `str` collection name or an iterable thereof.
188 See :ref:`daf_butler_collection_expressions` for more information.
189 These collections are not registered automatically and must be
190 manually registered before they are used by any method, but they
191 may be manually registered after the `Butler` is initialized.
192 run : `str`, optional
193 Name of the `~CollectionType.RUN` collection new datasets should be
194 inserted into. If ``collections`` is `None` and ``run`` is not
195 `None`, ``collections`` will be set to ``[run]``. If not `None`,
196 this collection will automatically be registered. If this is not
197 set (and ``writeable`` is not set either), a read-only butler will
198 be created.
199 searchPaths : `list` of `str`, optional
200 Directory paths to search when calculating the full Butler
201 configuration. Not used if the supplied config is already a
202 `ButlerConfig`.
203 writeable : `bool`, optional
204 Explicitly sets whether the butler supports write operations. If
205 not provided, a read-write butler is created if any of ``run``,
206 ``tags``, or ``chains`` is non-empty.
207 inferDefaults : `bool`, optional
208 If `True` (default) infer default data ID values from the values
209 present in the datasets in ``collections``: if all collections have
210 the same value (or no value) for a governor dimension, that value
211 will be the default for that dimension. Nonexistent collections
212 are ignored. If a default value is provided explicitly for a
213 governor dimension via ``**kwargs``, no default will be inferred
214 for that dimension.
215 **kwargs : `Any`
216 Additional keyword arguments passed to a constructor of actual
217 butler class.
219 Notes
220 -----
221 Calling this factory method is identical to calling
222 ``Butler(config, ...)``. Its only raison d'être is that ``mypy``
223 complains about ``Butler()`` call.
225 Examples
226 --------
227 While there are many ways to control exactly how a `Butler` interacts
228 with the collections in its `Registry`, the most common cases are still
229 simple.
231 For a read-only `Butler` that searches one collection, do::
233 butler = Butler.from_config(
234 "/path/to/repo", collections=["u/alice/DM-50000"]
235 )
237 For a read-write `Butler` that writes to and reads from a
238 `~CollectionType.RUN` collection::
240 butler = Butler.from_config(
241 "/path/to/repo", run="u/alice/DM-50000/a"
242 )
244 The `Butler` passed to a ``PipelineTask`` is often much more complex,
245 because we want to write to one `~CollectionType.RUN` collection but
246 read from several others (as well)::
248 butler = Butler.from_config(
249 "/path/to/repo",
250 run="u/alice/DM-50000/a",
251 collections=[
252 "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults"
253 ]
254 )
256 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``.
257 Datasets will be read first from that run (since it appears first in
258 the chain), and then from ``u/bob/DM-49998`` and finally
259 ``HSC/defaults``.
261 Finally, one can always create a `Butler` with no collections::
263 butler = Butler.from_config("/path/to/repo", writeable=True)
265 This can be extremely useful when you just want to use
266 ``butler.registry``, e.g. for inserting dimension data or managing
267 collections, or when the collections you want to use with the butler
268 are not consistent. Passing ``writeable`` explicitly here is only
269 necessary if you want to be able to make changes to the repo - usually
270 the value for ``writeable`` can be guessed from the collection
271 arguments provided, but it defaults to `False` when there are not
272 collection arguments.
273 """
274 cls = cls._find_butler_class(config, searchPaths)
275 return cls(
276 config,
277 collections=collections,
278 run=run,
279 searchPaths=searchPaths,
280 writeable=writeable,
281 inferDefaults=inferDefaults,
282 **kwargs,
283 )
285 @staticmethod
286 def makeRepo(
287 root: ResourcePathExpression,
288 config: Config | str | None = None,
289 dimensionConfig: Config | str | None = None,
290 standalone: bool = False,
291 searchPaths: list[str] | None = None,
292 forceConfigRoot: bool = True,
293 outfile: ResourcePathExpression | None = None,
294 overwrite: bool = False,
295 ) -> Config:
296 """Create an empty data repository by adding a butler.yaml config
297 to a repository root directory.
299 Parameters
300 ----------
301 root : `lsst.resources.ResourcePathExpression`
302 Path or URI to the root location of the new repository. Will be
303 created if it does not exist.
304 config : `Config` or `str`, optional
305 Configuration to write to the repository, after setting any
306 root-dependent Registry or Datastore config options. Can not
307 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default
308 configuration will be used. Root-dependent config options
309 specified in this config are overwritten if ``forceConfigRoot``
310 is `True`.
311 dimensionConfig : `Config` or `str`, optional
312 Configuration for dimensions, will be used to initialize registry
313 database.
314 standalone : `bool`
315 If True, write all expanded defaults, not just customized or
316 repository-specific settings.
317 This (mostly) decouples the repository from the default
318 configuration, insulating it from changes to the defaults (which
319 may be good or bad, depending on the nature of the changes).
320 Future *additions* to the defaults will still be picked up when
321 initializing `Butlers` to repos created with ``standalone=True``.
322 searchPaths : `list` of `str`, optional
323 Directory paths to search when calculating the full butler
324 configuration.
325 forceConfigRoot : `bool`, optional
326 If `False`, any values present in the supplied ``config`` that
327 would normally be reset are not overridden and will appear
328 directly in the output config. This allows non-standard overrides
329 of the root directory for a datastore or registry to be given.
330 If this parameter is `True` the values for ``root`` will be
331 forced into the resulting config if appropriate.
332 outfile : `lss.resources.ResourcePathExpression`, optional
333 If not-`None`, the output configuration will be written to this
334 location rather than into the repository itself. Can be a URI
335 string. Can refer to a directory that will be used to write
336 ``butler.yaml``.
337 overwrite : `bool`, optional
338 Create a new configuration file even if one already exists
339 in the specified output location. Default is to raise
340 an exception.
342 Returns
343 -------
344 config : `Config`
345 The updated `Config` instance written to the repo.
347 Raises
348 ------
349 ValueError
350 Raised if a ButlerConfig or ConfigSubset is passed instead of a
351 regular Config (as these subclasses would make it impossible to
352 support ``standalone=False``).
353 FileExistsError
354 Raised if the output config file already exists.
355 os.error
356 Raised if the directory does not exist, exists but is not a
357 directory, or cannot be created.
359 Notes
360 -----
361 Note that when ``standalone=False`` (the default), the configuration
362 search path (see `ConfigSubset.defaultSearchPaths`) that was used to
363 construct the repository should also be used to construct any Butlers
364 to avoid configuration inconsistencies.
365 """
366 if isinstance(config, ButlerConfig | ConfigSubset):
367 raise ValueError("makeRepo must be passed a regular Config without defaults applied.")
369 # Ensure that the root of the repository exists or can be made
370 root_uri = ResourcePath(root, forceDirectory=True)
371 root_uri.mkdir()
373 config = Config(config)
375 # If we are creating a new repo from scratch with relative roots,
376 # do not propagate an explicit root from the config file
377 if "root" in config:
378 del config["root"]
380 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults
381 imported_class = doImportType(full["datastore", "cls"])
382 if not issubclass(imported_class, Datastore):
383 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore")
384 datastoreClass: type[Datastore] = imported_class
385 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot)
387 # if key exists in given config, parse it, otherwise parse the defaults
388 # in the expanded config
389 if config.get(("registry", "db")):
390 registryConfig = RegistryConfig(config)
391 else:
392 registryConfig = RegistryConfig(full)
393 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG)
394 if defaultDatabaseUri is not None:
395 Config.updateParameters(
396 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot
397 )
398 else:
399 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot)
401 if standalone:
402 config.merge(full)
403 else:
404 # Always expand the registry.managers section into the per-repo
405 # config, because after the database schema is created, it's not
406 # allowed to change anymore. Note that in the standalone=True
407 # branch, _everything_ in the config is expanded, so there's no
408 # need to special case this.
409 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False)
410 configURI: ResourcePathExpression
411 if outfile is not None:
412 # When writing to a separate location we must include
413 # the root of the butler repo in the config else it won't know
414 # where to look.
415 config["root"] = root_uri.geturl()
416 configURI = outfile
417 else:
418 configURI = root_uri
419 # Strip obscore configuration, if it is present, before writing config
420 # to a file, obscore config will be stored in registry.
421 if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config:
422 config_to_write = config.copy()
423 del config_to_write[obscore_config_key]
424 config_to_write.dumpToUri(configURI, overwrite=overwrite)
425 # configFile attribute is updated, need to copy it to original.
426 config.configFile = config_to_write.configFile
427 else:
428 config.dumpToUri(configURI, overwrite=overwrite)
430 # Create Registry and populate tables
431 registryConfig = RegistryConfig(config.get("registry"))
432 dimensionConfig = DimensionConfig(dimensionConfig)
433 _RegistryFactory(registryConfig).create_from_config(
434 dimensionConfig=dimensionConfig, butlerRoot=root_uri
435 )
437 _LOG.verbose("Wrote new Butler configuration file to %s", configURI)
439 return config
441 @classmethod
442 def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
443 """Look up the label in a butler repository index.
445 Parameters
446 ----------
447 label : `str`
448 Label of the Butler repository to look up.
449 return_label : `bool`, optional
450 If ``label`` cannot be found in the repository index (either
451 because index is not defined or ``label`` is not in the index) and
452 ``return_label`` is `True` then return ``ResourcePath(label)``.
453 If ``return_label`` is `False` (default) then an exception will be
454 raised instead.
456 Returns
457 -------
458 uri : `lsst.resources.ResourcePath`
459 URI to the Butler repository associated with the given label or
460 default value if it is provided.
462 Raises
463 ------
464 KeyError
465 Raised if the label is not found in the index, or if an index
466 is not defined, and ``return_label`` is `False`.
468 Notes
469 -----
470 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
471 information is discovered.
472 """
473 return ButlerRepoIndex.get_repo_uri(label, return_label)
475 @classmethod
476 def get_known_repos(cls) -> set[str]:
477 """Retrieve the list of known repository labels.
479 Returns
480 -------
481 repos : `set` of `str`
482 All the known labels. Can be empty if no index can be found.
484 Notes
485 -----
486 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the
487 information is discovered.
488 """
489 return ButlerRepoIndex.get_known_repos()
491 @abstractmethod
492 def _caching_context(self) -> AbstractContextManager[None]:
493 """Context manager that enables caching."""
494 raise NotImplementedError()
496 @abstractmethod
497 def transaction(self) -> AbstractContextManager[None]:
498 """Context manager supporting `Butler` transactions.
500 Transactions can be nested.
501 """
502 raise NotImplementedError()
504 @abstractmethod
505 def put(
506 self,
507 obj: Any,
508 datasetRefOrType: DatasetRef | DatasetType | str,
509 /,
510 dataId: DataId | None = None,
511 *,
512 run: str | None = None,
513 **kwargs: Any,
514 ) -> DatasetRef:
515 """Store and register a dataset.
517 Parameters
518 ----------
519 obj : `object`
520 The dataset.
521 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
522 When `DatasetRef` is provided, ``dataId`` should be `None`.
523 Otherwise the `DatasetType` or name thereof. If a fully resolved
524 `DatasetRef` is given the run and ID are used directly.
525 dataId : `dict` or `DataCoordinate`
526 A `dict` of `Dimension` link name, value pairs that label the
527 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
528 should be provided as the second argument.
529 run : `str`, optional
530 The name of the run the dataset should be added to, overriding
531 ``self.run``. Not used if a resolved `DatasetRef` is provided.
532 **kwargs
533 Additional keyword arguments used to augment or construct a
534 `DataCoordinate`. See `DataCoordinate.standardize`
535 parameters. Not used if a resolve `DatasetRef` is provided.
537 Returns
538 -------
539 ref : `DatasetRef`
540 A reference to the stored dataset, updated with the correct id if
541 given.
543 Raises
544 ------
545 TypeError
546 Raised if the butler is read-only or if no run has been provided.
547 """
548 raise NotImplementedError()
550 @abstractmethod
551 def getDeferred(
552 self,
553 datasetRefOrType: DatasetRef | DatasetType | str,
554 /,
555 dataId: DataId | None = None,
556 *,
557 parameters: dict | None = None,
558 collections: Any = None,
559 storageClass: str | StorageClass | None = None,
560 **kwargs: Any,
561 ) -> DeferredDatasetHandle:
562 """Create a `DeferredDatasetHandle` which can later retrieve a dataset,
563 after an immediate registry lookup.
565 Parameters
566 ----------
567 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
568 When `DatasetRef` the `dataId` should be `None`.
569 Otherwise the `DatasetType` or name thereof.
570 dataId : `dict` or `DataCoordinate`, optional
571 A `dict` of `Dimension` link name, value pairs that label the
572 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
573 should be provided as the first argument.
574 parameters : `dict`
575 Additional StorageClass-defined options to control reading,
576 typically used to efficiently read only a subset of the dataset.
577 collections : Any, optional
578 Collections to be searched, overriding ``self.collections``.
579 Can be any of the types supported by the ``collections`` argument
580 to butler construction.
581 storageClass : `StorageClass` or `str`, optional
582 The storage class to be used to override the Python type
583 returned by this method. By default the returned type matches
584 the dataset type definition for this dataset. Specifying a
585 read `StorageClass` can force a different type to be returned.
586 This type must be compatible with the original type.
587 **kwargs
588 Additional keyword arguments used to augment or construct a
589 `DataId`. See `DataId` parameters.
591 Returns
592 -------
593 obj : `DeferredDatasetHandle`
594 A handle which can be used to retrieve a dataset at a later time.
596 Raises
597 ------
598 LookupError
599 Raised if no matching dataset exists in the `Registry` or
600 datastore.
601 ValueError
602 Raised if a resolved `DatasetRef` was passed as an input, but it
603 differs from the one found in the registry.
604 TypeError
605 Raised if no collections were provided.
606 """
607 raise NotImplementedError()
609 @abstractmethod
610 def get(
611 self,
612 datasetRefOrType: DatasetRef | DatasetType | str,
613 /,
614 dataId: DataId | None = None,
615 *,
616 parameters: dict[str, Any] | None = None,
617 collections: Any = None,
618 storageClass: StorageClass | str | None = None,
619 **kwargs: Any,
620 ) -> Any:
621 """Retrieve a stored dataset.
623 Parameters
624 ----------
625 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
626 When `DatasetRef` the `dataId` should be `None`.
627 Otherwise the `DatasetType` or name thereof.
628 If a resolved `DatasetRef`, the associated dataset
629 is returned directly without additional querying.
630 dataId : `dict` or `DataCoordinate`
631 A `dict` of `Dimension` link name, value pairs that label the
632 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
633 should be provided as the first argument.
634 parameters : `dict`
635 Additional StorageClass-defined options to control reading,
636 typically used to efficiently read only a subset of the dataset.
637 collections : Any, optional
638 Collections to be searched, overriding ``self.collections``.
639 Can be any of the types supported by the ``collections`` argument
640 to butler construction.
641 storageClass : `StorageClass` or `str`, optional
642 The storage class to be used to override the Python type
643 returned by this method. By default the returned type matches
644 the dataset type definition for this dataset. Specifying a
645 read `StorageClass` can force a different type to be returned.
646 This type must be compatible with the original type.
647 **kwargs
648 Additional keyword arguments used to augment or construct a
649 `DataCoordinate`. See `DataCoordinate.standardize`
650 parameters.
652 Returns
653 -------
654 obj : `object`
655 The dataset.
657 Raises
658 ------
659 LookupError
660 Raised if no matching dataset exists in the `Registry`.
661 TypeError
662 Raised if no collections were provided.
664 Notes
665 -----
666 When looking up datasets in a `~CollectionType.CALIBRATION` collection,
667 this method requires that the given data ID include temporal dimensions
668 beyond the dimensions of the dataset type itself, in order to find the
669 dataset with the appropriate validity range. For example, a "bias"
670 dataset with native dimensions ``{instrument, detector}`` could be
671 fetched with a ``{instrument, detector, exposure}`` data ID, because
672 ``exposure`` is a temporal dimension.
673 """
674 raise NotImplementedError()
676 @abstractmethod
677 def getURIs(
678 self,
679 datasetRefOrType: DatasetRef | DatasetType | str,
680 /,
681 dataId: DataId | None = None,
682 *,
683 predict: bool = False,
684 collections: Any = None,
685 run: str | None = None,
686 **kwargs: Any,
687 ) -> DatasetRefURIs:
688 """Return the URIs associated with the dataset.
690 Parameters
691 ----------
692 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
693 When `DatasetRef` the `dataId` should be `None`.
694 Otherwise the `DatasetType` or name thereof.
695 dataId : `dict` or `DataCoordinate`
696 A `dict` of `Dimension` link name, value pairs that label the
697 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
698 should be provided as the first argument.
699 predict : `bool`
700 If `True`, allow URIs to be returned of datasets that have not
701 been written.
702 collections : Any, optional
703 Collections to be searched, overriding ``self.collections``.
704 Can be any of the types supported by the ``collections`` argument
705 to butler construction.
706 run : `str`, optional
707 Run to use for predictions, overriding ``self.run``.
708 **kwargs
709 Additional keyword arguments used to augment or construct a
710 `DataCoordinate`. See `DataCoordinate.standardize`
711 parameters.
713 Returns
714 -------
715 uris : `DatasetRefURIs`
716 The URI to the primary artifact associated with this dataset (if
717 the dataset was disassembled within the datastore this may be
718 `None`), and the URIs to any components associated with the dataset
719 artifact. (can be empty if there are no components).
720 """
721 raise NotImplementedError()
723 @abstractmethod
724 def getURI(
725 self,
726 datasetRefOrType: DatasetRef | DatasetType | str,
727 /,
728 dataId: DataId | None = None,
729 *,
730 predict: bool = False,
731 collections: Any = None,
732 run: str | None = None,
733 **kwargs: Any,
734 ) -> ResourcePath:
735 """Return the URI to the Dataset.
737 Parameters
738 ----------
739 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
740 When `DatasetRef` the `dataId` should be `None`.
741 Otherwise the `DatasetType` or name thereof.
742 dataId : `dict` or `DataCoordinate`
743 A `dict` of `Dimension` link name, value pairs that label the
744 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
745 should be provided as the first argument.
746 predict : `bool`
747 If `True`, allow URIs to be returned of datasets that have not
748 been written.
749 collections : Any, optional
750 Collections to be searched, overriding ``self.collections``.
751 Can be any of the types supported by the ``collections`` argument
752 to butler construction.
753 run : `str`, optional
754 Run to use for predictions, overriding ``self.run``.
755 **kwargs
756 Additional keyword arguments used to augment or construct a
757 `DataCoordinate`. See `DataCoordinate.standardize`
758 parameters.
760 Returns
761 -------
762 uri : `lsst.resources.ResourcePath`
763 URI pointing to the Dataset within the datastore. If the
764 Dataset does not exist in the datastore, and if ``predict`` is
765 `True`, the URI will be a prediction and will include a URI
766 fragment "#predicted".
767 If the datastore does not have entities that relate well
768 to the concept of a URI the returned URI string will be
769 descriptive. The returned URI is not guaranteed to be obtainable.
771 Raises
772 ------
773 LookupError
774 A URI has been requested for a dataset that does not exist and
775 guessing is not allowed.
776 ValueError
777 Raised if a resolved `DatasetRef` was passed as an input, but it
778 differs from the one found in the registry.
779 TypeError
780 Raised if no collections were provided.
781 RuntimeError
782 Raised if a URI is requested for a dataset that consists of
783 multiple artifacts.
784 """
785 raise NotImplementedError()
787 @abstractmethod
788 def get_dataset_type(self, name: str) -> DatasetType:
789 """Get the `DatasetType`.
791 Parameters
792 ----------
793 name : `str`
794 Name of the type.
796 Returns
797 -------
798 type : `DatasetType`
799 The `DatasetType` associated with the given name.
801 Raises
802 ------
803 lsst.daf.butler.MissingDatasetTypeError
804 Raised if the requested dataset type has not been registered.
806 Notes
807 -----
808 This method handles component dataset types automatically, though most
809 other operations do not.
810 """
811 raise NotImplementedError()
813 @abstractmethod
814 def get_dataset(
815 self,
816 id: DatasetId,
817 storage_class: str | StorageClass | None,
818 dimension_records: bool = False,
819 datastore_records: bool = False,
820 ) -> DatasetRef | None:
821 """Retrieve a Dataset entry.
823 Parameters
824 ----------
825 id : `DatasetId`
826 The unique identifier for the dataset.
827 storage_class : `str` or `StorageClass` or `None`
828 A storage class to use when creating the returned entry. If given
829 it must be compatible with the default storage class.
830 dimension_records: `bool`, optional
831 If `True` the ref will be expanded and contain dimension records.
832 datastore_records: `bool`, optional.
833 If `True` the ref will contain associated datastore records.
835 Returns
836 -------
837 ref : `DatasetRef` or `None`
838 A ref to the Dataset, or `None` if no matching Dataset
839 was found.
840 """
841 raise NotImplementedError()
843 @abstractmethod
844 def find_dataset(
845 self,
846 dataset_type: DatasetType | str,
847 data_id: DataId | None = None,
848 *,
849 collections: str | Sequence[str] | None = None,
850 timespan: Timespan | None = None,
851 storage_class: str | StorageClass | None = None,
852 dimension_records: bool = False,
853 datastore_records: bool = False,
854 **kwargs: Any,
855 ) -> DatasetRef | None:
856 """Find a dataset given its `DatasetType` and data ID.
858 This can be used to obtain a `DatasetRef` that permits the dataset to
859 be read from a `Datastore`. If the dataset is a component and can not
860 be found using the provided dataset type, a dataset ref for the parent
861 will be returned instead but with the correct dataset type.
863 Parameters
864 ----------
865 dataset_type : `DatasetType` or `str`
866 A `DatasetType` or the name of one. If this is a `DatasetType`
867 instance, its storage class will be respected and propagated to
868 the output, even if it differs from the dataset type definition
869 in the registry, as long as the storage classes are convertible.
870 data_id : `dict` or `DataCoordinate`, optional
871 A `dict`-like object containing the `Dimension` links that identify
872 the dataset within a collection. If it is a `dict` the dataId
873 can include dimension record values such as ``day_obs`` and
874 ``seq_num`` or ``full_name`` that can be used to derive the
875 primary dimension.
876 collections : `str` or `list` [`str`], optional
877 A an ordered list of collections to search for the dataset.
878 Defaults to ``self.defaults.collections``.
879 timespan : `Timespan`, optional
880 A timespan that the validity range of the dataset must overlap.
881 If not provided, any `~CollectionType.CALIBRATION` collections
882 matched by the ``collections`` argument will not be searched.
883 storage_class : `str` or `StorageClass` or `None`
884 A storage class to use when creating the returned entry. If given
885 it must be compatible with the default storage class.
886 dimension_records: `bool`, optional
887 If `True` the ref will be expanded and contain dimension records.
888 datastore_records: `bool`, optional.
889 If `True` the ref will contain associated datastore records.
890 **kwargs
891 Additional keyword arguments passed to
892 `DataCoordinate.standardize` to convert ``dataId`` to a true
893 `DataCoordinate` or augment an existing one. This can also include
894 dimension record metadata that can be used to derive a primary
895 dimension value.
897 Returns
898 -------
899 ref : `DatasetRef`
900 A reference to the dataset, or `None` if no matching Dataset
901 was found.
903 Raises
904 ------
905 lsst.daf.butler.NoDefaultCollectionError
906 Raised if ``collections`` is `None` and
907 ``self.collections`` is `None`.
908 LookupError
909 Raised if one or more data ID keys are missing.
910 lsst.daf.butler.MissingDatasetTypeError
911 Raised if the dataset type does not exist.
912 lsst.daf.butler.MissingCollectionError
913 Raised if any of ``collections`` does not exist in the registry.
915 Notes
916 -----
917 This method simply returns `None` and does not raise an exception even
918 when the set of collections searched is intrinsically incompatible with
919 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
920 only `~CollectionType.CALIBRATION` collections are being searched.
921 This may make it harder to debug some lookup failures, but the behavior
922 is intentional; we consider it more important that failed searches are
923 reported consistently, regardless of the reason, and that adding
924 additional collections that do not contain a match to the search path
925 never changes the behavior.
927 This method handles component dataset types automatically, though most
928 other query operations do not.
929 """
930 raise NotImplementedError()
932 @abstractmethod
933 def retrieveArtifacts(
934 self,
935 refs: Iterable[DatasetRef],
936 destination: ResourcePathExpression,
937 transfer: str = "auto",
938 preserve_path: bool = True,
939 overwrite: bool = False,
940 ) -> list[ResourcePath]:
941 """Retrieve the artifacts associated with the supplied refs.
943 Parameters
944 ----------
945 refs : iterable of `DatasetRef`
946 The datasets for which artifacts are to be retrieved.
947 A single ref can result in multiple artifacts. The refs must
948 be resolved.
949 destination : `lsst.resources.ResourcePath` or `str`
950 Location to write the artifacts.
951 transfer : `str`, optional
952 Method to use to transfer the artifacts. Must be one of the options
953 supported by `~lsst.resources.ResourcePath.transfer_from()`.
954 "move" is not allowed.
955 preserve_path : `bool`, optional
956 If `True` the full path of the artifact within the datastore
957 is preserved. If `False` the final file component of the path
958 is used.
959 overwrite : `bool`, optional
960 If `True` allow transfers to overwrite existing files at the
961 destination.
963 Returns
964 -------
965 targets : `list` of `lsst.resources.ResourcePath`
966 URIs of file artifacts in destination location. Order is not
967 preserved.
969 Notes
970 -----
971 For non-file datastores the artifacts written to the destination
972 may not match the representation inside the datastore. For example
973 a hierarchical data structure in a NoSQL database may well be stored
974 as a JSON file.
975 """
976 raise NotImplementedError()
978 @abstractmethod
979 def exists(
980 self,
981 dataset_ref_or_type: DatasetRef | DatasetType | str,
982 /,
983 data_id: DataId | None = None,
984 *,
985 full_check: bool = True,
986 collections: Any = None,
987 **kwargs: Any,
988 ) -> DatasetExistence:
989 """Indicate whether a dataset is known to Butler registry and
990 datastore.
992 Parameters
993 ----------
994 dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str`
995 When `DatasetRef` the `dataId` should be `None`.
996 Otherwise the `DatasetType` or name thereof.
997 data_id : `dict` or `DataCoordinate`
998 A `dict` of `Dimension` link name, value pairs that label the
999 `DatasetRef` within a Collection. When `None`, a `DatasetRef`
1000 should be provided as the first argument.
1001 full_check : `bool`, optional
1002 If `True`, an additional check will be made for dataset artifact
1003 existence. This will involve additional overhead due to the need
1004 to query an external system. If `False` registry and datastore
1005 will solely be asked if they know about the dataset but no
1006 check for the artifact will be performed.
1007 collections : Any, optional
1008 Collections to be searched, overriding ``self.collections``.
1009 Can be any of the types supported by the ``collections`` argument
1010 to butler construction.
1011 **kwargs
1012 Additional keyword arguments used to augment or construct a
1013 `DataCoordinate`. See `DataCoordinate.standardize`
1014 parameters.
1016 Returns
1017 -------
1018 existence : `DatasetExistence`
1019 Object indicating whether the dataset is known to registry and
1020 datastore. Evaluates to `True` if the dataset is present and known
1021 to both.
1022 """
1023 raise NotImplementedError()
1025 @abstractmethod
1026 def _exists_many(
1027 self,
1028 refs: Iterable[DatasetRef],
1029 /,
1030 *,
1031 full_check: bool = True,
1032 ) -> dict[DatasetRef, DatasetExistence]:
1033 """Indicate whether multiple datasets are known to Butler registry and
1034 datastore.
1036 This is an experimental API that may change at any moment.
1038 Parameters
1039 ----------
1040 refs : iterable of `DatasetRef`
1041 The datasets to be checked.
1042 full_check : `bool`, optional
1043 If `True`, an additional check will be made for dataset artifact
1044 existence. This will involve additional overhead due to the need
1045 to query an external system. If `False` registry and datastore
1046 will solely be asked if they know about the dataset but no
1047 check for the artifact will be performed.
1049 Returns
1050 -------
1051 existence : dict of [`DatasetRef`, `DatasetExistence`]
1052 Mapping from the given dataset refs to an enum indicating the
1053 status of the dataset in registry and datastore.
1054 Each value evaluates to `True` if the dataset is present and known
1055 to both.
1056 """
1057 raise NotImplementedError()
1059 @abstractmethod
1060 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
1061 """Remove one or more `~CollectionType.RUN` collections and the
1062 datasets within them.
1064 Parameters
1065 ----------
1066 names : `~collections.abc.Iterable` [ `str` ]
1067 The names of the collections to remove.
1068 unstore : `bool`, optional
1069 If `True` (default), delete datasets from all datastores in which
1070 they are present, and attempt to rollback the registry deletions if
1071 datastore deletions fail (which may not always be possible). If
1072 `False`, datastore records for these datasets are still removed,
1073 but any artifacts (e.g. files) will not be.
1075 Raises
1076 ------
1077 TypeError
1078 Raised if one or more collections are not of type
1079 `~CollectionType.RUN`.
1080 """
1081 raise NotImplementedError()
1083 @abstractmethod
1084 def ingest(
1085 self,
1086 *datasets: FileDataset,
1087 transfer: str | None = "auto",
1088 run: str | None = None,
1089 idGenerationMode: DatasetIdGenEnum | None = None,
1090 record_validation_info: bool = True,
1091 ) -> None:
1092 """Store and register one or more datasets that already exist on disk.
1094 Parameters
1095 ----------
1096 datasets : `FileDataset`
1097 Each positional argument is a struct containing information about
1098 a file to be ingested, including its URI (either absolute or
1099 relative to the datastore root, if applicable), a resolved
1100 `DatasetRef`, and optionally a formatter class or its
1101 fully-qualified string name. If a formatter is not provided, the
1102 formatter that would be used for `put` is assumed. On successful
1103 ingest all `FileDataset.formatter` attributes will be set to the
1104 formatter class used. `FileDataset.path` attributes may be modified
1105 to put paths in whatever the datastore considers a standardized
1106 form.
1107 transfer : `str`, optional
1108 If not `None`, must be one of 'auto', 'move', 'copy', 'direct',
1109 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to
1110 transfer the file.
1111 run : `str`, optional
1112 The name of the run ingested datasets should be added to,
1113 overriding ``self.run``. This parameter is now deprecated since
1114 the run is encoded in the ``FileDataset``.
1115 idGenerationMode : `DatasetIdGenEnum`, optional
1116 Specifies option for generating dataset IDs. Parameter is
1117 deprecated.
1118 record_validation_info : `bool`, optional
1119 If `True`, the default, the datastore can record validation
1120 information associated with the file. If `False` the datastore
1121 will not attempt to track any information such as checksums
1122 or file sizes. This can be useful if such information is tracked
1123 in an external system or if the file is to be compressed in place.
1124 It is up to the datastore whether this parameter is relevant.
1126 Raises
1127 ------
1128 TypeError
1129 Raised if the butler is read-only or if no run was provided.
1130 NotImplementedError
1131 Raised if the `Datastore` does not support the given transfer mode.
1132 DatasetTypeNotSupportedError
1133 Raised if one or more files to be ingested have a dataset type that
1134 is not supported by the `Datastore`..
1135 FileNotFoundError
1136 Raised if one of the given files does not exist.
1137 FileExistsError
1138 Raised if transfer is not `None` but the (internal) location the
1139 file would be moved to is already occupied.
1141 Notes
1142 -----
1143 This operation is not fully exception safe: if a database operation
1144 fails, the given `FileDataset` instances may be only partially updated.
1146 It is atomic in terms of database operations (they will either all
1147 succeed or all fail) providing the database engine implements
1148 transactions correctly. It will attempt to be atomic in terms of
1149 filesystem operations as well, but this cannot be implemented
1150 rigorously for most datastores.
1151 """
1152 raise NotImplementedError()
1154 @abstractmethod
1155 def export(
1156 self,
1157 *,
1158 directory: str | None = None,
1159 filename: str | None = None,
1160 format: str | None = None,
1161 transfer: str | None = None,
1162 ) -> AbstractContextManager[RepoExportContext]:
1163 """Export datasets from the repository represented by this `Butler`.
1165 This method is a context manager that returns a helper object
1166 (`RepoExportContext`) that is used to indicate what information from
1167 the repository should be exported.
1169 Parameters
1170 ----------
1171 directory : `str`, optional
1172 Directory dataset files should be written to if ``transfer`` is not
1173 `None`.
1174 filename : `str`, optional
1175 Name for the file that will include database information associated
1176 with the exported datasets. If this is not an absolute path and
1177 ``directory`` is not `None`, it will be written to ``directory``
1178 instead of the current working directory. Defaults to
1179 "export.{format}".
1180 format : `str`, optional
1181 File format for the database information file. If `None`, the
1182 extension of ``filename`` will be used.
1183 transfer : `str`, optional
1184 Transfer mode passed to `Datastore.export`.
1186 Raises
1187 ------
1188 TypeError
1189 Raised if the set of arguments passed is inconsistent.
1191 Examples
1192 --------
1193 Typically the `Registry.queryDataIds` and `Registry.queryDatasets`
1194 methods are used to provide the iterables over data IDs and/or datasets
1195 to be exported::
1197 with butler.export("exports.yaml") as export:
1198 # Export all flats, but none of the dimension element rows
1199 # (i.e. data ID information) associated with them.
1200 export.saveDatasets(butler.registry.queryDatasets("flat"),
1201 elements=())
1202 # Export all datasets that start with "deepCoadd_" and all of
1203 # their associated data ID information.
1204 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*"))
1205 """
1206 raise NotImplementedError()
1208 @abstractmethod
1209 def import_(
1210 self,
1211 *,
1212 directory: ResourcePathExpression | None = None,
1213 filename: ResourcePathExpression | TextIO | None = None,
1214 format: str | None = None,
1215 transfer: str | None = None,
1216 skip_dimensions: set | None = None,
1217 ) -> None:
1218 """Import datasets into this repository that were exported from a
1219 different butler repository via `~lsst.daf.butler.Butler.export`.
1221 Parameters
1222 ----------
1223 directory : `~lsst.resources.ResourcePathExpression`, optional
1224 Directory containing dataset files to import from. If `None`,
1225 ``filename`` and all dataset file paths specified therein must
1226 be absolute.
1227 filename : `~lsst.resources.ResourcePathExpression` or `TextIO`
1228 A stream or name of file that contains database information
1229 associated with the exported datasets, typically generated by
1230 `~lsst.daf.butler.Butler.export`. If this a string (name) or
1231 `~lsst.resources.ResourcePath` and is not an absolute path,
1232 it will first be looked for relative to ``directory`` and if not
1233 found there it will be looked for in the current working
1234 directory. Defaults to "export.{format}".
1235 format : `str`, optional
1236 File format for ``filename``. If `None`, the extension of
1237 ``filename`` will be used.
1238 transfer : `str`, optional
1239 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
1240 skip_dimensions : `set`, optional
1241 Names of dimensions that should be skipped and not imported.
1243 Raises
1244 ------
1245 TypeError
1246 Raised if the set of arguments passed is inconsistent, or if the
1247 butler is read-only.
1248 """
1249 raise NotImplementedError()
1251 @abstractmethod
1252 def transfer_dimension_records_from(
1253 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
1254 ) -> None:
1255 """Transfer dimension records to this Butler from another Butler.
1257 Parameters
1258 ----------
1259 source_butler : `LimitedButler` or `Butler`
1260 Butler from which the records are to be transferred. If data IDs
1261 in ``source_refs`` are not expanded then this has to be a full
1262 `Butler` whose registry will be used to expand data IDs. If the
1263 source refs contain coordinates that are used to populate other
1264 records then this will also need to be a full `Butler`.
1265 source_refs : iterable of `DatasetRef`
1266 Datasets defined in the source butler whose dimension records
1267 should be transferred to this butler. In most circumstances.
1268 transfer is faster if the dataset refs are expanded.
1269 """
1270 raise NotImplementedError()
1272 @abstractmethod
1273 def transfer_from(
1274 self,
1275 source_butler: LimitedButler,
1276 source_refs: Iterable[DatasetRef],
1277 transfer: str = "auto",
1278 skip_missing: bool = True,
1279 register_dataset_types: bool = False,
1280 transfer_dimensions: bool = False,
1281 ) -> Collection[DatasetRef]:
1282 """Transfer datasets to this Butler from a run in another Butler.
1284 Parameters
1285 ----------
1286 source_butler : `LimitedButler`
1287 Butler from which the datasets are to be transferred. If data IDs
1288 in ``source_refs`` are not expanded then this has to be a full
1289 `Butler` whose registry will be used to expand data IDs.
1290 source_refs : iterable of `DatasetRef`
1291 Datasets defined in the source butler that should be transferred to
1292 this butler. In most circumstances, ``transfer_from`` is faster if
1293 the dataset refs are expanded.
1294 transfer : `str`, optional
1295 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`.
1296 skip_missing : `bool`
1297 If `True`, datasets with no datastore artifact associated with
1298 them are not transferred. If `False` a registry entry will be
1299 created even if no datastore record is created (and so will
1300 look equivalent to the dataset being unstored).
1301 register_dataset_types : `bool`
1302 If `True` any missing dataset types are registered. Otherwise
1303 an exception is raised.
1304 transfer_dimensions : `bool`, optional
1305 If `True`, dimension record data associated with the new datasets
1306 will be transferred.
1308 Returns
1309 -------
1310 refs : `list` of `DatasetRef`
1311 The refs added to this Butler.
1313 Notes
1314 -----
1315 The datastore artifact has to exist for a transfer
1316 to be made but non-existence is not an error.
1318 Datasets that already exist in this run will be skipped.
1320 The datasets are imported as part of a transaction, although
1321 dataset types are registered before the transaction is started.
1322 This means that it is possible for a dataset type to be registered
1323 even though transfer has failed.
1324 """
1325 raise NotImplementedError()
1327 @abstractmethod
1328 def validateConfiguration(
1329 self,
1330 logFailures: bool = False,
1331 datasetTypeNames: Iterable[str] | None = None,
1332 ignore: Iterable[str] | None = None,
1333 ) -> None:
1334 """Validate butler configuration.
1336 Checks that each `DatasetType` can be stored in the `Datastore`.
1338 Parameters
1339 ----------
1340 logFailures : `bool`, optional
1341 If `True`, output a log message for every validation error
1342 detected.
1343 datasetTypeNames : iterable of `str`, optional
1344 The `DatasetType` names that should be checked. This allows
1345 only a subset to be selected.
1346 ignore : iterable of `str`, optional
1347 Names of DatasetTypes to skip over. This can be used to skip
1348 known problems. If a named `DatasetType` corresponds to a
1349 composite, all components of that `DatasetType` will also be
1350 ignored.
1352 Raises
1353 ------
1354 ButlerValidationError
1355 Raised if there is some inconsistency with how this Butler
1356 is configured.
1357 """
1358 raise NotImplementedError()
1360 @property
1361 @abstractmethod
1362 def collections(self) -> Sequence[str]:
1363 """The collections to search by default, in order
1364 (`~collections.abc.Sequence` [ `str` ]).
1365 """
1366 raise NotImplementedError()
1368 @property
1369 @abstractmethod
1370 def run(self) -> str | None:
1371 """Name of the run this butler writes outputs to by default (`str` or
1372 `None`).
1373 """
1374 raise NotImplementedError()
1376 @property
1377 @abstractmethod
1378 def registry(self) -> Registry:
1379 """The object that manages dataset metadata and relationships
1380 (`Registry`).
1382 Many operations that don't involve reading or writing butler datasets
1383 are accessible only via `Registry` methods. Eventually these methods
1384 will be replaced by equivalent `Butler` methods.
1385 """
1386 raise NotImplementedError()
1388 @abstractmethod
1389 def _query(self) -> AbstractContextManager[Query]:
1390 """Context manager returning a `Query` object used for construction
1391 and execution of complex queries.
1392 """
1393 raise NotImplementedError()
1395 @abstractmethod
1396 def _query_data_ids(
1397 self,
1398 dimensions: DimensionGroup | Iterable[str] | str,
1399 *,
1400 data_id: DataId | None = None,
1401 where: str = "",
1402 bind: Mapping[str, Any] | None = None,
1403 expanded: bool = False,
1404 order_by: Iterable[str] | str | None = None,
1405 limit: int | None = None,
1406 offset: int | None = None,
1407 explain: bool = True,
1408 **kwargs: Any,
1409 ) -> list[DataCoordinate]:
1410 """Query for data IDs matching user-provided criteria.
1412 Parameters
1413 ----------
1414 dimensions : `DimensionGroup`, `str`, or \
1415 `~collections.abc.Iterable` [`str`]
1416 The dimensions of the data IDs to yield, as either `DimensionGroup`
1417 instances or `str`. Will be automatically expanded to a complete
1418 `DimensionGroup`.
1419 data_id : `dict` or `DataCoordinate`, optional
1420 A data ID whose key-value pairs are used as equality constraints
1421 in the query.
1422 where : `str`, optional
1423 A string expression similar to a SQL WHERE clause. May involve
1424 any column of a dimension table or (as a shortcut for the primary
1425 key column of a dimension table) dimension name. See
1426 :ref:`daf_butler_dimension_expressions` for more information.
1427 bind : `~collections.abc.Mapping`, optional
1428 Mapping containing literal values that should be injected into the
1429 ``where`` expression, keyed by the identifiers they replace.
1430 Values of collection type can be expanded in some cases; see
1431 :ref:`daf_butler_dimension_expressions_identifiers` for more
1432 information.
1433 expanded : `bool`, optional
1434 If `True` (default is `False`) then returned data IDs will have
1435 dimension records.
1436 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1437 Names of the columns/dimensions to use for ordering returned data
1438 IDs. Column name can be prefixed with minus (``-``) to use
1439 descending ordering.
1440 limit : `int`, optional
1441 Upper limit on the number of returned records.
1442 offset : `int`, optional
1443 The number of records to skip before returning at most ``limit``
1444 records. If ``offset`` is specified then ``limit`` must be
1445 specified as well.
1446 explain : `bool`, optional
1447 If `True` (default) then `EmptyQueryResultError` exception is
1448 raised when resulting list is empty. The exception contains
1449 non-empty list of strings explaining possible causes for empty
1450 result.
1451 **kwargs
1452 Additional keyword arguments are forwarded to
1453 `DataCoordinate.standardize` when processing the ``data_id``
1454 argument (and may be used to provide a constraining data ID even
1455 when the ``data_id`` argument is `None`).
1457 Returns
1458 -------
1459 dataIds : `list` [`DataCoordinate`]
1460 Data IDs matching the given query parameters. These are always
1461 guaranteed to identify all dimensions (`DataCoordinate.hasFull`
1462 returns `True`).
1464 Raises
1465 ------
1466 lsst.daf.butler.registry.DataIdError
1467 Raised when ``data_id`` or keyword arguments specify unknown
1468 dimensions or values, or when they contain inconsistent values.
1469 lsst.daf.butler.registry.UserExpressionError
1470 Raised when ``where`` expression is invalid.
1471 lsst.daf.butler.EmptyQueryResultError
1472 Raised when query generates empty result and ``explain`` is set to
1473 `True`.
1474 TypeError
1475 Raised when the arguments are incompatible, e.g. ``offset`` is
1476 specified, but ``limit`` is not.
1477 """
1478 raise NotImplementedError()
1480 @abstractmethod
1481 def _query_datasets(
1482 self,
1483 dataset_type: Any,
1484 collections: CollectionArgType | None = None,
1485 *,
1486 find_first: bool = True,
1487 data_id: DataId | None = None,
1488 where: str = "",
1489 bind: Mapping[str, Any] | None = None,
1490 expanded: bool = False,
1491 explain: bool = True,
1492 **kwargs: Any,
1493 ) -> list[DatasetRef]:
1494 """Query for dataset references matching user-provided criteria.
1496 Parameters
1497 ----------
1498 dataset_type : dataset type expression
1499 An expression that fully or partially identifies the dataset types
1500 to be queried. Allowed types include `DatasetType`, `str`,
1501 `re.Pattern`, and iterables thereof. The special value ``...`` can
1502 be used to query all dataset types. See
1503 :ref:`daf_butler_dataset_type_expressions` for more information.
1504 collections : collection expression, optional
1505 An expression that identifies the collections to search, such as a
1506 `str` (for full matches or partial matches via globs), `re.Pattern`
1507 (for partial matches), or iterable thereof. ``...`` can be used to
1508 search all collections (actually just all `~CollectionType.RUN`
1509 collections, because this will still find all datasets).
1510 If not provided, the default collections are used. See
1511 :ref:`daf_butler_collection_expressions` for more information.
1512 find_first : `bool`, optional
1513 If `True` (default), for each result data ID, only yield one
1514 `DatasetRef` of each `DatasetType`, from the first collection in
1515 which a dataset of that dataset type appears (according to the
1516 order of ``collections`` passed in). If `True`, ``collections``
1517 must not contain regular expressions and may not be ``...``.
1518 data_id : `dict` or `DataCoordinate`, optional
1519 A data ID whose key-value pairs are used as equality constraints
1520 in the query.
1521 where : `str`, optional
1522 A string expression similar to a SQL WHERE clause. May involve
1523 any column of a dimension table or (as a shortcut for the primary
1524 key column of a dimension table) dimension name. See
1525 :ref:`daf_butler_dimension_expressions` for more information.
1526 bind : `~collections.abc.Mapping`, optional
1527 Mapping containing literal values that should be injected into the
1528 ``where`` expression, keyed by the identifiers they replace.
1529 Values of collection type can be expanded in some cases; see
1530 :ref:`daf_butler_dimension_expressions_identifiers` for more
1531 information.
1532 expanded : `bool`, optional
1533 If `True` (default is `False`) then returned data IDs will have
1534 dimension records.
1535 explain : `bool`, optional
1536 If `True` (default) then `EmptyQueryResultError` exception is
1537 raised when resulting list is empty. The exception contains
1538 non-empty list of strings explaining possible causes for empty
1539 result.
1540 **kwargs
1541 Additional keyword arguments are forwarded to
1542 `DataCoordinate.standardize` when processing the ``data_id``
1543 argument (and may be used to provide a constraining data ID even
1544 when the ``data_id`` argument is `None`).
1546 Returns
1547 -------
1548 refs : `.queries.DatasetQueryResults`
1549 Dataset references matching the given query criteria. Nested data
1550 IDs are guaranteed to include values for all implied dimensions
1551 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
1552 include dimension records (`DataCoordinate.hasRecords` will be
1553 `False`) unless `~.queries.DatasetQueryResults.expanded` is
1554 called on the result object (which returns a new one).
1556 Raises
1557 ------
1558 lsst.daf.butler.registry.DatasetTypeExpressionError
1559 Raised when ``dataset_type`` expression is invalid.
1560 lsst.daf.butler.registry.DataIdError
1561 Raised when ``data_id`` or keyword arguments specify unknown
1562 dimensions or values, or when they contain inconsistent values.
1563 lsst.daf.butler.registry.UserExpressionError
1564 Raised when ``where`` expression is invalid.
1565 lsst.daf.butler.EmptyQueryResultError
1566 Raised when query generates empty result and ``explain`` is set to
1567 `True`.
1568 TypeError
1569 Raised when the arguments are incompatible, such as when a
1570 collection wildcard is passed when ``find_first`` is `True`, or
1571 when ``collections`` is `None` and default butler collections are
1572 not defined.
1574 Notes
1575 -----
1576 When multiple dataset types are queried in a single call, the
1577 results of this operation are equivalent to querying for each dataset
1578 type separately in turn, and no information about the relationships
1579 between datasets of different types is included.
1580 """
1581 raise NotImplementedError()
1583 @abstractmethod
1584 def _query_dimension_records(
1585 self,
1586 element: str,
1587 *,
1588 data_id: DataId | None = None,
1589 where: str = "",
1590 bind: Mapping[str, Any] | None = None,
1591 order_by: Iterable[str] | str | None = None,
1592 limit: int | None = None,
1593 offset: int | None = None,
1594 explain: bool = True,
1595 **kwargs: Any,
1596 ) -> list[DimensionRecord]:
1597 """Query for dimension information matching user-provided criteria.
1599 Parameters
1600 ----------
1601 element : `str`
1602 The name of a dimension element to obtain records for.
1603 data_id : `dict` or `DataCoordinate`, optional
1604 A data ID whose key-value pairs are used as equality constraints
1605 in the query.
1606 where : `str`, optional
1607 A string expression similar to a SQL WHERE clause. See
1608 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1609 information.
1610 bind : `~collections.abc.Mapping`, optional
1611 Mapping containing literal values that should be injected into the
1612 ``where`` expression, keyed by the identifiers they replace.
1613 Values of collection type can be expanded in some cases; see
1614 :ref:`daf_butler_dimension_expressions_identifiers` for more
1615 information.
1616 order_by : `~collections.abc.Iterable` [`str`] or `str`, optional
1617 Names of the columns/dimensions to use for ordering returned data
1618 IDs. Column name can be prefixed with minus (``-``) to use
1619 descending ordering.
1620 limit : `int`, optional
1621 Upper limit on the number of returned records.
1622 offset : `int`, optional
1623 The number of records to skip before returning at most ``limit``
1624 records. If ``offset`` is specified then ``limit`` must be
1625 specified as well.
1626 explain : `bool`, optional
1627 If `True` (default) then `EmptyQueryResultError` exception is
1628 raised when resulting list is empty. The exception contains
1629 non-empty list of strings explaining possible causes for empty
1630 result.
1631 **kwargs
1632 Additional keyword arguments are forwarded to
1633 `DataCoordinate.standardize` when processing the ``data_id``
1634 argument (and may be used to provide a constraining data ID even
1635 when the ``data_id`` argument is `None`).
1637 Returns
1638 -------
1639 records : `list`[`DimensionRecord`]
1640 Dimension records matching the given query parameters.
1642 Raises
1643 ------
1644 lsst.daf.butler.registry.DataIdError
1645 Raised when ``data_id`` or keyword arguments specify unknown
1646 dimensions or values, or when they contain inconsistent values.
1647 lsst.daf.butler.registry.UserExpressionError
1648 Raised when ``where`` expression is invalid.
1649 lsst.daf.butler.EmptyQueryResultError
1650 Raised when query generates empty result and ``explain`` is set to
1651 `True`.
1652 TypeError
1653 Raised when the arguments are incompatible, such as when a
1654 collection wildcard is passed when ``find_first`` is `True`, or
1655 when ``collections`` is `None` and default butler collections are
1656 not defined.
1657 """
1658 raise NotImplementedError()