Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py: 90%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Chained datastore."""
26__all__ = ("ChainedDatastore",)
28import itertools
29import logging
30import time
31import warnings
32from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union
34from lsst.daf.butler import (
35 Constraints,
36 DatasetRef,
37 DatasetTypeNotSupportedError,
38 Datastore,
39 DatastoreConfig,
40 DatastoreRecordData,
41 DatastoreValidationError,
42 FileDataset,
43)
44from lsst.resources import ResourcePath
45from lsst.utils import doImportType
47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass
49 from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
51log = logging.getLogger(__name__)
54class _IngestPrepData(Datastore.IngestPrepData):
55 """Helper class for ChainedDatastore ingest implementation.
57 Parameters
58 ----------
59 children : `list` of `tuple`
60 Pairs of `Datastore`, `IngestPrepData` for all child datastores.
61 """
63 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]):
64 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children))
65 self.children = children
68class ChainedDatastore(Datastore):
69 """Chained Datastores to allow read and writes from multiple datastores.
71 A ChainedDatastore is configured with multiple datastore configurations.
72 A ``put()`` is always sent to each datastore. A ``get()``
73 operation is sent to each datastore in turn and the first datastore
74 to return a valid dataset is used.
76 Parameters
77 ----------
78 config : `DatastoreConfig` or `str`
79 Configuration. This configuration must include a ``datastores`` field
80 as a sequence of datastore configurations. The order in this sequence
81 indicates the order to use for read operations.
82 bridgeManager : `DatastoreRegistryBridgeManager`
83 Object that manages the interface between `Registry` and datastores.
84 butlerRoot : `str`, optional
85 New datastore root to use to override the configuration value. This
86 root is sent to each child datastore.
88 Notes
89 -----
90 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer
91 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"`
92 and `"hardlink"` if and only if all its child datastores do.
93 """
95 defaultConfigFile = "datastores/chainedDatastore.yaml"
96 """Path to configuration defaults. Accessed within the ``configs`` resource
97 or relative to a search path. Can be None if no defaults specified.
98 """
100 containerKey = "datastores"
101 """Key to specify where child datastores are configured."""
103 datastores: List[Datastore]
104 """All the child datastores known to this datastore."""
106 datastoreConstraints: Sequence[Optional[Constraints]]
107 """Constraints to be applied to each of the child datastores."""
109 @classmethod
110 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
111 """Set any filesystem-dependent config options for child Datastores to
112 be appropriate for a new empty repository with the given root.
114 Parameters
115 ----------
116 root : `str`
117 Filesystem path to the root of the data repository.
118 config : `Config`
119 A `Config` to update. Only the subset understood by
120 this component will be updated. Will not expand
121 defaults.
122 full : `Config`
123 A complete config with all defaults expanded that can be
124 converted to a `DatastoreConfig`. Read-only and will not be
125 modified by this method.
126 Repository-specific options that should not be obtained
127 from defaults when Butler instances are constructed
128 should be copied from ``full`` to ``config``.
129 overwrite : `bool`, optional
130 If `False`, do not modify a value in ``config`` if the value
131 already exists. Default is always to overwrite with the provided
132 ``root``.
134 Notes
135 -----
136 If a keyword is explicitly defined in the supplied ``config`` it
137 will not be overridden by this method if ``overwrite`` is `False`.
138 This allows explicit values set in external configs to be retained.
139 """
141 # Extract the part of the config we care about updating
142 datastoreConfig = DatastoreConfig(config, mergeDefaults=False)
144 # And the subset of the full config that we can use for reference.
145 # Do not bother with defaults because we are told this already has
146 # them.
147 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False)
149 # Loop over each datastore config and pass the subsets to the
150 # child datastores to process.
152 containerKey = cls.containerKey
153 for idx, (child, fullChild) in enumerate(
154 zip(datastoreConfig[containerKey], fullDatastoreConfig[containerKey])
155 ):
156 childConfig = DatastoreConfig(child, mergeDefaults=False)
157 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False)
158 datastoreClass = doImportType(fullChildConfig["cls"])
159 if not issubclass(datastoreClass, Datastore): 159 ↛ 160line 159 didn't jump to line 160, because the condition on line 159 was never true
160 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore")
161 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx)
162 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite)
164 # Reattach to parent
165 datastoreConfig[containerKey, idx] = childConfig
167 # Reattach modified datastore config to parent
168 # If this has a datastore key we attach there, otherwise we assume
169 # this information goes at the top of the config hierarchy.
170 if DatastoreConfig.component in config:
171 config[DatastoreConfig.component] = datastoreConfig
172 else:
173 config.update(datastoreConfig)
175 return
177 def __init__(
178 self,
179 config: Union[Config, str],
180 bridgeManager: DatastoreRegistryBridgeManager,
181 butlerRoot: str = None,
182 ):
183 super().__init__(config, bridgeManager)
185 # Scan for child datastores and instantiate them with the same registry
186 self.datastores = []
187 for c in self.config["datastores"]:
188 c = DatastoreConfig(c)
189 datastoreType = doImportType(c["cls"])
190 if not issubclass(datastoreType, Datastore): 190 ↛ 191line 190 didn't jump to line 191, because the condition on line 190 was never true
191 raise TypeError(f"Imported child class {c['cls']} is not a Datastore")
192 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot)
193 log.debug("Creating child datastore %s", datastore.name)
194 self.datastores.append(datastore)
196 # Name ourself based on our children
197 if self.datastores: 197 ↛ 202line 197 didn't jump to line 202, because the condition on line 197 was never false
198 # We must set the names explicitly
199 self._names = [d.name for d in self.datastores]
200 childNames = ",".join(self.names)
201 else:
202 childNames = "(empty@{})".format(time.time())
203 self._names = [childNames]
204 self.name = "{}[{}]".format(type(self).__qualname__, childNames)
206 # We declare we are ephemeral if all our child datastores declare
207 # they are ephemeral
208 isEphemeral = True
209 for d in self.datastores:
210 if not d.isEphemeral:
211 isEphemeral = False
212 break
213 self.isEphemeral = isEphemeral
215 # per-datastore override constraints
216 if "datastore_constraints" in self.config:
217 overrides = self.config["datastore_constraints"]
219 if len(overrides) != len(self.datastores): 219 ↛ 220line 219 didn't jump to line 220, because the condition on line 219 was never true
220 raise DatastoreValidationError(
221 f"Number of registered datastores ({len(self.datastores)})"
222 " differs from number of constraints overrides"
223 f" {len(overrides)}"
224 )
226 self.datastoreConstraints = [
227 Constraints(c.get("constraints"), universe=bridgeManager.universe) for c in overrides
228 ]
230 else:
231 self.datastoreConstraints = (None,) * len(self.datastores)
233 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent"))
235 @property
236 def names(self) -> Tuple[str, ...]:
237 return tuple(self._names)
239 def __str__(self) -> str:
240 chainName = ", ".join(str(ds) for ds in self.datastores)
241 return chainName
243 def knows(self, ref: DatasetRef) -> bool:
244 """Check if the dataset is known to any of the datastores.
246 Does not check for existence of any artifact.
248 Parameters
249 ----------
250 ref : `DatasetRef`
251 Reference to the required dataset.
253 Returns
254 -------
255 exists : `bool`
256 `True` if the dataset is known to the datastore.
257 """
258 for datastore in self.datastores:
259 if datastore.knows(ref):
260 log.debug("%s known to datastore %s", ref, datastore.name)
261 return True
262 return False
264 def mexists(
265 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
266 ) -> Dict[DatasetRef, bool]:
267 """Check the existence of multiple datasets at once.
269 Parameters
270 ----------
271 refs : iterable of `DatasetRef`
272 The datasets to be checked.
273 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
274 Optional mapping of datastore artifact to existence. Updated by
275 this method with details of all artifacts tested. Can be `None`
276 if the caller is not interested.
278 Returns
279 -------
280 existence : `dict` of [`DatasetRef`, `bool`]
281 Mapping from dataset to boolean indicating existence in any
282 of the child datastores.
283 """
284 dataset_existence: Dict[DatasetRef, bool] = {}
285 for datastore in self.datastores:
286 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence))
288 # For next datastore no point asking about ones we know
289 # exist already. No special exemption for ephemeral datastores.
290 refs = [ref for ref, exists in dataset_existence.items() if not exists]
292 return dataset_existence
294 def exists(self, ref: DatasetRef) -> bool:
295 """Check if the dataset exists in one of the datastores.
297 Parameters
298 ----------
299 ref : `DatasetRef`
300 Reference to the required dataset.
302 Returns
303 -------
304 exists : `bool`
305 `True` if the entity exists in one of the child datastores.
306 """
307 for datastore in self.datastores:
308 if datastore.exists(ref):
309 log.debug("Found %s in datastore %s", ref, datastore.name)
310 return True
311 return False
313 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any:
314 """Load an InMemoryDataset from the store.
316 The dataset is returned from the first datastore that has
317 the dataset.
319 Parameters
320 ----------
321 ref : `DatasetRef`
322 Reference to the required Dataset.
323 parameters : `dict`
324 `StorageClass`-specific parameters that specify, for example,
325 a slice of the dataset to be loaded.
327 Returns
328 -------
329 inMemoryDataset : `object`
330 Requested dataset or slice thereof as an InMemoryDataset.
332 Raises
333 ------
334 FileNotFoundError
335 Requested dataset can not be retrieved.
336 TypeError
337 Return value from formatter has unexpected type.
338 ValueError
339 Formatter failed to process the dataset.
340 """
342 for datastore in self.datastores:
343 try:
344 inMemoryObject = datastore.get(ref, parameters)
345 log.debug("Found dataset %s in datastore %s", ref, datastore.name)
346 return inMemoryObject
347 except FileNotFoundError:
348 pass
350 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref))
352 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
353 """Write a InMemoryDataset with a given `DatasetRef` to each
354 datastore.
356 The put() to child datastores can fail with
357 `DatasetTypeNotSupportedError`. The put() for this datastore will be
358 deemed to have succeeded so long as at least one child datastore
359 accepted the inMemoryDataset.
361 Parameters
362 ----------
363 inMemoryDataset : `object`
364 The dataset to store.
365 ref : `DatasetRef`
366 Reference to the associated Dataset.
368 Raises
369 ------
370 TypeError
371 Supplied object and storage class are inconsistent.
372 DatasetTypeNotSupportedError
373 All datastores reported `DatasetTypeNotSupportedError`.
374 """
375 log.debug("Put %s", ref)
377 # Confirm that we can accept this dataset
378 if not self.constraints.isAcceptable(ref):
379 # Raise rather than use boolean return value.
380 raise DatasetTypeNotSupportedError(
381 f"Dataset {ref} has been rejected by this datastore via configuration."
382 )
384 isPermanent = False
385 nsuccess = 0
386 npermanent = 0
387 nephemeral = 0
388 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
389 if constraints is not None and not constraints.isAcceptable(ref):
390 log.debug("Datastore %s skipping put via configuration for ref %s", datastore.name, ref)
391 continue
393 if datastore.isEphemeral:
394 nephemeral += 1
395 else:
396 npermanent += 1
397 try:
398 datastore.put(inMemoryDataset, ref)
399 nsuccess += 1
400 if not datastore.isEphemeral:
401 isPermanent = True
402 except DatasetTypeNotSupportedError:
403 pass
405 if nsuccess == 0:
406 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}")
408 if not isPermanent and npermanent > 0: 408 ↛ 409line 408 didn't jump to line 409, because the condition on line 408 was never true
409 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2)
411 if self._transaction is not None:
412 self._transaction.registerUndo("put", self.remove, ref)
414 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]:
415 # Docstring inherited from base class.
416 if transfer != "auto":
417 return transfer
418 # Ask each datastore what they think auto means
419 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores}
421 # Remove any untranslated "auto" values
422 transfers.discard(transfer)
424 if len(transfers) == 1: 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true
425 return transfers.pop()
426 if not transfers: 426 ↛ 430line 426 didn't jump to line 430, because the condition on line 426 was never false
427 # Everything reported "auto"
428 return transfer
430 raise RuntimeError(
431 "Chained datastore does not yet support different transfer modes"
432 f" from 'auto' in each child datastore (wanted {transfers})"
433 )
435 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData:
436 # Docstring inherited from Datastore._prepIngest.
437 if transfer is None or transfer == "move":
438 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.")
440 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool:
441 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)]
442 if not acceptable:
443 log.debug(
444 "Datastore %s skipping ingest via configuration for refs %s",
445 name,
446 ", ".join(str(ref) for ref in dataset.refs),
447 )
448 return False
449 else:
450 return True
452 # Filter down to just datasets the chained datastore's own
453 # configuration accepts.
454 okForParent: List[FileDataset] = [
455 dataset
456 for dataset in datasets
457 if isDatasetAcceptable(dataset, name=self.name, constraints=self.constraints)
458 ]
460 # Iterate over nested datastores and call _prepIngest on each.
461 # Save the results to a list:
462 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = []
463 # ...and remember whether all of the failures are due to
464 # NotImplementedError being raised.
465 allFailuresAreNotImplementedError = True
466 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
467 okForChild: List[FileDataset]
468 if constraints is not None:
469 okForChild = [
470 dataset
471 for dataset in okForParent
472 if isDatasetAcceptable(dataset, name=datastore.name, constraints=constraints)
473 ]
474 else:
475 okForChild = okForParent
476 try:
477 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer)
478 except NotImplementedError:
479 log.debug(
480 "Skipping ingest for datastore %s because transfer mode %s is not supported.",
481 datastore.name,
482 transfer,
483 )
484 continue
485 allFailuresAreNotImplementedError = False
486 children.append((datastore, prepDataForChild))
487 if allFailuresAreNotImplementedError:
488 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.")
489 return _IngestPrepData(children=children)
491 def _finishIngest(
492 self,
493 prepData: _IngestPrepData,
494 *,
495 transfer: Optional[str] = None,
496 record_validation_info: bool = True,
497 ) -> None:
498 # Docstring inherited from Datastore._finishIngest.
499 for datastore, prepDataForChild in prepData.children:
500 datastore._finishIngest(
501 prepDataForChild, transfer=transfer, record_validation_info=record_validation_info
502 )
504 def getURIs(
505 self, ref: DatasetRef, predict: bool = False
506 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]:
507 """Return URIs associated with dataset.
509 Parameters
510 ----------
511 ref : `DatasetRef`
512 Reference to the required dataset.
513 predict : `bool`, optional
514 If the datastore does not know about the dataset, should it
515 return a predicted URI or not?
517 Returns
518 -------
519 primary : `lsst.resources.ResourcePath`
520 The URI to the primary artifact associated with this dataset.
521 If the dataset was disassembled within the datastore this
522 may be `None`.
523 components : `dict`
524 URIs to any components associated with the dataset artifact.
525 Can be empty if there are no components.
527 Notes
528 -----
529 The returned URI is from the first datastore in the list that has
530 the dataset with preference given to the first dataset coming from
531 a permanent datastore. If no datastores have the dataset and prediction
532 is allowed, the predicted URI for the first datastore in the list will
533 be returned.
534 """
535 DatastoreURIs = Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]
536 log.debug("Requesting URIs for %s", ref)
537 predictedUri: Optional[DatastoreURIs] = None
538 predictedEphemeralUri: Optional[DatastoreURIs] = None
539 firstEphemeralUri: Optional[DatastoreURIs] = None
540 for datastore in self.datastores:
541 if datastore.exists(ref):
542 if not datastore.isEphemeral:
543 uri = datastore.getURIs(ref)
544 log.debug("Retrieved non-ephemeral URI: %s", uri)
545 return uri
546 elif not firstEphemeralUri:
547 firstEphemeralUri = datastore.getURIs(ref)
548 elif predict:
549 if not predictedUri and not datastore.isEphemeral:
550 predictedUri = datastore.getURIs(ref, predict)
551 elif not predictedEphemeralUri and datastore.isEphemeral:
552 predictedEphemeralUri = datastore.getURIs(ref, predict)
554 if firstEphemeralUri:
555 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri)
556 return firstEphemeralUri
558 if predictedUri:
559 log.debug("Retrieved predicted URI: %s", predictedUri)
560 return predictedUri
562 if predictedEphemeralUri:
563 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri)
564 return predictedEphemeralUri
566 raise FileNotFoundError("Dataset {} not in any datastore".format(ref))
568 def getURI(self, ref: DatasetRef, predict: bool = False) -> ResourcePath:
569 """URI to the Dataset.
571 The returned URI is from the first datastore in the list that has
572 the dataset with preference given to the first dataset coming from
573 a permanent datastore. If no datastores have the dataset and prediction
574 is allowed, the predicted URI for the first datastore in the list will
575 be returned.
577 Parameters
578 ----------
579 ref : `DatasetRef`
580 Reference to the required Dataset.
581 predict : `bool`
582 If `True`, allow URIs to be returned of datasets that have not
583 been written.
585 Returns
586 -------
587 uri : `lsst.resources.ResourcePath`
588 URI pointing to the dataset within the datastore. If the
589 dataset does not exist in the datastore, and if ``predict`` is
590 `True`, the URI will be a prediction and will include a URI
591 fragment "#predicted".
593 Notes
594 -----
595 If the datastore does not have entities that relate well
596 to the concept of a URI the returned URI string will be
597 descriptive. The returned URI is not guaranteed to be obtainable.
599 Raises
600 ------
601 FileNotFoundError
602 A URI has been requested for a dataset that does not exist and
603 guessing is not allowed.
604 RuntimeError
605 Raised if a request is made for a single URI but multiple URIs
606 are associated with this dataset.
607 """
608 log.debug("Requesting URI for %s", ref)
609 primary, components = self.getURIs(ref, predict)
610 if primary is None or components: 610 ↛ 611line 610 didn't jump to line 611, because the condition on line 610 was never true
611 raise RuntimeError(
612 f"Dataset ({ref}) includes distinct URIs for components. Use Datastore.getURIs() instead."
613 )
614 return primary
616 def retrieveArtifacts(
617 self,
618 refs: Iterable[DatasetRef],
619 destination: ResourcePath,
620 transfer: str = "auto",
621 preserve_path: bool = True,
622 overwrite: bool = False,
623 ) -> List[ResourcePath]:
624 """Retrieve the file artifacts associated with the supplied refs.
626 Parameters
627 ----------
628 refs : iterable of `DatasetRef`
629 The datasets for which file artifacts are to be retrieved.
630 A single ref can result in multiple files. The refs must
631 be resolved.
632 destination : `lsst.resources.ResourcePath`
633 Location to write the file artifacts.
634 transfer : `str`, optional
635 Method to use to transfer the artifacts. Must be one of the options
636 supported by `lsst.resources.ResourcePath.transfer_from()`.
637 "move" is not allowed.
638 preserve_path : `bool`, optional
639 If `True` the full path of the file artifact within the datastore
640 is preserved. If `False` the final file component of the path
641 is used.
642 overwrite : `bool`, optional
643 If `True` allow transfers to overwrite existing files at the
644 destination.
646 Returns
647 -------
648 targets : `list` of `lsst.resources.ResourcePath`
649 URIs of file artifacts in destination location. Order is not
650 preserved.
651 """
652 if not destination.isdir(): 652 ↛ 653line 652 didn't jump to line 653, because the condition on line 652 was never true
653 raise ValueError(f"Destination location must refer to a directory. Given {destination}")
655 # Using getURIs is not feasible since it becomes difficult to
656 # determine the path within the datastore later on. For now
657 # follow getURIs implementation approach.
659 pending = set(refs)
661 # There is a question as to whether an exception should be raised
662 # early if some of the refs are missing, or whether files should be
663 # transferred until a problem is hit. Prefer to complain up front.
664 # Use the datastore integer as primary key.
665 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {}
667 for number, datastore in enumerate(self.datastores):
668 if datastore.isEphemeral:
669 # In the future we will want to distinguish in-memory from
670 # caching datastore since using an on-disk local
671 # cache is exactly what we should be doing.
672 continue
673 datastore_refs = {ref for ref in pending if datastore.exists(ref)}
675 if datastore_refs:
676 grouped_by_datastore[number] = datastore_refs
678 # Remove these from the pending list so that we do not bother
679 # looking for them any more.
680 pending = pending - datastore_refs
682 if pending: 682 ↛ 683line 682 didn't jump to line 683, because the condition on line 682 was never true
683 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}")
685 # Now do the transfer.
686 targets: List[ResourcePath] = []
687 for number, datastore_refs in grouped_by_datastore.items():
688 targets.extend(
689 self.datastores[number].retrieveArtifacts(
690 datastore_refs,
691 destination,
692 transfer=transfer,
693 preserve_path=preserve_path,
694 overwrite=overwrite,
695 )
696 )
698 return targets
700 def remove(self, ref: DatasetRef) -> None:
701 """Indicate to the datastore that a dataset can be removed.
703 The dataset will be removed from each datastore. The dataset is
704 not required to exist in every child datastore.
706 Parameters
707 ----------
708 ref : `DatasetRef`
709 Reference to the required dataset.
711 Raises
712 ------
713 FileNotFoundError
714 Attempt to remove a dataset that does not exist. Raised if none
715 of the child datastores removed the dataset.
716 """
717 log.debug("Removing %s", ref)
718 self.trash(ref, ignore_errors=False)
719 self.emptyTrash(ignore_errors=False)
721 def forget(self, refs: Iterable[DatasetRef]) -> None:
722 for datastore in tuple(self.datastores):
723 datastore.forget(refs)
725 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
726 if isinstance(ref, DatasetRef):
727 ref_label = str(ref)
728 else:
729 ref_label = "bulk datasets"
731 log.debug("Trashing %s", ref_label)
733 counter = 0
734 for datastore in self.datastores:
735 try:
736 datastore.trash(ref, ignore_errors=ignore_errors)
737 counter += 1
738 except FileNotFoundError:
739 pass
741 if counter == 0:
742 err_msg = f"Could not mark for removal from any child datastore: {ref_label}"
743 if ignore_errors: 743 ↛ 744line 743 didn't jump to line 744, because the condition on line 743 was never true
744 log.warning(err_msg)
745 else:
746 raise FileNotFoundError(err_msg)
748 def emptyTrash(self, ignore_errors: bool = True) -> None:
749 for datastore in self.datastores:
750 datastore.emptyTrash(ignore_errors=ignore_errors)
752 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
753 """Retrieve a dataset from an input `Datastore`,
754 and store the result in this `Datastore`.
756 Parameters
757 ----------
758 inputDatastore : `Datastore`
759 The external `Datastore` from which to retreive the Dataset.
760 ref : `DatasetRef`
761 Reference to the required dataset in the input data store.
763 Returns
764 -------
765 results : `list`
766 List containing the return value from the ``put()`` to each
767 child datastore.
768 """
769 assert inputDatastore is not self # unless we want it for renames?
770 inMemoryDataset = inputDatastore.get(ref)
771 self.put(inMemoryDataset, ref)
773 def validateConfiguration(
774 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
775 ) -> None:
776 """Validate some of the configuration for this datastore.
778 Parameters
779 ----------
780 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
781 Entities to test against this configuration. Can be differing
782 types.
783 logFailures : `bool`, optional
784 If `True`, output a log message for every validation error
785 detected.
787 Raises
788 ------
789 DatastoreValidationError
790 Raised if there is a validation problem with a configuration.
791 All the problems are reported in a single exception.
793 Notes
794 -----
795 This method checks each datastore in turn.
796 """
798 # Need to catch each of the datastore outputs and ensure that
799 # all are tested.
800 failures = []
801 for datastore in self.datastores:
802 try:
803 datastore.validateConfiguration(entities, logFailures=logFailures)
804 except DatastoreValidationError as e:
805 if logFailures: 805 ↛ 807line 805 didn't jump to line 807, because the condition on line 805 was never false
806 log.critical("Datastore %s failed validation", datastore.name)
807 failures.append(f"Datastore {self.name}: {e}")
809 if failures:
810 msg = ";\n".join(failures)
811 raise DatastoreValidationError(msg)
813 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
814 # Docstring is inherited from base class
815 failures = []
816 for datastore in self.datastores:
817 try:
818 datastore.validateKey(lookupKey, entity)
819 except DatastoreValidationError as e:
820 failures.append(f"Datastore {self.name}: {e}")
822 if failures:
823 msg = ";\n".join(failures)
824 raise DatastoreValidationError(msg)
826 def getLookupKeys(self) -> Set[LookupKey]:
827 # Docstring is inherited from base class
828 keys = set()
829 for datastore in self.datastores:
830 keys.update(datastore.getLookupKeys())
832 keys.update(self.constraints.getLookupKeys())
833 for p in self.datastoreConstraints:
834 if p is not None: 834 ↛ 835line 834 didn't jump to line 835, because the condition on line 834 was never true
835 keys.update(p.getLookupKeys())
837 return keys
839 def needs_expanded_data_ids(
840 self,
841 transfer: Optional[str],
842 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
843 ) -> bool:
844 # Docstring inherited.
845 # We can't safely use `self.datastoreConstraints` with `entity` to
846 # check whether a child datastore would even want to ingest this
847 # dataset, because we don't want to filter out datastores that might
848 # need an expanded data ID based in incomplete information (e.g. we
849 # pass a StorageClass, but the constraint dispatches on DatasetType).
850 # So we pessimistically check if any datastore would need an expanded
851 # data ID for this transfer mode.
852 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 852 ↛ exitline 852 didn't finish the generator expression on line 852
854 def import_records(self, data: Mapping[str, DatastoreRecordData]) -> None:
855 # Docstring inherited from the base class.
857 for datastore in self.datastores:
858 datastore.import_records(data)
860 def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]:
861 # Docstring inherited from the base class.
863 all_records: Dict[str, DatastoreRecordData] = {}
865 # Merge all sub-datastore records into one structure
866 for datastore in self.datastores:
867 sub_records = datastore.export_records(refs)
868 for name, record_data in sub_records.items():
869 # All datastore names must be unique in a chain.
870 if name in all_records: 870 ↛ 871line 870 didn't jump to line 871, because the condition on line 870 was never true
871 raise ValueError("Non-unique datastore name found in datastore {datastore}")
872 all_records[name] = record_data
874 return all_records