Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py : 91%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Chained datastore."""
26__all__ = ("ChainedDatastore",)
28import time
29import logging
30import warnings
31import itertools
32from typing import (
33 TYPE_CHECKING,
34 Any,
35 Dict,
36 List,
37 Iterable,
38 Mapping,
39 Optional,
40 Sequence,
41 Set,
42 Tuple,
43 Union,
44)
46from lsst.utils import doImport
47from lsst.daf.butler import ButlerURI, Datastore, DatastoreConfig, DatasetTypeNotSupportedError, \
48 DatastoreValidationError, Constraints, FileDataset, DatasetRef
50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true
51 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass
52 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
54log = logging.getLogger(__name__)
57class _IngestPrepData(Datastore.IngestPrepData):
58 """Helper class for ChainedDatastore ingest implementation.
60 Parameters
61 ----------
62 children : `list` of `tuple`
63 Pairs of `Datastore`, `IngestPrepData` for all child datastores.
64 """
65 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]):
66 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children))
67 self.children = children
70class ChainedDatastore(Datastore):
71 """Chained Datastores to allow read and writes from multiple datastores.
73 A ChainedDatastore is configured with multiple datastore configurations.
74 A ``put()`` is always sent to each datastore. A ``get()``
75 operation is sent to each datastore in turn and the first datastore
76 to return a valid dataset is used.
78 Parameters
79 ----------
80 config : `DatastoreConfig` or `str`
81 Configuration. This configuration must include a ``datastores`` field
82 as a sequence of datastore configurations. The order in this sequence
83 indicates the order to use for read operations.
84 bridgeManager : `DatastoreRegistryBridgeManager`
85 Object that manages the interface between `Registry` and datastores.
86 butlerRoot : `str`, optional
87 New datastore root to use to override the configuration value. This
88 root is sent to each child datastore.
90 Notes
91 -----
92 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer
93 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"`
94 and `"hardlink"` if and only if all its child datastores do.
95 """
97 defaultConfigFile = "datastores/chainedDatastore.yaml"
98 """Path to configuration defaults. Accessed within the ``configs`` resource
99 or relative to a search path. Can be None if no defaults specified.
100 """
102 containerKey = "datastores"
103 """Key to specify where child datastores are configured."""
105 datastores: List[Datastore]
106 """All the child datastores known to this datastore."""
108 datastoreConstraints: Sequence[Optional[Constraints]]
109 """Constraints to be applied to each of the child datastores."""
111 @classmethod
112 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
113 """Set any filesystem-dependent config options for child Datastores to
114 be appropriate for a new empty repository with the given root.
116 Parameters
117 ----------
118 root : `str`
119 Filesystem path to the root of the data repository.
120 config : `Config`
121 A `Config` to update. Only the subset understood by
122 this component will be updated. Will not expand
123 defaults.
124 full : `Config`
125 A complete config with all defaults expanded that can be
126 converted to a `DatastoreConfig`. Read-only and will not be
127 modified by this method.
128 Repository-specific options that should not be obtained
129 from defaults when Butler instances are constructed
130 should be copied from ``full`` to ``config``.
131 overwrite : `bool`, optional
132 If `False`, do not modify a value in ``config`` if the value
133 already exists. Default is always to overwrite with the provided
134 ``root``.
136 Notes
137 -----
138 If a keyword is explicitly defined in the supplied ``config`` it
139 will not be overridden by this method if ``overwrite`` is `False`.
140 This allows explicit values set in external configs to be retained.
141 """
143 # Extract the part of the config we care about updating
144 datastoreConfig = DatastoreConfig(config, mergeDefaults=False)
146 # And the subset of the full config that we can use for reference.
147 # Do not bother with defaults because we are told this already has
148 # them.
149 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False)
151 # Loop over each datastore config and pass the subsets to the
152 # child datastores to process.
154 containerKey = cls.containerKey
155 for idx, (child, fullChild) in enumerate(zip(datastoreConfig[containerKey],
156 fullDatastoreConfig[containerKey])):
157 childConfig = DatastoreConfig(child, mergeDefaults=False)
158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False)
159 datastoreClass = doImport(fullChildConfig["cls"])
160 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx)
161 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite)
163 # Reattach to parent
164 datastoreConfig[containerKey, idx] = childConfig
166 # Reattach modified datastore config to parent
167 # If this has a datastore key we attach there, otherwise we assume
168 # this information goes at the top of the config hierarchy.
169 if DatastoreConfig.component in config:
170 config[DatastoreConfig.component] = datastoreConfig
171 else:
172 config.update(datastoreConfig)
174 return
176 def __init__(self, config: Union[Config, str], bridgeManager: DatastoreRegistryBridgeManager,
177 butlerRoot: str = None):
178 super().__init__(config, bridgeManager)
180 # Scan for child datastores and instantiate them with the same registry
181 self.datastores = []
182 for c in self.config["datastores"]:
183 c = DatastoreConfig(c)
184 datastoreType = doImport(c["cls"])
185 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot)
186 log.debug("Creating child datastore %s", datastore.name)
187 self.datastores.append(datastore)
189 # Name ourself based on our children
190 if self.datastores: 190 ↛ 195line 190 didn't jump to line 195, because the condition on line 190 was never false
191 # We must set the names explicitly
192 self._names = [d.name for d in self.datastores]
193 childNames = ",".join(self.names)
194 else:
195 childNames = "(empty@{})".format(time.time())
196 self._names = [childNames]
197 self.name = "{}[{}]".format(type(self).__qualname__, childNames)
199 # We declare we are ephemeral if all our child datastores declare
200 # they are ephemeral
201 isEphemeral = True
202 for d in self.datastores:
203 if not d.isEphemeral:
204 isEphemeral = False
205 break
206 self.isEphemeral = isEphemeral
208 # per-datastore override constraints
209 if "datastore_constraints" in self.config:
210 overrides = self.config["datastore_constraints"]
212 if len(overrides) != len(self.datastores): 212 ↛ 213line 212 didn't jump to line 213, because the condition on line 212 was never true
213 raise DatastoreValidationError(f"Number of registered datastores ({len(self.datastores)})"
214 " differs from number of constraints overrides"
215 f" {len(overrides)}")
217 self.datastoreConstraints = [Constraints(c.get("constraints"), universe=bridgeManager.universe)
218 for c in overrides]
220 else:
221 self.datastoreConstraints = (None,) * len(self.datastores)
223 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent"))
225 @property
226 def names(self) -> Tuple[str, ...]:
227 return tuple(self._names)
229 def __str__(self) -> str:
230 chainName = ", ".join(str(ds) for ds in self.datastores)
231 return chainName
233 def knows(self, ref: DatasetRef) -> bool:
234 """Check if the dataset is known to any of the datastores.
236 Does not check for existence of any artifact.
238 Parameters
239 ----------
240 ref : `DatasetRef`
241 Reference to the required dataset.
243 Returns
244 -------
245 exists : `bool`
246 `True` if the dataset is known to the datastore.
247 """
248 for datastore in self.datastores:
249 if datastore.knows(ref):
250 log.debug("%s known to datastore %s", ref, datastore.name)
251 return True
252 return False
254 def exists(self, ref: DatasetRef) -> bool:
255 """Check if the dataset exists in one of the datastores.
257 Parameters
258 ----------
259 ref : `DatasetRef`
260 Reference to the required dataset.
262 Returns
263 -------
264 exists : `bool`
265 `True` if the entity exists in one of the child datastores.
266 """
267 for datastore in self.datastores:
268 if datastore.exists(ref):
269 log.debug("Found %s in datastore %s", ref, datastore.name)
270 return True
271 return False
273 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any:
274 """Load an InMemoryDataset from the store.
276 The dataset is returned from the first datastore that has
277 the dataset.
279 Parameters
280 ----------
281 ref : `DatasetRef`
282 Reference to the required Dataset.
283 parameters : `dict`
284 `StorageClass`-specific parameters that specify, for example,
285 a slice of the dataset to be loaded.
287 Returns
288 -------
289 inMemoryDataset : `object`
290 Requested dataset or slice thereof as an InMemoryDataset.
292 Raises
293 ------
294 FileNotFoundError
295 Requested dataset can not be retrieved.
296 TypeError
297 Return value from formatter has unexpected type.
298 ValueError
299 Formatter failed to process the dataset.
300 """
302 for datastore in self.datastores:
303 try:
304 inMemoryObject = datastore.get(ref, parameters)
305 log.debug("Found dataset %s in datastore %s", ref, datastore.name)
306 return inMemoryObject
307 except FileNotFoundError:
308 pass
310 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref))
312 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
313 """Write a InMemoryDataset with a given `DatasetRef` to each
314 datastore.
316 The put() to child datastores can fail with
317 `DatasetTypeNotSupportedError`. The put() for this datastore will be
318 deemed to have succeeded so long as at least one child datastore
319 accepted the inMemoryDataset.
321 Parameters
322 ----------
323 inMemoryDataset : `object`
324 The dataset to store.
325 ref : `DatasetRef`
326 Reference to the associated Dataset.
328 Raises
329 ------
330 TypeError
331 Supplied object and storage class are inconsistent.
332 DatasetTypeNotSupportedError
333 All datastores reported `DatasetTypeNotSupportedError`.
334 """
335 log.debug("Put %s", ref)
337 # Confirm that we can accept this dataset
338 if not self.constraints.isAcceptable(ref):
339 # Raise rather than use boolean return value.
340 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
341 " configuration.")
343 isPermanent = False
344 nsuccess = 0
345 npermanent = 0
346 nephemeral = 0
347 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
348 if constraints is not None and not constraints.isAcceptable(ref):
349 log.debug("Datastore %s skipping put via configuration for ref %s",
350 datastore.name, ref)
351 continue
353 if datastore.isEphemeral:
354 nephemeral += 1
355 else:
356 npermanent += 1
357 try:
358 datastore.put(inMemoryDataset, ref)
359 nsuccess += 1
360 if not datastore.isEphemeral:
361 isPermanent = True
362 except DatasetTypeNotSupportedError:
363 pass
365 if nsuccess == 0:
366 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}")
368 if not isPermanent and npermanent > 0: 368 ↛ 369line 368 didn't jump to line 369, because the condition on line 368 was never true
369 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2)
371 if self._transaction is not None:
372 self._transaction.registerUndo('put', self.remove, ref)
374 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]:
375 # Docstring inherited from base class.
376 if transfer != "auto":
377 return transfer
378 # Ask each datastore what they think auto means
379 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores}
381 # Remove any untranslated "auto" values
382 transfers.discard(transfer)
384 if len(transfers) == 1: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true
385 return transfers.pop()
386 if not transfers: 386 ↛ 390line 386 didn't jump to line 390, because the condition on line 386 was never false
387 # Everything reported "auto"
388 return transfer
390 raise RuntimeError("Chained datastore does not yet support different transfer modes"
391 f" from 'auto' in each child datastore (wanted {transfers})")
393 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData:
394 # Docstring inherited from Datastore._prepIngest.
395 if transfer is None or transfer == "move":
396 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.")
398 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool:
399 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)]
400 if not acceptable:
401 log.debug("Datastore %s skipping ingest via configuration for refs %s",
402 name, ", ".join(str(ref) for ref in dataset.refs))
403 return False
404 else:
405 return True
407 # Filter down to just datasets the chained datastore's own
408 # configuration accepts.
409 okForParent: List[FileDataset] = [dataset for dataset in datasets
410 if isDatasetAcceptable(dataset, name=self.name,
411 constraints=self.constraints)]
413 # Iterate over nested datastores and call _prepIngest on each.
414 # Save the results to a list:
415 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = []
416 # ...and remember whether all of the failures are due to
417 # NotImplementedError being raised.
418 allFailuresAreNotImplementedError = True
419 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
420 okForChild: List[FileDataset]
421 if constraints is not None:
422 okForChild = [dataset for dataset in okForParent
423 if isDatasetAcceptable(dataset, name=datastore.name,
424 constraints=constraints)]
425 else:
426 okForChild = okForParent
427 try:
428 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer)
429 except NotImplementedError:
430 log.debug("Skipping ingest for datastore %s because transfer "
431 "mode %s is not supported.", datastore.name, transfer)
432 continue
433 allFailuresAreNotImplementedError = False
434 children.append((datastore, prepDataForChild))
435 if allFailuresAreNotImplementedError:
436 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.")
437 return _IngestPrepData(children=children)
439 def _finishIngest(self, prepData: _IngestPrepData, *, transfer: Optional[str] = None) -> None:
440 # Docstring inherited from Datastore._finishIngest.
441 for datastore, prepDataForChild in prepData.children:
442 datastore._finishIngest(prepDataForChild, transfer=transfer)
444 def getURIs(self, ref: DatasetRef,
445 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
446 """Return URIs associated with dataset.
448 Parameters
449 ----------
450 ref : `DatasetRef`
451 Reference to the required dataset.
452 predict : `bool`, optional
453 If the datastore does not know about the dataset, should it
454 return a predicted URI or not?
456 Returns
457 -------
458 primary : `ButlerURI`
459 The URI to the primary artifact associated with this dataset.
460 If the dataset was disassembled within the datastore this
461 may be `None`.
462 components : `dict`
463 URIs to any components associated with the dataset artifact.
464 Can be empty if there are no components.
466 Notes
467 -----
468 The returned URI is from the first datastore in the list that has
469 the dataset with preference given to the first dataset coming from
470 a permanent datastore. If no datastores have the dataset and prediction
471 is allowed, the predicted URI for the first datastore in the list will
472 be returned.
473 """
474 DatastoreURIs = Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]
475 log.debug("Requesting URIs for %s", ref)
476 predictedUri: Optional[DatastoreURIs] = None
477 predictedEphemeralUri: Optional[DatastoreURIs] = None
478 firstEphemeralUri: Optional[DatastoreURIs] = None
479 for datastore in self.datastores:
480 if datastore.exists(ref):
481 if not datastore.isEphemeral:
482 uri = datastore.getURIs(ref)
483 log.debug("Retrieved non-ephemeral URI: %s", uri)
484 return uri
485 elif not firstEphemeralUri:
486 firstEphemeralUri = datastore.getURIs(ref)
487 elif predict:
488 if not predictedUri and not datastore.isEphemeral:
489 predictedUri = datastore.getURIs(ref, predict)
490 elif not predictedEphemeralUri and datastore.isEphemeral:
491 predictedEphemeralUri = datastore.getURIs(ref, predict)
493 if firstEphemeralUri:
494 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri)
495 return firstEphemeralUri
497 if predictedUri:
498 log.debug("Retrieved predicted URI: %s", predictedUri)
499 return predictedUri
501 if predictedEphemeralUri:
502 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri)
503 return predictedEphemeralUri
505 raise FileNotFoundError("Dataset {} not in any datastore".format(ref))
507 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI:
508 """URI to the Dataset.
510 The returned URI is from the first datastore in the list that has
511 the dataset with preference given to the first dataset coming from
512 a permanent datastore. If no datastores have the dataset and prediction
513 is allowed, the predicted URI for the first datastore in the list will
514 be returned.
516 Parameters
517 ----------
518 ref : `DatasetRef`
519 Reference to the required Dataset.
520 predict : `bool`
521 If `True`, allow URIs to be returned of datasets that have not
522 been written.
524 Returns
525 -------
526 uri : `ButlerURI`
527 URI pointing to the dataset within the datastore. If the
528 dataset does not exist in the datastore, and if ``predict`` is
529 `True`, the URI will be a prediction and will include a URI
530 fragment "#predicted".
532 Notes
533 -----
534 If the datastore does not have entities that relate well
535 to the concept of a URI the returned URI string will be
536 descriptive. The returned URI is not guaranteed to be obtainable.
538 Raises
539 ------
540 FileNotFoundError
541 A URI has been requested for a dataset that does not exist and
542 guessing is not allowed.
543 RuntimeError
544 Raised if a request is made for a single URI but multiple URIs
545 are associated with this dataset.
546 """
547 log.debug("Requesting URI for %s", ref)
548 primary, components = self.getURIs(ref, predict)
549 if primary is None or components: 549 ↛ 550line 549 didn't jump to line 550, because the condition on line 549 was never true
550 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. "
551 "Use Dataastore.getURIs() instead.")
552 return primary
554 def retrieveArtifacts(self, refs: Iterable[DatasetRef],
555 destination: ButlerURI, transfer: str = "auto",
556 preserve_path: bool = True,
557 overwrite: bool = False) -> List[ButlerURI]:
558 """Retrieve the file artifacts associated with the supplied refs.
560 Parameters
561 ----------
562 refs : iterable of `DatasetRef`
563 The datasets for which file artifacts are to be retrieved.
564 A single ref can result in multiple files. The refs must
565 be resolved.
566 destination : `ButlerURI`
567 Location to write the file artifacts.
568 transfer : `str`, optional
569 Method to use to transfer the artifacts. Must be one of the options
570 supported by `ButlerURI.transfer_from()`. "move" is not allowed.
571 preserve_path : `bool`, optional
572 If `True` the full path of the file artifact within the datastore
573 is preserved. If `False` the final file component of the path
574 is used.
575 overwrite : `bool`, optional
576 If `True` allow transfers to overwrite existing files at the
577 destination.
579 Returns
580 -------
581 targets : `list` of `ButlerURI`
582 URIs of file artifacts in destination location. Order is not
583 preserved.
584 """
585 if not destination.isdir(): 585 ↛ 586line 585 didn't jump to line 586, because the condition on line 585 was never true
586 raise ValueError(f"Destination location must refer to a directory. Given {destination}")
588 # Using getURIs is not feasible since it becomes difficult to
589 # determine the path within the datastore later on. For now
590 # follow getURIs implementation approach.
592 pending = set(refs)
594 # There is a question as to whether an exception should be raised
595 # early if some of the refs are missing, or whether files should be
596 # transferred until a problem is hit. Prefer to complain up front.
597 # Use the datastore integer as primary key.
598 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {}
600 for number, datastore in enumerate(self.datastores):
601 if datastore.isEphemeral:
602 # In the future we will want to distinguish in-memory from
603 # caching datastore since using an on-disk local
604 # cache is exactly what we should be doing.
605 continue
606 datastore_refs = {ref for ref in pending if datastore.exists(ref)}
608 if datastore_refs:
609 grouped_by_datastore[number] = datastore_refs
611 # Remove these from the pending list so that we do not bother
612 # looking for them any more.
613 pending = pending - datastore_refs
615 if pending: 615 ↛ 616line 615 didn't jump to line 616, because the condition on line 615 was never true
616 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}")
618 # Now do the transfer.
619 targets: List[ButlerURI] = []
620 for number, datastore_refs in grouped_by_datastore.items():
621 targets.extend(self.datastores[number].retrieveArtifacts(datastore_refs, destination,
622 transfer=transfer,
623 preserve_path=preserve_path,
624 overwrite=overwrite))
626 return targets
628 def remove(self, ref: DatasetRef) -> None:
629 """Indicate to the datastore that a dataset can be removed.
631 The dataset will be removed from each datastore. The dataset is
632 not required to exist in every child datastore.
634 Parameters
635 ----------
636 ref : `DatasetRef`
637 Reference to the required dataset.
639 Raises
640 ------
641 FileNotFoundError
642 Attempt to remove a dataset that does not exist. Raised if none
643 of the child datastores removed the dataset.
644 """
645 log.debug("Removing %s", ref)
646 self.trash(ref, ignore_errors=False)
647 self.emptyTrash(ignore_errors=False)
649 def forget(self, refs: Iterable[DatasetRef]) -> None:
650 for datastore in tuple(self.datastores):
651 datastore.forget(refs)
653 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
654 if isinstance(ref, DatasetRef):
655 ref_label = str(ref)
656 else:
657 ref_label = "bulk datasets"
659 log.debug("Trashing %s", ref_label)
661 counter = 0
662 for datastore in self.datastores:
663 try:
664 datastore.trash(ref, ignore_errors=ignore_errors)
665 counter += 1
666 except FileNotFoundError:
667 pass
669 if counter == 0:
670 err_msg = f"Could not mark for removal from any child datastore: {ref_label}"
671 if ignore_errors: 671 ↛ 672line 671 didn't jump to line 672, because the condition on line 671 was never true
672 log.warning(err_msg)
673 else:
674 raise FileNotFoundError(err_msg)
676 def emptyTrash(self, ignore_errors: bool = True) -> None:
677 for datastore in self.datastores:
678 datastore.emptyTrash(ignore_errors=ignore_errors)
680 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
681 """Retrieve a dataset from an input `Datastore`,
682 and store the result in this `Datastore`.
684 Parameters
685 ----------
686 inputDatastore : `Datastore`
687 The external `Datastore` from which to retreive the Dataset.
688 ref : `DatasetRef`
689 Reference to the required dataset in the input data store.
691 Returns
692 -------
693 results : `list`
694 List containing the return value from the ``put()`` to each
695 child datastore.
696 """
697 assert inputDatastore is not self # unless we want it for renames?
698 inMemoryDataset = inputDatastore.get(ref)
699 self.put(inMemoryDataset, ref)
701 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]],
702 logFailures: bool = False) -> None:
703 """Validate some of the configuration for this datastore.
705 Parameters
706 ----------
707 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
708 Entities to test against this configuration. Can be differing
709 types.
710 logFailures : `bool`, optional
711 If `True`, output a log message for every validation error
712 detected.
714 Raises
715 ------
716 DatastoreValidationError
717 Raised if there is a validation problem with a configuration.
718 All the problems are reported in a single exception.
720 Notes
721 -----
722 This method checks each datastore in turn.
723 """
725 # Need to catch each of the datastore outputs and ensure that
726 # all are tested.
727 failures = []
728 for datastore in self.datastores:
729 try:
730 datastore.validateConfiguration(entities, logFailures=logFailures)
731 except DatastoreValidationError as e:
732 if logFailures: 732 ↛ 734line 732 didn't jump to line 734, because the condition on line 732 was never false
733 log.critical("Datastore %s failed validation", datastore.name)
734 failures.append(f"Datastore {self.name}: {e}")
736 if failures:
737 msg = ";\n".join(failures)
738 raise DatastoreValidationError(msg)
740 def validateKey(self, lookupKey: LookupKey,
741 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
742 # Docstring is inherited from base class
743 failures = []
744 for datastore in self.datastores:
745 try:
746 datastore.validateKey(lookupKey, entity)
747 except DatastoreValidationError as e:
748 failures.append(f"Datastore {self.name}: {e}")
750 if failures:
751 msg = ";\n".join(failures)
752 raise DatastoreValidationError(msg)
754 def getLookupKeys(self) -> Set[LookupKey]:
755 # Docstring is inherited from base class
756 keys = set()
757 for datastore in self.datastores:
758 keys.update(datastore.getLookupKeys())
760 keys.update(self.constraints.getLookupKeys())
761 for p in self.datastoreConstraints:
762 if p is not None: 762 ↛ 763line 762 didn't jump to line 763, because the condition on line 762 was never true
763 keys.update(p.getLookupKeys())
765 return keys
767 def needs_expanded_data_ids(
768 self,
769 transfer: Optional[str],
770 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
771 ) -> bool:
772 # Docstring inherited.
773 # We can't safely use `self.datastoreConstraints` with `entity` to
774 # check whether a child datastore would even want to ingest this
775 # dataset, because we don't want to filter out datastores that might
776 # need an expanded data ID based in incomplete information (e.g. we
777 # pass a StorageClass, but the constraint dispatches on DatasetType).
778 # So we pessimistically check if any datastore would need an expanded
779 # data ID for this transfer mode.
780 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 780 ↛ exitline 780 didn't finish the generator expression on line 780