Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py: 91%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Chained datastore."""
26__all__ = ("ChainedDatastore",)
28import time
29import logging
30import warnings
31import itertools
32from typing import (
33 TYPE_CHECKING,
34 Any,
35 Dict,
36 List,
37 Iterable,
38 Mapping,
39 Optional,
40 Sequence,
41 Set,
42 Tuple,
43 Union,
44)
46from lsst.utils import doImport
47from lsst.daf.butler import ButlerURI, Datastore, DatastoreConfig, DatasetTypeNotSupportedError, \
48 DatastoreValidationError, Constraints, FileDataset, DatasetRef
50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true
51 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass
52 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
54log = logging.getLogger(__name__)
57class _IngestPrepData(Datastore.IngestPrepData):
58 """Helper class for ChainedDatastore ingest implementation.
60 Parameters
61 ----------
62 children : `list` of `tuple`
63 Pairs of `Datastore`, `IngestPrepData` for all child datastores.
64 """
65 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]):
66 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children))
67 self.children = children
70class ChainedDatastore(Datastore):
71 """Chained Datastores to allow read and writes from multiple datastores.
73 A ChainedDatastore is configured with multiple datastore configurations.
74 A ``put()`` is always sent to each datastore. A ``get()``
75 operation is sent to each datastore in turn and the first datastore
76 to return a valid dataset is used.
78 Parameters
79 ----------
80 config : `DatastoreConfig` or `str`
81 Configuration. This configuration must include a ``datastores`` field
82 as a sequence of datastore configurations. The order in this sequence
83 indicates the order to use for read operations.
84 bridgeManager : `DatastoreRegistryBridgeManager`
85 Object that manages the interface between `Registry` and datastores.
86 butlerRoot : `str`, optional
87 New datastore root to use to override the configuration value. This
88 root is sent to each child datastore.
90 Notes
91 -----
92 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer
93 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"`
94 and `"hardlink"` if and only if all its child datastores do.
95 """
97 defaultConfigFile = "datastores/chainedDatastore.yaml"
98 """Path to configuration defaults. Accessed within the ``configs`` resource
99 or relative to a search path. Can be None if no defaults specified.
100 """
102 containerKey = "datastores"
103 """Key to specify where child datastores are configured."""
105 datastores: List[Datastore]
106 """All the child datastores known to this datastore."""
108 datastoreConstraints: Sequence[Optional[Constraints]]
109 """Constraints to be applied to each of the child datastores."""
111 @classmethod
112 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
113 """Set any filesystem-dependent config options for child Datastores to
114 be appropriate for a new empty repository with the given root.
116 Parameters
117 ----------
118 root : `str`
119 Filesystem path to the root of the data repository.
120 config : `Config`
121 A `Config` to update. Only the subset understood by
122 this component will be updated. Will not expand
123 defaults.
124 full : `Config`
125 A complete config with all defaults expanded that can be
126 converted to a `DatastoreConfig`. Read-only and will not be
127 modified by this method.
128 Repository-specific options that should not be obtained
129 from defaults when Butler instances are constructed
130 should be copied from ``full`` to ``config``.
131 overwrite : `bool`, optional
132 If `False`, do not modify a value in ``config`` if the value
133 already exists. Default is always to overwrite with the provided
134 ``root``.
136 Notes
137 -----
138 If a keyword is explicitly defined in the supplied ``config`` it
139 will not be overridden by this method if ``overwrite`` is `False`.
140 This allows explicit values set in external configs to be retained.
141 """
143 # Extract the part of the config we care about updating
144 datastoreConfig = DatastoreConfig(config, mergeDefaults=False)
146 # And the subset of the full config that we can use for reference.
147 # Do not bother with defaults because we are told this already has
148 # them.
149 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False)
151 # Loop over each datastore config and pass the subsets to the
152 # child datastores to process.
154 containerKey = cls.containerKey
155 for idx, (child, fullChild) in enumerate(zip(datastoreConfig[containerKey],
156 fullDatastoreConfig[containerKey])):
157 childConfig = DatastoreConfig(child, mergeDefaults=False)
158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False)
159 datastoreClass = doImport(fullChildConfig["cls"])
160 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx)
161 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite)
163 # Reattach to parent
164 datastoreConfig[containerKey, idx] = childConfig
166 # Reattach modified datastore config to parent
167 # If this has a datastore key we attach there, otherwise we assume
168 # this information goes at the top of the config hierarchy.
169 if DatastoreConfig.component in config:
170 config[DatastoreConfig.component] = datastoreConfig
171 else:
172 config.update(datastoreConfig)
174 return
176 def __init__(self, config: Union[Config, str], bridgeManager: DatastoreRegistryBridgeManager,
177 butlerRoot: str = None):
178 super().__init__(config, bridgeManager)
180 # Scan for child datastores and instantiate them with the same registry
181 self.datastores = []
182 for c in self.config["datastores"]:
183 c = DatastoreConfig(c)
184 datastoreType = doImport(c["cls"])
185 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot)
186 log.debug("Creating child datastore %s", datastore.name)
187 self.datastores.append(datastore)
189 # Name ourself based on our children
190 if self.datastores: 190 ↛ 195line 190 didn't jump to line 195, because the condition on line 190 was never false
191 # We must set the names explicitly
192 self._names = [d.name for d in self.datastores]
193 childNames = ",".join(self.names)
194 else:
195 childNames = "(empty@{})".format(time.time())
196 self._names = [childNames]
197 self.name = "{}[{}]".format(type(self).__qualname__, childNames)
199 # We declare we are ephemeral if all our child datastores declare
200 # they are ephemeral
201 isEphemeral = True
202 for d in self.datastores:
203 if not d.isEphemeral:
204 isEphemeral = False
205 break
206 self.isEphemeral = isEphemeral
208 # per-datastore override constraints
209 if "datastore_constraints" in self.config:
210 overrides = self.config["datastore_constraints"]
212 if len(overrides) != len(self.datastores): 212 ↛ 213line 212 didn't jump to line 213, because the condition on line 212 was never true
213 raise DatastoreValidationError(f"Number of registered datastores ({len(self.datastores)})"
214 " differs from number of constraints overrides"
215 f" {len(overrides)}")
217 self.datastoreConstraints = [Constraints(c.get("constraints"), universe=bridgeManager.universe)
218 for c in overrides]
220 else:
221 self.datastoreConstraints = (None,) * len(self.datastores)
223 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent"))
225 @property
226 def names(self) -> Tuple[str, ...]:
227 return tuple(self._names)
229 def __str__(self) -> str:
230 chainName = ", ".join(str(ds) for ds in self.datastores)
231 return chainName
233 def knows(self, ref: DatasetRef) -> bool:
234 """Check if the dataset is known to any of the datastores.
236 Does not check for existence of any artifact.
238 Parameters
239 ----------
240 ref : `DatasetRef`
241 Reference to the required dataset.
243 Returns
244 -------
245 exists : `bool`
246 `True` if the dataset is known to the datastore.
247 """
248 for datastore in self.datastores:
249 if datastore.knows(ref):
250 log.debug("%s known to datastore %s", ref, datastore.name)
251 return True
252 return False
254 def mexists(self, refs: Iterable[DatasetRef],
255 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> Dict[DatasetRef, bool]:
256 """Check the existence of multiple datasets at once.
258 Parameters
259 ----------
260 refs : iterable of `DatasetRef`
261 The datasets to be checked.
262 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional
263 Mapping of datastore artifact to existence. Updated by this
264 method with details of all artifacts tested. Can be `None`
265 if the caller is not interested.
267 Returns
268 -------
269 existence : `dict` of [`DatasetRef`, `bool`]
270 Mapping from dataset to boolean indicating existence in any
271 of the child datastores.
272 """
273 dataset_existence: Dict[DatasetRef, bool] = {}
274 for datastore in self.datastores:
275 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence))
277 # For next datastore no point asking about ones we know
278 # exist already. No special exemption for ephemeral datastores.
279 refs = [ref for ref, exists in dataset_existence.items() if not exists]
281 return dataset_existence
283 def exists(self, ref: DatasetRef) -> bool:
284 """Check if the dataset exists in one of the datastores.
286 Parameters
287 ----------
288 ref : `DatasetRef`
289 Reference to the required dataset.
291 Returns
292 -------
293 exists : `bool`
294 `True` if the entity exists in one of the child datastores.
295 """
296 for datastore in self.datastores:
297 if datastore.exists(ref):
298 log.debug("Found %s in datastore %s", ref, datastore.name)
299 return True
300 return False
302 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any:
303 """Load an InMemoryDataset from the store.
305 The dataset is returned from the first datastore that has
306 the dataset.
308 Parameters
309 ----------
310 ref : `DatasetRef`
311 Reference to the required Dataset.
312 parameters : `dict`
313 `StorageClass`-specific parameters that specify, for example,
314 a slice of the dataset to be loaded.
316 Returns
317 -------
318 inMemoryDataset : `object`
319 Requested dataset or slice thereof as an InMemoryDataset.
321 Raises
322 ------
323 FileNotFoundError
324 Requested dataset can not be retrieved.
325 TypeError
326 Return value from formatter has unexpected type.
327 ValueError
328 Formatter failed to process the dataset.
329 """
331 for datastore in self.datastores:
332 try:
333 inMemoryObject = datastore.get(ref, parameters)
334 log.debug("Found dataset %s in datastore %s", ref, datastore.name)
335 return inMemoryObject
336 except FileNotFoundError:
337 pass
339 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref))
341 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
342 """Write a InMemoryDataset with a given `DatasetRef` to each
343 datastore.
345 The put() to child datastores can fail with
346 `DatasetTypeNotSupportedError`. The put() for this datastore will be
347 deemed to have succeeded so long as at least one child datastore
348 accepted the inMemoryDataset.
350 Parameters
351 ----------
352 inMemoryDataset : `object`
353 The dataset to store.
354 ref : `DatasetRef`
355 Reference to the associated Dataset.
357 Raises
358 ------
359 TypeError
360 Supplied object and storage class are inconsistent.
361 DatasetTypeNotSupportedError
362 All datastores reported `DatasetTypeNotSupportedError`.
363 """
364 log.debug("Put %s", ref)
366 # Confirm that we can accept this dataset
367 if not self.constraints.isAcceptable(ref):
368 # Raise rather than use boolean return value.
369 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
370 " configuration.")
372 isPermanent = False
373 nsuccess = 0
374 npermanent = 0
375 nephemeral = 0
376 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
377 if constraints is not None and not constraints.isAcceptable(ref):
378 log.debug("Datastore %s skipping put via configuration for ref %s",
379 datastore.name, ref)
380 continue
382 if datastore.isEphemeral:
383 nephemeral += 1
384 else:
385 npermanent += 1
386 try:
387 datastore.put(inMemoryDataset, ref)
388 nsuccess += 1
389 if not datastore.isEphemeral:
390 isPermanent = True
391 except DatasetTypeNotSupportedError:
392 pass
394 if nsuccess == 0:
395 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}")
397 if not isPermanent and npermanent > 0: 397 ↛ 398line 397 didn't jump to line 398, because the condition on line 397 was never true
398 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2)
400 if self._transaction is not None:
401 self._transaction.registerUndo('put', self.remove, ref)
403 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]:
404 # Docstring inherited from base class.
405 if transfer != "auto":
406 return transfer
407 # Ask each datastore what they think auto means
408 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores}
410 # Remove any untranslated "auto" values
411 transfers.discard(transfer)
413 if len(transfers) == 1: 413 ↛ 414line 413 didn't jump to line 414, because the condition on line 413 was never true
414 return transfers.pop()
415 if not transfers: 415 ↛ 419line 415 didn't jump to line 419, because the condition on line 415 was never false
416 # Everything reported "auto"
417 return transfer
419 raise RuntimeError("Chained datastore does not yet support different transfer modes"
420 f" from 'auto' in each child datastore (wanted {transfers})")
422 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData:
423 # Docstring inherited from Datastore._prepIngest.
424 if transfer is None or transfer == "move":
425 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.")
427 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool:
428 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)]
429 if not acceptable:
430 log.debug("Datastore %s skipping ingest via configuration for refs %s",
431 name, ", ".join(str(ref) for ref in dataset.refs))
432 return False
433 else:
434 return True
436 # Filter down to just datasets the chained datastore's own
437 # configuration accepts.
438 okForParent: List[FileDataset] = [dataset for dataset in datasets
439 if isDatasetAcceptable(dataset, name=self.name,
440 constraints=self.constraints)]
442 # Iterate over nested datastores and call _prepIngest on each.
443 # Save the results to a list:
444 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = []
445 # ...and remember whether all of the failures are due to
446 # NotImplementedError being raised.
447 allFailuresAreNotImplementedError = True
448 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
449 okForChild: List[FileDataset]
450 if constraints is not None:
451 okForChild = [dataset for dataset in okForParent
452 if isDatasetAcceptable(dataset, name=datastore.name,
453 constraints=constraints)]
454 else:
455 okForChild = okForParent
456 try:
457 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer)
458 except NotImplementedError:
459 log.debug("Skipping ingest for datastore %s because transfer "
460 "mode %s is not supported.", datastore.name, transfer)
461 continue
462 allFailuresAreNotImplementedError = False
463 children.append((datastore, prepDataForChild))
464 if allFailuresAreNotImplementedError:
465 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.")
466 return _IngestPrepData(children=children)
468 def _finishIngest(self, prepData: _IngestPrepData, *, transfer: Optional[str] = None) -> None:
469 # Docstring inherited from Datastore._finishIngest.
470 for datastore, prepDataForChild in prepData.children:
471 datastore._finishIngest(prepDataForChild, transfer=transfer)
473 def getURIs(self, ref: DatasetRef,
474 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
475 """Return URIs associated with dataset.
477 Parameters
478 ----------
479 ref : `DatasetRef`
480 Reference to the required dataset.
481 predict : `bool`, optional
482 If the datastore does not know about the dataset, should it
483 return a predicted URI or not?
485 Returns
486 -------
487 primary : `ButlerURI`
488 The URI to the primary artifact associated with this dataset.
489 If the dataset was disassembled within the datastore this
490 may be `None`.
491 components : `dict`
492 URIs to any components associated with the dataset artifact.
493 Can be empty if there are no components.
495 Notes
496 -----
497 The returned URI is from the first datastore in the list that has
498 the dataset with preference given to the first dataset coming from
499 a permanent datastore. If no datastores have the dataset and prediction
500 is allowed, the predicted URI for the first datastore in the list will
501 be returned.
502 """
503 DatastoreURIs = Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]
504 log.debug("Requesting URIs for %s", ref)
505 predictedUri: Optional[DatastoreURIs] = None
506 predictedEphemeralUri: Optional[DatastoreURIs] = None
507 firstEphemeralUri: Optional[DatastoreURIs] = None
508 for datastore in self.datastores:
509 if datastore.exists(ref):
510 if not datastore.isEphemeral:
511 uri = datastore.getURIs(ref)
512 log.debug("Retrieved non-ephemeral URI: %s", uri)
513 return uri
514 elif not firstEphemeralUri:
515 firstEphemeralUri = datastore.getURIs(ref)
516 elif predict:
517 if not predictedUri and not datastore.isEphemeral:
518 predictedUri = datastore.getURIs(ref, predict)
519 elif not predictedEphemeralUri and datastore.isEphemeral:
520 predictedEphemeralUri = datastore.getURIs(ref, predict)
522 if firstEphemeralUri:
523 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri)
524 return firstEphemeralUri
526 if predictedUri:
527 log.debug("Retrieved predicted URI: %s", predictedUri)
528 return predictedUri
530 if predictedEphemeralUri:
531 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri)
532 return predictedEphemeralUri
534 raise FileNotFoundError("Dataset {} not in any datastore".format(ref))
536 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI:
537 """URI to the Dataset.
539 The returned URI is from the first datastore in the list that has
540 the dataset with preference given to the first dataset coming from
541 a permanent datastore. If no datastores have the dataset and prediction
542 is allowed, the predicted URI for the first datastore in the list will
543 be returned.
545 Parameters
546 ----------
547 ref : `DatasetRef`
548 Reference to the required Dataset.
549 predict : `bool`
550 If `True`, allow URIs to be returned of datasets that have not
551 been written.
553 Returns
554 -------
555 uri : `ButlerURI`
556 URI pointing to the dataset within the datastore. If the
557 dataset does not exist in the datastore, and if ``predict`` is
558 `True`, the URI will be a prediction and will include a URI
559 fragment "#predicted".
561 Notes
562 -----
563 If the datastore does not have entities that relate well
564 to the concept of a URI the returned URI string will be
565 descriptive. The returned URI is not guaranteed to be obtainable.
567 Raises
568 ------
569 FileNotFoundError
570 A URI has been requested for a dataset that does not exist and
571 guessing is not allowed.
572 RuntimeError
573 Raised if a request is made for a single URI but multiple URIs
574 are associated with this dataset.
575 """
576 log.debug("Requesting URI for %s", ref)
577 primary, components = self.getURIs(ref, predict)
578 if primary is None or components: 578 ↛ 579line 578 didn't jump to line 579, because the condition on line 578 was never true
579 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. "
580 "Use Dataastore.getURIs() instead.")
581 return primary
583 def retrieveArtifacts(self, refs: Iterable[DatasetRef],
584 destination: ButlerURI, transfer: str = "auto",
585 preserve_path: bool = True,
586 overwrite: bool = False) -> List[ButlerURI]:
587 """Retrieve the file artifacts associated with the supplied refs.
589 Parameters
590 ----------
591 refs : iterable of `DatasetRef`
592 The datasets for which file artifacts are to be retrieved.
593 A single ref can result in multiple files. The refs must
594 be resolved.
595 destination : `ButlerURI`
596 Location to write the file artifacts.
597 transfer : `str`, optional
598 Method to use to transfer the artifacts. Must be one of the options
599 supported by `ButlerURI.transfer_from()`. "move" is not allowed.
600 preserve_path : `bool`, optional
601 If `True` the full path of the file artifact within the datastore
602 is preserved. If `False` the final file component of the path
603 is used.
604 overwrite : `bool`, optional
605 If `True` allow transfers to overwrite existing files at the
606 destination.
608 Returns
609 -------
610 targets : `list` of `ButlerURI`
611 URIs of file artifacts in destination location. Order is not
612 preserved.
613 """
614 if not destination.isdir(): 614 ↛ 615line 614 didn't jump to line 615, because the condition on line 614 was never true
615 raise ValueError(f"Destination location must refer to a directory. Given {destination}")
617 # Using getURIs is not feasible since it becomes difficult to
618 # determine the path within the datastore later on. For now
619 # follow getURIs implementation approach.
621 pending = set(refs)
623 # There is a question as to whether an exception should be raised
624 # early if some of the refs are missing, or whether files should be
625 # transferred until a problem is hit. Prefer to complain up front.
626 # Use the datastore integer as primary key.
627 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {}
629 for number, datastore in enumerate(self.datastores):
630 if datastore.isEphemeral:
631 # In the future we will want to distinguish in-memory from
632 # caching datastore since using an on-disk local
633 # cache is exactly what we should be doing.
634 continue
635 datastore_refs = {ref for ref in pending if datastore.exists(ref)}
637 if datastore_refs:
638 grouped_by_datastore[number] = datastore_refs
640 # Remove these from the pending list so that we do not bother
641 # looking for them any more.
642 pending = pending - datastore_refs
644 if pending: 644 ↛ 645line 644 didn't jump to line 645, because the condition on line 644 was never true
645 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}")
647 # Now do the transfer.
648 targets: List[ButlerURI] = []
649 for number, datastore_refs in grouped_by_datastore.items():
650 targets.extend(self.datastores[number].retrieveArtifacts(datastore_refs, destination,
651 transfer=transfer,
652 preserve_path=preserve_path,
653 overwrite=overwrite))
655 return targets
657 def remove(self, ref: DatasetRef) -> None:
658 """Indicate to the datastore that a dataset can be removed.
660 The dataset will be removed from each datastore. The dataset is
661 not required to exist in every child datastore.
663 Parameters
664 ----------
665 ref : `DatasetRef`
666 Reference to the required dataset.
668 Raises
669 ------
670 FileNotFoundError
671 Attempt to remove a dataset that does not exist. Raised if none
672 of the child datastores removed the dataset.
673 """
674 log.debug("Removing %s", ref)
675 self.trash(ref, ignore_errors=False)
676 self.emptyTrash(ignore_errors=False)
678 def forget(self, refs: Iterable[DatasetRef]) -> None:
679 for datastore in tuple(self.datastores):
680 datastore.forget(refs)
682 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
683 if isinstance(ref, DatasetRef):
684 ref_label = str(ref)
685 else:
686 ref_label = "bulk datasets"
688 log.debug("Trashing %s", ref_label)
690 counter = 0
691 for datastore in self.datastores:
692 try:
693 datastore.trash(ref, ignore_errors=ignore_errors)
694 counter += 1
695 except FileNotFoundError:
696 pass
698 if counter == 0:
699 err_msg = f"Could not mark for removal from any child datastore: {ref_label}"
700 if ignore_errors: 700 ↛ 701line 700 didn't jump to line 701, because the condition on line 700 was never true
701 log.warning(err_msg)
702 else:
703 raise FileNotFoundError(err_msg)
705 def emptyTrash(self, ignore_errors: bool = True) -> None:
706 for datastore in self.datastores:
707 datastore.emptyTrash(ignore_errors=ignore_errors)
709 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
710 """Retrieve a dataset from an input `Datastore`,
711 and store the result in this `Datastore`.
713 Parameters
714 ----------
715 inputDatastore : `Datastore`
716 The external `Datastore` from which to retreive the Dataset.
717 ref : `DatasetRef`
718 Reference to the required dataset in the input data store.
720 Returns
721 -------
722 results : `list`
723 List containing the return value from the ``put()`` to each
724 child datastore.
725 """
726 assert inputDatastore is not self # unless we want it for renames?
727 inMemoryDataset = inputDatastore.get(ref)
728 self.put(inMemoryDataset, ref)
730 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]],
731 logFailures: bool = False) -> None:
732 """Validate some of the configuration for this datastore.
734 Parameters
735 ----------
736 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
737 Entities to test against this configuration. Can be differing
738 types.
739 logFailures : `bool`, optional
740 If `True`, output a log message for every validation error
741 detected.
743 Raises
744 ------
745 DatastoreValidationError
746 Raised if there is a validation problem with a configuration.
747 All the problems are reported in a single exception.
749 Notes
750 -----
751 This method checks each datastore in turn.
752 """
754 # Need to catch each of the datastore outputs and ensure that
755 # all are tested.
756 failures = []
757 for datastore in self.datastores:
758 try:
759 datastore.validateConfiguration(entities, logFailures=logFailures)
760 except DatastoreValidationError as e:
761 if logFailures: 761 ↛ 763line 761 didn't jump to line 763, because the condition on line 761 was never false
762 log.critical("Datastore %s failed validation", datastore.name)
763 failures.append(f"Datastore {self.name}: {e}")
765 if failures:
766 msg = ";\n".join(failures)
767 raise DatastoreValidationError(msg)
769 def validateKey(self, lookupKey: LookupKey,
770 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
771 # Docstring is inherited from base class
772 failures = []
773 for datastore in self.datastores:
774 try:
775 datastore.validateKey(lookupKey, entity)
776 except DatastoreValidationError as e:
777 failures.append(f"Datastore {self.name}: {e}")
779 if failures:
780 msg = ";\n".join(failures)
781 raise DatastoreValidationError(msg)
783 def getLookupKeys(self) -> Set[LookupKey]:
784 # Docstring is inherited from base class
785 keys = set()
786 for datastore in self.datastores:
787 keys.update(datastore.getLookupKeys())
789 keys.update(self.constraints.getLookupKeys())
790 for p in self.datastoreConstraints:
791 if p is not None: 791 ↛ 792line 791 didn't jump to line 792, because the condition on line 791 was never true
792 keys.update(p.getLookupKeys())
794 return keys
796 def needs_expanded_data_ids(
797 self,
798 transfer: Optional[str],
799 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
800 ) -> bool:
801 # Docstring inherited.
802 # We can't safely use `self.datastoreConstraints` with `entity` to
803 # check whether a child datastore would even want to ingest this
804 # dataset, because we don't want to filter out datastores that might
805 # need an expanded data ID based in incomplete information (e.g. we
806 # pass a StorageClass, but the constraint dispatches on DatasetType).
807 # So we pessimistically check if any datastore would need an expanded
808 # data ID for this transfer mode.
809 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 809 ↛ exitline 809 didn't finish the generator expression on line 809