Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py : 90%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Chained datastore."""
26__all__ = ("ChainedDatastore",)
28import time
29import logging
30import warnings
31import itertools
32from typing import (
33 TYPE_CHECKING,
34 Any,
35 Dict,
36 List,
37 Iterable,
38 Mapping,
39 Optional,
40 Sequence,
41 Set,
42 Tuple,
43 Union,
44)
46from lsst.utils import doImportType
47from lsst.daf.butler import ButlerURI, Datastore, DatastoreConfig, DatasetTypeNotSupportedError, \
48 DatastoreValidationError, Constraints, FileDataset, DatasetRef
50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true
51 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass
52 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
54log = logging.getLogger(__name__)
57class _IngestPrepData(Datastore.IngestPrepData):
58 """Helper class for ChainedDatastore ingest implementation.
60 Parameters
61 ----------
62 children : `list` of `tuple`
63 Pairs of `Datastore`, `IngestPrepData` for all child datastores.
64 """
65 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]):
66 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children))
67 self.children = children
70class ChainedDatastore(Datastore):
71 """Chained Datastores to allow read and writes from multiple datastores.
73 A ChainedDatastore is configured with multiple datastore configurations.
74 A ``put()`` is always sent to each datastore. A ``get()``
75 operation is sent to each datastore in turn and the first datastore
76 to return a valid dataset is used.
78 Parameters
79 ----------
80 config : `DatastoreConfig` or `str`
81 Configuration. This configuration must include a ``datastores`` field
82 as a sequence of datastore configurations. The order in this sequence
83 indicates the order to use for read operations.
84 bridgeManager : `DatastoreRegistryBridgeManager`
85 Object that manages the interface between `Registry` and datastores.
86 butlerRoot : `str`, optional
87 New datastore root to use to override the configuration value. This
88 root is sent to each child datastore.
90 Notes
91 -----
92 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer
93 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"`
94 and `"hardlink"` if and only if all its child datastores do.
95 """
97 defaultConfigFile = "datastores/chainedDatastore.yaml"
98 """Path to configuration defaults. Accessed within the ``configs`` resource
99 or relative to a search path. Can be None if no defaults specified.
100 """
102 containerKey = "datastores"
103 """Key to specify where child datastores are configured."""
105 datastores: List[Datastore]
106 """All the child datastores known to this datastore."""
108 datastoreConstraints: Sequence[Optional[Constraints]]
109 """Constraints to be applied to each of the child datastores."""
111 @classmethod
112 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
113 """Set any filesystem-dependent config options for child Datastores to
114 be appropriate for a new empty repository with the given root.
116 Parameters
117 ----------
118 root : `str`
119 Filesystem path to the root of the data repository.
120 config : `Config`
121 A `Config` to update. Only the subset understood by
122 this component will be updated. Will not expand
123 defaults.
124 full : `Config`
125 A complete config with all defaults expanded that can be
126 converted to a `DatastoreConfig`. Read-only and will not be
127 modified by this method.
128 Repository-specific options that should not be obtained
129 from defaults when Butler instances are constructed
130 should be copied from ``full`` to ``config``.
131 overwrite : `bool`, optional
132 If `False`, do not modify a value in ``config`` if the value
133 already exists. Default is always to overwrite with the provided
134 ``root``.
136 Notes
137 -----
138 If a keyword is explicitly defined in the supplied ``config`` it
139 will not be overridden by this method if ``overwrite`` is `False`.
140 This allows explicit values set in external configs to be retained.
141 """
143 # Extract the part of the config we care about updating
144 datastoreConfig = DatastoreConfig(config, mergeDefaults=False)
146 # And the subset of the full config that we can use for reference.
147 # Do not bother with defaults because we are told this already has
148 # them.
149 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False)
151 # Loop over each datastore config and pass the subsets to the
152 # child datastores to process.
154 containerKey = cls.containerKey
155 for idx, (child, fullChild) in enumerate(zip(datastoreConfig[containerKey],
156 fullDatastoreConfig[containerKey])):
157 childConfig = DatastoreConfig(child, mergeDefaults=False)
158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False)
159 datastoreClass = doImportType(fullChildConfig["cls"])
160 if not issubclass(datastoreClass, Datastore): 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true
161 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore")
162 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx)
163 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite)
165 # Reattach to parent
166 datastoreConfig[containerKey, idx] = childConfig
168 # Reattach modified datastore config to parent
169 # If this has a datastore key we attach there, otherwise we assume
170 # this information goes at the top of the config hierarchy.
171 if DatastoreConfig.component in config:
172 config[DatastoreConfig.component] = datastoreConfig
173 else:
174 config.update(datastoreConfig)
176 return
178 def __init__(self, config: Union[Config, str], bridgeManager: DatastoreRegistryBridgeManager,
179 butlerRoot: str = None):
180 super().__init__(config, bridgeManager)
182 # Scan for child datastores and instantiate them with the same registry
183 self.datastores = []
184 for c in self.config["datastores"]:
185 c = DatastoreConfig(c)
186 datastoreType = doImportType(c["cls"])
187 if not issubclass(datastoreType, Datastore): 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true
188 raise TypeError(f"Imported child class {c['cls']} is not a Datastore")
189 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot)
190 log.debug("Creating child datastore %s", datastore.name)
191 self.datastores.append(datastore)
193 # Name ourself based on our children
194 if self.datastores: 194 ↛ 199line 194 didn't jump to line 199, because the condition on line 194 was never false
195 # We must set the names explicitly
196 self._names = [d.name for d in self.datastores]
197 childNames = ",".join(self.names)
198 else:
199 childNames = "(empty@{})".format(time.time())
200 self._names = [childNames]
201 self.name = "{}[{}]".format(type(self).__qualname__, childNames)
203 # We declare we are ephemeral if all our child datastores declare
204 # they are ephemeral
205 isEphemeral = True
206 for d in self.datastores:
207 if not d.isEphemeral:
208 isEphemeral = False
209 break
210 self.isEphemeral = isEphemeral
212 # per-datastore override constraints
213 if "datastore_constraints" in self.config:
214 overrides = self.config["datastore_constraints"]
216 if len(overrides) != len(self.datastores): 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true
217 raise DatastoreValidationError(f"Number of registered datastores ({len(self.datastores)})"
218 " differs from number of constraints overrides"
219 f" {len(overrides)}")
221 self.datastoreConstraints = [Constraints(c.get("constraints"), universe=bridgeManager.universe)
222 for c in overrides]
224 else:
225 self.datastoreConstraints = (None,) * len(self.datastores)
227 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent"))
229 @property
230 def names(self) -> Tuple[str, ...]:
231 return tuple(self._names)
233 def __str__(self) -> str:
234 chainName = ", ".join(str(ds) for ds in self.datastores)
235 return chainName
237 def knows(self, ref: DatasetRef) -> bool:
238 """Check if the dataset is known to any of the datastores.
240 Does not check for existence of any artifact.
242 Parameters
243 ----------
244 ref : `DatasetRef`
245 Reference to the required dataset.
247 Returns
248 -------
249 exists : `bool`
250 `True` if the dataset is known to the datastore.
251 """
252 for datastore in self.datastores:
253 if datastore.knows(ref):
254 log.debug("%s known to datastore %s", ref, datastore.name)
255 return True
256 return False
258 def mexists(self, refs: Iterable[DatasetRef],
259 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> Dict[DatasetRef, bool]:
260 """Check the existence of multiple datasets at once.
262 Parameters
263 ----------
264 refs : iterable of `DatasetRef`
265 The datasets to be checked.
266 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional
267 Mapping of datastore artifact to existence. Updated by this
268 method with details of all artifacts tested. Can be `None`
269 if the caller is not interested.
271 Returns
272 -------
273 existence : `dict` of [`DatasetRef`, `bool`]
274 Mapping from dataset to boolean indicating existence in any
275 of the child datastores.
276 """
277 dataset_existence: Dict[DatasetRef, bool] = {}
278 for datastore in self.datastores:
279 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence))
281 # For next datastore no point asking about ones we know
282 # exist already. No special exemption for ephemeral datastores.
283 refs = [ref for ref, exists in dataset_existence.items() if not exists]
285 return dataset_existence
287 def exists(self, ref: DatasetRef) -> bool:
288 """Check if the dataset exists in one of the datastores.
290 Parameters
291 ----------
292 ref : `DatasetRef`
293 Reference to the required dataset.
295 Returns
296 -------
297 exists : `bool`
298 `True` if the entity exists in one of the child datastores.
299 """
300 for datastore in self.datastores:
301 if datastore.exists(ref):
302 log.debug("Found %s in datastore %s", ref, datastore.name)
303 return True
304 return False
306 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any:
307 """Load an InMemoryDataset from the store.
309 The dataset is returned from the first datastore that has
310 the dataset.
312 Parameters
313 ----------
314 ref : `DatasetRef`
315 Reference to the required Dataset.
316 parameters : `dict`
317 `StorageClass`-specific parameters that specify, for example,
318 a slice of the dataset to be loaded.
320 Returns
321 -------
322 inMemoryDataset : `object`
323 Requested dataset or slice thereof as an InMemoryDataset.
325 Raises
326 ------
327 FileNotFoundError
328 Requested dataset can not be retrieved.
329 TypeError
330 Return value from formatter has unexpected type.
331 ValueError
332 Formatter failed to process the dataset.
333 """
335 for datastore in self.datastores:
336 try:
337 inMemoryObject = datastore.get(ref, parameters)
338 log.debug("Found dataset %s in datastore %s", ref, datastore.name)
339 return inMemoryObject
340 except FileNotFoundError:
341 pass
343 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref))
345 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
346 """Write a InMemoryDataset with a given `DatasetRef` to each
347 datastore.
349 The put() to child datastores can fail with
350 `DatasetTypeNotSupportedError`. The put() for this datastore will be
351 deemed to have succeeded so long as at least one child datastore
352 accepted the inMemoryDataset.
354 Parameters
355 ----------
356 inMemoryDataset : `object`
357 The dataset to store.
358 ref : `DatasetRef`
359 Reference to the associated Dataset.
361 Raises
362 ------
363 TypeError
364 Supplied object and storage class are inconsistent.
365 DatasetTypeNotSupportedError
366 All datastores reported `DatasetTypeNotSupportedError`.
367 """
368 log.debug("Put %s", ref)
370 # Confirm that we can accept this dataset
371 if not self.constraints.isAcceptable(ref):
372 # Raise rather than use boolean return value.
373 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
374 " configuration.")
376 isPermanent = False
377 nsuccess = 0
378 npermanent = 0
379 nephemeral = 0
380 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
381 if constraints is not None and not constraints.isAcceptable(ref):
382 log.debug("Datastore %s skipping put via configuration for ref %s",
383 datastore.name, ref)
384 continue
386 if datastore.isEphemeral:
387 nephemeral += 1
388 else:
389 npermanent += 1
390 try:
391 datastore.put(inMemoryDataset, ref)
392 nsuccess += 1
393 if not datastore.isEphemeral:
394 isPermanent = True
395 except DatasetTypeNotSupportedError:
396 pass
398 if nsuccess == 0:
399 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}")
401 if not isPermanent and npermanent > 0: 401 ↛ 402line 401 didn't jump to line 402, because the condition on line 401 was never true
402 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2)
404 if self._transaction is not None:
405 self._transaction.registerUndo('put', self.remove, ref)
407 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]:
408 # Docstring inherited from base class.
409 if transfer != "auto":
410 return transfer
411 # Ask each datastore what they think auto means
412 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores}
414 # Remove any untranslated "auto" values
415 transfers.discard(transfer)
417 if len(transfers) == 1: 417 ↛ 418line 417 didn't jump to line 418, because the condition on line 417 was never true
418 return transfers.pop()
419 if not transfers: 419 ↛ 423line 419 didn't jump to line 423, because the condition on line 419 was never false
420 # Everything reported "auto"
421 return transfer
423 raise RuntimeError("Chained datastore does not yet support different transfer modes"
424 f" from 'auto' in each child datastore (wanted {transfers})")
426 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData:
427 # Docstring inherited from Datastore._prepIngest.
428 if transfer is None or transfer == "move":
429 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.")
431 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool:
432 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)]
433 if not acceptable:
434 log.debug("Datastore %s skipping ingest via configuration for refs %s",
435 name, ", ".join(str(ref) for ref in dataset.refs))
436 return False
437 else:
438 return True
440 # Filter down to just datasets the chained datastore's own
441 # configuration accepts.
442 okForParent: List[FileDataset] = [dataset for dataset in datasets
443 if isDatasetAcceptable(dataset, name=self.name,
444 constraints=self.constraints)]
446 # Iterate over nested datastores and call _prepIngest on each.
447 # Save the results to a list:
448 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = []
449 # ...and remember whether all of the failures are due to
450 # NotImplementedError being raised.
451 allFailuresAreNotImplementedError = True
452 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
453 okForChild: List[FileDataset]
454 if constraints is not None:
455 okForChild = [dataset for dataset in okForParent
456 if isDatasetAcceptable(dataset, name=datastore.name,
457 constraints=constraints)]
458 else:
459 okForChild = okForParent
460 try:
461 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer)
462 except NotImplementedError:
463 log.debug("Skipping ingest for datastore %s because transfer "
464 "mode %s is not supported.", datastore.name, transfer)
465 continue
466 allFailuresAreNotImplementedError = False
467 children.append((datastore, prepDataForChild))
468 if allFailuresAreNotImplementedError:
469 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.")
470 return _IngestPrepData(children=children)
472 def _finishIngest(self, prepData: _IngestPrepData, *, transfer: Optional[str] = None) -> None:
473 # Docstring inherited from Datastore._finishIngest.
474 for datastore, prepDataForChild in prepData.children:
475 datastore._finishIngest(prepDataForChild, transfer=transfer)
477 def getURIs(self, ref: DatasetRef,
478 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
479 """Return URIs associated with dataset.
481 Parameters
482 ----------
483 ref : `DatasetRef`
484 Reference to the required dataset.
485 predict : `bool`, optional
486 If the datastore does not know about the dataset, should it
487 return a predicted URI or not?
489 Returns
490 -------
491 primary : `ButlerURI`
492 The URI to the primary artifact associated with this dataset.
493 If the dataset was disassembled within the datastore this
494 may be `None`.
495 components : `dict`
496 URIs to any components associated with the dataset artifact.
497 Can be empty if there are no components.
499 Notes
500 -----
501 The returned URI is from the first datastore in the list that has
502 the dataset with preference given to the first dataset coming from
503 a permanent datastore. If no datastores have the dataset and prediction
504 is allowed, the predicted URI for the first datastore in the list will
505 be returned.
506 """
507 DatastoreURIs = Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]
508 log.debug("Requesting URIs for %s", ref)
509 predictedUri: Optional[DatastoreURIs] = None
510 predictedEphemeralUri: Optional[DatastoreURIs] = None
511 firstEphemeralUri: Optional[DatastoreURIs] = None
512 for datastore in self.datastores:
513 if datastore.exists(ref):
514 if not datastore.isEphemeral:
515 uri = datastore.getURIs(ref)
516 log.debug("Retrieved non-ephemeral URI: %s", uri)
517 return uri
518 elif not firstEphemeralUri:
519 firstEphemeralUri = datastore.getURIs(ref)
520 elif predict:
521 if not predictedUri and not datastore.isEphemeral:
522 predictedUri = datastore.getURIs(ref, predict)
523 elif not predictedEphemeralUri and datastore.isEphemeral:
524 predictedEphemeralUri = datastore.getURIs(ref, predict)
526 if firstEphemeralUri:
527 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri)
528 return firstEphemeralUri
530 if predictedUri:
531 log.debug("Retrieved predicted URI: %s", predictedUri)
532 return predictedUri
534 if predictedEphemeralUri:
535 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri)
536 return predictedEphemeralUri
538 raise FileNotFoundError("Dataset {} not in any datastore".format(ref))
540 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI:
541 """URI to the Dataset.
543 The returned URI is from the first datastore in the list that has
544 the dataset with preference given to the first dataset coming from
545 a permanent datastore. If no datastores have the dataset and prediction
546 is allowed, the predicted URI for the first datastore in the list will
547 be returned.
549 Parameters
550 ----------
551 ref : `DatasetRef`
552 Reference to the required Dataset.
553 predict : `bool`
554 If `True`, allow URIs to be returned of datasets that have not
555 been written.
557 Returns
558 -------
559 uri : `ButlerURI`
560 URI pointing to the dataset within the datastore. If the
561 dataset does not exist in the datastore, and if ``predict`` is
562 `True`, the URI will be a prediction and will include a URI
563 fragment "#predicted".
565 Notes
566 -----
567 If the datastore does not have entities that relate well
568 to the concept of a URI the returned URI string will be
569 descriptive. The returned URI is not guaranteed to be obtainable.
571 Raises
572 ------
573 FileNotFoundError
574 A URI has been requested for a dataset that does not exist and
575 guessing is not allowed.
576 RuntimeError
577 Raised if a request is made for a single URI but multiple URIs
578 are associated with this dataset.
579 """
580 log.debug("Requesting URI for %s", ref)
581 primary, components = self.getURIs(ref, predict)
582 if primary is None or components: 582 ↛ 583line 582 didn't jump to line 583, because the condition on line 582 was never true
583 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. "
584 "Use Dataastore.getURIs() instead.")
585 return primary
587 def retrieveArtifacts(self, refs: Iterable[DatasetRef],
588 destination: ButlerURI, transfer: str = "auto",
589 preserve_path: bool = True,
590 overwrite: bool = False) -> List[ButlerURI]:
591 """Retrieve the file artifacts associated with the supplied refs.
593 Parameters
594 ----------
595 refs : iterable of `DatasetRef`
596 The datasets for which file artifacts are to be retrieved.
597 A single ref can result in multiple files. The refs must
598 be resolved.
599 destination : `ButlerURI`
600 Location to write the file artifacts.
601 transfer : `str`, optional
602 Method to use to transfer the artifacts. Must be one of the options
603 supported by `ButlerURI.transfer_from()`. "move" is not allowed.
604 preserve_path : `bool`, optional
605 If `True` the full path of the file artifact within the datastore
606 is preserved. If `False` the final file component of the path
607 is used.
608 overwrite : `bool`, optional
609 If `True` allow transfers to overwrite existing files at the
610 destination.
612 Returns
613 -------
614 targets : `list` of `ButlerURI`
615 URIs of file artifacts in destination location. Order is not
616 preserved.
617 """
618 if not destination.isdir(): 618 ↛ 619line 618 didn't jump to line 619, because the condition on line 618 was never true
619 raise ValueError(f"Destination location must refer to a directory. Given {destination}")
621 # Using getURIs is not feasible since it becomes difficult to
622 # determine the path within the datastore later on. For now
623 # follow getURIs implementation approach.
625 pending = set(refs)
627 # There is a question as to whether an exception should be raised
628 # early if some of the refs are missing, or whether files should be
629 # transferred until a problem is hit. Prefer to complain up front.
630 # Use the datastore integer as primary key.
631 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {}
633 for number, datastore in enumerate(self.datastores):
634 if datastore.isEphemeral:
635 # In the future we will want to distinguish in-memory from
636 # caching datastore since using an on-disk local
637 # cache is exactly what we should be doing.
638 continue
639 datastore_refs = {ref for ref in pending if datastore.exists(ref)}
641 if datastore_refs:
642 grouped_by_datastore[number] = datastore_refs
644 # Remove these from the pending list so that we do not bother
645 # looking for them any more.
646 pending = pending - datastore_refs
648 if pending: 648 ↛ 649line 648 didn't jump to line 649, because the condition on line 648 was never true
649 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}")
651 # Now do the transfer.
652 targets: List[ButlerURI] = []
653 for number, datastore_refs in grouped_by_datastore.items():
654 targets.extend(self.datastores[number].retrieveArtifacts(datastore_refs, destination,
655 transfer=transfer,
656 preserve_path=preserve_path,
657 overwrite=overwrite))
659 return targets
661 def remove(self, ref: DatasetRef) -> None:
662 """Indicate to the datastore that a dataset can be removed.
664 The dataset will be removed from each datastore. The dataset is
665 not required to exist in every child datastore.
667 Parameters
668 ----------
669 ref : `DatasetRef`
670 Reference to the required dataset.
672 Raises
673 ------
674 FileNotFoundError
675 Attempt to remove a dataset that does not exist. Raised if none
676 of the child datastores removed the dataset.
677 """
678 log.debug("Removing %s", ref)
679 self.trash(ref, ignore_errors=False)
680 self.emptyTrash(ignore_errors=False)
682 def forget(self, refs: Iterable[DatasetRef]) -> None:
683 for datastore in tuple(self.datastores):
684 datastore.forget(refs)
686 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
687 if isinstance(ref, DatasetRef):
688 ref_label = str(ref)
689 else:
690 ref_label = "bulk datasets"
692 log.debug("Trashing %s", ref_label)
694 counter = 0
695 for datastore in self.datastores:
696 try:
697 datastore.trash(ref, ignore_errors=ignore_errors)
698 counter += 1
699 except FileNotFoundError:
700 pass
702 if counter == 0:
703 err_msg = f"Could not mark for removal from any child datastore: {ref_label}"
704 if ignore_errors: 704 ↛ 705line 704 didn't jump to line 705, because the condition on line 704 was never true
705 log.warning(err_msg)
706 else:
707 raise FileNotFoundError(err_msg)
709 def emptyTrash(self, ignore_errors: bool = True) -> None:
710 for datastore in self.datastores:
711 datastore.emptyTrash(ignore_errors=ignore_errors)
713 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
714 """Retrieve a dataset from an input `Datastore`,
715 and store the result in this `Datastore`.
717 Parameters
718 ----------
719 inputDatastore : `Datastore`
720 The external `Datastore` from which to retreive the Dataset.
721 ref : `DatasetRef`
722 Reference to the required dataset in the input data store.
724 Returns
725 -------
726 results : `list`
727 List containing the return value from the ``put()`` to each
728 child datastore.
729 """
730 assert inputDatastore is not self # unless we want it for renames?
731 inMemoryDataset = inputDatastore.get(ref)
732 self.put(inMemoryDataset, ref)
734 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]],
735 logFailures: bool = False) -> None:
736 """Validate some of the configuration for this datastore.
738 Parameters
739 ----------
740 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
741 Entities to test against this configuration. Can be differing
742 types.
743 logFailures : `bool`, optional
744 If `True`, output a log message for every validation error
745 detected.
747 Raises
748 ------
749 DatastoreValidationError
750 Raised if there is a validation problem with a configuration.
751 All the problems are reported in a single exception.
753 Notes
754 -----
755 This method checks each datastore in turn.
756 """
758 # Need to catch each of the datastore outputs and ensure that
759 # all are tested.
760 failures = []
761 for datastore in self.datastores:
762 try:
763 datastore.validateConfiguration(entities, logFailures=logFailures)
764 except DatastoreValidationError as e:
765 if logFailures: 765 ↛ 767line 765 didn't jump to line 767, because the condition on line 765 was never false
766 log.critical("Datastore %s failed validation", datastore.name)
767 failures.append(f"Datastore {self.name}: {e}")
769 if failures:
770 msg = ";\n".join(failures)
771 raise DatastoreValidationError(msg)
773 def validateKey(self, lookupKey: LookupKey,
774 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
775 # Docstring is inherited from base class
776 failures = []
777 for datastore in self.datastores:
778 try:
779 datastore.validateKey(lookupKey, entity)
780 except DatastoreValidationError as e:
781 failures.append(f"Datastore {self.name}: {e}")
783 if failures:
784 msg = ";\n".join(failures)
785 raise DatastoreValidationError(msg)
787 def getLookupKeys(self) -> Set[LookupKey]:
788 # Docstring is inherited from base class
789 keys = set()
790 for datastore in self.datastores:
791 keys.update(datastore.getLookupKeys())
793 keys.update(self.constraints.getLookupKeys())
794 for p in self.datastoreConstraints:
795 if p is not None: 795 ↛ 796line 795 didn't jump to line 796, because the condition on line 795 was never true
796 keys.update(p.getLookupKeys())
798 return keys
800 def needs_expanded_data_ids(
801 self,
802 transfer: Optional[str],
803 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
804 ) -> bool:
805 # Docstring inherited.
806 # We can't safely use `self.datastoreConstraints` with `entity` to
807 # check whether a child datastore would even want to ingest this
808 # dataset, because we don't want to filter out datastores that might
809 # need an expanded data ID based in incomplete information (e.g. we
810 # pass a StorageClass, but the constraint dispatches on DatasetType).
811 # So we pessimistically check if any datastore would need an expanded
812 # data ID for this transfer mode.
813 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 813 ↛ exitline 813 didn't finish the generator expression on line 813