Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py: 90%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Chained datastore."""
26__all__ = ("ChainedDatastore",)
28import itertools
29import logging
30import time
31import warnings
32from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union
34from lsst.daf.butler import (
35 ButlerURI,
36 Constraints,
37 DatasetRef,
38 DatasetTypeNotSupportedError,
39 Datastore,
40 DatastoreConfig,
41 DatastoreValidationError,
42 FileDataset,
43)
44from lsst.utils import doImportType
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass
48 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
50log = logging.getLogger(__name__)
53class _IngestPrepData(Datastore.IngestPrepData):
54 """Helper class for ChainedDatastore ingest implementation.
56 Parameters
57 ----------
58 children : `list` of `tuple`
59 Pairs of `Datastore`, `IngestPrepData` for all child datastores.
60 """
62 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]):
63 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children))
64 self.children = children
67class ChainedDatastore(Datastore):
68 """Chained Datastores to allow read and writes from multiple datastores.
70 A ChainedDatastore is configured with multiple datastore configurations.
71 A ``put()`` is always sent to each datastore. A ``get()``
72 operation is sent to each datastore in turn and the first datastore
73 to return a valid dataset is used.
75 Parameters
76 ----------
77 config : `DatastoreConfig` or `str`
78 Configuration. This configuration must include a ``datastores`` field
79 as a sequence of datastore configurations. The order in this sequence
80 indicates the order to use for read operations.
81 bridgeManager : `DatastoreRegistryBridgeManager`
82 Object that manages the interface between `Registry` and datastores.
83 butlerRoot : `str`, optional
84 New datastore root to use to override the configuration value. This
85 root is sent to each child datastore.
87 Notes
88 -----
89 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer
90 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"`
91 and `"hardlink"` if and only if all its child datastores do.
92 """
94 defaultConfigFile = "datastores/chainedDatastore.yaml"
95 """Path to configuration defaults. Accessed within the ``configs`` resource
96 or relative to a search path. Can be None if no defaults specified.
97 """
99 containerKey = "datastores"
100 """Key to specify where child datastores are configured."""
102 datastores: List[Datastore]
103 """All the child datastores known to this datastore."""
105 datastoreConstraints: Sequence[Optional[Constraints]]
106 """Constraints to be applied to each of the child datastores."""
108 @classmethod
109 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
110 """Set any filesystem-dependent config options for child Datastores to
111 be appropriate for a new empty repository with the given root.
113 Parameters
114 ----------
115 root : `str`
116 Filesystem path to the root of the data repository.
117 config : `Config`
118 A `Config` to update. Only the subset understood by
119 this component will be updated. Will not expand
120 defaults.
121 full : `Config`
122 A complete config with all defaults expanded that can be
123 converted to a `DatastoreConfig`. Read-only and will not be
124 modified by this method.
125 Repository-specific options that should not be obtained
126 from defaults when Butler instances are constructed
127 should be copied from ``full`` to ``config``.
128 overwrite : `bool`, optional
129 If `False`, do not modify a value in ``config`` if the value
130 already exists. Default is always to overwrite with the provided
131 ``root``.
133 Notes
134 -----
135 If a keyword is explicitly defined in the supplied ``config`` it
136 will not be overridden by this method if ``overwrite`` is `False`.
137 This allows explicit values set in external configs to be retained.
138 """
140 # Extract the part of the config we care about updating
141 datastoreConfig = DatastoreConfig(config, mergeDefaults=False)
143 # And the subset of the full config that we can use for reference.
144 # Do not bother with defaults because we are told this already has
145 # them.
146 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False)
148 # Loop over each datastore config and pass the subsets to the
149 # child datastores to process.
151 containerKey = cls.containerKey
152 for idx, (child, fullChild) in enumerate(
153 zip(datastoreConfig[containerKey], fullDatastoreConfig[containerKey])
154 ):
155 childConfig = DatastoreConfig(child, mergeDefaults=False)
156 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False)
157 datastoreClass = doImportType(fullChildConfig["cls"])
158 if not issubclass(datastoreClass, Datastore): 158 ↛ 159line 158 didn't jump to line 159, because the condition on line 158 was never true
159 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore")
160 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx)
161 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite)
163 # Reattach to parent
164 datastoreConfig[containerKey, idx] = childConfig
166 # Reattach modified datastore config to parent
167 # If this has a datastore key we attach there, otherwise we assume
168 # this information goes at the top of the config hierarchy.
169 if DatastoreConfig.component in config:
170 config[DatastoreConfig.component] = datastoreConfig
171 else:
172 config.update(datastoreConfig)
174 return
176 def __init__(
177 self,
178 config: Union[Config, str],
179 bridgeManager: DatastoreRegistryBridgeManager,
180 butlerRoot: str = None,
181 ):
182 super().__init__(config, bridgeManager)
184 # Scan for child datastores and instantiate them with the same registry
185 self.datastores = []
186 for c in self.config["datastores"]:
187 c = DatastoreConfig(c)
188 datastoreType = doImportType(c["cls"])
189 if not issubclass(datastoreType, Datastore): 189 ↛ 190line 189 didn't jump to line 190, because the condition on line 189 was never true
190 raise TypeError(f"Imported child class {c['cls']} is not a Datastore")
191 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot)
192 log.debug("Creating child datastore %s", datastore.name)
193 self.datastores.append(datastore)
195 # Name ourself based on our children
196 if self.datastores: 196 ↛ 201line 196 didn't jump to line 201, because the condition on line 196 was never false
197 # We must set the names explicitly
198 self._names = [d.name for d in self.datastores]
199 childNames = ",".join(self.names)
200 else:
201 childNames = "(empty@{})".format(time.time())
202 self._names = [childNames]
203 self.name = "{}[{}]".format(type(self).__qualname__, childNames)
205 # We declare we are ephemeral if all our child datastores declare
206 # they are ephemeral
207 isEphemeral = True
208 for d in self.datastores:
209 if not d.isEphemeral:
210 isEphemeral = False
211 break
212 self.isEphemeral = isEphemeral
214 # per-datastore override constraints
215 if "datastore_constraints" in self.config:
216 overrides = self.config["datastore_constraints"]
218 if len(overrides) != len(self.datastores): 218 ↛ 219line 218 didn't jump to line 219, because the condition on line 218 was never true
219 raise DatastoreValidationError(
220 f"Number of registered datastores ({len(self.datastores)})"
221 " differs from number of constraints overrides"
222 f" {len(overrides)}"
223 )
225 self.datastoreConstraints = [
226 Constraints(c.get("constraints"), universe=bridgeManager.universe) for c in overrides
227 ]
229 else:
230 self.datastoreConstraints = (None,) * len(self.datastores)
232 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent"))
234 @property
235 def names(self) -> Tuple[str, ...]:
236 return tuple(self._names)
238 def __str__(self) -> str:
239 chainName = ", ".join(str(ds) for ds in self.datastores)
240 return chainName
242 def knows(self, ref: DatasetRef) -> bool:
243 """Check if the dataset is known to any of the datastores.
245 Does not check for existence of any artifact.
247 Parameters
248 ----------
249 ref : `DatasetRef`
250 Reference to the required dataset.
252 Returns
253 -------
254 exists : `bool`
255 `True` if the dataset is known to the datastore.
256 """
257 for datastore in self.datastores:
258 if datastore.knows(ref):
259 log.debug("%s known to datastore %s", ref, datastore.name)
260 return True
261 return False
263 def mexists(
264 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ButlerURI, bool]] = None
265 ) -> Dict[DatasetRef, bool]:
266 """Check the existence of multiple datasets at once.
268 Parameters
269 ----------
270 refs : iterable of `DatasetRef`
271 The datasets to be checked.
272 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional
273 Mapping of datastore artifact to existence. Updated by this
274 method with details of all artifacts tested. Can be `None`
275 if the caller is not interested.
277 Returns
278 -------
279 existence : `dict` of [`DatasetRef`, `bool`]
280 Mapping from dataset to boolean indicating existence in any
281 of the child datastores.
282 """
283 dataset_existence: Dict[DatasetRef, bool] = {}
284 for datastore in self.datastores:
285 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence))
287 # For next datastore no point asking about ones we know
288 # exist already. No special exemption for ephemeral datastores.
289 refs = [ref for ref, exists in dataset_existence.items() if not exists]
291 return dataset_existence
293 def exists(self, ref: DatasetRef) -> bool:
294 """Check if the dataset exists in one of the datastores.
296 Parameters
297 ----------
298 ref : `DatasetRef`
299 Reference to the required dataset.
301 Returns
302 -------
303 exists : `bool`
304 `True` if the entity exists in one of the child datastores.
305 """
306 for datastore in self.datastores:
307 if datastore.exists(ref):
308 log.debug("Found %s in datastore %s", ref, datastore.name)
309 return True
310 return False
312 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any:
313 """Load an InMemoryDataset from the store.
315 The dataset is returned from the first datastore that has
316 the dataset.
318 Parameters
319 ----------
320 ref : `DatasetRef`
321 Reference to the required Dataset.
322 parameters : `dict`
323 `StorageClass`-specific parameters that specify, for example,
324 a slice of the dataset to be loaded.
326 Returns
327 -------
328 inMemoryDataset : `object`
329 Requested dataset or slice thereof as an InMemoryDataset.
331 Raises
332 ------
333 FileNotFoundError
334 Requested dataset can not be retrieved.
335 TypeError
336 Return value from formatter has unexpected type.
337 ValueError
338 Formatter failed to process the dataset.
339 """
341 for datastore in self.datastores:
342 try:
343 inMemoryObject = datastore.get(ref, parameters)
344 log.debug("Found dataset %s in datastore %s", ref, datastore.name)
345 return inMemoryObject
346 except FileNotFoundError:
347 pass
349 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref))
351 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
352 """Write a InMemoryDataset with a given `DatasetRef` to each
353 datastore.
355 The put() to child datastores can fail with
356 `DatasetTypeNotSupportedError`. The put() for this datastore will be
357 deemed to have succeeded so long as at least one child datastore
358 accepted the inMemoryDataset.
360 Parameters
361 ----------
362 inMemoryDataset : `object`
363 The dataset to store.
364 ref : `DatasetRef`
365 Reference to the associated Dataset.
367 Raises
368 ------
369 TypeError
370 Supplied object and storage class are inconsistent.
371 DatasetTypeNotSupportedError
372 All datastores reported `DatasetTypeNotSupportedError`.
373 """
374 log.debug("Put %s", ref)
376 # Confirm that we can accept this dataset
377 if not self.constraints.isAcceptable(ref):
378 # Raise rather than use boolean return value.
379 raise DatasetTypeNotSupportedError(
380 f"Dataset {ref} has been rejected by this datastore via configuration."
381 )
383 isPermanent = False
384 nsuccess = 0
385 npermanent = 0
386 nephemeral = 0
387 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
388 if constraints is not None and not constraints.isAcceptable(ref):
389 log.debug("Datastore %s skipping put via configuration for ref %s", datastore.name, ref)
390 continue
392 if datastore.isEphemeral:
393 nephemeral += 1
394 else:
395 npermanent += 1
396 try:
397 datastore.put(inMemoryDataset, ref)
398 nsuccess += 1
399 if not datastore.isEphemeral:
400 isPermanent = True
401 except DatasetTypeNotSupportedError:
402 pass
404 if nsuccess == 0:
405 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}")
407 if not isPermanent and npermanent > 0: 407 ↛ 408line 407 didn't jump to line 408, because the condition on line 407 was never true
408 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2)
410 if self._transaction is not None:
411 self._transaction.registerUndo("put", self.remove, ref)
413 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]:
414 # Docstring inherited from base class.
415 if transfer != "auto":
416 return transfer
417 # Ask each datastore what they think auto means
418 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores}
420 # Remove any untranslated "auto" values
421 transfers.discard(transfer)
423 if len(transfers) == 1: 423 ↛ 424line 423 didn't jump to line 424, because the condition on line 423 was never true
424 return transfers.pop()
425 if not transfers: 425 ↛ 429line 425 didn't jump to line 429, because the condition on line 425 was never false
426 # Everything reported "auto"
427 return transfer
429 raise RuntimeError(
430 "Chained datastore does not yet support different transfer modes"
431 f" from 'auto' in each child datastore (wanted {transfers})"
432 )
434 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData:
435 # Docstring inherited from Datastore._prepIngest.
436 if transfer is None or transfer == "move":
437 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.")
439 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool:
440 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)]
441 if not acceptable:
442 log.debug(
443 "Datastore %s skipping ingest via configuration for refs %s",
444 name,
445 ", ".join(str(ref) for ref in dataset.refs),
446 )
447 return False
448 else:
449 return True
451 # Filter down to just datasets the chained datastore's own
452 # configuration accepts.
453 okForParent: List[FileDataset] = [
454 dataset
455 for dataset in datasets
456 if isDatasetAcceptable(dataset, name=self.name, constraints=self.constraints)
457 ]
459 # Iterate over nested datastores and call _prepIngest on each.
460 # Save the results to a list:
461 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = []
462 # ...and remember whether all of the failures are due to
463 # NotImplementedError being raised.
464 allFailuresAreNotImplementedError = True
465 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
466 okForChild: List[FileDataset]
467 if constraints is not None:
468 okForChild = [
469 dataset
470 for dataset in okForParent
471 if isDatasetAcceptable(dataset, name=datastore.name, constraints=constraints)
472 ]
473 else:
474 okForChild = okForParent
475 try:
476 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer)
477 except NotImplementedError:
478 log.debug(
479 "Skipping ingest for datastore %s because transfer mode %s is not supported.",
480 datastore.name,
481 transfer,
482 )
483 continue
484 allFailuresAreNotImplementedError = False
485 children.append((datastore, prepDataForChild))
486 if allFailuresAreNotImplementedError:
487 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.")
488 return _IngestPrepData(children=children)
490 def _finishIngest(self, prepData: _IngestPrepData, *, transfer: Optional[str] = None) -> None:
491 # Docstring inherited from Datastore._finishIngest.
492 for datastore, prepDataForChild in prepData.children:
493 datastore._finishIngest(prepDataForChild, transfer=transfer)
495 def getURIs(
496 self, ref: DatasetRef, predict: bool = False
497 ) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
498 """Return URIs associated with dataset.
500 Parameters
501 ----------
502 ref : `DatasetRef`
503 Reference to the required dataset.
504 predict : `bool`, optional
505 If the datastore does not know about the dataset, should it
506 return a predicted URI or not?
508 Returns
509 -------
510 primary : `ButlerURI`
511 The URI to the primary artifact associated with this dataset.
512 If the dataset was disassembled within the datastore this
513 may be `None`.
514 components : `dict`
515 URIs to any components associated with the dataset artifact.
516 Can be empty if there are no components.
518 Notes
519 -----
520 The returned URI is from the first datastore in the list that has
521 the dataset with preference given to the first dataset coming from
522 a permanent datastore. If no datastores have the dataset and prediction
523 is allowed, the predicted URI for the first datastore in the list will
524 be returned.
525 """
526 DatastoreURIs = Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]
527 log.debug("Requesting URIs for %s", ref)
528 predictedUri: Optional[DatastoreURIs] = None
529 predictedEphemeralUri: Optional[DatastoreURIs] = None
530 firstEphemeralUri: Optional[DatastoreURIs] = None
531 for datastore in self.datastores:
532 if datastore.exists(ref):
533 if not datastore.isEphemeral:
534 uri = datastore.getURIs(ref)
535 log.debug("Retrieved non-ephemeral URI: %s", uri)
536 return uri
537 elif not firstEphemeralUri:
538 firstEphemeralUri = datastore.getURIs(ref)
539 elif predict:
540 if not predictedUri and not datastore.isEphemeral:
541 predictedUri = datastore.getURIs(ref, predict)
542 elif not predictedEphemeralUri and datastore.isEphemeral:
543 predictedEphemeralUri = datastore.getURIs(ref, predict)
545 if firstEphemeralUri:
546 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri)
547 return firstEphemeralUri
549 if predictedUri:
550 log.debug("Retrieved predicted URI: %s", predictedUri)
551 return predictedUri
553 if predictedEphemeralUri:
554 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri)
555 return predictedEphemeralUri
557 raise FileNotFoundError("Dataset {} not in any datastore".format(ref))
559 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI:
560 """URI to the Dataset.
562 The returned URI is from the first datastore in the list that has
563 the dataset with preference given to the first dataset coming from
564 a permanent datastore. If no datastores have the dataset and prediction
565 is allowed, the predicted URI for the first datastore in the list will
566 be returned.
568 Parameters
569 ----------
570 ref : `DatasetRef`
571 Reference to the required Dataset.
572 predict : `bool`
573 If `True`, allow URIs to be returned of datasets that have not
574 been written.
576 Returns
577 -------
578 uri : `ButlerURI`
579 URI pointing to the dataset within the datastore. If the
580 dataset does not exist in the datastore, and if ``predict`` is
581 `True`, the URI will be a prediction and will include a URI
582 fragment "#predicted".
584 Notes
585 -----
586 If the datastore does not have entities that relate well
587 to the concept of a URI the returned URI string will be
588 descriptive. The returned URI is not guaranteed to be obtainable.
590 Raises
591 ------
592 FileNotFoundError
593 A URI has been requested for a dataset that does not exist and
594 guessing is not allowed.
595 RuntimeError
596 Raised if a request is made for a single URI but multiple URIs
597 are associated with this dataset.
598 """
599 log.debug("Requesting URI for %s", ref)
600 primary, components = self.getURIs(ref, predict)
601 if primary is None or components: 601 ↛ 602line 601 didn't jump to line 602, because the condition on line 601 was never true
602 raise RuntimeError(
603 f"Dataset ({ref}) includes distinct URIs for components. Use Datastore.getURIs() instead."
604 )
605 return primary
607 def retrieveArtifacts(
608 self,
609 refs: Iterable[DatasetRef],
610 destination: ButlerURI,
611 transfer: str = "auto",
612 preserve_path: bool = True,
613 overwrite: bool = False,
614 ) -> List[ButlerURI]:
615 """Retrieve the file artifacts associated with the supplied refs.
617 Parameters
618 ----------
619 refs : iterable of `DatasetRef`
620 The datasets for which file artifacts are to be retrieved.
621 A single ref can result in multiple files. The refs must
622 be resolved.
623 destination : `ButlerURI`
624 Location to write the file artifacts.
625 transfer : `str`, optional
626 Method to use to transfer the artifacts. Must be one of the options
627 supported by `ButlerURI.transfer_from()`. "move" is not allowed.
628 preserve_path : `bool`, optional
629 If `True` the full path of the file artifact within the datastore
630 is preserved. If `False` the final file component of the path
631 is used.
632 overwrite : `bool`, optional
633 If `True` allow transfers to overwrite existing files at the
634 destination.
636 Returns
637 -------
638 targets : `list` of `ButlerURI`
639 URIs of file artifacts in destination location. Order is not
640 preserved.
641 """
642 if not destination.isdir(): 642 ↛ 643line 642 didn't jump to line 643, because the condition on line 642 was never true
643 raise ValueError(f"Destination location must refer to a directory. Given {destination}")
645 # Using getURIs is not feasible since it becomes difficult to
646 # determine the path within the datastore later on. For now
647 # follow getURIs implementation approach.
649 pending = set(refs)
651 # There is a question as to whether an exception should be raised
652 # early if some of the refs are missing, or whether files should be
653 # transferred until a problem is hit. Prefer to complain up front.
654 # Use the datastore integer as primary key.
655 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {}
657 for number, datastore in enumerate(self.datastores):
658 if datastore.isEphemeral:
659 # In the future we will want to distinguish in-memory from
660 # caching datastore since using an on-disk local
661 # cache is exactly what we should be doing.
662 continue
663 datastore_refs = {ref for ref in pending if datastore.exists(ref)}
665 if datastore_refs:
666 grouped_by_datastore[number] = datastore_refs
668 # Remove these from the pending list so that we do not bother
669 # looking for them any more.
670 pending = pending - datastore_refs
672 if pending: 672 ↛ 673line 672 didn't jump to line 673, because the condition on line 672 was never true
673 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}")
675 # Now do the transfer.
676 targets: List[ButlerURI] = []
677 for number, datastore_refs in grouped_by_datastore.items():
678 targets.extend(
679 self.datastores[number].retrieveArtifacts(
680 datastore_refs,
681 destination,
682 transfer=transfer,
683 preserve_path=preserve_path,
684 overwrite=overwrite,
685 )
686 )
688 return targets
690 def remove(self, ref: DatasetRef) -> None:
691 """Indicate to the datastore that a dataset can be removed.
693 The dataset will be removed from each datastore. The dataset is
694 not required to exist in every child datastore.
696 Parameters
697 ----------
698 ref : `DatasetRef`
699 Reference to the required dataset.
701 Raises
702 ------
703 FileNotFoundError
704 Attempt to remove a dataset that does not exist. Raised if none
705 of the child datastores removed the dataset.
706 """
707 log.debug("Removing %s", ref)
708 self.trash(ref, ignore_errors=False)
709 self.emptyTrash(ignore_errors=False)
711 def forget(self, refs: Iterable[DatasetRef]) -> None:
712 for datastore in tuple(self.datastores):
713 datastore.forget(refs)
715 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
716 if isinstance(ref, DatasetRef):
717 ref_label = str(ref)
718 else:
719 ref_label = "bulk datasets"
721 log.debug("Trashing %s", ref_label)
723 counter = 0
724 for datastore in self.datastores:
725 try:
726 datastore.trash(ref, ignore_errors=ignore_errors)
727 counter += 1
728 except FileNotFoundError:
729 pass
731 if counter == 0:
732 err_msg = f"Could not mark for removal from any child datastore: {ref_label}"
733 if ignore_errors: 733 ↛ 734line 733 didn't jump to line 734, because the condition on line 733 was never true
734 log.warning(err_msg)
735 else:
736 raise FileNotFoundError(err_msg)
738 def emptyTrash(self, ignore_errors: bool = True) -> None:
739 for datastore in self.datastores:
740 datastore.emptyTrash(ignore_errors=ignore_errors)
742 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
743 """Retrieve a dataset from an input `Datastore`,
744 and store the result in this `Datastore`.
746 Parameters
747 ----------
748 inputDatastore : `Datastore`
749 The external `Datastore` from which to retreive the Dataset.
750 ref : `DatasetRef`
751 Reference to the required dataset in the input data store.
753 Returns
754 -------
755 results : `list`
756 List containing the return value from the ``put()`` to each
757 child datastore.
758 """
759 assert inputDatastore is not self # unless we want it for renames?
760 inMemoryDataset = inputDatastore.get(ref)
761 self.put(inMemoryDataset, ref)
763 def validateConfiguration(
764 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
765 ) -> None:
766 """Validate some of the configuration for this datastore.
768 Parameters
769 ----------
770 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
771 Entities to test against this configuration. Can be differing
772 types.
773 logFailures : `bool`, optional
774 If `True`, output a log message for every validation error
775 detected.
777 Raises
778 ------
779 DatastoreValidationError
780 Raised if there is a validation problem with a configuration.
781 All the problems are reported in a single exception.
783 Notes
784 -----
785 This method checks each datastore in turn.
786 """
788 # Need to catch each of the datastore outputs and ensure that
789 # all are tested.
790 failures = []
791 for datastore in self.datastores:
792 try:
793 datastore.validateConfiguration(entities, logFailures=logFailures)
794 except DatastoreValidationError as e:
795 if logFailures: 795 ↛ 797line 795 didn't jump to line 797, because the condition on line 795 was never false
796 log.critical("Datastore %s failed validation", datastore.name)
797 failures.append(f"Datastore {self.name}: {e}")
799 if failures:
800 msg = ";\n".join(failures)
801 raise DatastoreValidationError(msg)
803 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
804 # Docstring is inherited from base class
805 failures = []
806 for datastore in self.datastores:
807 try:
808 datastore.validateKey(lookupKey, entity)
809 except DatastoreValidationError as e:
810 failures.append(f"Datastore {self.name}: {e}")
812 if failures:
813 msg = ";\n".join(failures)
814 raise DatastoreValidationError(msg)
816 def getLookupKeys(self) -> Set[LookupKey]:
817 # Docstring is inherited from base class
818 keys = set()
819 for datastore in self.datastores:
820 keys.update(datastore.getLookupKeys())
822 keys.update(self.constraints.getLookupKeys())
823 for p in self.datastoreConstraints:
824 if p is not None: 824 ↛ 825line 824 didn't jump to line 825, because the condition on line 824 was never true
825 keys.update(p.getLookupKeys())
827 return keys
829 def needs_expanded_data_ids(
830 self,
831 transfer: Optional[str],
832 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
833 ) -> bool:
834 # Docstring inherited.
835 # We can't safely use `self.datastoreConstraints` with `entity` to
836 # check whether a child datastore would even want to ingest this
837 # dataset, because we don't want to filter out datastores that might
838 # need an expanded data ID based in incomplete information (e.g. we
839 # pass a StorageClass, but the constraint dispatches on DatasetType).
840 # So we pessimistically check if any datastore would need an expanded
841 # data ID for this transfer mode.
842 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 842 ↛ exitline 842 didn't finish the generator expression on line 842