Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py : 91%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Chained datastore."""
26__all__ = ("ChainedDatastore",)
28import time
29import logging
30import warnings
31import itertools
32from typing import (
33 TYPE_CHECKING,
34 Any,
35 Dict,
36 List,
37 Iterable,
38 Mapping,
39 Optional,
40 Sequence,
41 Set,
42 Tuple,
43 Union,
44)
46from lsst.utils import doImport
47from lsst.daf.butler import ButlerURI, Datastore, DatastoreConfig, DatasetTypeNotSupportedError, \
48 DatastoreValidationError, Constraints, FileDataset, DatasetRef
50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true
51 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass
52 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
54log = logging.getLogger(__name__)
57class _IngestPrepData(Datastore.IngestPrepData):
58 """Helper class for ChainedDatastore ingest implementation.
60 Parameters
61 ----------
62 children : `list` of `tuple`
63 Pairs of `Datastore`, `IngestPrepData` for all child datastores.
64 """
65 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]):
66 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children))
67 self.children = children
70class ChainedDatastore(Datastore):
71 """Chained Datastores to allow read and writes from multiple datastores.
73 A ChainedDatastore is configured with multiple datastore configurations.
74 A ``put()`` is always sent to each datastore. A ``get()``
75 operation is sent to each datastore in turn and the first datastore
76 to return a valid dataset is used.
78 Parameters
79 ----------
80 config : `DatastoreConfig` or `str`
81 Configuration. This configuration must include a ``datastores`` field
82 as a sequence of datastore configurations. The order in this sequence
83 indicates the order to use for read operations.
84 bridgeManager : `DatastoreRegistryBridgeManager`
85 Object that manages the interface between `Registry` and datastores.
86 butlerRoot : `str`, optional
87 New datastore root to use to override the configuration value. This
88 root is sent to each child datastore.
90 Notes
91 -----
92 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer
93 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"`
94 and `"hardlink"` if and only if all its child datastores do.
95 """
97 defaultConfigFile = "datastores/chainedDatastore.yaml"
98 """Path to configuration defaults. Accessed within the ``configs`` resource
99 or relative to a search path. Can be None if no defaults specified.
100 """
102 containerKey = "datastores"
103 """Key to specify where child datastores are configured."""
105 datastores: List[Datastore]
106 """All the child datastores known to this datastore."""
108 datastoreConstraints: Sequence[Optional[Constraints]]
109 """Constraints to be applied to each of the child datastores."""
111 @classmethod
112 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
113 """Set any filesystem-dependent config options for child Datastores to
114 be appropriate for a new empty repository with the given root.
116 Parameters
117 ----------
118 root : `str`
119 Filesystem path to the root of the data repository.
120 config : `Config`
121 A `Config` to update. Only the subset understood by
122 this component will be updated. Will not expand
123 defaults.
124 full : `Config`
125 A complete config with all defaults expanded that can be
126 converted to a `DatastoreConfig`. Read-only and will not be
127 modified by this method.
128 Repository-specific options that should not be obtained
129 from defaults when Butler instances are constructed
130 should be copied from ``full`` to ``config``.
131 overwrite : `bool`, optional
132 If `False`, do not modify a value in ``config`` if the value
133 already exists. Default is always to overwrite with the provided
134 ``root``.
136 Notes
137 -----
138 If a keyword is explicitly defined in the supplied ``config`` it
139 will not be overridden by this method if ``overwrite`` is `False`.
140 This allows explicit values set in external configs to be retained.
141 """
143 # Extract the part of the config we care about updating
144 datastoreConfig = DatastoreConfig(config, mergeDefaults=False)
146 # And the subset of the full config that we can use for reference.
147 # Do not bother with defaults because we are told this already has
148 # them.
149 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False)
151 # Loop over each datastore config and pass the subsets to the
152 # child datastores to process.
154 containerKey = cls.containerKey
155 for idx, (child, fullChild) in enumerate(zip(datastoreConfig[containerKey],
156 fullDatastoreConfig[containerKey])):
157 childConfig = DatastoreConfig(child, mergeDefaults=False)
158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False)
159 datastoreClass = doImport(fullChildConfig["cls"])
160 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx)
161 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite)
163 # Reattach to parent
164 datastoreConfig[containerKey, idx] = childConfig
166 # Reattach modified datastore config to parent
167 # If this has a datastore key we attach there, otherwise we assume
168 # this information goes at the top of the config hierarchy.
169 if DatastoreConfig.component in config:
170 config[DatastoreConfig.component] = datastoreConfig
171 else:
172 config.update(datastoreConfig)
174 return
176 def __init__(self, config: Union[Config, str], bridgeManager: DatastoreRegistryBridgeManager,
177 butlerRoot: str = None):
178 super().__init__(config, bridgeManager)
180 # Scan for child datastores and instantiate them with the same registry
181 self.datastores = []
182 for c in self.config["datastores"]:
183 c = DatastoreConfig(c)
184 datastoreType = doImport(c["cls"])
185 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot)
186 log.debug("Creating child datastore %s", datastore.name)
187 self.datastores.append(datastore)
189 # Name ourself based on our children
190 if self.datastores: 190 ↛ 195line 190 didn't jump to line 195, because the condition on line 190 was never false
191 # We must set the names explicitly
192 self._names = [d.name for d in self.datastores]
193 childNames = ",".join(self.names)
194 else:
195 childNames = "(empty@{})".format(time.time())
196 self._names = [childNames]
197 self.name = "{}[{}]".format(type(self).__qualname__, childNames)
199 # We declare we are ephemeral if all our child datastores declare
200 # they are ephemeral
201 isEphemeral = True
202 for d in self.datastores:
203 if not d.isEphemeral:
204 isEphemeral = False
205 break
206 self.isEphemeral = isEphemeral
208 # per-datastore override constraints
209 if "datastore_constraints" in self.config:
210 overrides = self.config["datastore_constraints"]
212 if len(overrides) != len(self.datastores): 212 ↛ 213line 212 didn't jump to line 213, because the condition on line 212 was never true
213 raise DatastoreValidationError(f"Number of registered datastores ({len(self.datastores)})"
214 " differs from number of constraints overrides"
215 f" {len(overrides)}")
217 self.datastoreConstraints = [Constraints(c.get("constraints"), universe=bridgeManager.universe)
218 for c in overrides]
220 else:
221 self.datastoreConstraints = (None,) * len(self.datastores)
223 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent"))
225 @property
226 def names(self) -> Tuple[str, ...]:
227 return tuple(self._names)
229 def __str__(self) -> str:
230 chainName = ", ".join(str(ds) for ds in self.datastores)
231 return chainName
233 def exists(self, ref: DatasetRef) -> bool:
234 """Check if the dataset exists in one of the datastores.
236 Parameters
237 ----------
238 ref : `DatasetRef`
239 Reference to the required dataset.
241 Returns
242 -------
243 exists : `bool`
244 `True` if the entity exists in one of the child datastores.
245 """
246 for datastore in self.datastores:
247 if datastore.exists(ref):
248 log.debug("Found %s in datastore %s", ref, datastore.name)
249 return True
250 return False
252 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any:
253 """Load an InMemoryDataset from the store.
255 The dataset is returned from the first datastore that has
256 the dataset.
258 Parameters
259 ----------
260 ref : `DatasetRef`
261 Reference to the required Dataset.
262 parameters : `dict`
263 `StorageClass`-specific parameters that specify, for example,
264 a slice of the dataset to be loaded.
266 Returns
267 -------
268 inMemoryDataset : `object`
269 Requested dataset or slice thereof as an InMemoryDataset.
271 Raises
272 ------
273 FileNotFoundError
274 Requested dataset can not be retrieved.
275 TypeError
276 Return value from formatter has unexpected type.
277 ValueError
278 Formatter failed to process the dataset.
279 """
281 for datastore in self.datastores:
282 try:
283 inMemoryObject = datastore.get(ref, parameters)
284 log.debug("Found dataset %s in datastore %s", ref, datastore.name)
285 return inMemoryObject
286 except FileNotFoundError:
287 pass
289 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref))
291 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
292 """Write a InMemoryDataset with a given `DatasetRef` to each
293 datastore.
295 The put() to child datastores can fail with
296 `DatasetTypeNotSupportedError`. The put() for this datastore will be
297 deemed to have succeeded so long as at least one child datastore
298 accepted the inMemoryDataset.
300 Parameters
301 ----------
302 inMemoryDataset : `object`
303 The dataset to store.
304 ref : `DatasetRef`
305 Reference to the associated Dataset.
307 Raises
308 ------
309 TypeError
310 Supplied object and storage class are inconsistent.
311 DatasetTypeNotSupportedError
312 All datastores reported `DatasetTypeNotSupportedError`.
313 """
314 log.debug("Put %s", ref)
316 # Confirm that we can accept this dataset
317 if not self.constraints.isAcceptable(ref):
318 # Raise rather than use boolean return value.
319 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
320 " configuration.")
322 isPermanent = False
323 nsuccess = 0
324 npermanent = 0
325 nephemeral = 0
326 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
327 if constraints is not None and not constraints.isAcceptable(ref):
328 log.debug("Datastore %s skipping put via configuration for ref %s",
329 datastore.name, ref)
330 continue
332 if datastore.isEphemeral:
333 nephemeral += 1
334 else:
335 npermanent += 1
336 try:
337 datastore.put(inMemoryDataset, ref)
338 nsuccess += 1
339 if not datastore.isEphemeral:
340 isPermanent = True
341 except DatasetTypeNotSupportedError:
342 pass
344 if nsuccess == 0:
345 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}")
347 if not isPermanent and npermanent > 0: 347 ↛ 348line 347 didn't jump to line 348, because the condition on line 347 was never true
348 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2)
350 if self._transaction is not None:
351 self._transaction.registerUndo('put', self.remove, ref)
353 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]:
354 # Docstring inherited from base class.
355 if transfer != "auto":
356 return transfer
357 # Ask each datastore what they think auto means
358 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores}
360 # Remove any untranslated "auto" values
361 transfers.discard(transfer)
363 if len(transfers) == 1: 363 ↛ 364line 363 didn't jump to line 364, because the condition on line 363 was never true
364 return transfers.pop()
365 if not transfers: 365 ↛ 369line 365 didn't jump to line 369, because the condition on line 365 was never false
366 # Everything reported "auto"
367 return transfer
369 raise RuntimeError("Chained datastore does not yet support different transfer modes"
370 f" from 'auto' in each child datastore (wanted {transfers})")
372 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData:
373 # Docstring inherited from Datastore._prepIngest.
374 if transfer is None or transfer == "move":
375 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.")
377 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool:
378 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)]
379 if not acceptable:
380 log.debug("Datastore %s skipping ingest via configuration for refs %s",
381 name, ", ".join(str(ref) for ref in dataset.refs))
382 return False
383 else:
384 return True
386 # Filter down to just datasets the chained datastore's own
387 # configuration accepts.
388 okForParent: List[FileDataset] = [dataset for dataset in datasets
389 if isDatasetAcceptable(dataset, name=self.name,
390 constraints=self.constraints)]
392 # Iterate over nested datastores and call _prepIngest on each.
393 # Save the results to a list:
394 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = []
395 # ...and remember whether all of the failures are due to
396 # NotImplementedError being raised.
397 allFailuresAreNotImplementedError = True
398 for datastore, constraints in zip(self.datastores, self.datastoreConstraints):
399 okForChild: List[FileDataset]
400 if constraints is not None:
401 okForChild = [dataset for dataset in okForParent
402 if isDatasetAcceptable(dataset, name=datastore.name,
403 constraints=constraints)]
404 else:
405 okForChild = okForParent
406 try:
407 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer)
408 except NotImplementedError:
409 log.debug("Skipping ingest for datastore %s because transfer "
410 "mode %s is not supported.", datastore.name, transfer)
411 continue
412 allFailuresAreNotImplementedError = False
413 children.append((datastore, prepDataForChild))
414 if allFailuresAreNotImplementedError:
415 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.")
416 return _IngestPrepData(children=children)
418 def _finishIngest(self, prepData: _IngestPrepData, *, transfer: Optional[str] = None) -> None:
419 # Docstring inherited from Datastore._finishIngest.
420 for datastore, prepDataForChild in prepData.children:
421 datastore._finishIngest(prepDataForChild, transfer=transfer)
423 def getURIs(self, ref: DatasetRef,
424 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
425 """Return URIs associated with dataset.
427 Parameters
428 ----------
429 ref : `DatasetRef`
430 Reference to the required dataset.
431 predict : `bool`, optional
432 If the datastore does not know about the dataset, should it
433 return a predicted URI or not?
435 Returns
436 -------
437 primary : `ButlerURI`
438 The URI to the primary artifact associated with this dataset.
439 If the dataset was disassembled within the datastore this
440 may be `None`.
441 components : `dict`
442 URIs to any components associated with the dataset artifact.
443 Can be empty if there are no components.
445 Notes
446 -----
447 The returned URI is from the first datastore in the list that has
448 the dataset with preference given to the first dataset coming from
449 a permanent datastore. If no datastores have the dataset and prediction
450 is allowed, the predicted URI for the first datastore in the list will
451 be returned.
452 """
453 DatastoreURIs = Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]
454 log.debug("Requesting URIs for %s", ref)
455 predictedUri: Optional[DatastoreURIs] = None
456 predictedEphemeralUri: Optional[DatastoreURIs] = None
457 firstEphemeralUri: Optional[DatastoreURIs] = None
458 for datastore in self.datastores:
459 if datastore.exists(ref):
460 if not datastore.isEphemeral:
461 uri = datastore.getURIs(ref)
462 log.debug("Retrieved non-ephemeral URI: %s", uri)
463 return uri
464 elif not firstEphemeralUri:
465 firstEphemeralUri = datastore.getURIs(ref)
466 elif predict:
467 if not predictedUri and not datastore.isEphemeral:
468 predictedUri = datastore.getURIs(ref, predict)
469 elif not predictedEphemeralUri and datastore.isEphemeral:
470 predictedEphemeralUri = datastore.getURIs(ref, predict)
472 if firstEphemeralUri:
473 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri)
474 return firstEphemeralUri
476 if predictedUri:
477 log.debug("Retrieved predicted URI: %s", predictedUri)
478 return predictedUri
480 if predictedEphemeralUri:
481 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri)
482 return predictedEphemeralUri
484 raise FileNotFoundError("Dataset {} not in any datastore".format(ref))
486 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI:
487 """URI to the Dataset.
489 The returned URI is from the first datastore in the list that has
490 the dataset with preference given to the first dataset coming from
491 a permanent datastore. If no datastores have the dataset and prediction
492 is allowed, the predicted URI for the first datastore in the list will
493 be returned.
495 Parameters
496 ----------
497 ref : `DatasetRef`
498 Reference to the required Dataset.
499 predict : `bool`
500 If `True`, allow URIs to be returned of datasets that have not
501 been written.
503 Returns
504 -------
505 uri : `ButlerURI`
506 URI pointing to the dataset within the datastore. If the
507 dataset does not exist in the datastore, and if ``predict`` is
508 `True`, the URI will be a prediction and will include a URI
509 fragment "#predicted".
511 Notes
512 -----
513 If the datastore does not have entities that relate well
514 to the concept of a URI the returned URI string will be
515 descriptive. The returned URI is not guaranteed to be obtainable.
517 Raises
518 ------
519 FileNotFoundError
520 A URI has been requested for a dataset that does not exist and
521 guessing is not allowed.
522 RuntimeError
523 Raised if a request is made for a single URI but multiple URIs
524 are associated with this dataset.
525 """
526 log.debug("Requesting URI for %s", ref)
527 primary, components = self.getURIs(ref, predict)
528 if primary is None or components: 528 ↛ 529line 528 didn't jump to line 529, because the condition on line 528 was never true
529 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. "
530 "Use Dataastore.getURIs() instead.")
531 return primary
533 def retrieveArtifacts(self, refs: Iterable[DatasetRef],
534 destination: ButlerURI, transfer: str = "auto",
535 preserve_path: bool = True,
536 overwrite: bool = False) -> List[ButlerURI]:
537 """Retrieve the file artifacts associated with the supplied refs.
539 Parameters
540 ----------
541 refs : iterable of `DatasetRef`
542 The datasets for which file artifacts are to be retrieved.
543 A single ref can result in multiple files. The refs must
544 be resolved.
545 destination : `ButlerURI`
546 Location to write the file artifacts.
547 transfer : `str`, optional
548 Method to use to transfer the artifacts. Must be one of the options
549 supported by `ButlerURI.transfer_from()`. "move" is not allowed.
550 preserve_path : `bool`, optional
551 If `True` the full path of the file artifact within the datastore
552 is preserved. If `False` the final file component of the path
553 is used.
554 overwrite : `bool`, optional
555 If `True` allow transfers to overwrite existing files at the
556 destination.
558 Returns
559 -------
560 targets : `list` of `ButlerURI`
561 URIs of file artifacts in destination location. Order is not
562 preserved.
563 """
564 if not destination.isdir(): 564 ↛ 565line 564 didn't jump to line 565, because the condition on line 564 was never true
565 raise ValueError(f"Destination location must refer to a directory. Given {destination}")
567 # Using getURIs is not feasible since it becomes difficult to
568 # determine the path within the datastore later on. For now
569 # follow getURIs implementation approach.
571 pending = set(refs)
573 # There is a question as to whether an exception should be raised
574 # early if some of the refs are missing, or whether files should be
575 # transferred until a problem is hit. Prefer to complain up front.
576 # Use the datastore integer as primary key.
577 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {}
579 for number, datastore in enumerate(self.datastores):
580 if datastore.isEphemeral:
581 # In the future we will want to distinguish in-memory from
582 # caching datastore since using an on-disk local
583 # cache is exactly what we should be doing.
584 continue
585 datastore_refs = {ref for ref in pending if datastore.exists(ref)}
587 if datastore_refs:
588 grouped_by_datastore[number] = datastore_refs
590 # Remove these from the pending list so that we do not bother
591 # looking for them any more.
592 pending = pending - datastore_refs
594 if pending: 594 ↛ 595line 594 didn't jump to line 595, because the condition on line 594 was never true
595 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}")
597 # Now do the transfer.
598 targets: List[ButlerURI] = []
599 for number, datastore_refs in grouped_by_datastore.items():
600 targets.extend(self.datastores[number].retrieveArtifacts(datastore_refs, destination,
601 transfer=transfer,
602 preserve_path=preserve_path,
603 overwrite=overwrite))
605 return targets
607 def remove(self, ref: DatasetRef) -> None:
608 """Indicate to the datastore that a dataset can be removed.
610 The dataset will be removed from each datastore. The dataset is
611 not required to exist in every child datastore.
613 Parameters
614 ----------
615 ref : `DatasetRef`
616 Reference to the required dataset.
618 Raises
619 ------
620 FileNotFoundError
621 Attempt to remove a dataset that does not exist. Raised if none
622 of the child datastores removed the dataset.
623 """
624 log.debug(f"Removing {ref}")
625 self.trash(ref, ignore_errors=False)
626 self.emptyTrash(ignore_errors=False)
628 def forget(self, refs: Iterable[DatasetRef]) -> None:
629 for datastore in tuple(self.datastores):
630 datastore.forget(refs)
632 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
633 if isinstance(ref, DatasetRef):
634 ref_label = str(ref)
635 else:
636 ref_label = "bulk datasets"
638 log.debug("Trashing %s", ref_label)
640 counter = 0
641 for datastore in self.datastores:
642 try:
643 datastore.trash(ref, ignore_errors=ignore_errors)
644 counter += 1
645 except FileNotFoundError:
646 pass
648 if counter == 0:
649 err_msg = f"Could not mark for removal from any child datastore: {ref_label}"
650 if ignore_errors: 650 ↛ 651line 650 didn't jump to line 651, because the condition on line 650 was never true
651 log.warning(err_msg)
652 else:
653 raise FileNotFoundError(err_msg)
655 def emptyTrash(self, ignore_errors: bool = True) -> None:
656 for datastore in self.datastores:
657 datastore.emptyTrash(ignore_errors=ignore_errors)
659 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
660 """Retrieve a dataset from an input `Datastore`,
661 and store the result in this `Datastore`.
663 Parameters
664 ----------
665 inputDatastore : `Datastore`
666 The external `Datastore` from which to retreive the Dataset.
667 ref : `DatasetRef`
668 Reference to the required dataset in the input data store.
670 Returns
671 -------
672 results : `list`
673 List containing the return value from the ``put()`` to each
674 child datastore.
675 """
676 assert inputDatastore is not self # unless we want it for renames?
677 inMemoryDataset = inputDatastore.get(ref)
678 self.put(inMemoryDataset, ref)
680 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]],
681 logFailures: bool = False) -> None:
682 """Validate some of the configuration for this datastore.
684 Parameters
685 ----------
686 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
687 Entities to test against this configuration. Can be differing
688 types.
689 logFailures : `bool`, optional
690 If `True`, output a log message for every validation error
691 detected.
693 Raises
694 ------
695 DatastoreValidationError
696 Raised if there is a validation problem with a configuration.
697 All the problems are reported in a single exception.
699 Notes
700 -----
701 This method checks each datastore in turn.
702 """
704 # Need to catch each of the datastore outputs and ensure that
705 # all are tested.
706 failures = []
707 for datastore in self.datastores:
708 try:
709 datastore.validateConfiguration(entities, logFailures=logFailures)
710 except DatastoreValidationError as e:
711 if logFailures: 711 ↛ 713line 711 didn't jump to line 713, because the condition on line 711 was never false
712 log.critical("Datastore %s failed validation", datastore.name)
713 failures.append(f"Datastore {self.name}: {e}")
715 if failures:
716 msg = ";\n".join(failures)
717 raise DatastoreValidationError(msg)
719 def validateKey(self, lookupKey: LookupKey,
720 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
721 # Docstring is inherited from base class
722 failures = []
723 for datastore in self.datastores:
724 try:
725 datastore.validateKey(lookupKey, entity)
726 except DatastoreValidationError as e:
727 failures.append(f"Datastore {self.name}: {e}")
729 if failures:
730 msg = ";\n".join(failures)
731 raise DatastoreValidationError(msg)
733 def getLookupKeys(self) -> Set[LookupKey]:
734 # Docstring is inherited from base class
735 keys = set()
736 for datastore in self.datastores:
737 keys.update(datastore.getLookupKeys())
739 keys.update(self.constraints.getLookupKeys())
740 for p in self.datastoreConstraints:
741 if p is not None: 741 ↛ 742line 741 didn't jump to line 742, because the condition on line 741 was never true
742 keys.update(p.getLookupKeys())
744 return keys
746 def needs_expanded_data_ids(
747 self,
748 transfer: Optional[str],
749 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
750 ) -> bool:
751 # Docstring inherited.
752 # We can't safely use `self.datastoreConstraints` with `entity` to
753 # check whether a child datastore would even want to ingest this
754 # dataset, because we don't want to filter out datastores that might
755 # need an expanded data ID based in incomplete information (e.g. we
756 # pass a StorageClass, but the constraint dispatches on DatasetType).
757 # So we pessimistically check if any datastore would need an expanded
758 # data ID for this transfer mode.
759 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 759 ↛ exitline 759 didn't finish the generator expression on line 759