Coverage for python/lsst/daf/butler/datastores/inMemoryDatastore.py : 90%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""In-memory datastore."""
24__all__ = ("StoredMemoryItemInfo", "InMemoryDatastore")
26import time
27import logging
28from dataclasses import dataclass
29from typing import Dict, Optional, Any
31from lsst.daf.butler import StoredDatastoreItemInfo, StorageClass
32from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
33from .genericDatastore import GenericBaseDatastore
35log = logging.getLogger(__name__)
38@dataclass(frozen=True)
39class StoredMemoryItemInfo(StoredDatastoreItemInfo):
40 """Internal InMemoryDatastore Metadata associated with a stored
41 DatasetRef.
42 """
43 __slots__ = {"timestamp", "storageClass", "parentID"}
45 timestamp: float
46 """Unix timestamp indicating the time the dataset was stored."""
48 storageClass: StorageClass
49 """StorageClass associated with the dataset."""
51 parentID: Optional[int]
52 """ID of the parent `DatasetRef` if this entry is a concrete
53 composite. Not used if the dataset being stored is not a
54 virtual component of a composite
55 """
58class InMemoryDatastore(GenericBaseDatastore):
59 """Basic Datastore for writing to an in memory cache.
61 This datastore is ephemeral in that the contents of the datastore
62 disappear when the Python process completes. This also means that
63 other processes can not access this datastore.
65 Parameters
66 ----------
67 config : `DatastoreConfig` or `str`
68 Configuration.
69 bridgeManager : `DatastoreRegistryBridgeManager`
70 Object that manages the interface between `Registry` and datastores.
71 butlerRoot : `str`, optional
72 Unused parameter.
74 Notes
75 -----
76 InMemoryDatastore does not support any file-based ingest.
77 """
79 defaultConfigFile = "datastores/inMemoryDatastore.yaml"
80 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or
81 absolute path. Can be None if no defaults specified.
82 """
84 isEphemeral = True
85 """A new datastore is created every time and datasets disappear when
86 the process shuts down."""
88 datasets: Dict[int, Any]
89 """Internal storage of datasets indexed by dataset ID."""
91 records: Dict[int, StoredMemoryItemInfo]
92 """Internal records about stored datasets."""
94 def __init__(self, config, bridgeManager, butlerRoot=None):
95 super().__init__(config, bridgeManager)
97 # Name ourselves with the timestamp the datastore
98 # was created.
99 self.name = "{}@{}".format(type(self).__name__, time.time())
100 log.debug("Creating datastore %s", self.name)
102 # Storage of datasets, keyed by dataset_id
103 self.datasets = {}
105 # Records is distinct in order to track concrete composite components
106 # where we register multiple components for a single dataset.
107 self.records = {}
109 # Related records that share the same parent
110 self.related = {}
112 self._bridge = bridgeManager.register(self.name, ephemeral=True)
114 @classmethod
115 def setConfigRoot(cls, root, config, full, overwrite=True):
116 """Set any filesystem-dependent config options for this Datastore to
117 be appropriate for a new empty repository with the given root.
119 Does nothing in this implementation.
121 Parameters
122 ----------
123 root : `str`
124 Filesystem path to the root of the data repository.
125 config : `Config`
126 A `Config` to update. Only the subset understood by
127 this component will be updated. Will not expand
128 defaults.
129 full : `Config`
130 A complete config with all defaults expanded that can be
131 converted to a `DatastoreConfig`. Read-only and will not be
132 modified by this method.
133 Repository-specific options that should not be obtained
134 from defaults when Butler instances are constructed
135 should be copied from ``full`` to ``config``.
136 overwrite : `bool`, optional
137 If `False`, do not modify a value in ``config`` if the value
138 already exists. Default is always to overwrite with the provided
139 ``root``.
141 Notes
142 -----
143 If a keyword is explicitly defined in the supplied ``config`` it
144 will not be overridden by this method if ``overwrite`` is `False`.
145 This allows explicit values set in external configs to be retained.
146 """
147 return
149 @property
150 def bridge(self) -> DatastoreRegistryBridge:
151 # Docstring inherited from GenericBaseDatastore.
152 return self._bridge
154 def addStoredItemInfo(self, refs, infos):
155 # Docstring inherited from GenericBaseDatastore.
156 for ref, info in zip(refs, infos):
157 self.records[ref.id] = info
158 self.related.setdefault(info.parentID, set()).add(ref.id)
160 def getStoredItemInfo(self, ref):
161 # Docstring inherited from GenericBaseDatastore.
162 return self.records[ref.id]
164 def getStoredItemsInfo(self, ref):
165 # Docstring inherited from GenericBaseDatastore.
166 return [self.getStoredItemInfo(ref)]
168 def removeStoredItemInfo(self, ref):
169 # Docstring inherited from GenericBaseDatastore.
170 # If a component has been removed previously then we can sometimes
171 # be asked to remove it again. Other datastores ignore this
172 # so also ignore here
173 if ref.id not in self.records: 173 ↛ 174line 173 didn't jump to line 174, because the condition on line 173 was never true
174 return
175 record = self.records[ref.id]
176 del self.records[ref.id]
177 self.related[record.parentID].remove(ref.id)
179 def _get_dataset_info(self, ref):
180 """Check that the dataset is present and return the real ID and
181 associated information.
183 Parameters
184 ----------
185 ref : `DatasetRef`
186 Target `DatasetRef`
188 Returns
189 -------
190 realID : `int`
191 The dataset ID associated with this ref that shoul be used. This
192 could either be the ID of the supplied `DatasetRef` or the parent.
193 storageInfo : `StoredMemoryItemInfo`
194 Associated storage information.
196 Raises
197 ------
198 FileNotFoundError
199 Raised if the dataset is not present in this datastore.
200 """
201 try:
202 storedItemInfo = self.getStoredItemInfo(ref)
203 except KeyError:
204 raise FileNotFoundError(f"No such file dataset in memory: {ref}") from None
205 realID = ref.id
206 if storedItemInfo.parentID is not None: 206 ↛ 209line 206 didn't jump to line 209, because the condition on line 206 was never false
207 realID = storedItemInfo.parentID
209 if realID not in self.datasets: 209 ↛ 210line 209 didn't jump to line 210, because the condition on line 209 was never true
210 raise FileNotFoundError(f"No such file dataset in memory: {ref}")
212 return realID, storedItemInfo
214 def exists(self, ref):
215 """Check if the dataset exists in the datastore.
217 Parameters
218 ----------
219 ref : `DatasetRef`
220 Reference to the required dataset.
222 Returns
223 -------
224 exists : `bool`
225 `True` if the entity exists in the `Datastore`.
226 """
227 try:
228 self._get_dataset_info(ref)
229 except FileNotFoundError:
230 return False
231 return True
233 def get(self, ref, parameters=None):
234 """Load an InMemoryDataset from the store.
236 Parameters
237 ----------
238 ref : `DatasetRef`
239 Reference to the required Dataset.
240 parameters : `dict`
241 `StorageClass`-specific parameters that specify, for example,
242 a slice of the dataset to be loaded.
244 Returns
245 -------
246 inMemoryDataset : `object`
247 Requested dataset or slice thereof as an InMemoryDataset.
249 Raises
250 ------
251 FileNotFoundError
252 Requested dataset can not be retrieved.
253 TypeError
254 Return value from formatter has unexpected type.
255 ValueError
256 Formatter failed to process the dataset.
257 """
259 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters)
261 realID, storedItemInfo = self._get_dataset_info(ref)
263 # We have a write storage class and a read storage class and they
264 # can be different for concrete composites.
265 readStorageClass = ref.datasetType.storageClass
266 writeStorageClass = storedItemInfo.storageClass
268 # Check that the supplied parameters are suitable for the type read
269 readStorageClass.validateParameters(parameters)
271 inMemoryDataset = self.datasets[realID]
273 component = ref.datasetType.component()
275 # Different storage classes implies a component request
276 if readStorageClass != writeStorageClass:
278 if component is None: 278 ↛ 279line 278 didn't jump to line 279, because the condition on line 278 was never true
279 raise ValueError("Storage class inconsistency ({} vs {}) but no"
280 " component requested".format(readStorageClass.name,
281 writeStorageClass.name))
283 # Concrete composite written as a single object (we hope)
284 inMemoryDataset = writeStorageClass.assembler().getComponent(inMemoryDataset, component)
286 # Since there is no formatter to process parameters, they all must be
287 # passed to the assembler.
288 return self._post_process_get(inMemoryDataset, readStorageClass, parameters,
289 isComponent=component is not None)
291 def put(self, inMemoryDataset, ref):
292 """Write a InMemoryDataset with a given `DatasetRef` to the store.
294 Parameters
295 ----------
296 inMemoryDataset : `object`
297 The dataset to store.
298 ref : `DatasetRef`
299 Reference to the associated Dataset.
301 Raises
302 ------
303 TypeError
304 Supplied object and storage class are inconsistent.
305 DatasetTypeNotSupportedError
306 The associated `DatasetType` is not handled by this datastore.
308 Notes
309 -----
310 If the datastore is configured to reject certain dataset types it
311 is possible that the put will fail and raise a
312 `DatasetTypeNotSupportedError`. The main use case for this is to
313 allow `ChainedDatastore` to put to multiple datastores without
314 requiring that every datastore accepts the dataset.
315 """
317 self._validate_put_parameters(inMemoryDataset, ref)
319 self.datasets[ref.id] = inMemoryDataset
320 log.debug("Store %s in %s", ref, self.name)
322 # Store time we received this content, to allow us to optionally
323 # expire it. Instead of storing a filename here, we include the
324 # ID of this datasetRef so we can find it from components.
325 itemInfo = StoredMemoryItemInfo(time.time(), ref.datasetType.storageClass,
326 parentID=ref.id)
328 # We have to register this content with registry.
329 # Currently this assumes we have a file so we need to use stub entries
330 # TODO: Add to ephemeral part of registry
331 self._register_datasets([(ref, itemInfo)])
333 if self._transaction is not None:
334 self._transaction.registerUndo("put", self.remove, ref)
336 def getUri(self, ref, predict=False):
337 """URI to the Dataset.
339 Always uses "mem://" URI prefix.
341 Parameters
342 ----------
343 ref : `DatasetRef`
344 Reference to the required Dataset.
345 predict : `bool`
346 If `True`, allow URIs to be returned of datasets that have not
347 been written.
349 Returns
350 -------
351 uri : `str`
352 URI string pointing to the dataset within the datastore. If the
353 dataset does not exist in the datastore, and if ``predict`` is
354 `True`, the URI will be a prediction and will include a URI
355 fragment "#predicted".
356 If the datastore does not have entities that relate well
357 to the concept of a URI the returned URI string will be
358 descriptive. The returned URI is not guaranteed to be obtainable.
360 Raises
361 ------
362 FileNotFoundError
363 A URI has been requested for a dataset that does not exist and
364 guessing is not allowed.
366 """
368 # if this has never been written then we have to guess
369 if not self.exists(ref):
370 if not predict:
371 raise FileNotFoundError("Dataset {} not in this datastore".format(ref))
372 name = "{}#predicted".format(ref.datasetType.name)
373 else:
374 realID, _ = self._get_dataset_info(ref)
375 name = '{}'.format(id(self.datasets[realID]))
377 return "mem://{}".format(name)
379 def trash(self, ref, ignore_errors=False):
380 """Indicate to the Datastore that a dataset can be removed.
382 Parameters
383 ----------
384 ref : `DatasetRef`
385 Reference to the required Dataset.
386 ignore_errors: `bool`, optional
387 Indicate that errors should be ignored.
389 Raises
390 ------
391 FileNotFoundError
392 Attempt to remove a dataset that does not exist.
394 Notes
395 -----
396 Concurrency should not normally be an issue for the in memory datastore
397 since all internal changes are isolated to solely this process and
398 the registry only changes rows associated with this process.
399 """
401 log.debug("Trash %s in datastore %s", ref, self.name)
403 # Check that this dataset is known to datastore
404 try:
405 self._get_dataset_info(ref)
407 # Move datasets to trash table
408 self._move_to_trash_in_registry(ref)
409 except Exception as e:
410 if ignore_errors:
411 log.warning("Error encountered moving dataset %s to trash in datastore %s: %s",
412 ref, self.name, e)
413 else:
414 raise
416 def emptyTrash(self, ignore_errors=False):
417 """Remove all datasets from the trash.
419 Parameters
420 ----------
421 ignore_errors : `bool`, optional
422 Ignore errors.
424 Notes
425 -----
426 The internal tracking of datasets is affected by this method and
427 transaction handling is not supported if there is a problem before
428 the datasets themselves are deleted.
430 Concurrency should not normally be an issue for the in memory datastore
431 since all internal changes are isolated to solely this process and
432 the registry only changes rows associated with this process.
433 """
434 log.debug("Emptying trash in datastore %s", self.name)
435 with self._bridge.emptyTrash() as trashed:
436 for ref in trashed:
437 try:
438 realID, _ = self._get_dataset_info(ref)
439 except Exception as e:
440 if ignore_errors:
441 log.warning("Emptying trash in datastore %s but encountered an "
442 "error with dataset %s: %s",
443 self.name, ref.id, e)
444 continue
445 else:
446 raise
448 # Determine whether all references to this dataset have been
449 # removed and we can delete the dataset itself
450 allRefs = self.related[realID]
451 theseRefs = {r.id for r in ref.flatten([ref])}
452 remainingRefs = allRefs - theseRefs
453 if not remainingRefs:
454 log.debug("Removing artifact %s from datastore %s", realID, self.name)
455 del self.datasets[realID]
457 # Remove this entry
458 self.removeStoredItemInfo(ref)
460 def validateConfiguration(self, entities, logFailures=False):
461 """Validate some of the configuration for this datastore.
463 Parameters
464 ----------
465 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
466 Entities to test against this configuration. Can be differing
467 types.
468 logFailures : `bool`, optional
469 If `True`, output a log message for every validation error
470 detected.
472 Raises
473 ------
474 DatastoreValidationError
475 Raised if there is a validation problem with a configuration.
476 All the problems are reported in a single exception.
478 Notes
479 -----
480 This method is a no-op.
481 """
482 return
484 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> str:
485 # Docstring is inherited from base class
486 return transfer
488 def validateKey(self, lookupKey, entity):
489 # Docstring is inherited from base class
490 return
492 def getLookupKeys(self):
493 # Docstring is inherited from base class
494 return self.constraints.getLookupKeys()