Coverage for python/lsst/daf/butler/datastores/inMemoryDatastore.py : 90%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""In-memory datastore."""
24__all__ = ("StoredMemoryItemInfo", "InMemoryDatastore")
26import time
27import logging
28from dataclasses import dataclass
29from typing import Dict, Optional, Any
31from lsst.daf.butler import StoredDatastoreItemInfo, StorageClass
32from .genericDatastore import GenericBaseDatastore
34log = logging.getLogger(__name__)
37@dataclass(frozen=True)
38class StoredMemoryItemInfo(StoredDatastoreItemInfo):
39 """Internal InMemoryDatastore Metadata associated with a stored
40 DatasetRef.
41 """
42 __slots__ = {"timestamp", "storageClass", "parentID"}
44 timestamp: float
45 """Unix timestamp indicating the time the dataset was stored."""
47 storageClass: StorageClass
48 """StorageClass associated with the dataset."""
50 parentID: Optional[int]
51 """ID of the parent `DatasetRef` if this entry is a concrete
52 composite. Not used if the dataset being stored is not a
53 virtual component of a composite
54 """
57class InMemoryDatastore(GenericBaseDatastore):
58 """Basic Datastore for writing to an in memory cache.
60 This datastore is ephemeral in that the contents of the datastore
61 disappear when the Python process completes. This also means that
62 other processes can not access this datastore.
64 Parameters
65 ----------
66 config : `DatastoreConfig` or `str`
67 Configuration.
68 registry : `Registry`, optional
69 Unused parameter.
70 butlerRoot : `str`, optional
71 Unused parameter.
73 Notes
74 -----
75 InMemoryDatastore does not support any file-based ingest.
76 """
78 defaultConfigFile = "datastores/inMemoryDatastore.yaml"
79 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or
80 absolute path. Can be None if no defaults specified.
81 """
83 isEphemeral = True
84 """A new datastore is created every time and datasets disappear when
85 the process shuts down."""
87 datasets: Dict[int, Any]
88 """Internal storage of datasets indexed by dataset ID."""
90 records: Dict[int, StoredMemoryItemInfo]
91 """Internal records about stored datasets."""
93 def __init__(self, config, registry=None, butlerRoot=None):
94 super().__init__(config, registry)
96 # Name ourselves with the timestamp the datastore
97 # was created.
98 self.name = "{}@{}".format(type(self).__name__, time.time())
99 log.debug("Creating datastore %s", self.name)
101 # Storage of datasets, keyed by dataset_id
102 self.datasets = {}
104 # Records is distinct in order to track concrete composite components
105 # where we register multiple components for a single dataset.
106 self.records = {}
108 # Related records that share the same parent
109 self.related = {}
111 @classmethod
112 def setConfigRoot(cls, root, config, full, overwrite=True):
113 """Set any filesystem-dependent config options for this Datastore to
114 be appropriate for a new empty repository with the given root.
116 Does nothing in this implementation.
118 Parameters
119 ----------
120 root : `str`
121 Filesystem path to the root of the data repository.
122 config : `Config`
123 A `Config` to update. Only the subset understood by
124 this component will be updated. Will not expand
125 defaults.
126 full : `Config`
127 A complete config with all defaults expanded that can be
128 converted to a `DatastoreConfig`. Read-only and will not be
129 modified by this method.
130 Repository-specific options that should not be obtained
131 from defaults when Butler instances are constructed
132 should be copied from ``full`` to ``config``.
133 overwrite : `bool`, optional
134 If `False`, do not modify a value in ``config`` if the value
135 already exists. Default is always to overwrite with the provided
136 ``root``.
138 Notes
139 -----
140 If a keyword is explicitly defined in the supplied ``config`` it
141 will not be overridden by this method if ``overwrite`` is `False`.
142 This allows explicit values set in external configs to be retained.
143 """
144 return
146 def addStoredItemInfo(self, refs, infos):
147 # Docstring inherited from GenericBaseDatastore.
148 for ref, info in zip(refs, infos):
149 self.records[ref.id] = info
150 self.related.setdefault(info.parentID, set()).add(ref.id)
152 def getStoredItemInfo(self, ref):
153 # Docstring inherited from GenericBaseDatastore.
154 return self.records[ref.id]
156 def removeStoredItemInfo(self, ref):
157 # Docstring inherited from GenericBaseDatastore.
158 # If a component has been removed previously then we can sometimes
159 # be asked to remove it again. Other datastores ignore this
160 # so also ignore here
161 if ref.id not in self.records: 161 ↛ 162line 161 didn't jump to line 162, because the condition on line 161 was never true
162 return
163 record = self.records[ref.id]
164 del self.records[ref.id]
165 self.related[record.parentID].remove(ref.id)
167 def _get_dataset_info(self, ref):
168 """Check that the dataset is present and return the real ID and
169 associated information.
171 Parameters
172 ----------
173 ref : `DatasetRef`
174 Target `DatasetRef`
176 Returns
177 -------
178 realID : `int`
179 The dataset ID associated with this ref that shoul be used. This
180 could either be the ID of the supplied `DatasetRef` or the parent.
181 storageInfo : `StoredMemoryItemInfo`
182 Associated storage information.
184 Raises
185 ------
186 FileNotFoundError
187 Raised if the dataset is not present in this datastore.
188 """
189 try:
190 storedItemInfo = self.getStoredItemInfo(ref)
191 except KeyError:
192 raise FileNotFoundError(f"No such file dataset in memory: {ref}") from None
193 realID = ref.id
194 if storedItemInfo.parentID is not None: 194 ↛ 197line 194 didn't jump to line 197, because the condition on line 194 was never false
195 realID = storedItemInfo.parentID
197 if realID not in self.datasets: 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true
198 raise FileNotFoundError(f"No such file dataset in memory: {ref}")
200 return realID, storedItemInfo
202 def exists(self, ref):
203 """Check if the dataset exists in the datastore.
205 Parameters
206 ----------
207 ref : `DatasetRef`
208 Reference to the required dataset.
210 Returns
211 -------
212 exists : `bool`
213 `True` if the entity exists in the `Datastore`.
214 """
215 try:
216 self._get_dataset_info(ref)
217 except FileNotFoundError:
218 return False
219 return True
221 def get(self, ref, parameters=None):
222 """Load an InMemoryDataset from the store.
224 Parameters
225 ----------
226 ref : `DatasetRef`
227 Reference to the required Dataset.
228 parameters : `dict`
229 `StorageClass`-specific parameters that specify, for example,
230 a slice of the dataset to be loaded.
232 Returns
233 -------
234 inMemoryDataset : `object`
235 Requested dataset or slice thereof as an InMemoryDataset.
237 Raises
238 ------
239 FileNotFoundError
240 Requested dataset can not be retrieved.
241 TypeError
242 Return value from formatter has unexpected type.
243 ValueError
244 Formatter failed to process the dataset.
245 """
247 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters)
249 realID, storedItemInfo = self._get_dataset_info(ref)
251 # We have a write storage class and a read storage class and they
252 # can be different for concrete composites.
253 readStorageClass = ref.datasetType.storageClass
254 writeStorageClass = storedItemInfo.storageClass
256 # Check that the supplied parameters are suitable for the type read
257 readStorageClass.validateParameters(parameters)
259 inMemoryDataset = self.datasets[realID]
261 component = ref.datasetType.component()
263 # Different storage classes implies a component request
264 if readStorageClass != writeStorageClass:
266 if component is None: 266 ↛ 267line 266 didn't jump to line 267, because the condition on line 266 was never true
267 raise ValueError("Storage class inconsistency ({} vs {}) but no"
268 " component requested".format(readStorageClass.name,
269 writeStorageClass.name))
271 # Concrete composite written as a single object (we hope)
272 inMemoryDataset = writeStorageClass.assembler().getComponent(inMemoryDataset, component)
274 # Since there is no formatter to process parameters, they all must be
275 # passed to the assembler.
276 return self._post_process_get(inMemoryDataset, readStorageClass, parameters,
277 isComponent=component is not None)
279 def put(self, inMemoryDataset, ref):
280 """Write a InMemoryDataset with a given `DatasetRef` to the store.
282 Parameters
283 ----------
284 inMemoryDataset : `object`
285 The dataset to store.
286 ref : `DatasetRef`
287 Reference to the associated Dataset.
289 Raises
290 ------
291 TypeError
292 Supplied object and storage class are inconsistent.
293 DatasetTypeNotSupportedError
294 The associated `DatasetType` is not handled by this datastore.
296 Notes
297 -----
298 If the datastore is configured to reject certain dataset types it
299 is possible that the put will fail and raise a
300 `DatasetTypeNotSupportedError`. The main use case for this is to
301 allow `ChainedDatastore` to put to multiple datastores without
302 requiring that every datastore accepts the dataset.
303 """
305 self._validate_put_parameters(inMemoryDataset, ref)
307 self.datasets[ref.id] = inMemoryDataset
308 log.debug("Store %s in %s", ref, self.name)
310 # Store time we received this content, to allow us to optionally
311 # expire it. Instead of storing a filename here, we include the
312 # ID of this datasetRef so we can find it from components.
313 itemInfo = StoredMemoryItemInfo(time.time(), ref.datasetType.storageClass,
314 parentID=ref.id)
316 # We have to register this content with registry.
317 # Currently this assumes we have a file so we need to use stub entries
318 # TODO: Add to ephemeral part of registry
319 self._register_datasets([(ref, itemInfo)])
321 if self._transaction is not None:
322 self._transaction.registerUndo("put", self.remove, ref)
324 def getUri(self, ref, predict=False):
325 """URI to the Dataset.
327 Always uses "mem://" URI prefix.
329 Parameters
330 ----------
331 ref : `DatasetRef`
332 Reference to the required Dataset.
333 predict : `bool`
334 If `True`, allow URIs to be returned of datasets that have not
335 been written.
337 Returns
338 -------
339 uri : `str`
340 URI string pointing to the dataset within the datastore. If the
341 dataset does not exist in the datastore, and if ``predict`` is
342 `True`, the URI will be a prediction and will include a URI
343 fragment "#predicted".
344 If the datastore does not have entities that relate well
345 to the concept of a URI the returned URI string will be
346 descriptive. The returned URI is not guaranteed to be obtainable.
348 Raises
349 ------
350 FileNotFoundError
351 A URI has been requested for a dataset that does not exist and
352 guessing is not allowed.
354 """
356 # if this has never been written then we have to guess
357 if not self.exists(ref):
358 if not predict:
359 raise FileNotFoundError("Dataset {} not in this datastore".format(ref))
360 name = "{}#predicted".format(ref.datasetType.name)
361 else:
362 name = '{}'.format(id(self.datasets[ref.id]))
364 return "mem://{}".format(name)
366 def trash(self, ref, ignore_errors=False):
367 """Indicate to the Datastore that a dataset can be removed.
369 Parameters
370 ----------
371 ref : `DatasetRef`
372 Reference to the required Dataset.
373 ignore_errors: `bool`, optional
374 Indicate that errors should be ignored.
376 Raises
377 ------
378 FileNotFoundError
379 Attempt to remove a dataset that does not exist.
381 Notes
382 -----
383 Concurrency should not normally be an issue for the in memory datastore
384 since all internal changes are isolated to solely this process and
385 the registry only changes rows associated with this process.
386 """
388 log.debug("Trash %s in datastore %s", ref, self.name)
390 # Check that this dataset is known to datastore
391 try:
392 self._get_dataset_info(ref)
394 # Move datasets to trash table
395 self._move_to_trash_in_registry(ref)
396 except Exception as e:
397 if ignore_errors:
398 log.warning("Error encountered moving dataset %s to trash in datastore %s: %s",
399 ref, self.name, e)
400 else:
401 raise
403 def emptyTrash(self, ignore_errors=False):
404 """Remove all datasets from the trash.
406 Parameters
407 ----------
408 ignore_errors : `bool`, optional
409 Ignore errors.
411 Notes
412 -----
413 The internal tracking of datasets is affected by this method and
414 transaction handling is not supported if there is a problem before
415 the datasets themselves are deleted.
417 Concurrency should not normally be an issue for the in memory datastore
418 since all internal changes are isolated to solely this process and
419 the registry only changes rows associated with this process.
420 """
421 log.debug("Emptying trash in datastore %s", self.name)
422 trashed = self.registry.getTrashedDatasets(self.name)
424 for ref in trashed:
425 try:
426 realID, _ = self._get_dataset_info(ref)
427 except Exception as e:
428 if ignore_errors:
429 log.warning("Emptying trash in datastore %s but encountered an error with dataset %s: %s",
430 self.name, ref.id, e)
431 continue
432 else:
433 raise
435 # Determine whether all references to this dataset have been
436 # removed and we can delete the dataset itself
437 allRefs = self.related[realID]
438 theseRefs = {r.id for r in ref.flatten([ref])}
439 remainingRefs = allRefs - theseRefs
440 if not remainingRefs:
441 log.debug("Removing artifact %s from datastore %s", realID, self.name)
442 del self.datasets[realID]
444 # Remove this entry
445 self.removeStoredItemInfo(ref)
447 # Inform registry that we have handled these items
448 # This should work even if another process is clearing out those rows
449 self.registry.emptyDatasetLocationsTrash(self.name, trashed)
451 def validateConfiguration(self, entities, logFailures=False):
452 """Validate some of the configuration for this datastore.
454 Parameters
455 ----------
456 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
457 Entities to test against this configuration. Can be differing
458 types.
459 logFailures : `bool`, optional
460 If `True`, output a log message for every validation error
461 detected.
463 Raises
464 ------
465 DatastoreValidationError
466 Raised if there is a validation problem with a configuration.
467 All the problems are reported in a single exception.
469 Notes
470 -----
471 This method is a no-op.
472 """
473 return
475 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> str:
476 # Docstring is inherited from base class
477 return transfer
479 def validateKey(self, lookupKey, entity):
480 # Docstring is inherited from base class
481 return
483 def getLookupKeys(self):
484 # Docstring is inherited from base class
485 return self.constraints.getLookupKeys()