Coverage for python/lsst/daf/butler/datastores/inMemoryDatastore.py : 94%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""In-memory datastore."""
24__all__ = ("StoredMemoryItemInfo", "InMemoryDatastore")
26import time
27import logging
28import itertools
29from dataclasses import dataclass
30from typing import Dict, Optional, Any
32from lsst.daf.butler import StoredDatastoreItemInfo, StorageClass
33from .genericDatastore import GenericBaseDatastore
35log = logging.getLogger(__name__)
38@dataclass(frozen=True)
39class StoredMemoryItemInfo(StoredDatastoreItemInfo):
40 """Internal InMemoryDatastore Metadata associated with a stored
41 DatasetRef.
42 """
43 __slots__ = {"timestamp", "storageClass", "parentID"}
45 timestamp: float
46 """Unix timestamp indicating the time the dataset was stored."""
48 storageClass: StorageClass
49 """StorageClass associated with the dataset."""
51 parentID: Optional[int]
52 """ID of the parent `DatasetRef` if this entry is a concrete
53 composite. Not used if the dataset being stored is not a
54 virtual component of a composite
55 """
58class InMemoryDatastore(GenericBaseDatastore):
59 """Basic Datastore for writing to an in memory cache.
61 This datastore is ephemeral in that the contents of the datastore
62 disappear when the Python process completes. This also means that
63 other processes can not access this datastore.
65 Parameters
66 ----------
67 config : `DatastoreConfig` or `str`
68 Configuration.
69 registry : `Registry`, optional
70 Unused parameter.
71 butlerRoot : `str`, optional
72 Unused parameter.
74 Notes
75 -----
76 InMemoryDatastore does not support any file-based ingest.
77 """
79 defaultConfigFile = "datastores/inMemoryDatastore.yaml"
80 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or
81 absolute path. Can be None if no defaults specified.
82 """
84 isEphemeral = True
85 """A new datastore is created every time and datasets disappear when
86 the process shuts down."""
88 datasets: Dict[int, Any]
89 """Internal storage of datasets indexed by dataset ID."""
91 records: Dict[int, StoredMemoryItemInfo]
92 """Internal records about stored datasets."""
94 def __init__(self, config, registry=None, butlerRoot=None):
95 super().__init__(config, registry)
97 # Name ourselves with the timestamp the datastore
98 # was created.
99 self.name = "{}@{}".format(type(self).__name__, time.time())
100 log.debug("Creating datastore %s", self.name)
102 # Storage of datasets, keyed by dataset_id
103 self.datasets = {}
105 # Records is distinct in order to track concrete composite components
106 # where we register multiple components for a single dataset.
107 self.records = {}
109 # Related records that share the same parent
110 self.related = {}
112 @classmethod
113 def setConfigRoot(cls, root, config, full, overwrite=True):
114 """Set any filesystem-dependent config options for this Datastore to
115 be appropriate for a new empty repository with the given root.
117 Does nothing in this implementation.
119 Parameters
120 ----------
121 root : `str`
122 Filesystem path to the root of the data repository.
123 config : `Config`
124 A `Config` to update. Only the subset understood by
125 this component will be updated. Will not expand
126 defaults.
127 full : `Config`
128 A complete config with all defaults expanded that can be
129 converted to a `DatastoreConfig`. Read-only and will not be
130 modified by this method.
131 Repository-specific options that should not be obtained
132 from defaults when Butler instances are constructed
133 should be copied from ``full`` to ``config``.
134 overwrite : `bool`, optional
135 If `False`, do not modify a value in ``config`` if the value
136 already exists. Default is always to overwrite with the provided
137 ``root``.
139 Notes
140 -----
141 If a keyword is explicitly defined in the supplied ``config`` it
142 will not be overridden by this method if ``overwrite`` is `False`.
143 This allows explicit values set in external configs to be retained.
144 """
145 return
147 def addStoredItemInfo(self, refs, infos):
148 # Docstring inherited from GenericBaseDatastore.
149 for ref, info in zip(refs, infos):
150 self.records[ref.id] = info
151 self.related.setdefault(info.parentID, set()).add(ref.id)
153 def getStoredItemInfo(self, ref):
154 # Docstring inherited from GenericBaseDatastore.
155 return self.records[ref.id]
157 def removeStoredItemInfo(self, ref):
158 # Docstring inherited from GenericBaseDatastore.
159 # If a component has been removed previously then we can sometimes
160 # be asked to remove it again. Other datastores ignore this
161 # so also ignore here
162 if ref.id not in self.records:
163 return
164 record = self.records[ref.id]
165 del self.records[ref.id]
166 self.related[record.parentID].remove(ref.id)
168 def exists(self, ref):
169 """Check if the dataset exists in the datastore.
171 Parameters
172 ----------
173 ref : `DatasetRef`
174 Reference to the required dataset.
176 Returns
177 -------
178 exists : `bool`
179 `True` if the entity exists in the `Datastore`.
180 """
181 # Get the stored information (this will fail if no dataset)
182 try:
183 storedItemInfo = self.getStoredItemInfo(ref)
184 except KeyError:
185 return False
187 # The actual ID for the requested dataset might be that of a parent
188 # if this is a composite
189 thisref = ref.id
190 if storedItemInfo.parentID is not None: 190 ↛ 192line 190 didn't jump to line 192, because the condition on line 190 was never false
191 thisref = storedItemInfo.parentID
192 return thisref in self.datasets
194 def get(self, ref, parameters=None):
195 """Load an InMemoryDataset from the store.
197 Parameters
198 ----------
199 ref : `DatasetRef`
200 Reference to the required Dataset.
201 parameters : `dict`
202 `StorageClass`-specific parameters that specify, for example,
203 a slice of the Dataset to be loaded.
205 Returns
206 -------
207 inMemoryDataset : `object`
208 Requested Dataset or slice thereof as an InMemoryDataset.
210 Raises
211 ------
212 FileNotFoundError
213 Requested dataset can not be retrieved.
214 TypeError
215 Return value from formatter has unexpected type.
216 ValueError
217 Formatter failed to process the dataset.
218 """
220 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters)
222 if not self.exists(ref):
223 raise FileNotFoundError(f"Could not retrieve Dataset {ref}")
225 # We have a write storage class and a read storage class and they
226 # can be different for concrete composites.
227 readStorageClass = ref.datasetType.storageClass
228 storedItemInfo = self.getStoredItemInfo(ref)
229 writeStorageClass = storedItemInfo.storageClass
231 # Check that the supplied parameters are suitable for the type read
232 readStorageClass.validateParameters(parameters)
234 # We might need a parent if we are being asked for a component
235 # of a concrete composite
236 thisID = ref.id
237 if storedItemInfo.parentID is not None: 237 ↛ 239line 237 didn't jump to line 239, because the condition on line 237 was never false
238 thisID = storedItemInfo.parentID
239 inMemoryDataset = self.datasets[thisID]
241 # Different storage classes implies a component request
242 if readStorageClass != writeStorageClass:
244 component = ref.datasetType.component()
246 if component is None: 246 ↛ 247line 246 didn't jump to line 247, because the condition on line 246 was never true
247 raise ValueError("Storage class inconsistency ({} vs {}) but no"
248 " component requested".format(readStorageClass.name,
249 writeStorageClass.name))
251 # Concrete composite written as a single object (we hope)
252 inMemoryDataset = writeStorageClass.assembler().getComponent(inMemoryDataset, component)
254 # Since there is no formatter to process parameters, they all must be
255 # passed to the assembler.
256 return self._post_process_get(inMemoryDataset, readStorageClass, parameters)
258 def put(self, inMemoryDataset, ref):
259 """Write a InMemoryDataset with a given `DatasetRef` to the store.
261 Parameters
262 ----------
263 inMemoryDataset : `object`
264 The Dataset to store.
265 ref : `DatasetRef`
266 Reference to the associated Dataset.
268 Raises
269 ------
270 TypeError
271 Supplied object and storage class are inconsistent.
272 DatasetTypeNotSupportedError
273 The associated `DatasetType` is not handled by this datastore.
275 Notes
276 -----
277 If the datastore is configured to reject certain dataset types it
278 is possible that the put will fail and raise a
279 `DatasetTypeNotSupportedError`. The main use case for this is to
280 allow `ChainedDatastore` to put to multiple datastores without
281 requiring that every datastore accepts the dataset.
282 """
284 self._validate_put_parameters(inMemoryDataset, ref)
286 self.datasets[ref.id] = inMemoryDataset
287 log.debug("Store %s in %s", ref, self.name)
289 # Store time we received this content, to allow us to optionally
290 # expire it. Instead of storing a filename here, we include the
291 # ID of this datasetRef so we can find it from components.
292 itemInfo = StoredMemoryItemInfo(time.time(), ref.datasetType.storageClass,
293 parentID=ref.id)
295 # We have to register this content with registry.
296 # Currently this assumes we have a file so we need to use stub entries
297 # TODO: Add to ephemeral part of registry
298 self._register_datasets([(ref, itemInfo)])
300 if self._transaction is not None:
301 self._transaction.registerUndo("put", self.remove, ref)
303 def getUri(self, ref, predict=False):
304 """URI to the Dataset.
306 Always uses "mem://" URI prefix.
308 Parameters
309 ----------
310 ref : `DatasetRef`
311 Reference to the required Dataset.
312 predict : `bool`
313 If `True`, allow URIs to be returned of datasets that have not
314 been written.
316 Returns
317 -------
318 uri : `str`
319 URI string pointing to the Dataset within the datastore. If the
320 Dataset does not exist in the datastore, and if ``predict`` is
321 `True`, the URI will be a prediction and will include a URI
322 fragment "#predicted".
323 If the datastore does not have entities that relate well
324 to the concept of a URI the returned URI string will be
325 descriptive. The returned URI is not guaranteed to be obtainable.
327 Raises
328 ------
329 FileNotFoundError
330 A URI has been requested for a dataset that does not exist and
331 guessing is not allowed.
333 """
335 # if this has never been written then we have to guess
336 if not self.exists(ref):
337 if not predict:
338 raise FileNotFoundError("Dataset {} not in this datastore".format(ref))
339 name = "{}#predicted".format(ref.datasetType.name)
340 else:
341 name = '{}'.format(id(self.datasets[ref.id]))
343 return "mem://{}".format(name)
345 def remove(self, ref):
346 """Indicate to the Datastore that a Dataset can be removed.
348 Parameters
349 ----------
350 ref : `DatasetRef`
351 Reference to the required Dataset.
353 Raises
354 ------
355 FileNotFoundError
356 Attempt to remove a dataset that does not exist.
358 """
359 try:
360 storedItemInfo = self.getStoredItemInfo(ref)
361 except KeyError:
362 raise FileNotFoundError(f"No such file dataset in memory: {ref}") from None
363 thisID = ref.id
364 if storedItemInfo.parentID is not None: 364 ↛ 367line 364 didn't jump to line 367, because the condition on line 364 was never false
365 thisID = storedItemInfo.parentID
367 if thisID not in self.datasets: 367 ↛ 368line 367 didn't jump to line 368, because the condition on line 367 was never true
368 raise FileNotFoundError("No such file dataset in memory: {}".format(ref))
370 # Only delete if this is the only dataset associated with this data
371 allRefs = self.related[thisID]
372 theseRefs = {r.id for r in itertools.chain([ref], ref.components.values())}
373 remainingRefs = allRefs - theseRefs
374 if not remainingRefs:
375 del self.datasets[thisID]
377 # Remove rows from registries
378 self._remove_from_registry(ref)
380 def validateConfiguration(self, entities, logFailures=False):
381 """Validate some of the configuration for this datastore.
383 Parameters
384 ----------
385 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
386 Entities to test against this configuration. Can be differing
387 types.
388 logFailures : `bool`, optional
389 If `True`, output a log message for every validation error
390 detected.
392 Raises
393 ------
394 DatastoreValidationError
395 Raised if there is a validation problem with a configuration.
396 All the problems are reported in a single exception.
398 Notes
399 -----
400 This method is a no-op.
401 """
402 return
404 def validateKey(self, lookupKey, entity):
405 # Docstring is inherited from base class
406 return
408 def getLookupKeys(self):
409 # Docstring is inherited from base class
410 return self.constraints.getLookupKeys()