Coverage for python/lsst/daf/butler/core/datastoreCacheManager.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Cache management for a datastore."""
26__all__ = ("AbstractDatastoreCacheManager",
27 "DatastoreDisabledCacheManager",
28 "DatastoreCacheManager",
29 "DatastoreCacheManagerConfig",
30 )
32from typing import (
33 TYPE_CHECKING,
34 Optional,
35 Union,
36)
38from abc import ABC, abstractmethod
39import logging
40import tempfile
42from .configSupport import processLookupConfigs
43from .config import ConfigSubset
44from ._butlerUri import ButlerURI
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from .dimensions import DimensionUniverse
48 from .datasets import DatasetType, DatasetRef
49 from .storageClass import StorageClass
50 from .configSupport import LookupKey
52log = logging.getLogger(__name__)
55class DatastoreCacheManagerConfig(ConfigSubset):
56 """Configuration information for `DatastoreCacheManager`."""
58 component = "cached"
59 requiredKeys = ("cacheable",)
62class AbstractDatastoreCacheManager(ABC):
63 """An abstract base class for managing caching in a Datastore.
65 Parameters
66 ----------
67 config : `str` or `DatastoreCacheManagerConfig`
68 Configuration to control caching.
69 universe : `DimensionUniverse`
70 Set of all known dimensions, used to expand and validate any used
71 in lookup keys.
72 """
74 def __init__(self, config: Union[str, DatastoreCacheManagerConfig],
75 universe: DimensionUniverse):
76 if not isinstance(config, DatastoreCacheManagerConfig):
77 config = DatastoreCacheManagerConfig(config)
78 assert isinstance(config, DatastoreCacheManagerConfig)
79 self.config = config
81 @abstractmethod
82 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
83 """Indicate whether the entity should be added to the cache.
85 This is relevant when reading or writing.
87 Parameters
88 ----------
89 entity : `StorageClass` or `DatasetType` or `DatasetRef`
90 Thing to test against the configuration. The ``name`` property
91 is used to determine a match. A `DatasetType` will first check
92 its name, before checking its `StorageClass`. If there are no
93 matches the default will be returned.
95 Returns
96 -------
97 should_cache : `bool`
98 Returns `True` if the dataset should be cached; `False` otherwise.
99 """
100 raise NotImplementedError()
102 @abstractmethod
103 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
104 """Move a file to the cache.
106 Move the given file into the cache, using the supplied DatasetRef
107 for naming. A call is made to `should_be_cached()` and if the
108 DatasetRef should not be accepted `None` will be returned.
110 Parameters
111 ----------
112 uri : `ButlerURI`
113 Location of the file to be relocated to the cache. Will be moved.
114 ref : `DatasetRef`
115 Ref associated with this file. Will be used to determine the name
116 of the file within the cache.
118 Returns
119 -------
120 new : `ButlerURI` or `None`
121 URI to the file within the cache, or `None` if the dataset
122 was not accepted by the cache.
123 """
124 raise NotImplementedError()
126 @abstractmethod
127 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]:
128 """Look for a dataset in the cache and return its location.
130 Parameters
131 ----------
132 ref : `DatasetRef`
133 Dataset to locate in the cache.
134 extension : `str`
135 File extension expected.
137 Returns
138 -------
139 uri : `ButlerURI` or `None`
140 The URI to the cached file, or `None` if the file has not been
141 cached.
142 """
143 raise NotImplementedError()
146class DatastoreCacheManager(AbstractDatastoreCacheManager):
147 """A class for managing caching in a Datastore using local files.
149 Parameters
150 ----------
151 config : `str` or `DatastoreCacheManagerConfig`
152 Configuration to control caching.
153 universe : `DimensionUniverse`
154 Set of all known dimensions, used to expand and validate any used
155 in lookup keys.
156 """
158 def __init__(self, config: Union[str, DatastoreCacheManagerConfig],
159 universe: DimensionUniverse):
160 super().__init__(config, universe)
162 # Set cache directory if it pre-exists, else defer creation until
163 # requested.
164 root = self.config.get("root")
165 self._cache_directory = ButlerURI(root, forceAbsolute=True) if root is not None else None
167 # Calculate the caching lookup table.
168 self._lut = processLookupConfigs(self.config["cacheable"], universe=universe)
170 @property
171 def cache_directory(self) -> ButlerURI:
172 if self._cache_directory is None:
173 # Create on demand.
174 self._cache_directory = ButlerURI(tempfile.mkdtemp(prefix="butler-"), forceDirectory=True,
175 isTemporary=True)
176 return self._cache_directory
178 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
179 # Docstring inherited
180 matchName: Union[LookupKey, str] = "{} (via default)".format(entity)
181 should_cache = False
183 for key in entity._lookupNames():
184 if key in self._lut:
185 should_cache = bool(self._lut[key])
186 matchName = key
187 break
189 if not isinstance(should_cache, bool):
190 raise TypeError(
191 f"Got cache value {should_cache!r} for config entry {matchName!r}; expected bool."
192 )
194 log.debug("%s (match: %s) should%s be cached", entity, matchName, "" if should_cache else " not")
195 return should_cache
197 def _construct_cache_name(self, ref: DatasetRef, extension: str) -> ButlerURI:
198 """Construct the name to use for this dataset in the cache.
200 Parameters
201 ----------
202 ref : `DatasetRef`
203 The dataset to look up in or write to the cache.
204 extension : `str`
205 File extension to use for this file.
207 Returns
208 -------
209 uri : `ButlerURI`
210 URI to use for this dataset in the cache.
211 """
212 return self.cache_directory.join(f"{ref.id}{extension}")
214 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
215 # Docstring inherited
216 if ref.id is None:
217 raise ValueError(f"Can not cache a file associated with an unresolved reference ({ref})")
219 if not self.should_be_cached(ref):
220 return None
222 # Write the file using the id of the dataset ref and the file
223 # extension.
224 cached_location = self._construct_cache_name(ref, uri.getExtension())
226 # Move into the cache. This will complain if something is already
227 # in the cache for this file.
228 cached_location.transfer_from(uri, transfer="move")
229 log.debug("Cached dataset %s to %s", ref, cached_location)
231 return cached_location
233 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]:
234 # Docstring inherited
235 cached_location = self._construct_cache_name(ref, extension)
236 if cached_location.exists():
237 log.debug("Retrieved cached file %s for dataset %s.", cached_location, ref)
238 return cached_location
239 log.debug("Dataset %s not found in cache.", ref)
240 return None
243class DatastoreDisabledCacheManager(AbstractDatastoreCacheManager):
244 """A variant of the datastore cache where no cache is enabled.
246 Parameters
247 ----------
248 config : `str` or `DatastoreCacheManagerConfig`
249 Configuration to control caching.
250 universe : `DimensionUniverse`
251 Set of all known dimensions, used to expand and validate any used
252 in lookup keys.
253 """
255 def __init__(self, config: Union[str, DatastoreCacheManagerConfig],
256 universe: DimensionUniverse):
257 return
259 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
260 """Indicate whether the entity should be added to the cache.
262 Always returns `False`.
263 """
264 return False
266 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
267 """Move dataset to cache but always refuse and returns `None`."""
268 return None
270 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]:
271 """Look for a dataset in the cache and return its location.
273 Never finds a file.
274 """
275 return None