Coverage for python/lsst/daf/butler/core/datastoreCacheManager.py : 34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Cache management for a datastore."""
26__all__ = ("AbstractDatastoreCacheManager",
27 "DatastoreDisabledCacheManager",
28 "DatastoreCacheManager",
29 "DatastoreCacheManagerConfig",
30 )
32from typing import (
33 TYPE_CHECKING,
34 Optional,
35 Union,
36)
38from abc import ABC, abstractmethod
39import logging
40import tempfile
42from .configSupport import processLookupConfigs
43from .config import ConfigSubset
44from ._butlerUri import ButlerURI
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from .dimensions import DimensionUniverse
48 from .datasets import DatasetType, DatasetRef
49 from .storageClass import StorageClass
50 from .configSupport import LookupKey
52log = logging.getLogger(__name__)
55class DatastoreCacheManagerConfig(ConfigSubset):
56 """Configuration information for `DatastoreCacheManager`."""
59class AbstractDatastoreCacheManager(ABC):
60 """An abstract base class for managing caching in a Datastore.
62 Parameters
63 ----------
64 config : `str` or `DatastoreCacheManagerConfig`
65 Configuration to control caching.
66 universe : `DimensionUniverse`
67 Set of all known dimensions, used to expand and validate any used
68 in lookup keys.
69 """
71 def __init__(self, config: Union[str, DatastoreCacheManagerConfig],
72 universe: DimensionUniverse):
73 if not isinstance(config, DatastoreCacheManagerConfig):
74 config = DatastoreCacheManagerConfig(config)
75 assert isinstance(config, DatastoreCacheManagerConfig)
76 self.config = config
78 @abstractmethod
79 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
80 """Indicate whether the entity should be added to the cache.
82 This is relevant when reading or writing.
84 Parameters
85 ----------
86 entity : `StorageClass` or `DatasetType` or `DatasetRef`
87 Thing to test against the configuration. The ``name`` property
88 is used to determine a match. A `DatasetType` will first check
89 its name, before checking its `StorageClass`. If there are no
90 matches the default will be returned.
92 Returns
93 -------
94 should_cache : `bool`
95 Returns `True` if the dataset should be cached; `False` otherwise.
96 """
97 raise NotImplementedError()
99 @abstractmethod
100 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
101 """Move a file to the cache.
103 Move the given file into the cache, using the supplied DatasetRef
104 for naming. A call is made to `should_be_cached()` and if the
105 DatasetRef should not be accepted `None` will be returned.
107 Parameters
108 ----------
109 uri : `ButlerURI`
110 Location of the file to be relocated to the cache. Will be moved.
111 ref : `DatasetRef`
112 Ref associated with this file. Will be used to determine the name
113 of the file within the cache.
115 Returns
116 -------
117 new : `ButlerURI` or `None`
118 URI to the file within the cache, or `None` if the dataset
119 was not accepted by the cache.
120 """
121 raise NotImplementedError()
123 @abstractmethod
124 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]:
125 """Look for a dataset in the cache and return its location.
127 Parameters
128 ----------
129 ref : `DatasetRef`
130 Dataset to locate in the cache.
131 extension : `str`
132 File extension expected.
134 Returns
135 -------
136 uri : `ButlerURI` or `None`
137 The URI to the cached file, or `None` if the file has not been
138 cached.
139 """
140 raise NotImplementedError()
143class DatastoreCacheManager(AbstractDatastoreCacheManager):
144 """A class for managing caching in a Datastore using local files.
146 Parameters
147 ----------
148 config : `str` or `DatastoreCacheManagerConfig`
149 Configuration to control caching.
150 universe : `DimensionUniverse`
151 Set of all known dimensions, used to expand and validate any used
152 in lookup keys.
153 """
155 def __init__(self, config: Union[str, DatastoreCacheManagerConfig],
156 universe: DimensionUniverse):
157 super().__init__(config, universe)
159 if (root := self.config.get("root")):
160 self.cache_directory = ButlerURI(root, forceAbsolute=True)
161 else:
162 self.cache_directory = ButlerURI(tempfile.mkdtemp(prefix="butler-"), forceDirectory=True,
163 isTemporary=True)
165 # Calculate the caching lookup table.
166 self._lut = processLookupConfigs(self.config["cacheable"], universe=universe)
168 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
169 # Docstring inherited
170 matchName: Union[LookupKey, str] = "{} (via default)".format(entity)
171 should_cache = False
173 for key in entity._lookupNames():
174 if key in self._lut:
175 should_cache = bool(self._lut[key])
176 matchName = key
177 break
179 if not isinstance(should_cache, bool):
180 raise TypeError(
181 f"Got cache value {should_cache!r} for config entry {matchName!r}; expected bool."
182 )
184 log.debug("%s (match: %s) should%s be cached", entity, matchName, "" if should_cache else " not")
185 return should_cache
187 def _construct_cache_name(self, ref: DatasetRef, extension: str) -> ButlerURI:
188 """Construct the name to use for this dataset in the cache.
190 Parameters
191 ----------
192 ref : `DatasetRef`
193 The dataset to look up in or write to the cache.
194 extension : `str`
195 File extension to use for this file.
197 Returns
198 -------
199 uri : `ButlerURI`
200 URI to use for this dataset in the cache.
201 """
202 return self.cache_directory.join(f"{ref.id}{extension}")
204 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
205 # Docstring inherited
206 if ref.id is None:
207 raise ValueError(f"Can not cache a file associated with an unresolved reference ({ref})")
209 if not self.should_be_cached(ref):
210 return None
212 # Write the file using the id of the dataset ref and the file
213 # extension.
214 cached_location = self._construct_cache_name(ref, uri.getExtension())
216 # Move into the cache. This will complain if something is already
217 # in the cache for this file.
218 cached_location.transfer_from(uri, transfer="move")
219 log.debug("Cached dataset %s to %s", ref, cached_location)
221 return cached_location
223 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]:
224 # Docstring inherited
225 cached_location = self._construct_cache_name(ref, extension)
226 if cached_location.exists():
227 log.debug("Retrieved cached file %s for dataset %s.", cached_location, ref)
228 return cached_location
229 log.debug("Dataset %s not found in cache.", ref)
230 return None
233class DatastoreDisabledCacheManager(AbstractDatastoreCacheManager):
234 """A variant of the datastore cache where no cache is enabled.
236 Parameters
237 ----------
238 config : `str` or `DatastoreCacheManagerConfig`
239 Configuration to control caching.
240 universe : `DimensionUniverse`
241 Set of all known dimensions, used to expand and validate any used
242 in lookup keys.
243 """
245 def __init__(self, config: Union[str, DatastoreCacheManagerConfig],
246 universe: DimensionUniverse):
247 return
249 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
250 """Indicate whether the entity should be added to the cache.
252 Always returns `False`.
253 """
254 return False
256 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]:
257 """Move dataset to cache but always refuse and returns `None`."""
258 return None
260 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]:
261 """Look for a dataset in the cache and return its location.
263 Never finds a file.
264 """
265 return None