Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Cache management for a datastore.""" 

25 

26__all__ = ("AbstractDatastoreCacheManager", 

27 "DatastoreDisabledCacheManager", 

28 "DatastoreCacheManager", 

29 "DatastoreCacheManagerConfig", 

30 ) 

31 

32from typing import ( 

33 TYPE_CHECKING, 

34 Optional, 

35 Union, 

36) 

37 

38from abc import ABC, abstractmethod 

39import logging 

40import tempfile 

41 

42from .configSupport import processLookupConfigs 

43from .config import ConfigSubset 

44from ._butlerUri import ButlerURI 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from .dimensions import DimensionUniverse 

48 from .datasets import DatasetType, DatasetRef 

49 from .storageClass import StorageClass 

50 from .configSupport import LookupKey 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class DatastoreCacheManagerConfig(ConfigSubset): 

56 """Configuration information for `DatastoreCacheManager`.""" 

57 

58 

59class AbstractDatastoreCacheManager(ABC): 

60 """An abstract base class for managing caching in a Datastore. 

61 

62 Parameters 

63 ---------- 

64 config : `str` or `DatastoreCacheManagerConfig` 

65 Configuration to control caching. 

66 universe : `DimensionUniverse` 

67 Set of all known dimensions, used to expand and validate any used 

68 in lookup keys. 

69 """ 

70 

71 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], 

72 universe: DimensionUniverse): 

73 if not isinstance(config, DatastoreCacheManagerConfig): 

74 config = DatastoreCacheManagerConfig(config) 

75 assert isinstance(config, DatastoreCacheManagerConfig) 

76 self.config = config 

77 

78 @abstractmethod 

79 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool: 

80 """Indicate whether the entity should be added to the cache. 

81 

82 This is relevant when reading or writing. 

83 

84 Parameters 

85 ---------- 

86 entity : `StorageClass` or `DatasetType` or `DatasetRef` 

87 Thing to test against the configuration. The ``name`` property 

88 is used to determine a match. A `DatasetType` will first check 

89 its name, before checking its `StorageClass`. If there are no 

90 matches the default will be returned. 

91 

92 Returns 

93 ------- 

94 should_cache : `bool` 

95 Returns `True` if the dataset should be cached; `False` otherwise. 

96 """ 

97 raise NotImplementedError() 

98 

99 @abstractmethod 

100 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]: 

101 """Move a file to the cache. 

102 

103 Move the given file into the cache, using the supplied DatasetRef 

104 for naming. A call is made to `should_be_cached()` and if the 

105 DatasetRef should not be accepted `None` will be returned. 

106 

107 Parameters 

108 ---------- 

109 uri : `ButlerURI` 

110 Location of the file to be relocated to the cache. Will be moved. 

111 ref : `DatasetRef` 

112 Ref associated with this file. Will be used to determine the name 

113 of the file within the cache. 

114 

115 Returns 

116 ------- 

117 new : `ButlerURI` or `None` 

118 URI to the file within the cache, or `None` if the dataset 

119 was not accepted by the cache. 

120 """ 

121 raise NotImplementedError() 

122 

123 @abstractmethod 

124 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]: 

125 """Look for a dataset in the cache and return its location. 

126 

127 Parameters 

128 ---------- 

129 ref : `DatasetRef` 

130 Dataset to locate in the cache. 

131 extension : `str` 

132 File extension expected. 

133 

134 Returns 

135 ------- 

136 uri : `ButlerURI` or `None` 

137 The URI to the cached file, or `None` if the file has not been 

138 cached. 

139 """ 

140 raise NotImplementedError() 

141 

142 

143class DatastoreCacheManager(AbstractDatastoreCacheManager): 

144 """A class for managing caching in a Datastore using local files. 

145 

146 Parameters 

147 ---------- 

148 config : `str` or `DatastoreCacheManagerConfig` 

149 Configuration to control caching. 

150 universe : `DimensionUniverse` 

151 Set of all known dimensions, used to expand and validate any used 

152 in lookup keys. 

153 """ 

154 

155 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], 

156 universe: DimensionUniverse): 

157 super().__init__(config, universe) 

158 

159 if (root := self.config.get("root")): 

160 self.cache_directory = ButlerURI(root, forceAbsolute=True) 

161 else: 

162 self.cache_directory = ButlerURI(tempfile.mkdtemp(prefix="butler-"), forceDirectory=True, 

163 isTemporary=True) 

164 

165 # Calculate the caching lookup table. 

166 self._lut = processLookupConfigs(self.config["cacheable"], universe=universe) 

167 

168 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool: 

169 # Docstring inherited 

170 matchName: Union[LookupKey, str] = "{} (via default)".format(entity) 

171 should_cache = False 

172 

173 for key in entity._lookupNames(): 

174 if key in self._lut: 

175 should_cache = bool(self._lut[key]) 

176 matchName = key 

177 break 

178 

179 if not isinstance(should_cache, bool): 

180 raise TypeError( 

181 f"Got cache value {should_cache!r} for config entry {matchName!r}; expected bool." 

182 ) 

183 

184 log.debug("%s (match: %s) should%s be cached", entity, matchName, "" if should_cache else " not") 

185 return should_cache 

186 

187 def _construct_cache_name(self, ref: DatasetRef, extension: str) -> ButlerURI: 

188 """Construct the name to use for this dataset in the cache. 

189 

190 Parameters 

191 ---------- 

192 ref : `DatasetRef` 

193 The dataset to look up in or write to the cache. 

194 extension : `str` 

195 File extension to use for this file. 

196 

197 Returns 

198 ------- 

199 uri : `ButlerURI` 

200 URI to use for this dataset in the cache. 

201 """ 

202 return self.cache_directory.join(f"{ref.id}{extension}") 

203 

204 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]: 

205 # Docstring inherited 

206 if ref.id is None: 

207 raise ValueError(f"Can not cache a file associated with an unresolved reference ({ref})") 

208 

209 if not self.should_be_cached(ref): 

210 return None 

211 

212 # Write the file using the id of the dataset ref and the file 

213 # extension. 

214 cached_location = self._construct_cache_name(ref, uri.getExtension()) 

215 

216 # Move into the cache. This will complain if something is already 

217 # in the cache for this file. 

218 cached_location.transfer_from(uri, transfer="move") 

219 log.debug("Cached dataset %s to %s", ref, cached_location) 

220 

221 return cached_location 

222 

223 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]: 

224 # Docstring inherited 

225 cached_location = self._construct_cache_name(ref, extension) 

226 if cached_location.exists(): 

227 log.debug("Retrieved cached file %s for dataset %s.", cached_location, ref) 

228 return cached_location 

229 log.debug("Dataset %s not found in cache.", ref) 

230 return None 

231 

232 

233class DatastoreDisabledCacheManager(AbstractDatastoreCacheManager): 

234 """A variant of the datastore cache where no cache is enabled. 

235 

236 Parameters 

237 ---------- 

238 config : `str` or `DatastoreCacheManagerConfig` 

239 Configuration to control caching. 

240 universe : `DimensionUniverse` 

241 Set of all known dimensions, used to expand and validate any used 

242 in lookup keys. 

243 """ 

244 

245 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], 

246 universe: DimensionUniverse): 

247 return 

248 

249 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool: 

250 """Indicate whether the entity should be added to the cache. 

251 

252 Always returns `False`. 

253 """ 

254 return False 

255 

256 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]: 

257 """Move dataset to cache but always refuse and returns `None`.""" 

258 return None 

259 

260 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]: 

261 """Look for a dataset in the cache and return its location. 

262 

263 Never finds a file. 

264 """ 

265 return None