Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Cache management for a datastore.""" 

25 

26__all__ = ("AbstractDatastoreCacheManager", 

27 "DatastoreDisabledCacheManager", 

28 "DatastoreCacheManager", 

29 "DatastoreCacheManagerConfig", 

30 ) 

31 

32from typing import ( 

33 TYPE_CHECKING, 

34 Optional, 

35 Union, 

36) 

37 

38from abc import ABC, abstractmethod 

39import logging 

40import tempfile 

41 

42from .configSupport import processLookupConfigs 

43from .config import ConfigSubset 

44from ._butlerUri import ButlerURI 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from .dimensions import DimensionUniverse 

48 from .datasets import DatasetType, DatasetRef 

49 from .storageClass import StorageClass 

50 from .configSupport import LookupKey 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class DatastoreCacheManagerConfig(ConfigSubset): 

56 """Configuration information for `DatastoreCacheManager`.""" 

57 

58 component = "cached" 

59 requiredKeys = ("cacheable",) 

60 

61 

62class AbstractDatastoreCacheManager(ABC): 

63 """An abstract base class for managing caching in a Datastore. 

64 

65 Parameters 

66 ---------- 

67 config : `str` or `DatastoreCacheManagerConfig` 

68 Configuration to control caching. 

69 universe : `DimensionUniverse` 

70 Set of all known dimensions, used to expand and validate any used 

71 in lookup keys. 

72 """ 

73 

74 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], 

75 universe: DimensionUniverse): 

76 if not isinstance(config, DatastoreCacheManagerConfig): 

77 config = DatastoreCacheManagerConfig(config) 

78 assert isinstance(config, DatastoreCacheManagerConfig) 

79 self.config = config 

80 

81 @abstractmethod 

82 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool: 

83 """Indicate whether the entity should be added to the cache. 

84 

85 This is relevant when reading or writing. 

86 

87 Parameters 

88 ---------- 

89 entity : `StorageClass` or `DatasetType` or `DatasetRef` 

90 Thing to test against the configuration. The ``name`` property 

91 is used to determine a match. A `DatasetType` will first check 

92 its name, before checking its `StorageClass`. If there are no 

93 matches the default will be returned. 

94 

95 Returns 

96 ------- 

97 should_cache : `bool` 

98 Returns `True` if the dataset should be cached; `False` otherwise. 

99 """ 

100 raise NotImplementedError() 

101 

102 @abstractmethod 

103 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]: 

104 """Move a file to the cache. 

105 

106 Move the given file into the cache, using the supplied DatasetRef 

107 for naming. A call is made to `should_be_cached()` and if the 

108 DatasetRef should not be accepted `None` will be returned. 

109 

110 Parameters 

111 ---------- 

112 uri : `ButlerURI` 

113 Location of the file to be relocated to the cache. Will be moved. 

114 ref : `DatasetRef` 

115 Ref associated with this file. Will be used to determine the name 

116 of the file within the cache. 

117 

118 Returns 

119 ------- 

120 new : `ButlerURI` or `None` 

121 URI to the file within the cache, or `None` if the dataset 

122 was not accepted by the cache. 

123 """ 

124 raise NotImplementedError() 

125 

126 @abstractmethod 

127 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]: 

128 """Look for a dataset in the cache and return its location. 

129 

130 Parameters 

131 ---------- 

132 ref : `DatasetRef` 

133 Dataset to locate in the cache. 

134 extension : `str` 

135 File extension expected. 

136 

137 Returns 

138 ------- 

139 uri : `ButlerURI` or `None` 

140 The URI to the cached file, or `None` if the file has not been 

141 cached. 

142 """ 

143 raise NotImplementedError() 

144 

145 

146class DatastoreCacheManager(AbstractDatastoreCacheManager): 

147 """A class for managing caching in a Datastore using local files. 

148 

149 Parameters 

150 ---------- 

151 config : `str` or `DatastoreCacheManagerConfig` 

152 Configuration to control caching. 

153 universe : `DimensionUniverse` 

154 Set of all known dimensions, used to expand and validate any used 

155 in lookup keys. 

156 """ 

157 

158 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], 

159 universe: DimensionUniverse): 

160 super().__init__(config, universe) 

161 

162 # Set cache directory if it pre-exists, else defer creation until 

163 # requested. 

164 root = self.config.get("root") 

165 self._cache_directory = ButlerURI(root, forceAbsolute=True) if root is not None else None 

166 

167 # Calculate the caching lookup table. 

168 self._lut = processLookupConfigs(self.config["cacheable"], universe=universe) 

169 

170 @property 

171 def cache_directory(self) -> ButlerURI: 

172 if self._cache_directory is None: 

173 # Create on demand. 

174 self._cache_directory = ButlerURI(tempfile.mkdtemp(prefix="butler-"), forceDirectory=True, 

175 isTemporary=True) 

176 return self._cache_directory 

177 

178 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool: 

179 # Docstring inherited 

180 matchName: Union[LookupKey, str] = "{} (via default)".format(entity) 

181 should_cache = False 

182 

183 for key in entity._lookupNames(): 

184 if key in self._lut: 

185 should_cache = bool(self._lut[key]) 

186 matchName = key 

187 break 

188 

189 if not isinstance(should_cache, bool): 

190 raise TypeError( 

191 f"Got cache value {should_cache!r} for config entry {matchName!r}; expected bool." 

192 ) 

193 

194 log.debug("%s (match: %s) should%s be cached", entity, matchName, "" if should_cache else " not") 

195 return should_cache 

196 

197 def _construct_cache_name(self, ref: DatasetRef, extension: str) -> ButlerURI: 

198 """Construct the name to use for this dataset in the cache. 

199 

200 Parameters 

201 ---------- 

202 ref : `DatasetRef` 

203 The dataset to look up in or write to the cache. 

204 extension : `str` 

205 File extension to use for this file. 

206 

207 Returns 

208 ------- 

209 uri : `ButlerURI` 

210 URI to use for this dataset in the cache. 

211 """ 

212 return self.cache_directory.join(f"{ref.id}{extension}") 

213 

214 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]: 

215 # Docstring inherited 

216 if ref.id is None: 

217 raise ValueError(f"Can not cache a file associated with an unresolved reference ({ref})") 

218 

219 if not self.should_be_cached(ref): 

220 return None 

221 

222 # Write the file using the id of the dataset ref and the file 

223 # extension. 

224 cached_location = self._construct_cache_name(ref, uri.getExtension()) 

225 

226 # Move into the cache. This will complain if something is already 

227 # in the cache for this file. 

228 cached_location.transfer_from(uri, transfer="move") 

229 log.debug("Cached dataset %s to %s", ref, cached_location) 

230 

231 return cached_location 

232 

233 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]: 

234 # Docstring inherited 

235 cached_location = self._construct_cache_name(ref, extension) 

236 if cached_location.exists(): 

237 log.debug("Retrieved cached file %s for dataset %s.", cached_location, ref) 

238 return cached_location 

239 log.debug("Dataset %s not found in cache.", ref) 

240 return None 

241 

242 

243class DatastoreDisabledCacheManager(AbstractDatastoreCacheManager): 

244 """A variant of the datastore cache where no cache is enabled. 

245 

246 Parameters 

247 ---------- 

248 config : `str` or `DatastoreCacheManagerConfig` 

249 Configuration to control caching. 

250 universe : `DimensionUniverse` 

251 Set of all known dimensions, used to expand and validate any used 

252 in lookup keys. 

253 """ 

254 

255 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], 

256 universe: DimensionUniverse): 

257 return 

258 

259 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool: 

260 """Indicate whether the entity should be added to the cache. 

261 

262 Always returns `False`. 

263 """ 

264 return False 

265 

266 def move_to_cache(self, uri: ButlerURI, ref: DatasetRef) -> Optional[ButlerURI]: 

267 """Move dataset to cache but always refuse and returns `None`.""" 

268 return None 

269 

270 def find_in_cache(self, ref: DatasetRef, extension: str) -> Optional[ButlerURI]: 

271 """Look for a dataset in the cache and return its location. 

272 

273 Never finds a file. 

274 """ 

275 return None