Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""In-memory datastore.""" 

23 

24__all__ = ("StoredMemoryItemInfo", "InMemoryDatastore") 

25 

26import time 

27import logging 

28import itertools 

29from dataclasses import dataclass 

30from typing import Dict, Optional, Any 

31 

32from lsst.daf.butler import StoredDatastoreItemInfo, StorageClass 

33from .genericDatastore import GenericBaseDatastore 

34 

35log = logging.getLogger(__name__) 

36 

37 

38@dataclass(frozen=True) 

39class StoredMemoryItemInfo(StoredDatastoreItemInfo): 

40 """Internal InMemoryDatastore Metadata associated with a stored 

41 DatasetRef. 

42 """ 

43 __slots__ = {"timestamp", "storageClass", "parentID"} 

44 

45 timestamp: float 

46 """Unix timestamp indicating the time the dataset was stored.""" 

47 

48 storageClass: StorageClass 

49 """StorageClass associated with the dataset.""" 

50 

51 parentID: Optional[int] 

52 """ID of the parent `DatasetRef` if this entry is a concrete 

53 composite. Not used if the dataset being stored is not a 

54 virtual component of a composite 

55 """ 

56 

57 

58class InMemoryDatastore(GenericBaseDatastore): 

59 """Basic Datastore for writing to an in memory cache. 

60 

61 This datastore is ephemeral in that the contents of the datastore 

62 disappear when the Python process completes. This also means that 

63 other processes can not access this datastore. 

64 

65 Parameters 

66 ---------- 

67 config : `DatastoreConfig` or `str` 

68 Configuration. 

69 registry : `Registry`, optional 

70 Unused parameter. 

71 butlerRoot : `str`, optional 

72 Unused parameter. 

73 

74 Notes 

75 ----- 

76 InMemoryDatastore does not support any file-based ingest. 

77 """ 

78 

79 defaultConfigFile = "datastores/inMemoryDatastore.yaml" 

80 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

81 absolute path. Can be None if no defaults specified. 

82 """ 

83 

84 isEphemeral = True 

85 """A new datastore is created every time and datasets disappear when 

86 the process shuts down.""" 

87 

88 datasets: Dict[int, Any] 

89 """Internal storage of datasets indexed by dataset ID.""" 

90 

91 records: Dict[int, StoredMemoryItemInfo] 

92 """Internal records about stored datasets.""" 

93 

94 def __init__(self, config, registry=None, butlerRoot=None): 

95 super().__init__(config, registry) 

96 

97 # Name ourselves with the timestamp the datastore 

98 # was created. 

99 self.name = "{}@{}".format(type(self).__name__, time.time()) 

100 log.debug("Creating datastore %s", self.name) 

101 

102 # Storage of datasets, keyed by dataset_id 

103 self.datasets = {} 

104 

105 # Records is distinct in order to track concrete composite components 

106 # where we register multiple components for a single dataset. 

107 self.records = {} 

108 

109 # Related records that share the same parent 

110 self.related = {} 

111 

112 @classmethod 

113 def setConfigRoot(cls, root, config, full, overwrite=True): 

114 """Set any filesystem-dependent config options for this Datastore to 

115 be appropriate for a new empty repository with the given root. 

116 

117 Does nothing in this implementation. 

118 

119 Parameters 

120 ---------- 

121 root : `str` 

122 Filesystem path to the root of the data repository. 

123 config : `Config` 

124 A `Config` to update. Only the subset understood by 

125 this component will be updated. Will not expand 

126 defaults. 

127 full : `Config` 

128 A complete config with all defaults expanded that can be 

129 converted to a `DatastoreConfig`. Read-only and will not be 

130 modified by this method. 

131 Repository-specific options that should not be obtained 

132 from defaults when Butler instances are constructed 

133 should be copied from ``full`` to ``config``. 

134 overwrite : `bool`, optional 

135 If `False`, do not modify a value in ``config`` if the value 

136 already exists. Default is always to overwrite with the provided 

137 ``root``. 

138 

139 Notes 

140 ----- 

141 If a keyword is explicitly defined in the supplied ``config`` it 

142 will not be overridden by this method if ``overwrite`` is `False`. 

143 This allows explicit values set in external configs to be retained. 

144 """ 

145 return 

146 

147 def addStoredItemInfo(self, refs, infos): 

148 # Docstring inherited from GenericBaseDatastore. 

149 for ref, info in zip(refs, infos): 

150 self.records[ref.id] = info 

151 self.related.setdefault(info.parentID, set()).add(ref.id) 

152 

153 def getStoredItemInfo(self, ref): 

154 # Docstring inherited from GenericBaseDatastore. 

155 return self.records[ref.id] 

156 

157 def removeStoredItemInfo(self, ref): 

158 # Docstring inherited from GenericBaseDatastore. 

159 # If a component has been removed previously then we can sometimes 

160 # be asked to remove it again. Other datastores ignore this 

161 # so also ignore here 

162 if ref.id not in self.records: 

163 return 

164 record = self.records[ref.id] 

165 del self.records[ref.id] 

166 self.related[record.parentID].remove(ref.id) 

167 

168 def exists(self, ref): 

169 """Check if the dataset exists in the datastore. 

170 

171 Parameters 

172 ---------- 

173 ref : `DatasetRef` 

174 Reference to the required dataset. 

175 

176 Returns 

177 ------- 

178 exists : `bool` 

179 `True` if the entity exists in the `Datastore`. 

180 """ 

181 # Get the stored information (this will fail if no dataset) 

182 try: 

183 storedItemInfo = self.getStoredItemInfo(ref) 

184 except KeyError: 

185 return False 

186 

187 # The actual ID for the requested dataset might be that of a parent 

188 # if this is a composite 

189 thisref = ref.id 

190 if storedItemInfo.parentID is not None: 190 ↛ 192line 190 didn't jump to line 192, because the condition on line 190 was never false

191 thisref = storedItemInfo.parentID 

192 return thisref in self.datasets 

193 

194 def get(self, ref, parameters=None): 

195 """Load an InMemoryDataset from the store. 

196 

197 Parameters 

198 ---------- 

199 ref : `DatasetRef` 

200 Reference to the required Dataset. 

201 parameters : `dict` 

202 `StorageClass`-specific parameters that specify, for example, 

203 a slice of the Dataset to be loaded. 

204 

205 Returns 

206 ------- 

207 inMemoryDataset : `object` 

208 Requested Dataset or slice thereof as an InMemoryDataset. 

209 

210 Raises 

211 ------ 

212 FileNotFoundError 

213 Requested dataset can not be retrieved. 

214 TypeError 

215 Return value from formatter has unexpected type. 

216 ValueError 

217 Formatter failed to process the dataset. 

218 """ 

219 

220 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters) 

221 

222 if not self.exists(ref): 

223 raise FileNotFoundError(f"Could not retrieve Dataset {ref}") 

224 

225 # We have a write storage class and a read storage class and they 

226 # can be different for concrete composites. 

227 readStorageClass = ref.datasetType.storageClass 

228 storedItemInfo = self.getStoredItemInfo(ref) 

229 writeStorageClass = storedItemInfo.storageClass 

230 

231 # Check that the supplied parameters are suitable for the type read 

232 readStorageClass.validateParameters(parameters) 

233 

234 # We might need a parent if we are being asked for a component 

235 # of a concrete composite 

236 thisID = ref.id 

237 if storedItemInfo.parentID is not None: 237 ↛ 239line 237 didn't jump to line 239, because the condition on line 237 was never false

238 thisID = storedItemInfo.parentID 

239 inMemoryDataset = self.datasets[thisID] 

240 

241 # Different storage classes implies a component request 

242 if readStorageClass != writeStorageClass: 

243 

244 component = ref.datasetType.component() 

245 

246 if component is None: 246 ↛ 247line 246 didn't jump to line 247, because the condition on line 246 was never true

247 raise ValueError("Storage class inconsistency ({} vs {}) but no" 

248 " component requested".format(readStorageClass.name, 

249 writeStorageClass.name)) 

250 

251 # Concrete composite written as a single object (we hope) 

252 inMemoryDataset = writeStorageClass.assembler().getComponent(inMemoryDataset, component) 

253 

254 # Since there is no formatter to process parameters, they all must be 

255 # passed to the assembler. 

256 return self._post_process_get(inMemoryDataset, readStorageClass, parameters) 

257 

258 def put(self, inMemoryDataset, ref): 

259 """Write a InMemoryDataset with a given `DatasetRef` to the store. 

260 

261 Parameters 

262 ---------- 

263 inMemoryDataset : `object` 

264 The Dataset to store. 

265 ref : `DatasetRef` 

266 Reference to the associated Dataset. 

267 

268 Raises 

269 ------ 

270 TypeError 

271 Supplied object and storage class are inconsistent. 

272 DatasetTypeNotSupportedError 

273 The associated `DatasetType` is not handled by this datastore. 

274 

275 Notes 

276 ----- 

277 If the datastore is configured to reject certain dataset types it 

278 is possible that the put will fail and raise a 

279 `DatasetTypeNotSupportedError`. The main use case for this is to 

280 allow `ChainedDatastore` to put to multiple datastores without 

281 requiring that every datastore accepts the dataset. 

282 """ 

283 

284 self._validate_put_parameters(inMemoryDataset, ref) 

285 

286 self.datasets[ref.id] = inMemoryDataset 

287 log.debug("Store %s in %s", ref, self.name) 

288 

289 # Store time we received this content, to allow us to optionally 

290 # expire it. Instead of storing a filename here, we include the 

291 # ID of this datasetRef so we can find it from components. 

292 itemInfo = StoredMemoryItemInfo(time.time(), ref.datasetType.storageClass, 

293 parentID=ref.id) 

294 

295 # We have to register this content with registry. 

296 # Currently this assumes we have a file so we need to use stub entries 

297 # TODO: Add to ephemeral part of registry 

298 self._register_datasets([(ref, itemInfo)]) 

299 

300 if self._transaction is not None: 

301 self._transaction.registerUndo("put", self.remove, ref) 

302 

303 def getUri(self, ref, predict=False): 

304 """URI to the Dataset. 

305 

306 Always uses "mem://" URI prefix. 

307 

308 Parameters 

309 ---------- 

310 ref : `DatasetRef` 

311 Reference to the required Dataset. 

312 predict : `bool` 

313 If `True`, allow URIs to be returned of datasets that have not 

314 been written. 

315 

316 Returns 

317 ------- 

318 uri : `str` 

319 URI string pointing to the Dataset within the datastore. If the 

320 Dataset does not exist in the datastore, and if ``predict`` is 

321 `True`, the URI will be a prediction and will include a URI 

322 fragment "#predicted". 

323 If the datastore does not have entities that relate well 

324 to the concept of a URI the returned URI string will be 

325 descriptive. The returned URI is not guaranteed to be obtainable. 

326 

327 Raises 

328 ------ 

329 FileNotFoundError 

330 A URI has been requested for a dataset that does not exist and 

331 guessing is not allowed. 

332 

333 """ 

334 

335 # if this has never been written then we have to guess 

336 if not self.exists(ref): 

337 if not predict: 

338 raise FileNotFoundError("Dataset {} not in this datastore".format(ref)) 

339 name = "{}#predicted".format(ref.datasetType.name) 

340 else: 

341 name = '{}'.format(id(self.datasets[ref.id])) 

342 

343 return "mem://{}".format(name) 

344 

345 def remove(self, ref): 

346 """Indicate to the Datastore that a Dataset can be removed. 

347 

348 Parameters 

349 ---------- 

350 ref : `DatasetRef` 

351 Reference to the required Dataset. 

352 

353 Raises 

354 ------ 

355 FileNotFoundError 

356 Attempt to remove a dataset that does not exist. 

357 

358 """ 

359 try: 

360 storedItemInfo = self.getStoredItemInfo(ref) 

361 except KeyError: 

362 raise FileNotFoundError(f"No such file dataset in memory: {ref}") from None 

363 thisID = ref.id 

364 if storedItemInfo.parentID is not None: 364 ↛ 367line 364 didn't jump to line 367, because the condition on line 364 was never false

365 thisID = storedItemInfo.parentID 

366 

367 if thisID not in self.datasets: 367 ↛ 368line 367 didn't jump to line 368, because the condition on line 367 was never true

368 raise FileNotFoundError("No such file dataset in memory: {}".format(ref)) 

369 

370 # Only delete if this is the only dataset associated with this data 

371 allRefs = self.related[thisID] 

372 theseRefs = {r.id for r in itertools.chain([ref], ref.components.values())} 

373 remainingRefs = allRefs - theseRefs 

374 if not remainingRefs: 

375 del self.datasets[thisID] 

376 

377 # Remove rows from registries 

378 self._remove_from_registry(ref) 

379 

380 def validateConfiguration(self, entities, logFailures=False): 

381 """Validate some of the configuration for this datastore. 

382 

383 Parameters 

384 ---------- 

385 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

386 Entities to test against this configuration. Can be differing 

387 types. 

388 logFailures : `bool`, optional 

389 If `True`, output a log message for every validation error 

390 detected. 

391 

392 Raises 

393 ------ 

394 DatastoreValidationError 

395 Raised if there is a validation problem with a configuration. 

396 All the problems are reported in a single exception. 

397 

398 Notes 

399 ----- 

400 This method is a no-op. 

401 """ 

402 return 

403 

404 def validateKey(self, lookupKey, entity): 

405 # Docstring is inherited from base class 

406 return 

407 

408 def getLookupKeys(self): 

409 # Docstring is inherited from base class 

410 return self.constraints.getLookupKeys()