Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

"""In-memory datastore.""" 

 

__all__ = ("StoredMemoryItemInfo", "InMemoryDatastore") 

 

import time 

import logging 

import itertools 

from dataclasses import dataclass 

from typing import Dict, Optional, Any 

 

from lsst.daf.butler import StoredDatastoreItemInfo, StorageClass 

from .genericDatastore import GenericBaseDatastore 

 

log = logging.getLogger(__name__) 

 

 

@dataclass(frozen=True) 

class StoredMemoryItemInfo(StoredDatastoreItemInfo): 

"""Internal InMemoryDatastore Metadata associated with a stored 

DatasetRef. 

""" 

__slots__ = {"timestamp", "storageClass", "parentID"} 

 

timestamp: float 

"""Unix timestamp indicating the time the dataset was stored.""" 

 

storageClass: StorageClass 

"""StorageClass associated with the dataset.""" 

 

parentID: Optional[int] 

"""ID of the parent `DatasetRef` if this entry is a concrete 

composite. Not used if the dataset being stored is not a 

virtual component of a composite 

""" 

 

 

class InMemoryDatastore(GenericBaseDatastore): 

"""Basic Datastore for writing to an in memory cache. 

 

This datastore is ephemeral in that the contents of the datastore 

disappear when the Python process completes. This also means that 

other processes can not access this datastore. 

 

Parameters 

---------- 

config : `DatastoreConfig` or `str` 

Configuration. 

registry : `Registry`, optional 

Unused parameter. 

butlerRoot : `str`, optional 

Unused parameter. 

 

Notes 

----- 

InMemoryDatastore does not support any file-based ingest. 

""" 

 

defaultConfigFile = "datastores/inMemoryDatastore.yaml" 

"""Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

absolute path. Can be None if no defaults specified. 

""" 

 

isEphemeral = True 

"""A new datastore is created every time and datasets disappear when 

the process shuts down.""" 

 

datasets: Dict[int, Any] 

"""Internal storage of datasets indexed by dataset ID.""" 

 

records: Dict[int, StoredMemoryItemInfo] 

"""Internal records about stored datasets.""" 

 

def __init__(self, config, registry=None, butlerRoot=None): 

super().__init__(config, registry) 

 

# Name ourselves with the timestamp the datastore 

# was created. 

self.name = "{}@{}".format(type(self).__name__, time.time()) 

log.debug("Creating datastore %s", self.name) 

 

# Storage of datasets, keyed by dataset_id 

self.datasets = {} 

 

# Records is distinct in order to track concrete composite components 

# where we register multiple components for a single dataset. 

self.records = {} 

 

# Related records that share the same parent 

self.related = {} 

 

@classmethod 

def setConfigRoot(cls, root, config, full, overwrite=True): 

"""Set any filesystem-dependent config options for this Datastore to 

be appropriate for a new empty repository with the given root. 

 

Does nothing in this implementation. 

 

Parameters 

---------- 

root : `str` 

Filesystem path to the root of the data repository. 

config : `Config` 

A `Config` to update. Only the subset understood by 

this component will be updated. Will not expand 

defaults. 

full : `Config` 

A complete config with all defaults expanded that can be 

converted to a `DatastoreConfig`. Read-only and will not be 

modified by this method. 

Repository-specific options that should not be obtained 

from defaults when Butler instances are constructed 

should be copied from ``full`` to ``config``. 

overwrite : `bool`, optional 

If `False`, do not modify a value in ``config`` if the value 

already exists. Default is always to overwrite with the provided 

``root``. 

 

Notes 

----- 

If a keyword is explicitly defined in the supplied ``config`` it 

will not be overridden by this method if ``overwrite`` is `False`. 

This allows explicit values set in external configs to be retained. 

""" 

return 

 

def addStoredItemInfo(self, refs, infos): 

# Docstring inherited from GenericBaseDatastore. 

for ref, info in zip(refs, infos): 

self.records[ref.id] = info 

self.related.setdefault(info.parentID, set()).add(ref.id) 

 

def getStoredItemInfo(self, ref): 

# Docstring inherited from GenericBaseDatastore. 

return self.records[ref.id] 

 

def removeStoredItemInfo(self, ref): 

# Docstring inherited from GenericBaseDatastore. 

# If a component has been removed previously then we can sometimes 

# be asked to remove it again. Other datastores ignore this 

# so also ignore here 

if ref.id not in self.records: 

return 

record = self.records[ref.id] 

del self.records[ref.id] 

self.related[record.parentID].remove(ref.id) 

 

def exists(self, ref): 

"""Check if the dataset exists in the datastore. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required dataset. 

 

Returns 

------- 

exists : `bool` 

`True` if the entity exists in the `Datastore`. 

""" 

# Get the stored information (this will fail if no dataset) 

try: 

storedItemInfo = self.getStoredItemInfo(ref) 

except KeyError: 

return False 

 

# The actual ID for the requested dataset might be that of a parent 

# if this is a composite 

thisref = ref.id 

190 ↛ 192line 190 didn't jump to line 192, because the condition on line 190 was never false if storedItemInfo.parentID is not None: 

thisref = storedItemInfo.parentID 

return thisref in self.datasets 

 

def get(self, ref, parameters=None): 

"""Load an InMemoryDataset from the store. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required Dataset. 

parameters : `dict` 

`StorageClass`-specific parameters that specify, for example, 

a slice of the Dataset to be loaded. 

 

Returns 

------- 

inMemoryDataset : `object` 

Requested Dataset or slice thereof as an InMemoryDataset. 

 

Raises 

------ 

FileNotFoundError 

Requested dataset can not be retrieved. 

TypeError 

Return value from formatter has unexpected type. 

ValueError 

Formatter failed to process the dataset. 

""" 

 

log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters) 

 

if not self.exists(ref): 

raise FileNotFoundError(f"Could not retrieve Dataset {ref}") 

 

# We have a write storage class and a read storage class and they 

# can be different for concrete composites. 

readStorageClass = ref.datasetType.storageClass 

storedItemInfo = self.getStoredItemInfo(ref) 

writeStorageClass = storedItemInfo.storageClass 

 

# Check that the supplied parameters are suitable for the type read 

readStorageClass.validateParameters(parameters) 

 

# We might need a parent if we are being asked for a component 

# of a concrete composite 

thisID = ref.id 

237 ↛ 239line 237 didn't jump to line 239, because the condition on line 237 was never false if storedItemInfo.parentID is not None: 

thisID = storedItemInfo.parentID 

inMemoryDataset = self.datasets[thisID] 

 

# Different storage classes implies a component request 

if readStorageClass != writeStorageClass: 

 

component = ref.datasetType.component() 

 

246 ↛ 247line 246 didn't jump to line 247, because the condition on line 246 was never true if component is None: 

raise ValueError("Storage class inconsistency ({} vs {}) but no" 

" component requested".format(readStorageClass.name, 

writeStorageClass.name)) 

 

# Concrete composite written as a single object (we hope) 

inMemoryDataset = writeStorageClass.assembler().getComponent(inMemoryDataset, component) 

 

# Since there is no formatter to process parameters, they all must be 

# passed to the assembler. 

return self._post_process_get(inMemoryDataset, readStorageClass, parameters) 

 

def put(self, inMemoryDataset, ref): 

"""Write a InMemoryDataset with a given `DatasetRef` to the store. 

 

Parameters 

---------- 

inMemoryDataset : `object` 

The Dataset to store. 

ref : `DatasetRef` 

Reference to the associated Dataset. 

 

Raises 

------ 

TypeError 

Supplied object and storage class are inconsistent. 

DatasetTypeNotSupportedError 

The associated `DatasetType` is not handled by this datastore. 

 

Notes 

----- 

If the datastore is configured to reject certain dataset types it 

is possible that the put will fail and raise a 

`DatasetTypeNotSupportedError`. The main use case for this is to 

allow `ChainedDatastore` to put to multiple datastores without 

requiring that every datastore accepts the dataset. 

""" 

 

self._validate_put_parameters(inMemoryDataset, ref) 

 

self.datasets[ref.id] = inMemoryDataset 

log.debug("Store %s in %s", ref, self.name) 

 

# Store time we received this content, to allow us to optionally 

# expire it. Instead of storing a filename here, we include the 

# ID of this datasetRef so we can find it from components. 

itemInfo = StoredMemoryItemInfo(time.time(), ref.datasetType.storageClass, 

parentID=ref.id) 

 

# We have to register this content with registry. 

# Currently this assumes we have a file so we need to use stub entries 

# TODO: Add to ephemeral part of registry 

self._register_datasets([(ref, itemInfo)]) 

 

if self._transaction is not None: 

self._transaction.registerUndo("put", self.remove, ref) 

 

def getUri(self, ref, predict=False): 

"""URI to the Dataset. 

 

Always uses "mem://" URI prefix. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required Dataset. 

predict : `bool` 

If `True`, allow URIs to be returned of datasets that have not 

been written. 

 

Returns 

------- 

uri : `str` 

URI string pointing to the Dataset within the datastore. If the 

Dataset does not exist in the datastore, and if ``predict`` is 

`True`, the URI will be a prediction and will include a URI 

fragment "#predicted". 

If the datastore does not have entities that relate well 

to the concept of a URI the returned URI string will be 

descriptive. The returned URI is not guaranteed to be obtainable. 

 

Raises 

------ 

FileNotFoundError 

A URI has been requested for a dataset that does not exist and 

guessing is not allowed. 

 

""" 

 

# if this has never been written then we have to guess 

if not self.exists(ref): 

if not predict: 

raise FileNotFoundError("Dataset {} not in this datastore".format(ref)) 

name = "{}#predicted".format(ref.datasetType.name) 

else: 

name = '{}'.format(id(self.datasets[ref.id])) 

 

return "mem://{}".format(name) 

 

def remove(self, ref): 

"""Indicate to the Datastore that a Dataset can be removed. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required Dataset. 

 

Raises 

------ 

FileNotFoundError 

Attempt to remove a dataset that does not exist. 

 

""" 

try: 

storedItemInfo = self.getStoredItemInfo(ref) 

except KeyError: 

raise FileNotFoundError(f"No such file dataset in memory: {ref}") from None 

thisID = ref.id 

364 ↛ 367line 364 didn't jump to line 367, because the condition on line 364 was never false if storedItemInfo.parentID is not None: 

thisID = storedItemInfo.parentID 

 

367 ↛ 368line 367 didn't jump to line 368, because the condition on line 367 was never true if thisID not in self.datasets: 

raise FileNotFoundError("No such file dataset in memory: {}".format(ref)) 

 

# Only delete if this is the only dataset associated with this data 

allRefs = self.related[thisID] 

theseRefs = {r.id for r in itertools.chain([ref], ref.components.values())} 

remainingRefs = allRefs - theseRefs 

if not remainingRefs: 

del self.datasets[thisID] 

 

# Remove rows from registries 

self._remove_from_registry(ref) 

 

def validateConfiguration(self, entities, logFailures=False): 

"""Validate some of the configuration for this datastore. 

 

Parameters 

---------- 

entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

Entities to test against this configuration. Can be differing 

types. 

logFailures : `bool`, optional 

If `True`, output a log message for every validation error 

detected. 

 

Raises 

------ 

DatastoreValidationError 

Raised if there is a validation problem with a configuration. 

All the problems are reported in a single exception. 

 

Notes 

----- 

This method is a no-op. 

""" 

return 

 

def validateKey(self, lookupKey, entity): 

# Docstring is inherited from base class 

return 

 

def getLookupKeys(self): 

# Docstring is inherited from base class 

return self.constraints.getLookupKeys()