Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

"""In-memory datastore.""" 

 

import time 

import logging 

 

from lsst.daf.butler.core.datastore import Datastore 

from lsst.daf.butler.core.storageClass import StorageClassFactory 

from lsst.daf.butler.core.storageInfo import StorageInfo 

 

log = logging.getLogger(__name__) 

 

__all__ = ("StoredItemInfo", "InMemoryDatastore") 

 

 

class StoredItemInfo: 

"""Internal Metadata associated with a DatasetRef. 

 

Parameters 

---------- 

timestamp : `float` 

Unix timestamp indicating the time the dataset was stored. 

storageClass : `StorageClass` 

StorageClass associated with the dataset. 

parentID : `int`, optional 

ID of the parent `DatasetRef` if this entry is a concrete 

composite. Not used if the dataset being stored is not a 

virtual component of a composite 

""" 

 

def __init__(self, timestamp, storageClass, parentID=None): 

self.timestamp = timestamp 

self.storageClass = storageClass 

self.parentID = parentID 

 

def __str__(self): 

return "StoredItemInfo({}, {}, parent={})".format(self.timestamp, self.storageClass.name, 

self.parentID) 

 

def __repr__(self): 

return "StoredItemInfo({!r}, {!r}, parent={})".format(self.timestamp, self.storageClass, 

self.parentID) 

 

 

class InMemoryDatastore(Datastore): 

"""Basic Datastore for writing to an in memory cache. 

 

This datastore is ephemeral in that the contents of the datastore 

disappear when the Python process completes. This also means that 

other processes can not access this datastore. 

 

Attributes 

---------- 

config : `DatastoreConfig` 

Configuration used to create Datastore. 

storageClassFactory : `StorageClassFactory` 

Factory for creating storage class instances from name. 

name : `str` 

Label associated with this Datastore. 

 

Parameters 

---------- 

config : `DatastoreConfig` or `str` 

Configuration. 

""" 

 

defaultConfigFile = "datastores/inMemoryDatastore.yaml" 

"""Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

absolute path. Can be None if no defaults specified. 

""" 

 

isEphemeral = True 

"""A new datastore is created every time and datasets disappear when 

the process shuts down.""" 

 

def __init__(self, config, registry=None): 

super().__init__(config, registry) 

 

self.storageClassFactory = StorageClassFactory() 

 

# Name ourselves with the timestamp the datastore 

# was created. 

self.name = "InMemoryDatastore@{}".format(time.time()) 

log.debug("Creating datastore %s", self.name) 

 

# Storage of datasets, keyed by dataset_id 

self.datasets = {} 

 

# Records is distinct in order to track concrete composite components 

# where we register multiple components for a single dataset. 

self.records = {} 

 

def __str__(self): 

return "InMemory" 

 

@classmethod 

def setConfigRoot(cls, root, config, full): 

"""Set any filesystem-dependent config options for this Datastore to 

be appropriate for a new empty repository with the given root. 

 

Does nothing in this implementation. 

 

Parameters 

---------- 

root : `str` 

Filesystem path to the root of the data repository. 

config : `Config` 

A `Config` to update. Only the subset understood by 

this component will be updated. Will not expand 

defaults. 

full : `Config` 

A complete config with all defaults expanded that can be 

converted to a `DatastoreConfig`. Read-only and will not be 

modified by this method. 

Repository-specific options that should not be obtained 

from defaults when Butler instances are constructed 

should be copied from `full` to `Config`. 

""" 

return 

 

def addStoredItemInfo(self, ref, info): 

"""Record internal storage information associated with this 

`DatasetRef`. 

 

Parameters 

---------- 

ref : `DatasetRef` 

The Dataset that has been stored. 

info : `StoredItemInfo` 

Metadata associated with the stored Dataset. 

 

Raises 

------ 

KeyError 

An entry with this DatasetRef already exists. 

""" 

157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true if ref.id in self.records: 

raise KeyError("Attempt to store item info with ID {}" 

" when that ID exists as '{}'".format(ref.id, self.records[ref.id])) 

self.records[ref.id] = info 

 

def removeStoredItemInfo(self, ref): 

"""Remove information about the object associated with this dataset. 

 

Parameters 

---------- 

ref : `DatasetRef` 

The Dataset that has been removed. 

""" 

del self.records[ref.id] 

 

def getStoredItemInfo(self, ref): 

"""Retrieve information associated with object stored in this 

`Datastore`. 

 

Parameters 

---------- 

ref : `DatasetRef` 

The Dataset that is to be queried. 

 

Returns 

------- 

info : `StoredItemInfo` 

Stored information about the internal location of this file 

and its formatter. 

 

Raises 

------ 

KeyError 

Dataset with that id can not be found. 

""" 

record = self.records.get(ref.id, None) 

if record is None: 

raise KeyError("Unable to retrieve information associated with Dataset {}".format(ref.id)) 

return record 

 

def exists(self, ref): 

"""Check if the dataset exists in the datastore. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required dataset. 

 

Returns 

------- 

exists : `bool` 

`True` if the entity exists in the `Datastore`. 

""" 

# Get the stored information (this will fail if no dataset) 

try: 

storedItemInfo = self.getStoredItemInfo(ref) 

except KeyError: 

return False 

 

# The actual ID for the requested dataset might be that of a parent 

# if this is a composite 

thisref = ref.id 

219 ↛ 221line 219 didn't jump to line 221, because the condition on line 219 was never false if storedItemInfo.parentID is not None: 

thisref = storedItemInfo.parentID 

return thisref in self.datasets 

 

def get(self, ref, parameters=None): 

"""Load an InMemoryDataset from the store. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required Dataset. 

parameters : `dict` 

`StorageClass`-specific parameters that specify, for example, 

a slice of the Dataset to be loaded. 

 

Returns 

------- 

inMemoryDataset : `object` 

Requested Dataset or slice thereof as an InMemoryDataset. 

 

Raises 

------ 

FileNotFoundError 

Requested dataset can not be retrieved. 

TypeError 

Return value from formatter has unexpected type. 

ValueError 

Formatter failed to process the dataset. 

""" 

 

log.debug("Retrieve %s from %s", ref, self.name) 

 

if not self.exists(ref): 

raise FileNotFoundError("Could not retrieve Dataset {}".format(ref)) 

 

# We have a write storage class and a read storage class and they 

# can be different for concrete composites. 

readStorageClass = ref.datasetType.storageClass 

storedItemInfo = self.getStoredItemInfo(ref) 

writeStorageClass = storedItemInfo.storageClass 

 

# We might need a parent if we are being asked for a component 

# of a concrete composite 

thisID = ref.id 

263 ↛ 265line 263 didn't jump to line 265, because the condition on line 263 was never false if storedItemInfo.parentID is not None: 

thisID = storedItemInfo.parentID 

inMemoryDataset = self.datasets[thisID] 

 

# Different storage classes implies a component request 

if readStorageClass != writeStorageClass: 

 

component = ref.datasetType.component() 

 

272 ↛ 273line 272 didn't jump to line 273, because the condition on line 272 was never true if component is None: 

raise ValueError("Storage class inconsistency ({} vs {}) but no" 

" component requested".format(readStorageClass.name, 

writeStorageClass.name)) 

 

# Concrete composite written as a single object (we hope) 

inMemoryDataset = writeStorageClass.assembler().getComponent(inMemoryDataset, component) 

 

# Validate the returned data type matches the expected data type 

pytype = readStorageClass.pytype 

282 ↛ 283line 282 didn't jump to line 283, because the condition on line 282 was never true if pytype and not isinstance(inMemoryDataset, pytype): 

raise TypeError("Got Python type {} (datasetType '{}')" 

" but expected {}".format(type(inMemoryDataset), 

ref.datasetType.name, pytype)) 

 

return inMemoryDataset 

 

def put(self, inMemoryDataset, ref): 

"""Write a InMemoryDataset with a given `DatasetRef` to the store. 

 

Parameters 

---------- 

inMemoryDataset : `object` 

The Dataset to store. 

ref : `DatasetRef` 

Reference to the associated Dataset. 

 

Raises 

------ 

TypeError 

Supplied object and storage class are inconsistent. 

""" 

 

datasetType = ref.datasetType 

storageClass = datasetType.storageClass 

 

# Sanity check 

309 ↛ 310line 309 didn't jump to line 310, because the condition on line 309 was never true if not isinstance(inMemoryDataset, storageClass.pytype): 

raise TypeError("Inconsistency between supplied object ({}) " 

"and storage class type ({})".format(type(inMemoryDataset), storageClass.pytype)) 

 

self.datasets[ref.id] = inMemoryDataset 

log.debug("Store %s in %s", ref, self.name) 

 

# We have to register this content with registry. 

# Currently this assumes we have a file so we need to use stub entries 

# TODO: Add to ephemeral part of registry 

checksum = str(id(inMemoryDataset)) 

size = 0 

info = StorageInfo(self.name, checksum, size) 

self.registry.addStorageInfo(ref, info) 

 

# Store time we received this content, to allow us to optionally 

# expire it. Instead of storing a filename here, we include the 

# ID of this datasetRef so we can find it from components. 

itemInfo = StoredItemInfo(time.time(), ref.datasetType.storageClass, parentID=ref.id) 

self.addStoredItemInfo(ref, itemInfo) 

 

# Register all components with same information 

for compRef in ref.components.values(): 

self.registry.addStorageInfo(compRef, info) 

self.addStoredItemInfo(compRef, itemInfo) 

 

if self._transaction is not None: 

self._transaction.registerUndo("put", self.remove, ref) 

 

def getUri(self, ref, predict=False): 

"""URI to the Dataset. 

 

Always uses "mem://" URI prefix. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required Dataset. 

predict : `bool` 

If `True`, allow URIs to be returned of datasets that have not 

been written. 

 

Returns 

------- 

uri : `str` 

URI string pointing to the Dataset within the datastore. If the 

Dataset does not exist in the datastore, and if ``predict`` is 

`True`, the URI will be a prediction and will include a URI 

fragment "#predicted". 

If the datastore does not have entities that relate well 

to the concept of a URI the returned URI string will be 

descriptive. The returned URI is not guaranteed to be obtainable. 

 

Raises 

------ 

FileNotFoundError 

A URI has been requested for a dataset that does not exist and 

guessing is not allowed. 

 

""" 

 

# if this has never been written then we have to guess 

if not self.exists(ref): 

if not predict: 

raise FileNotFoundError("Dataset {} not in this datastore".format(ref)) 

name = "{}#predicted".format(ref.datasetType.name) 

else: 

name = '{}'.format(id(self.datasets[ref.id])) 

 

return "mem://{}".format(name) 

 

def remove(self, ref): 

"""Indicate to the Datastore that a Dataset can be removed. 

 

Parameters 

---------- 

ref : `DatasetRef` 

Reference to the required Dataset. 

 

Raises 

------ 

FileNotFoundError 

Attempt to remove a dataset that does not exist. 

 

""" 

if ref.id not in self.datasets: 

raise FileNotFoundError("No such file dataset in memory: {}".format(ref)) 

del self.datasets[ref.id] 

 

# Remove rows from registries 

self.removeStoredItemInfo(ref) 

self.registry.removeStorageInfo(self.name, ref) 

for compRef in ref.components.values(): 

self.registry.removeStorageInfo(self.name, compRef) 

self.removeStoredItemInfo(compRef) 

 

def transfer(self, inputDatastore, ref): 

"""Retrieve a Dataset from an input `Datastore`, 

and store the result in this `Datastore`. 

 

Parameters 

---------- 

inputDatastore : `Datastore` 

The external `Datastore` from which to retreive the Dataset. 

ref : `DatasetRef` 

Reference to the required Dataset in the input data store. 

 

""" 

assert inputDatastore is not self # unless we want it for renames? 

inMemoryDataset = inputDatastore.get(ref) 

return self.put(inMemoryDataset, ref)