Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

#!/usr/bin/env python 

 

# 

# LSST Data Management System 

# Copyright 2016 LSST Corporation. 

# 

# This product includes software developed by the 

# LSST Project (http://www.lsst.org/). 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the LSST License Statement and 

# the GNU General Public License along with this program. If not, 

# see <http://www.lsstcorp.org/LegalNotices/>. 

# 

import copy 

import inspect 

import os 

 

from lsst.daf.persistence import Storage, listify, doImport, Policy 

 

 

class RepositoryArgs: 

 

"""Arguments passed into a Butler that are used to instantiate a repository. This includes arguments that 

can be used to create a new repository (cfgRoot, root, mapper, mapperArgs, policy) and are persisted along 

with the new repository's configuration file. These arguments can also describe how a new or existing 

repository are to be used (cfgRoot or root, tags, mode). When indicating an existing repository it is 

better to not specify unnecessary arguments, as if they conflict with the persisted repository 

configuration then a RuntimeError will be raised during Butler init. 

 

A RepositoryArgs class can be initialized from a dict, if the first argument to the initializer is a dict. 

 

Parameters 

---------- 

cfgRoot : URI or dict, optional 

If dict, the initalizer is re-called with the expanded dict. 

If URI, this is the location where the RepositoryCfg should be found (existing repo) or put (new repo) 

root : URI, optional 

If different than cfgRoot then this is the location where the repository should exist. A RepositoryCfg 

will be put at cfgRoot and its root will be a path to root. 

mapper : string or class object, optional 

The mapper to use with this repository. If string, should refer an importable object. If class object, 

should be a mapper to be instantiated by the Butler during Butler init. 

mapperArgs : dict 

Arguments & values to pass to the mapper when initializing it. 

tags : list or object, optional 

One or more unique identifiers to uniquely identify this repository and its parents when performing 

Butler.get. 

mode : string, optional 

should be one of 'r', 'w', or 'rw', for 'read', 'write', or 'read-write'. Can be omitted; input 

repositories will default to 'r', output repositories will default to 'w'. 'w' on an input repository 

will raise a RuntimeError during Butler init, although 'rw' works and is equivalent to 'r'. Output 

repositories may be 'r' or 'rw', 'r' for an output repository will raise a RuntimeError during Butler 

init. 

policy : dict 

Policy associated with this repository, overrides all other policy data (which may be loaded from 

policies in derived packages). 

""" 

def __init__(self, cfgRoot=None, root=None, mapper=None, mapperArgs=None, tags=None, 

mode=None, policy=None): 

try: 

# is cfgRoot a dict? try dict init: 

self.__init__(**cfgRoot) 

except TypeError: 

self._root = Storage.absolutePath(os.getcwd(), root.rstrip(os.sep)) if root else root 

self._cfgRoot = Storage.absolutePath(os.getcwd(), cfgRoot.rstrip(os.sep)) if cfgRoot else cfgRoot 

self._mapper = mapper 

self.mapperArgs = mapperArgs 

self.tags = set(listify(tags)) 

self.mode = mode 

self.policy = Policy(policy) if policy is not None else None 

 

def __repr__(self): 

return "%s(root=%r, cfgRoot=%r, mapper=%r, mapperArgs=%r, tags=%s, mode=%r, policy=%s)" % ( 

self.__class__.__name__, self.root, self._cfgRoot, self._mapper, self.mapperArgs, self.tags, 

self.mode, self.policy) 

 

@property 

def mapper(self): 

return self._mapper 

 

@mapper.setter 

def mapper(self, mapper): 

if mapper is not None and self._mapper: 

raise RuntimeError("Explicity clear mapper (set to None) before changing its value.") 

self._mapper = mapper 

 

@property 

def cfgRoot(self): 

return self._cfgRoot if self._cfgRoot is not None else self._root 

 

@property 

def root(self): 

return self._root if self._root is not None else self._cfgRoot 

 

@staticmethod 

def inputRepo(storage, tags=None): 

return RepositoryArgs(storage, tags) 

 

@staticmethod 

def outputRepo(storage, mapper=None, mapperArgs=None, tags=None, mode=None): 

return RepositoryArgs(storage, mapper, mapperArgs, tags, mode) 

 

def tag(self, tag): 

"""add a tag to the repository cfg""" 

if isinstance(tag, str): 

self.tags.add(tag) 

else: 

try: 

self.tags.update(tag) 

except TypeError: 

self.tags.add(tag) 

 

 

class Repository: 

"""Represents a repository of persisted data and has methods to access that data. 

""" 

 

def __init__(self, repoData): 

"""Initialize a Repository with parameters input via RepoData. 

 

Parameters 

---------- 

repoData : RepoData 

Object that contains the parameters with which to init the Repository. 

""" 

self._storage = Storage.makeFromURI(repoData.cfg.root) 

if repoData.cfg.dirty and not repoData.isV1Repository and repoData.cfgOrigin != 'nested': 

self._storage.putRepositoryCfg(repoData.cfg, repoData.cfgRoot) 

self._mapperArgs = repoData.cfg.mapperArgs # keep for reference in matchesArgs 

self._initMapper(repoData) 

 

def _initMapper(self, repoData): 

'''Initialize and keep the mapper in a member var. 

 

Parameters 

---------- 

repoData : RepoData 

The RepoData with the properties of this Repository. 

''' 

 

# rule: If mapper is: 

# - an object: use it as the mapper. 

# - a string: import it and instantiate it with mapperArgs 

# - a class object: instantiate it with mapperArgs 

mapper = repoData.cfg.mapper 

 

# if mapper is a string, import it: 

if isinstance(mapper, str): 

mapper = doImport(mapper) 

# now if mapper is a class type (not instance), instantiate it: 

if inspect.isclass(mapper): 

mapperArgs = copy.copy(repoData.cfg.mapperArgs) 

if mapperArgs is None: 

mapperArgs = {} 

if 'root' not in mapperArgs: 

mapperArgs['root'] = repoData.cfg.root 

mapper = mapper(parentRegistry=repoData.parentRegistry, 

repositoryCfg=repoData.cfg, 

**mapperArgs) 

self._mapper = mapper 

 

# todo want a way to make a repository read-only 

def write(self, butlerLocation, obj): 

"""Write a dataset to Storage. 

 

:param butlerLocation: Contains the details needed to find the desired dataset. 

:param dataset: The dataset to be written. 

:return: 

""" 

butlerLocationStorage = butlerLocation.getStorage() 

if butlerLocationStorage: 

return butlerLocationStorage.write(butlerLocation, obj) 

else: 

return self._storage.write(butlerLocation, obj) 

 

def read(self, butlerLocation): 

"""Read a dataset from Storage. 

 

:param butlerLocation: Contains the details needed to find the desired dataset. 

:return: An instance of the dataset requested by butlerLocation. 

""" 

butlerLocationStorage = butlerLocation.getStorage() 

if butlerLocationStorage: 

return butlerLocationStorage.read(butlerLocation) 

else: 

return self._storage.read(butlerLocation) 

 

################# 

# Mapper Access # 

 

def mappers(self): 

return (self._mapper, ) 

 

def getRegistry(self): 

"""Get the registry from the mapper 

 

Returns 

------- 

Registry or None 

The registry from the mapper or None if the mapper does not have one. 

""" 

if self._mapper is None: 

return None 

return self._mapper.getRegistry() 

 

def getKeys(self, *args, **kwargs): 

""" 

Get the keys available in the repository/repositories. 

:param args: 

:param kwargs: 

:return: A dict of {key:valueType} 

""" 

# todo: getKeys is not in the mapper API 

if self._mapper is None: 

return None 

keys = self._mapper.getKeys(*args, **kwargs) 

return keys 

 

def map(self, *args, **kwargs): 

"""Find a butler location for the given arguments. 

See mapper.map for more information about args and kwargs. 

 

:param args: arguments to be passed on to mapper.map 

:param kwargs: keyword arguments to be passed on to mapper.map 

:return: The type of item is dependent on the mapper being used but is typically a ButlerLocation. 

""" 

if self._mapper is None: 

raise RuntimeError("No mapper assigned to Repository") 

loc = self._mapper.map(*args, **kwargs) 

if not loc: 

return None 

loc.setRepository(self) 

return loc 

 

def queryMetadata(self, *args, **kwargs): 

"""Gets possible values for keys given a partial data id. 

 

See mapper documentation for more explanation about queryMetadata. 

 

:param args: arguments to be passed on to mapper.queryMetadata 

:param kwargs: keyword arguments to be passed on to mapper.queryMetadata 

:return:The type of item is dependent on the mapper being used but is typically a set that contains 

available values for the keys in the format input argument. 

""" 

if self._mapper is None: 

return None 

ret = self._mapper.queryMetadata(*args, **kwargs) 

return ret 

 

def backup(self, *args, **kwargs): 

"""Perform mapper.backup. 

 

See mapper.backup for more information about args and kwargs. 

 

:param args: arguments to be passed on to mapper.backup 

:param kwargs: keyword arguments to be passed on to mapper.backup 

:return: None 

""" 

if self._mapper is None: 

return None 

self._mapper.backup(*args, **kwargs) 

 

def getMapperDefaultLevel(self): 

"""Get the default level of the mapper. 

 

This is typically used if no level is passed into butler methods that call repository.getKeys and/or 

repository.queryMetadata. There is a bug in that code because it gets the default level from this 

repository but then uses that value when searching all repositories. If this and other repositories 

have dissimilar data, the default level value will be nonsensical. A good example of this issue is in 

Butler.subset; it needs refactoring. 

 

:return: 

""" 

if self._mapper is None: 

return None 

return self._mapper.getDefaultLevel() 

 

def exists(self, location): 

"""Check if location exists in storage. 

 

Parameters 

---------- 

location : ButlerLocation 

Desrcibes a location in storage to look for. 

 

Returns 

------- 

bool 

True if location exists, False if not. 

""" 

butlerLocationStorage = location.getStorage() 

if butlerLocationStorage: 

return butlerLocationStorage.exists(location) 

else: 

return self._storage.exists(location)