Coverage for python/lsst/daf/persistence/repository.py: 23%

111 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-12 02:38 -0700

1#!/usr/bin/env python 

2 

3# 

4# LSST Data Management System 

5# Copyright 2016 LSST Corporation. 

6# 

7# This product includes software developed by the 

8# LSST Project (http://www.lsst.org/). 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the LSST License Statement and 

21# the GNU General Public License along with this program. If not, 

22# see <http://www.lsstcorp.org/LegalNotices/>. 

23# 

24import copy 

25import inspect 

26import os 

27 

28from lsst.daf.persistence import Storage, listify, doImport, Policy 

29 

30 

31class RepositoryArgs: 

32 

33 """Arguments passed into a Butler that are used to instantiate a repository. This includes arguments that 

34 can be used to create a new repository (cfgRoot, root, mapper, mapperArgs, policy) and are persisted along 

35 with the new repository's configuration file. These arguments can also describe how a new or existing 

36 repository are to be used (cfgRoot or root, tags, mode). When indicating an existing repository it is 

37 better to not specify unnecessary arguments, as if they conflict with the persisted repository 

38 configuration then a RuntimeError will be raised during Butler init. 

39 

40 A RepositoryArgs class can be initialized from a dict, if the first argument to the initializer is a dict. 

41 

42 Parameters 

43 ---------- 

44 cfgRoot : URI or dict, optional 

45 If dict, the initalizer is re-called with the expanded dict. 

46 If URI, this is the location where the RepositoryCfg should be found (existing repo) or put (new repo) 

47 root : URI, optional 

48 If different than cfgRoot then this is the location where the repository should exist. A RepositoryCfg 

49 will be put at cfgRoot and its root will be a path to root. 

50 mapper : string or class object, optional 

51 The mapper to use with this repository. If string, should refer an importable object. If class object, 

52 should be a mapper to be instantiated by the Butler during Butler init. 

53 mapperArgs : dict 

54 Arguments & values to pass to the mapper when initializing it. 

55 tags : list or object, optional 

56 One or more unique identifiers to uniquely identify this repository and its parents when performing 

57 Butler.get. 

58 mode : string, optional 

59 should be one of 'r', 'w', or 'rw', for 'read', 'write', or 'read-write'. Can be omitted; input 

60 repositories will default to 'r', output repositories will default to 'w'. 'w' on an input repository 

61 will raise a RuntimeError during Butler init, although 'rw' works and is equivalent to 'r'. Output 

62 repositories may be 'r' or 'rw', 'r' for an output repository will raise a RuntimeError during Butler 

63 init. 

64 policy : dict 

65 Policy associated with this repository, overrides all other policy data (which may be loaded from 

66 policies in derived packages). 

67 """ 

68 def __init__(self, cfgRoot=None, root=None, mapper=None, mapperArgs=None, tags=None, 

69 mode=None, policy=None): 

70 try: 

71 # is cfgRoot a dict? try dict init: 

72 self.__init__(**cfgRoot) 

73 except TypeError: 

74 self._root = Storage.absolutePath(os.getcwd(), root.rstrip(os.sep)) if root else root 

75 self._cfgRoot = Storage.absolutePath(os.getcwd(), cfgRoot.rstrip(os.sep)) if cfgRoot else cfgRoot 

76 self._mapper = mapper 

77 self.mapperArgs = mapperArgs 

78 self.tags = set(listify(tags)) 

79 self.mode = mode 

80 self.policy = Policy(policy) if policy is not None else None 

81 

82 def __repr__(self): 

83 return "%s(root=%r, cfgRoot=%r, mapper=%r, mapperArgs=%r, tags=%s, mode=%r, policy=%s)" % ( 

84 self.__class__.__name__, self.root, self._cfgRoot, self._mapper, self.mapperArgs, self.tags, 

85 self.mode, self.policy) 

86 

87 @property 

88 def mapper(self): 

89 return self._mapper 

90 

91 @mapper.setter 

92 def mapper(self, mapper): 

93 if mapper is not None and self._mapper: 

94 raise RuntimeError("Explicity clear mapper (set to None) before changing its value.") 

95 self._mapper = mapper 

96 

97 @property 

98 def cfgRoot(self): 

99 return self._cfgRoot if self._cfgRoot is not None else self._root 

100 

101 @property 

102 def root(self): 

103 return self._root if self._root is not None else self._cfgRoot 

104 

105 @staticmethod 

106 def inputRepo(storage, tags=None): 

107 return RepositoryArgs(storage, tags) 

108 

109 @staticmethod 

110 def outputRepo(storage, mapper=None, mapperArgs=None, tags=None, mode=None): 

111 return RepositoryArgs(storage, mapper, mapperArgs, tags, mode) 

112 

113 def tag(self, tag): 

114 """add a tag to the repository cfg""" 

115 if isinstance(tag, str): 

116 self.tags.add(tag) 

117 else: 

118 try: 

119 self.tags.update(tag) 

120 except TypeError: 

121 self.tags.add(tag) 

122 

123 

124class Repository: 

125 """Represents a repository of persisted data and has methods to access that data. 

126 """ 

127 

128 def __init__(self, repoData): 

129 """Initialize a Repository with parameters input via RepoData. 

130 

131 Parameters 

132 ---------- 

133 repoData : RepoData 

134 Object that contains the parameters with which to init the Repository. 

135 """ 

136 self._storage = Storage.makeFromURI(repoData.cfg.root) 

137 if repoData.cfg.dirty and not repoData.isV1Repository and repoData.cfgOrigin != 'nested': 

138 self._storage.putRepositoryCfg(repoData.cfg, repoData.cfgRoot) 

139 self._mapperArgs = repoData.cfg.mapperArgs # keep for reference in matchesArgs 

140 self._initMapper(repoData) 

141 

142 def _initMapper(self, repoData): 

143 '''Initialize and keep the mapper in a member var. 

144 

145 Parameters 

146 ---------- 

147 repoData : RepoData 

148 The RepoData with the properties of this Repository. 

149 ''' 

150 

151 # rule: If mapper is: 

152 # - an object: use it as the mapper. 

153 # - a string: import it and instantiate it with mapperArgs 

154 # - a class object: instantiate it with mapperArgs 

155 mapper = repoData.cfg.mapper 

156 

157 # if mapper is a string, import it: 

158 if isinstance(mapper, str): 

159 mapper = doImport(mapper) 

160 # now if mapper is a class type (not instance), instantiate it: 

161 if inspect.isclass(mapper): 

162 mapperArgs = copy.copy(repoData.cfg.mapperArgs) 

163 if mapperArgs is None: 

164 mapperArgs = {} 

165 if 'root' not in mapperArgs: 

166 mapperArgs['root'] = repoData.cfg.root 

167 mapper = mapper(parentRegistry=repoData.parentRegistry, 

168 repositoryCfg=repoData.cfg, 

169 **mapperArgs) 

170 self._mapper = mapper 

171 

172 # todo want a way to make a repository read-only 

173 def write(self, butlerLocation, obj): 

174 """Write a dataset to Storage. 

175 

176 :param butlerLocation: Contains the details needed to find the desired dataset. 

177 :param dataset: The dataset to be written. 

178 :return: 

179 """ 

180 butlerLocationStorage = butlerLocation.getStorage() 

181 if butlerLocationStorage: 

182 return butlerLocationStorage.write(butlerLocation, obj) 

183 else: 

184 return self._storage.write(butlerLocation, obj) 

185 

186 def read(self, butlerLocation): 

187 """Read a dataset from Storage. 

188 

189 :param butlerLocation: Contains the details needed to find the desired dataset. 

190 :return: An instance of the dataset requested by butlerLocation. 

191 """ 

192 butlerLocationStorage = butlerLocation.getStorage() 

193 if butlerLocationStorage: 

194 return butlerLocationStorage.read(butlerLocation) 

195 else: 

196 return self._storage.read(butlerLocation) 

197 

198 ################# 

199 # Mapper Access # 

200 

201 def mappers(self): 

202 return (self._mapper, ) 

203 

204 def getRegistry(self): 

205 """Get the registry from the mapper 

206 

207 Returns 

208 ------- 

209 Registry or None 

210 The registry from the mapper or None if the mapper does not have one. 

211 """ 

212 if self._mapper is None: 

213 return None 

214 return self._mapper.getRegistry() 

215 

216 def getKeys(self, *args, **kwargs): 

217 """ 

218 Get the keys available in the repository/repositories. 

219 :param args: 

220 :param kwargs: 

221 :return: A dict of {key:valueType} 

222 """ 

223 # todo: getKeys is not in the mapper API 

224 if self._mapper is None: 

225 return None 

226 keys = self._mapper.getKeys(*args, **kwargs) 

227 return keys 

228 

229 def map(self, *args, **kwargs): 

230 """Find a butler location for the given arguments. 

231 See mapper.map for more information about args and kwargs. 

232 

233 :param args: arguments to be passed on to mapper.map 

234 :param kwargs: keyword arguments to be passed on to mapper.map 

235 :return: The type of item is dependent on the mapper being used but is typically a ButlerLocation. 

236 """ 

237 if self._mapper is None: 

238 raise RuntimeError("No mapper assigned to Repository") 

239 loc = self._mapper.map(*args, **kwargs) 

240 if not loc: 

241 return None 

242 loc.setRepository(self) 

243 return loc 

244 

245 def queryMetadata(self, *args, **kwargs): 

246 """Gets possible values for keys given a partial data id. 

247 

248 See mapper documentation for more explanation about queryMetadata. 

249 

250 :param args: arguments to be passed on to mapper.queryMetadata 

251 :param kwargs: keyword arguments to be passed on to mapper.queryMetadata 

252 :return:The type of item is dependent on the mapper being used but is typically a set that contains 

253 available values for the keys in the format input argument. 

254 """ 

255 if self._mapper is None: 

256 return None 

257 ret = self._mapper.queryMetadata(*args, **kwargs) 

258 return ret 

259 

260 def backup(self, *args, **kwargs): 

261 """Perform mapper.backup. 

262 

263 See mapper.backup for more information about args and kwargs. 

264 

265 :param args: arguments to be passed on to mapper.backup 

266 :param kwargs: keyword arguments to be passed on to mapper.backup 

267 :return: None 

268 """ 

269 if self._mapper is None: 

270 return None 

271 self._mapper.backup(*args, **kwargs) 

272 

273 def getMapperDefaultLevel(self): 

274 """Get the default level of the mapper. 

275 

276 This is typically used if no level is passed into butler methods that call repository.getKeys and/or 

277 repository.queryMetadata. There is a bug in that code because it gets the default level from this 

278 repository but then uses that value when searching all repositories. If this and other repositories 

279 have dissimilar data, the default level value will be nonsensical. A good example of this issue is in 

280 Butler.subset; it needs refactoring. 

281 

282 :return: 

283 """ 

284 if self._mapper is None: 

285 return None 

286 return self._mapper.getDefaultLevel() 

287 

288 def exists(self, location): 

289 """Check if location exists in storage. 

290 

291 Parameters 

292 ---------- 

293 location : ButlerLocation 

294 Desrcibes a location in storage to look for. 

295 

296 Returns 

297 ------- 

298 bool 

299 True if location exists, False if not. 

300 """ 

301 butlerLocationStorage = location.getStorage() 

302 if butlerLocationStorage: 

303 return butlerLocationStorage.exists(location) 

304 else: 

305 return self._storage.exists(location)