Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2 

3# 

4# LSST Data Management System 

5# Copyright 2008, 2009, 2010 LSST Corporation. 

6# 

7# This product includes software developed by the 

8# LSST Project (http://www.lsst.org/). 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the LSST License Statement and 

21# the GNU General Public License along with this program. If not, 

22# see <http://www.lsstcorp.org/LegalNotices/>. 

23# 

24 

25# -*- python -*- 

26 

27"""This module defines the ButlerSubset class and the ButlerDataRefs contained 

28within it as well as an iterator over the subset.""" 

29 

30from . import DataId 

31 

32 

33class ButlerSubset: 

34 

35 """ButlerSubset is a container for ButlerDataRefs. It represents a 

36 collection of data ids that can be used to obtain datasets of the type 

37 used when creating the collection or a compatible dataset type. It can be 

38 thought of as the result of a query for datasets matching a partial data 

39 id. 

40 

41 The ButlerDataRefs are generated at a specified level of the data id 

42 hierarchy. If that is not the level at which datasets are specified, the 

43 ButlerDataRef.subItems() method may be used to dive further into the 

44 ButlerDataRefs. 

45 

46 ButlerSubsets should generally be created using Butler.subset(). 

47 

48 This mechanism replaces the creation of butlers using partial dataIds. 

49 

50 Public methods: 

51 

52 __init__(self, butler, datasetType, level, dataId) 

53 

54 __len__(self) 

55 

56 __iter__(self) 

57 

58 """ 

59 

60 GENERATION = 2 

61 """This is a Generation 2 ButlerSubset. 

62 """ 

63 

64 def __init__(self, butler, datasetType, level, dataId): 

65 """ 

66 Create a ButlerSubset by querying a butler for data ids matching a 

67 given partial data id for a given dataset type at a given hierarchy 

68 level. 

69 

70 @param butler (Butler) butler that is being queried. 

71 @param datasetType (str) the type of dataset to query. 

72 @param level (str) the hierarchy level to descend to. if empty string will look up the default 

73 level. 

74 @param dataId (dict) the (partial or complete) data id. 

75 """ 

76 self.butler = butler 

77 self.datasetType = datasetType 

78 self.dataId = DataId(dataId) 

79 self.cache = [] 

80 self.level = level 

81 

82 keys = self.butler.getKeys(datasetType, level, tag=dataId.tag) 

83 if keys is None: 

84 return 

85 fmt = list(keys.keys()) 

86 

87 # Don't query if we already have a complete dataId 

88 completeId = True 

89 for key in fmt: 

90 if key not in dataId: 

91 completeId = False 

92 break 

93 if completeId: 

94 self.cache.append(dataId) 

95 return 

96 

97 idTuples = butler.queryMetadata(self.datasetType, fmt, self.dataId) 

98 for idTuple in idTuples: 

99 tempId = dict(self.dataId) 

100 if len(fmt) == 1: 

101 tempId[fmt[0]] = idTuple 

102 else: 

103 for i in range(len(fmt)): 

104 tempId[fmt[i]] = idTuple[i] 

105 self.cache.append(tempId) 

106 

107 def __repr__(self): 

108 return "ButlerSubset(butler=%s, datasetType=%s, dataId=%s, cache=%s, level=%s)" % ( 

109 self.butler, self.datasetType, self.dataId, self.cache, self.level) 

110 

111 def __len__(self): 

112 """ 

113 Number of ButlerDataRefs in the ButlerSubset. 

114 

115 @returns (int) 

116 """ 

117 

118 return len(self.cache) 

119 

120 def __iter__(self): 

121 """ 

122 Iterator over the ButlerDataRefs in the ButlerSubset. 

123 

124 @returns (ButlerIterator) 

125 """ 

126 

127 return ButlerSubsetIterator(self) 

128 

129 

130class ButlerSubsetIterator: 

131 """ 

132 An iterator over the ButlerDataRefs in a ButlerSubset. 

133 """ 

134 

135 def __init__(self, butlerSubset): 

136 self.butlerSubset = butlerSubset 

137 self.iter = iter(butlerSubset.cache) 

138 

139 def __iter__(self): 

140 return self 

141 

142 def __next__(self): 

143 return ButlerDataRef(self.butlerSubset, next(self.iter)) 

144 

145 

146class ButlerDataRef: 

147 """ 

148 A ButlerDataRef is a reference to a potential dataset or group of datasets 

149 that is portable between compatible dataset types. As such, it can be 

150 used to create or retrieve datasets. 

151 

152 ButlerDataRefs are (conceptually) created as elements of a ButlerSubset by 

153 Butler.subset(). They are initially specific to the dataset type passed 

154 to that call, but they may be used with any other compatible dataset type. 

155 Dataset type compatibility must be determined externally (or by trial and 

156 error). 

157 

158 ButlerDataRefs may be created at any level of a data identifier hierarchy. 

159 If the level is not one at which datasets exist, a ButlerSubset 

160 with lower-level ButlerDataRefs can be created using 

161 ButlerDataRef.subItems(). 

162 

163 Public methods: 

164 

165 get(self, datasetType=None, **rest) 

166 

167 put(self, obj, datasetType=None, **rest) 

168 

169 subItems(self, level=None) 

170 

171 datasetExists(self, datasetType=None, **rest) 

172 

173 getButler(self) 

174 """ 

175 

176 GENERATION = 2 

177 """This is a Generation 2 DataRef. 

178 """ 

179 

180 def __init__(self, butlerSubset, dataId): 

181 """ 

182 For internal use only. ButlerDataRefs should only be created by 

183 ButlerSubset and ButlerSubsetIterator. 

184 """ 

185 

186 self.butlerSubset = butlerSubset 

187 self.dataId = dataId 

188 

189 def __repr__(self): 

190 return 'ButlerDataRef(butlerSubset=%s, dataId=%s)' % (self.butlerSubset, self.dataId) 

191 

192 def get(self, datasetType=None, **rest): 

193 """ 

194 Retrieve a dataset of the given type (or the type used when creating 

195 the ButlerSubset, if None) as specified by the ButlerDataRef. 

196 

197 @param datasetType (str) dataset type to retrieve. 

198 @param **rest keyword arguments with data identifiers 

199 @returns object corresponding to the given dataset type. 

200 """ 

201 if datasetType is None: 

202 datasetType = self.butlerSubset.datasetType 

203 return self.butlerSubset.butler.get(datasetType, self.dataId, **rest) 

204 

205 def put(self, obj, datasetType=None, doBackup=False, **rest): 

206 """ 

207 Persist a dataset of the given type (or the type used when creating 

208 the ButlerSubset, if None) as specified by the ButlerDataRef. 

209 

210 @param obj object to persist. 

211 @param datasetType (str) dataset type to persist. 

212 @param doBackup if True, rename existing instead of overwriting 

213 @param **rest keyword arguments with data identifiers 

214 

215 WARNING: Setting doBackup=True is not safe for parallel processing, as it 

216 may be subject to race conditions. 

217 """ 

218 

219 if datasetType is None: 

220 datasetType = self.butlerSubset.datasetType 

221 self.butlerSubset.butler.put(obj, datasetType, self.dataId, doBackup=doBackup, **rest) 

222 

223 def getUri(self, datasetType=None, write=False, **rest): 

224 """Return the URL for a dataset 

225 

226 .. warning:: This is intended only for debugging. The URI should 

227 never be used for anything other than printing. 

228 

229 .. note:: In the event there are multiple URIs, we return only 

230 the first. 

231 

232 .. note:: getUri() does not currently support composite datasets. 

233 

234 Parameters 

235 ---------- 

236 datasetType : `str`, optional 

237 The dataset type of interest. 

238 write : `bool`, optional 

239 Return the URI for writing? 

240 rest : `dict`, optional 

241 Keyword arguments for the data id. 

242 

243 Returns 

244 ------- 

245 uri : `str` 

246 URI for dataset 

247 """ 

248 

249 if datasetType is None: 

250 datasetType = self.butlerSubset.datasetType 

251 return self.butlerSubset.butler.getUri(datasetType, self.dataId, write=write, **rest) 

252 

253 def subLevels(self): 

254 """ 

255 Return a list of the lower levels of the hierarchy than this 

256 ButlerDataRef. 

257 

258 @returns (iterable) list of strings with level keys.""" 

259 

260 return set( 

261 self.butlerSubset.butler.getKeys( 

262 self.butlerSubset.datasetType, 

263 tag=self.butlerSubset.dataId.tag).keys() 

264 ) - set( 

265 self.butlerSubset.butler.getKeys( 

266 self.butlerSubset.datasetType, 

267 self.butlerSubset.level, 

268 tag=self.butlerSubset.dataId.tag).keys() 

269 ) 

270 

271 def subItems(self, level=None): 

272 """ 

273 Generate a ButlerSubset at a lower level of the hierarchy than this 

274 ButlerDataRef, using it as a partial data id. If level is None, a 

275 default lower level for the original ButlerSubset level and dataset 

276 type is used. 

277 

278 As currently implemented, the default sublevels for all the 

279 repositories used by this Butler instance must match for the Butler to 

280 be able to select a default sublevel to get the subset. 

281 

282 @param level (str) the hierarchy level to descend to. 

283 @returns (ButlerSubset) resulting from the lower-level query or () if 

284 there is no lower level. 

285 """ 

286 

287 if level is None: 

288 levelSet = set() 

289 for repoData in self.butlerSubset.butler._repos.all(): 

290 levelSet.add(repoData.repo._mapper.getDefaultSubLevel( 

291 self.butlerSubset.level)) 

292 if len(levelSet) > 1: 

293 raise RuntimeError( 

294 "Support for multiple levels not implemented.") 

295 level = levelSet.pop() 

296 if level is None: 

297 return () 

298 return self.butlerSubset.butler.subset(self.butlerSubset.datasetType, 

299 level, self.dataId) 

300 

301 def datasetExists(self, datasetType=None, write=False, **rest): 

302 """ 

303 Determine if a dataset exists of the given type (or the type used when 

304 creating the ButlerSubset, if None) as specified by the ButlerDataRef. 

305 

306 @param datasetType (str) dataset type to check. 

307 @param write (bool) if True, search only in output repositories 

308 @param **rest keywords arguments with data identifiers 

309 @returns bool 

310 """ 

311 if datasetType is None: 

312 datasetType = self.butlerSubset.datasetType 

313 return self.butlerSubset.butler.datasetExists( 

314 datasetType, self.dataId, write=write, **rest) 

315 

316 def getButler(self): 

317 """ 

318 Return the butler associated with this data reference. 

319 """ 

320 return self.butlerSubset.butler