Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import with_statement 

2from builtins import zip 

3from builtins import range 

4from builtins import object 

5import numpy as np 

6import numpy.lib.recfunctions as recfunctions 

7from lsst.sims.catalogs.db import CompoundCatalogDBObject 

8 

9 

10class CompoundInstanceCatalog(object): 

11 """ 

12 This is essentially an InstanceCatalog class meant to wrap together 

13 several disparate InstanceCatalog instantiations that will ultimately 

14 be written to the same output catalog. 

15 

16 You pass the constructor a list of InstanceCatalog classes, a list of 

17 CatalogDBObject classes, and ObservationMetaData, and an optional SQL constraint. 

18 

19 The write_catalog method then writes all of the InstanceCatalogs to one 

20 ASCII file using the same API as InstanceCatalog.write_catalog. 

21 

22 Note: any member variables of the CompoundInstanceCatalog whose names 

23 do not begin with '_' will be assigned to the InstanceCatalogs iterated 

24 over by the CompoundInstanceCatalog. This allows you to, for example, 

25 format the outputs of every InstanceCatalog in the CompoundInstanceCatalog 

26 by setting override_formats in just the CompoundInstanceCatalog, e.g. 

27 

28 class myCompoundInstanceCatalog(CompoundInstanceCatalog): 

29 transformations = {'raJ2000': np.degrees, 'decJ2000': np.degrees} 

30 

31 comCat = myCompoundInstanceCatalog([catClass1, catClass2], 

32 [dbClass1, dbClass2]) 

33 

34 will write raJ2000 and decJ2000 in degrees without having to define 

35 transformations in catClass1 and catClass2. 

36 """ 

37 

38 def __init__(self, instanceCatalogClassList, catalogDBObjectClassList, 

39 obs_metadata=None, constraint=None, compoundDBclass = None): 

40 """ 

41 @param [in] instanceCatalogClassList is a list of the InstanceCatalog 

42 classes to be combined into one output catalog. 

43 

44 @param [in] catalogDBObjectClassList is a list of the CatalogDBObject 

45 classes to be associated with the InstanceCatalog classes in 

46 instanceCatalogClassList. There should be one CatalogDBObject class 

47 for each InstanceCatalogClass. 

48 

49 @param [in] obs_metadata is the ObservationMetaData describing 

50 the telescope pointing 

51 

52 @param [in] constraint is an optional SQL constraint to be applied 

53 to the database query 

54 

55 @param [in] compoundDBclass is an optional argument specifying what 

56 CompoundCatalogDBobject class(es) to use to combine InstanceCatalogs 

57 that query the same table. This can be either a single 

58 ComboundCatalogDBObject class, or a list of classes. The 

59 CompoundInstanceCatalog will figure out which InstanceCatalog(s) go with 

60 which CompoundCatalogDBObject class. If no CompoundCatalogDBObject class 

61 corresponds to a given group of InstanceCatalogs, then the base 

62 CompoundCatalogDBObject class will be used. 

63 

64 Note: compoundDBclass should be a CompoundCatalogDBObject class. 

65 Not an instantiation of a CompoundCatalogDBObject class. 

66 """ 

67 

68 self._compoundDBclass = compoundDBclass 

69 self._obs_metadata = obs_metadata 

70 self._dbo_list = catalogDBObjectClassList 

71 self._ic_list = instanceCatalogClassList 

72 self._constraint = constraint 

73 

74 assigned = [False]*len(self._dbo_list) 

75 self._dbObjectGroupList = [] 

76 

77 for ix in range(len(self._dbo_list)): 

78 for row in self._dbObjectGroupList: 

79 if self.areDBObjectsTheSame(self._dbo_list[ix], self._dbo_list[row[0]]): 

80 row.append(ix) 

81 assigned[ix] = True 

82 break 

83 

84 if not assigned[ix]: 

85 new_row = [ix] 

86 for iy in range(ix): 

87 if not assigned[iy]: 

88 if self.areDBObjectsTheSame(self._dbo_list[ix], self._dbo_list[iy]): 

89 new_row.append(iy) 

90 

91 self._dbObjectGroupList.append(new_row) 

92 

93 def areDBObjectsTheSame(self, db1, db2): 

94 """ 

95 @param [in] db1 is a CatalogDBObject instantiation 

96 

97 @param [in] db2 is a CatalogDBObject instantiation 

98 

99 @param [out] a boolean stating whether or not db1 and db2 

100 query the same table of the same database 

101 """ 

102 

103 if hasattr(db1, 'host'): 

104 host1 = db1.host 

105 else: 

106 host1 = None 

107 

108 if hasattr(db2, 'host'): 

109 host2 = db2.host 

110 else: 

111 host2 = None 

112 

113 if hasattr(db1, 'port'): 

114 port1 = db1.port 

115 else: 

116 port1 = None 

117 

118 if hasattr(db2, 'port'): 

119 port2 = db2.port 

120 else: 

121 port2 = None 

122 

123 if hasattr(db1, 'driver'): 

124 driver1 = db1.driver 

125 else: 

126 driver1 = None 

127 

128 if hasattr(db2, 'driver'): 

129 driver2 = db2.driver 

130 else: 

131 driver2 = None 

132 

133 if db1.tableid != db2.tableid: 

134 return False 

135 if host1 != host2: 

136 return False 

137 if db1.database != db2.database: 

138 return False 

139 if port1 != port2: 

140 return False 

141 if driver1 != driver2: 

142 return False 

143 return True 

144 

145 

146 def write_catalog(self, filename, chunk_size=None, write_header=True, write_mode='w'): 

147 """ 

148 Write the stored list of InstanceCatalogs to a single ASCII output catalog. 

149 

150 @param [in] filename is the name of the file to be written 

151 

152 @param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog 

153 to query the database in manageable chunks (in case returning the whole catalog 

154 takes too much memory) 

155 

156 @param [in] write_header a boolean specifying whether or not to add a header 

157 to the output catalog (Note: only one header will be written; there will not be 

158 a header for each InstanceCatalog in the CompoundInstanceCatalog; default True) 

159 

160 @param [in] write_mode is 'w' if you want to overwrite the output file or 

161 'a' if you want to append to an existing output file (default: 'w') 

162 """ 

163 

164 instantiated_ic_list = [None]*len(self._ic_list) 

165 

166 # first, loop over all of the InstanceCatalog and CatalogDBObject classes, pre-processing 

167 # them (i.e. verifying that they have access to all of the columns they need) 

168 for ix, (icClass, dboClass) in enumerate(zip(self._ic_list, self._dbo_list)): 

169 dbo = dboClass() 

170 

171 ic = icClass(dbo, obs_metadata=self._obs_metadata) 

172 

173 # assign all non-private member variables of the CompoundInstanceCatalog 

174 # to the instantiated InstanceCatalogs 

175 for kk in self.__dict__: 

176 if kk[0] != '_' and not hasattr(self.__dict__[kk], '__call__'): 

177 setattr(ic, kk, self.__dict__[kk]) 

178 

179 for kk in self.__class__.__dict__: 

180 if kk[0] != '_' and not hasattr(self.__class__.__dict__[kk], '__call__'): 

181 setattr(ic, kk, self.__class__.__dict__[kk]) 

182 

183 ic._write_pre_process() 

184 instantiated_ic_list[ix] = ic 

185 

186 for row in self._dbObjectGroupList: 

187 if len(row) == 1: 

188 ic = instantiated_ic_list[row[0]] 

189 ic._query_and_write(filename, chunk_size=chunk_size, 

190 write_header=write_header, write_mode=write_mode, 

191 obs_metadata=self._obs_metadata, 

192 constraint=self._constraint) 

193 write_mode = 'a' 

194 write_header = False 

195 

196 default_compound_dbo = None 

197 if self._compoundDBclass is not None: 

198 if not hasattr(self._compoundDBclass, '__getitem__'): 

199 default_compound_dbo = CompoundCatalogDBObject 

200 else: 

201 for dbo in self._compoundDBclass: 

202 if dbo._table_restriction is None: 

203 default_compound_dbo = dbo 

204 break 

205 

206 if default_compound_dbo is None: 

207 default_compound_dbo is CompoundCatalogDBObject 

208 

209 for row in self._dbObjectGroupList: 

210 if len(row) > 1: 

211 dbObjClassList = [self._dbo_list[ix] for ix in row] 

212 catList = [instantiated_ic_list[ix] for ix in row] 

213 for cat in catList: 

214 cat._pre_screen = True 

215 

216 if self._compoundDBclass is None: 

217 compound_dbo = CompoundCatalogDBObject(dbObjClassList) 

218 elif not hasattr(self._compoundDBclass, '__getitem__'): 

219 # if self._compoundDBclass is not a list 

220 try: 

221 compound_dbo = self._compoundDBclass(dbObjClassList) 

222 except: 

223 compound_dbo = default_compound_dbo(dbObjClassList) 

224 else: 

225 compound_dbo = None 

226 for candidate in self._compoundDBclass: 

227 use_it = True 

228 if False in [candidate._table_restriction is not None and 

229 dbo.tableid in candidate._table_restriction 

230 for dbo in dbObjClassList]: 

231 

232 use_it = False 

233 

234 if use_it: 

235 compound_dbo = candidate(dbObjClassList) 

236 break 

237 

238 if compound_dbo is None: 

239 compound_dbo = default_compound_dbo(dbObjClassList) 

240 

241 self._write_compound(catList, compound_dbo, filename, 

242 chunk_size=chunk_size, write_header=write_header, 

243 write_mode=write_mode) 

244 write_mode = 'a' 

245 write_header = False 

246 

247 def _write_compound(self, catList, compound_dbo, filename, 

248 chunk_size=None, write_header=False, write_mode='a'): 

249 """ 

250 Write out a set of InstanceCatalog instantiations that have been 

251 determined to query the same database table. 

252 

253 @param [in] catList is the list of InstanceCatalog instantiations 

254 

255 @param [in] compound_db is the CompoundCatalogDBObject instantiation 

256 associated with catList 

257 

258 @param [in] filename is the name of the file to be written 

259 

260 @param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog 

261 to query the database in manageable chunks (in case returning the whole catalog 

262 takes too much memory) 

263 

264 @param [in] write_header a boolean specifying whether or not to add a header 

265 to the output catalog (Note: only one header will be written; there will not be 

266 a header for each InstanceCatalog in the CompoundInstanceCatalog; default True) 

267 

268 @param [in] write_mode is 'w' if you want to overwrite the output file or 

269 'a' if you want to append to an existing output file (default: 'w') 

270 """ 

271 

272 colnames = [] 

273 master_colnames = [] 

274 name_map = [] 

275 dbObjNameList = [db.objid for db in compound_dbo._dbObjectClassList] 

276 for name, cat in zip(dbObjNameList, catList): 

277 localNames = [] 

278 local_map = {} 

279 for colName in cat._active_columns: 

280 prefixed_name = '%s_%s' % (name, colName) 

281 query_name = compound_dbo.name_map(prefixed_name) 

282 if query_name not in colnames: 

283 colnames.append(query_name) 

284 localNames.append(query_name) 

285 local_map[query_name] = colName 

286 master_colnames.append(localNames) 

287 name_map.append(local_map) 

288 

289 master_results = compound_dbo.query_columns(colnames=colnames, 

290 obs_metadata=self._obs_metadata, 

291 constraint=self._constraint, 

292 chunk_size=chunk_size) 

293 

294 with open(filename, write_mode) as file_handle: 

295 if write_header: 

296 catList[0].write_header(file_handle) 

297 

298 new_dtype_name_list = [None]*len(catList) 

299 

300 first_chunk = True 

301 for chunk in master_results: 

302 for ix, (catName, cat) in enumerate(zip(dbObjNameList, catList)): 

303 

304 if first_chunk: 

305 for iy, name in enumerate(master_colnames[ix]): 

306 if name not in chunk.dtype.fields: 

307 master_colnames[ix][iy] = name_map[ix][name] 

308 

309 local_recarray = recfunctions.repack_fields( 

310 chunk[master_colnames[ix]].view(np.recarray)) 

311 

312 local_recarray.flags['WRITEABLE'] = False # so numpy does not raise a warning 

313 # because it thinks we may accidentally 

314 # write to this array 

315 if new_dtype_name_list[ix] is None: 

316 new_dtype_name_list[ix] = list([dd.replace(catName+'_','') 

317 for dd in master_colnames[ix]]) 

318 

319 local_recarray.dtype.names = new_dtype_name_list[ix] 

320 cat._write_recarray(local_recarray, file_handle) 

321 cat._delete_current_chunk() 

322 

323 first_chunk = False