Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from builtins import zip 

2from builtins import str 

3from builtins import range 

4from lsst.sims.catalogs.db import CatalogDBObject 

5 

6__all__ = ["CompoundCatalogDBObject", 

7 "_CompoundCatalogDBObject_mixin"] 

8 

9 

10class _CompoundCatalogDBObject_mixin(object): 

11 """ 

12 This mixin exists to separate out utility methods that we will 

13 need for the DESC DC2 CompoundCatalogDBObject 

14 """ 

15 def _make_columns(self): 

16 """ 

17 Construct the self.columns member by concatenating the self.columns 

18 from the input CatalogDBObjects and modifying the names of the returned 

19 columns to identify them with their specific CatalogDBObjects. 

20 """ 

21 raw_column_names = [] # the names as they appear on the database 

22 processed_column_names = [] # the names as they appear in the CatalogDBObject 

23 prefix_column_names = [] # the names with the objid added 

24 raw_column_transform = [] # the full transform applied by the CatalogDBObject 

25 all_rows = [] # store the raw rows for the last block of code in this method 

26 

27 for dbo, dbName in zip(self._dbObjectClassList, self._nameList): 

28 db_inst = dbo() 

29 for row in db_inst.columns: 

30 all_rows.append(row) 

31 raw_column_names.append(row[1]) 

32 processed_column_names.append(row[0]) 

33 prefix_column_names.append('%s_%s' % (dbName, row[0])) 

34 raw_column_transform.append(row[1:]) 

35 

36 

37 self._compound_dbo_name_map = {} 

38 self.columns = [] 

39 processed_columns_requiring_prefix = set() 

40 column_diagnostic = {} 

41 

42 # Now we need to figure out which of the CatalogDBObject-mapped columns 

43 # actually need to be kept independent (i.e sedFilename for galaxy bulges will 

44 # not actually be referencing the same column as sedFilename for galaxy disks) 

45 # and which can be lumped together (i.e. redshift will be the same database 

46 # column for galaxy bulges and disks) 

47 for i_r1 in range(len(raw_column_names)): 

48 

49 # if a processed column has already been determined to be degenerate, 

50 # just acknowledget that 

51 use_prefix = processed_column_names[i_r1] in processed_columns_requiring_prefix 

52 

53 if not use_prefix: 

54 # If two CatalogDBObjects map different columns in the raw database to 

55 # the same transformed column name, then we need to keep the two 

56 # distinct in this CompoundCatalogDBObject; we do that by using the 

57 # prefix_name, which prepends the objid of the CatalogDBobject to the 

58 # transformed column 

59 for i_r2 in range(i_r1+1, len(raw_column_names)): 

60 if (processed_column_names[i_r1] == processed_column_names[i_r2] and 

61 raw_column_transform[i_r1] != raw_column_transform[i_r2]): 

62 

63 use_prefix = True 

64 break 

65 

66 if use_prefix: 

67 processed_columns_requiring_prefix.add(processed_column_names[i_r1]) 

68 

69 # under what name will the column actually be queried 

70 if use_prefix: 

71 query_name = prefix_column_names[i_r1] 

72 else: 

73 query_name = processed_column_names[i_r1] 

74 

75 

76 if prefix_column_names[i_r1] in self._compound_dbo_name_map: 

77 raise RuntimeError("Trying to put %s in compound_db_name_map twice" % 

78 prefix_column_names[i_r1]) 

79 

80 # build the dict that maps the prefixed column names, which CompoundInstanceCatalog 

81 # will reference, to the names that are actually going to be queried from 

82 # this CompoundCatalogDBObject 

83 self._compound_dbo_name_map[prefix_column_names[i_r1]] = query_name 

84 

85 # build the self.columns member variable of this CompoundCatalogDBObject 

86 column_row = [query_name] 

87 column_row += [ww for ww in raw_column_transform[i_r1]] 

88 

89 # if no transformation was applied, we need to map the column 

90 # back to the database column of the same name 

91 if column_row[1] is None: 

92 column_row[1] = processed_column_names[i_r1] 

93 

94 column_row = tuple(column_row) 

95 

96 if column_row[0] in column_diagnostic: 

97 if column_row != column_diagnostic[column_row[0]]: 

98 row1 = column_row 

99 row2 = column_diagnostic[column_row[0]] 

100 raise RuntimeError("Trying to change definition of columns " 

101 + "\n%s\n%s\n" % (row1, row2)) 

102 

103 if column_row not in self.columns: 

104 self.columns.append(column_row) 

105 column_diagnostic[column_row[0]] = column_row 

106 

107 # 8 November 2018 (originally 25 August 2015) 

108 # This is a modification that needs to be made in order for this 

109 # class to work with GalaxyTileObj. The column galaxytileid in 

110 # GalaxyTileObj is removed from the query by query_columns, but 

111 # somehow injected back in by the query procedure on fatboy. This 

112 # leads to confusion if you try to query something like 

113 # galaxyAgn_galaxytileid. We deal with that by removing all column 

114 # names like 'galaxytileid' in query_columns, but leaving 'galaxytileid' 

115 # un-mangled in self.columns so that self.typeMap knows how to deal 

116 # with it when it comes back 

117 

118 for column_row in all_rows: 

119 if (column_row[0] not in self._compound_dbo_name_map and 

120 (column_row[1] is None or column_row[1] == column_row[0])): 

121 

122 # again: deal with cases where no transformation is applied 

123 if column_row[1] is None: 

124 new_row = [ww for ww in column_row] 

125 new_row[1] = new_row[0] 

126 column_row = tuple(new_row) 

127 

128 if column_row[0] in column_diagnostic: 

129 if column_row != column_diagnostic[column_row[0]]: 

130 row1 = column_row 

131 row2 = column_diagnostic[column_row[0]] 

132 raise RuntimeError("Trying to change definition of columns " 

133 + "\n%s\n%s\n" % (row1, row2)) 

134 

135 if column_row not in self.columns: 

136 self.columns.append(column_row) 

137 column_diagnostic[column_row[0]] = column_row 

138 

139 if column_row[0] in self._compound_dbo_name_map: 

140 if column_row[0] != self._column_dbo_name_map[column_row[0]]: 

141 raise RuntimeError("Column name map conflict") 

142 

143 self._compound_dbo_name_map[column_row[0]] = column_row[0] 

144 

145 def name_map(self, name): 

146 """ 

147 Map a column name with the CatalogDBObject's objid prepended to the 

148 name of the column that will actually be queried from the database 

149 """ 

150 if not hasattr(self, '_compound_dbo_name_map'): 

151 raise RuntimeError("This CompoundCatalogDBObject does not have a name_map") 

152 return self._compound_dbo_name_map[name] 

153 

154 

155class CompoundCatalogDBObject(_CompoundCatalogDBObject_mixin, CatalogDBObject): 

156 """ 

157 This is a class for taking several CatalogDBObject daughter classes that 

158 query the same table of the same database for the same rows (but different 

159 columns; note that the columns can be transformed by the CatalogDBObjects' 

160 self.columns member), and combining their queries into one. 

161 

162 You feed the constructor a list of CatalogDBObject daughter classes. The 

163 CompoundCatalogDBObject verifies that they all do, indeed, query the same table 

164 of the same database. It then constructs its own self.columns member (note 

165 that CompoundCatalogDBObject is a daughter class of CatalogDBObject) which 

166 combines all of the requested data. 

167 

168 When you call query_columns, a recarray will be returned as in a CatalogDBObject. 

169 Note, however, that the names of the columns of the recarray will be modified. 

170 If the first CatalogDBObject in the list of CatalogDBObjects passed to the constructor 

171 asks for a column named 'col1', that will be mapped to 'catName_col1' where 'catName' 

172 is the CatalogDBObject's objid member. 'col2' will be mapped to 'catName_col2', etc. 

173 In cases where the CatalogDBObject does not change the name of the column, the column 

174 will also be returned by its original, un-mangled name. 

175 

176 In cases where a custom query_columns method must be implemented, this class 

177 can be sub-classed and the custom method added as a member method. In that 

178 case, the _table_restriction member variable should be set to a list of table 

179 names corresponding to the tables for which this class was designed. An 

180 exception will be raised if the user tries to use the CompoundCatalogDBObject 

181 class to query tables for which it was not written. _table_restriction defaults 

182 to None, which means that the class is for use with any table. 

183 """ 

184 

185 # This member variable is an optional list of tables supported 

186 # by a specific CompoundCatalogDBObject sub-class. If 

187 # _table_restriction==None, then any table is supported 

188 _table_restriction = None 

189 

190 def __init__(self, catalogDbObjectClassList, connection=None): 

191 """ 

192 @param [in] catalogDbObjectClassList is a list of CatalogDBObject 

193 daughter classes (not instantiations of those classes; the classes 

194 themselves) that all query the same database table 

195 

196 Note: this is a list of classes, not a list of instantiations of those 

197 classes. The connection to the database is established as soon as 

198 you instantiate a CatalogDBObject daughter class. To avoid creating 

199 unnecessary database connections, CompoundCatalogDBObject will 

200 read in classes without an active connection and establish its 

201 own connection in this constructor. This means that all connection 

202 parameters must be specified in the class definitions of the classes 

203 passed into catalogDbObjectClassList. 

204 

205 @param [in] connection is an optional instantiation of DBConnection 

206 representing an active connection to the database required by 

207 this CompoundCatalogDBObject (prevents the CompoundCatalogDBObject 

208 from opening a redundant connection) 

209 """ 

210 

211 self._dbObjectClassList = catalogDbObjectClassList 

212 self._validate_input() 

213 

214 self._nameList = [] 

215 for ix in range(len(self._dbObjectClassList)): 

216 self._nameList.append(self._dbObjectClassList[ix].objid) 

217 

218 self._make_columns() 

219 self._make_dbTypeMap() 

220 self._make_dbDefaultValues() 

221 

222 dbo = self._dbObjectClassList[0](connection=connection) 

223 # need to instantiate the first one because sometimes 

224 # idColKey is not defined until instantiation 

225 # (see GalaxyTileObj in sims_catUtils/../baseCatalogModels/GalaxyModels.py) 

226 

227 self.tableid = dbo.tableid 

228 self.idColKey = dbo.idColKey 

229 self.raColName = dbo.raColName 

230 self.decColName = dbo.decColName 

231 

232 super(CompoundCatalogDBObject, self).__init__(connection=dbo.connection) 

233 

234 def _make_dbTypeMap(self): 

235 """ 

236 Construct the self.dbTypeMap member by concatenating the self.dbTypeMaps 

237 from the input CatalogDBObjects. 

238 """ 

239 self.dbTypeMap = {} 

240 for dbo in self._dbObjectClassList: 

241 for col in dbo.dbTypeMap: 

242 if col not in self.dbTypeMap: 

243 self.dbTypeMap[col] = dbo.dbTypeMap[col] 

244 

245 def _make_dbDefaultValues(self): 

246 """ 

247 Construct the self.dbDefaultValues member by concatenating the 

248 self.dbDefaultValues from the input CatalogDBObjects. 

249 """ 

250 self.dbDefaultValues = {} 

251 for dbo, dbName in zip(self._dbObjectClassList, self._nameList): 

252 for col in dbo.dbDefaultValues: 

253 self.dbDefaultValues['%s_%s' % (dbName, col)] = dbo.dbDefaultValues[col] 

254 

255 def _validate_input(self): 

256 """ 

257 Verify that the CatalogDBObjects passed to the constructor 

258 do, indeed, query the same table of the same database. 

259 

260 Also verify that this class is designed to query the tables 

261 it is being used on (in cases where a custom query_columns 

262 has been implemented). 

263 """ 

264 hostList = [] 

265 databaseList = [] 

266 portList = [] 

267 driverList = [] 

268 tableList = [] 

269 objidList = [] 

270 for dbo in self._dbObjectClassList: 

271 

272 if hasattr(dbo, 'host'): 

273 if dbo.host not in hostList: 

274 hostList.append(dbo.host) 

275 

276 if hasattr(dbo, 'database'): 

277 if dbo.database not in databaseList: 

278 databaseList.append(dbo.database) 

279 

280 if hasattr(dbo, 'port'): 

281 if dbo.port not in portList: 

282 portList.append(dbo.port) 

283 

284 if hasattr(dbo, 'driver'): 

285 if dbo.driver not in driverList: 

286 driverList.append(dbo.driver) 

287 

288 if hasattr(dbo, 'tableid'): 

289 if dbo.tableid not in tableList: 

290 tableList.append(dbo.tableid) 

291 

292 if hasattr(dbo, 'objid'): 

293 if dbo.objid not in objidList: 

294 objidList.append(dbo.objid) 

295 else: 

296 raise RuntimeError('The objid %s ' % dbo.objid + 

297 'is duplicated in your list of ' + 

298 'CatalogDBObjects\n' + 

299 'CompoundCatalogDBObject requires each' + 

300 ' CatalogDBObject have a unique objid\n') 

301 

302 acceptable = True 

303 msg = '' 

304 if len(hostList) > 1: 

305 acceptable = False 

306 msg += ' hosts: ' + str(hostList) + '\n' 

307 

308 if len(databaseList) != 1: 

309 acceptable = False 

310 msg += ' databases: ' + str(databaseList) + '\n' 

311 

312 if len(portList) > 1: 

313 acceptable = False 

314 msg += ' ports: ' + str(portList) + '\n' 

315 

316 if len(driverList) > 1: 

317 acceptable = False 

318 msg += ' drivers: ' + str(driverList) + '\n' 

319 

320 if len(tableList) != 1: 

321 acceptable = False 

322 msg += ' tables: ' + str(tableList) + '\n' 

323 

324 if not acceptable: 

325 raise RuntimeError('The CatalogDBObjects fed to ' + 

326 'CompoundCatalogDBObject do not all ' + 

327 'query the same table:\n' + 

328 msg) 

329 

330 if self._table_restriction is not None and len(tableList) > 0: 

331 if tableList[0] not in self._table_restriction: 

332 raise RuntimeError("This CompoundCatalogDBObject does not support " + 

333 "the table '%s' " % tableList[0])