Coverage for python/lsst/sims/catalogs/db/CompoundCatalogDBObject.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from builtins import zip
2from builtins import str
3from builtins import range
4from lsst.sims.catalogs.db import CatalogDBObject
6__all__ = ["CompoundCatalogDBObject",
7 "_CompoundCatalogDBObject_mixin"]
10class _CompoundCatalogDBObject_mixin(object):
11 """
12 This mixin exists to separate out utility methods that we will
13 need for the DESC DC2 CompoundCatalogDBObject
14 """
15 def _make_columns(self):
16 """
17 Construct the self.columns member by concatenating the self.columns
18 from the input CatalogDBObjects and modifying the names of the returned
19 columns to identify them with their specific CatalogDBObjects.
20 """
21 raw_column_names = [] # the names as they appear on the database
22 processed_column_names = [] # the names as they appear in the CatalogDBObject
23 prefix_column_names = [] # the names with the objid added
24 raw_column_transform = [] # the full transform applied by the CatalogDBObject
25 all_rows = [] # store the raw rows for the last block of code in this method
27 for dbo, dbName in zip(self._dbObjectClassList, self._nameList):
28 db_inst = dbo()
29 for row in db_inst.columns:
30 all_rows.append(row)
31 raw_column_names.append(row[1])
32 processed_column_names.append(row[0])
33 prefix_column_names.append('%s_%s' % (dbName, row[0]))
34 raw_column_transform.append(row[1:])
37 self._compound_dbo_name_map = {}
38 self.columns = []
39 processed_columns_requiring_prefix = set()
40 column_diagnostic = {}
42 # Now we need to figure out which of the CatalogDBObject-mapped columns
43 # actually need to be kept independent (i.e sedFilename for galaxy bulges will
44 # not actually be referencing the same column as sedFilename for galaxy disks)
45 # and which can be lumped together (i.e. redshift will be the same database
46 # column for galaxy bulges and disks)
47 for i_r1 in range(len(raw_column_names)):
49 # if a processed column has already been determined to be degenerate,
50 # just acknowledget that
51 use_prefix = processed_column_names[i_r1] in processed_columns_requiring_prefix
53 if not use_prefix:
54 # If two CatalogDBObjects map different columns in the raw database to
55 # the same transformed column name, then we need to keep the two
56 # distinct in this CompoundCatalogDBObject; we do that by using the
57 # prefix_name, which prepends the objid of the CatalogDBobject to the
58 # transformed column
59 for i_r2 in range(i_r1+1, len(raw_column_names)):
60 if (processed_column_names[i_r1] == processed_column_names[i_r2] and
61 raw_column_transform[i_r1] != raw_column_transform[i_r2]):
63 use_prefix = True
64 break
66 if use_prefix:
67 processed_columns_requiring_prefix.add(processed_column_names[i_r1])
69 # under what name will the column actually be queried
70 if use_prefix:
71 query_name = prefix_column_names[i_r1]
72 else:
73 query_name = processed_column_names[i_r1]
76 if prefix_column_names[i_r1] in self._compound_dbo_name_map:
77 raise RuntimeError("Trying to put %s in compound_db_name_map twice" %
78 prefix_column_names[i_r1])
80 # build the dict that maps the prefixed column names, which CompoundInstanceCatalog
81 # will reference, to the names that are actually going to be queried from
82 # this CompoundCatalogDBObject
83 self._compound_dbo_name_map[prefix_column_names[i_r1]] = query_name
85 # build the self.columns member variable of this CompoundCatalogDBObject
86 column_row = [query_name]
87 column_row += [ww for ww in raw_column_transform[i_r1]]
89 # if no transformation was applied, we need to map the column
90 # back to the database column of the same name
91 if column_row[1] is None:
92 column_row[1] = processed_column_names[i_r1]
94 column_row = tuple(column_row)
96 if column_row[0] in column_diagnostic:
97 if column_row != column_diagnostic[column_row[0]]:
98 row1 = column_row
99 row2 = column_diagnostic[column_row[0]]
100 raise RuntimeError("Trying to change definition of columns "
101 + "\n%s\n%s\n" % (row1, row2))
103 if column_row not in self.columns:
104 self.columns.append(column_row)
105 column_diagnostic[column_row[0]] = column_row
107 # 8 November 2018 (originally 25 August 2015)
108 # This is a modification that needs to be made in order for this
109 # class to work with GalaxyTileObj. The column galaxytileid in
110 # GalaxyTileObj is removed from the query by query_columns, but
111 # somehow injected back in by the query procedure on fatboy. This
112 # leads to confusion if you try to query something like
113 # galaxyAgn_galaxytileid. We deal with that by removing all column
114 # names like 'galaxytileid' in query_columns, but leaving 'galaxytileid'
115 # un-mangled in self.columns so that self.typeMap knows how to deal
116 # with it when it comes back
118 for column_row in all_rows:
119 if (column_row[0] not in self._compound_dbo_name_map and
120 (column_row[1] is None or column_row[1] == column_row[0])):
122 # again: deal with cases where no transformation is applied
123 if column_row[1] is None:
124 new_row = [ww for ww in column_row]
125 new_row[1] = new_row[0]
126 column_row = tuple(new_row)
128 if column_row[0] in column_diagnostic:
129 if column_row != column_diagnostic[column_row[0]]:
130 row1 = column_row
131 row2 = column_diagnostic[column_row[0]]
132 raise RuntimeError("Trying to change definition of columns "
133 + "\n%s\n%s\n" % (row1, row2))
135 if column_row not in self.columns:
136 self.columns.append(column_row)
137 column_diagnostic[column_row[0]] = column_row
139 if column_row[0] in self._compound_dbo_name_map:
140 if column_row[0] != self._column_dbo_name_map[column_row[0]]:
141 raise RuntimeError("Column name map conflict")
143 self._compound_dbo_name_map[column_row[0]] = column_row[0]
145 def name_map(self, name):
146 """
147 Map a column name with the CatalogDBObject's objid prepended to the
148 name of the column that will actually be queried from the database
149 """
150 if not hasattr(self, '_compound_dbo_name_map'):
151 raise RuntimeError("This CompoundCatalogDBObject does not have a name_map")
152 return self._compound_dbo_name_map[name]
155class CompoundCatalogDBObject(_CompoundCatalogDBObject_mixin, CatalogDBObject):
156 """
157 This is a class for taking several CatalogDBObject daughter classes that
158 query the same table of the same database for the same rows (but different
159 columns; note that the columns can be transformed by the CatalogDBObjects'
160 self.columns member), and combining their queries into one.
162 You feed the constructor a list of CatalogDBObject daughter classes. The
163 CompoundCatalogDBObject verifies that they all do, indeed, query the same table
164 of the same database. It then constructs its own self.columns member (note
165 that CompoundCatalogDBObject is a daughter class of CatalogDBObject) which
166 combines all of the requested data.
168 When you call query_columns, a recarray will be returned as in a CatalogDBObject.
169 Note, however, that the names of the columns of the recarray will be modified.
170 If the first CatalogDBObject in the list of CatalogDBObjects passed to the constructor
171 asks for a column named 'col1', that will be mapped to 'catName_col1' where 'catName'
172 is the CatalogDBObject's objid member. 'col2' will be mapped to 'catName_col2', etc.
173 In cases where the CatalogDBObject does not change the name of the column, the column
174 will also be returned by its original, un-mangled name.
176 In cases where a custom query_columns method must be implemented, this class
177 can be sub-classed and the custom method added as a member method. In that
178 case, the _table_restriction member variable should be set to a list of table
179 names corresponding to the tables for which this class was designed. An
180 exception will be raised if the user tries to use the CompoundCatalogDBObject
181 class to query tables for which it was not written. _table_restriction defaults
182 to None, which means that the class is for use with any table.
183 """
185 # This member variable is an optional list of tables supported
186 # by a specific CompoundCatalogDBObject sub-class. If
187 # _table_restriction==None, then any table is supported
188 _table_restriction = None
190 def __init__(self, catalogDbObjectClassList, connection=None):
191 """
192 @param [in] catalogDbObjectClassList is a list of CatalogDBObject
193 daughter classes (not instantiations of those classes; the classes
194 themselves) that all query the same database table
196 Note: this is a list of classes, not a list of instantiations of those
197 classes. The connection to the database is established as soon as
198 you instantiate a CatalogDBObject daughter class. To avoid creating
199 unnecessary database connections, CompoundCatalogDBObject will
200 read in classes without an active connection and establish its
201 own connection in this constructor. This means that all connection
202 parameters must be specified in the class definitions of the classes
203 passed into catalogDbObjectClassList.
205 @param [in] connection is an optional instantiation of DBConnection
206 representing an active connection to the database required by
207 this CompoundCatalogDBObject (prevents the CompoundCatalogDBObject
208 from opening a redundant connection)
209 """
211 self._dbObjectClassList = catalogDbObjectClassList
212 self._validate_input()
214 self._nameList = []
215 for ix in range(len(self._dbObjectClassList)):
216 self._nameList.append(self._dbObjectClassList[ix].objid)
218 self._make_columns()
219 self._make_dbTypeMap()
220 self._make_dbDefaultValues()
222 dbo = self._dbObjectClassList[0](connection=connection)
223 # need to instantiate the first one because sometimes
224 # idColKey is not defined until instantiation
225 # (see GalaxyTileObj in sims_catUtils/../baseCatalogModels/GalaxyModels.py)
227 self.tableid = dbo.tableid
228 self.idColKey = dbo.idColKey
229 self.raColName = dbo.raColName
230 self.decColName = dbo.decColName
232 super(CompoundCatalogDBObject, self).__init__(connection=dbo.connection)
234 def _make_dbTypeMap(self):
235 """
236 Construct the self.dbTypeMap member by concatenating the self.dbTypeMaps
237 from the input CatalogDBObjects.
238 """
239 self.dbTypeMap = {}
240 for dbo in self._dbObjectClassList:
241 for col in dbo.dbTypeMap:
242 if col not in self.dbTypeMap:
243 self.dbTypeMap[col] = dbo.dbTypeMap[col]
245 def _make_dbDefaultValues(self):
246 """
247 Construct the self.dbDefaultValues member by concatenating the
248 self.dbDefaultValues from the input CatalogDBObjects.
249 """
250 self.dbDefaultValues = {}
251 for dbo, dbName in zip(self._dbObjectClassList, self._nameList):
252 for col in dbo.dbDefaultValues:
253 self.dbDefaultValues['%s_%s' % (dbName, col)] = dbo.dbDefaultValues[col]
255 def _validate_input(self):
256 """
257 Verify that the CatalogDBObjects passed to the constructor
258 do, indeed, query the same table of the same database.
260 Also verify that this class is designed to query the tables
261 it is being used on (in cases where a custom query_columns
262 has been implemented).
263 """
264 hostList = []
265 databaseList = []
266 portList = []
267 driverList = []
268 tableList = []
269 objidList = []
270 for dbo in self._dbObjectClassList:
272 if hasattr(dbo, 'host'):
273 if dbo.host not in hostList:
274 hostList.append(dbo.host)
276 if hasattr(dbo, 'database'):
277 if dbo.database not in databaseList:
278 databaseList.append(dbo.database)
280 if hasattr(dbo, 'port'):
281 if dbo.port not in portList:
282 portList.append(dbo.port)
284 if hasattr(dbo, 'driver'):
285 if dbo.driver not in driverList:
286 driverList.append(dbo.driver)
288 if hasattr(dbo, 'tableid'):
289 if dbo.tableid not in tableList:
290 tableList.append(dbo.tableid)
292 if hasattr(dbo, 'objid'):
293 if dbo.objid not in objidList:
294 objidList.append(dbo.objid)
295 else:
296 raise RuntimeError('The objid %s ' % dbo.objid +
297 'is duplicated in your list of ' +
298 'CatalogDBObjects\n' +
299 'CompoundCatalogDBObject requires each' +
300 ' CatalogDBObject have a unique objid\n')
302 acceptable = True
303 msg = ''
304 if len(hostList) > 1:
305 acceptable = False
306 msg += ' hosts: ' + str(hostList) + '\n'
308 if len(databaseList) != 1:
309 acceptable = False
310 msg += ' databases: ' + str(databaseList) + '\n'
312 if len(portList) > 1:
313 acceptable = False
314 msg += ' ports: ' + str(portList) + '\n'
316 if len(driverList) > 1:
317 acceptable = False
318 msg += ' drivers: ' + str(driverList) + '\n'
320 if len(tableList) != 1:
321 acceptable = False
322 msg += ' tables: ' + str(tableList) + '\n'
324 if not acceptable:
325 raise RuntimeError('The CatalogDBObjects fed to ' +
326 'CompoundCatalogDBObject do not all ' +
327 'query the same table:\n' +
328 msg)
330 if self._table_restriction is not None and len(tableList) > 0:
331 if tableList[0] not in self._table_restriction:
332 raise RuntimeError("This CompoundCatalogDBObject does not support " +
333 "the table '%s' " % tableList[0])