Coverage for python/lsst/sims/catalogs/definitions/CompoundInstanceCatalog.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import with_statement
2from builtins import zip
3from builtins import range
4from builtins import object
5import numpy as np
6import numpy.lib.recfunctions as recfunctions
7from lsst.sims.catalogs.db import CompoundCatalogDBObject
10class CompoundInstanceCatalog(object):
11 """
12 This is essentially an InstanceCatalog class meant to wrap together
13 several disparate InstanceCatalog instantiations that will ultimately
14 be written to the same output catalog.
16 You pass the constructor a list of InstanceCatalog classes, a list of
17 CatalogDBObject classes, and ObservationMetaData, and an optional SQL constraint.
19 The write_catalog method then writes all of the InstanceCatalogs to one
20 ASCII file using the same API as InstanceCatalog.write_catalog.
22 Note: any member variables of the CompoundInstanceCatalog whose names
23 do not begin with '_' will be assigned to the InstanceCatalogs iterated
24 over by the CompoundInstanceCatalog. This allows you to, for example,
25 format the outputs of every InstanceCatalog in the CompoundInstanceCatalog
26 by setting override_formats in just the CompoundInstanceCatalog, e.g.
28 class myCompoundInstanceCatalog(CompoundInstanceCatalog):
29 transformations = {'raJ2000': np.degrees, 'decJ2000': np.degrees}
31 comCat = myCompoundInstanceCatalog([catClass1, catClass2],
32 [dbClass1, dbClass2])
34 will write raJ2000 and decJ2000 in degrees without having to define
35 transformations in catClass1 and catClass2.
36 """
38 def __init__(self, instanceCatalogClassList, catalogDBObjectClassList,
39 obs_metadata=None, constraint=None, compoundDBclass = None):
40 """
41 @param [in] instanceCatalogClassList is a list of the InstanceCatalog
42 classes to be combined into one output catalog.
44 @param [in] catalogDBObjectClassList is a list of the CatalogDBObject
45 classes to be associated with the InstanceCatalog classes in
46 instanceCatalogClassList. There should be one CatalogDBObject class
47 for each InstanceCatalogClass.
49 @param [in] obs_metadata is the ObservationMetaData describing
50 the telescope pointing
52 @param [in] constraint is an optional SQL constraint to be applied
53 to the database query
55 @param [in] compoundDBclass is an optional argument specifying what
56 CompoundCatalogDBobject class(es) to use to combine InstanceCatalogs
57 that query the same table. This can be either a single
58 ComboundCatalogDBObject class, or a list of classes. The
59 CompoundInstanceCatalog will figure out which InstanceCatalog(s) go with
60 which CompoundCatalogDBObject class. If no CompoundCatalogDBObject class
61 corresponds to a given group of InstanceCatalogs, then the base
62 CompoundCatalogDBObject class will be used.
64 Note: compoundDBclass should be a CompoundCatalogDBObject class.
65 Not an instantiation of a CompoundCatalogDBObject class.
66 """
68 self._compoundDBclass = compoundDBclass
69 self._obs_metadata = obs_metadata
70 self._dbo_list = catalogDBObjectClassList
71 self._ic_list = instanceCatalogClassList
72 self._constraint = constraint
74 assigned = [False]*len(self._dbo_list)
75 self._dbObjectGroupList = []
77 for ix in range(len(self._dbo_list)):
78 for row in self._dbObjectGroupList:
79 if self.areDBObjectsTheSame(self._dbo_list[ix], self._dbo_list[row[0]]):
80 row.append(ix)
81 assigned[ix] = True
82 break
84 if not assigned[ix]:
85 new_row = [ix]
86 for iy in range(ix):
87 if not assigned[iy]:
88 if self.areDBObjectsTheSame(self._dbo_list[ix], self._dbo_list[iy]):
89 new_row.append(iy)
91 self._dbObjectGroupList.append(new_row)
93 def areDBObjectsTheSame(self, db1, db2):
94 """
95 @param [in] db1 is a CatalogDBObject instantiation
97 @param [in] db2 is a CatalogDBObject instantiation
99 @param [out] a boolean stating whether or not db1 and db2
100 query the same table of the same database
101 """
103 if hasattr(db1, 'host'):
104 host1 = db1.host
105 else:
106 host1 = None
108 if hasattr(db2, 'host'):
109 host2 = db2.host
110 else:
111 host2 = None
113 if hasattr(db1, 'port'):
114 port1 = db1.port
115 else:
116 port1 = None
118 if hasattr(db2, 'port'):
119 port2 = db2.port
120 else:
121 port2 = None
123 if hasattr(db1, 'driver'):
124 driver1 = db1.driver
125 else:
126 driver1 = None
128 if hasattr(db2, 'driver'):
129 driver2 = db2.driver
130 else:
131 driver2 = None
133 if db1.tableid != db2.tableid:
134 return False
135 if host1 != host2:
136 return False
137 if db1.database != db2.database:
138 return False
139 if port1 != port2:
140 return False
141 if driver1 != driver2:
142 return False
143 return True
146 def write_catalog(self, filename, chunk_size=None, write_header=True, write_mode='w'):
147 """
148 Write the stored list of InstanceCatalogs to a single ASCII output catalog.
150 @param [in] filename is the name of the file to be written
152 @param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog
153 to query the database in manageable chunks (in case returning the whole catalog
154 takes too much memory)
156 @param [in] write_header a boolean specifying whether or not to add a header
157 to the output catalog (Note: only one header will be written; there will not be
158 a header for each InstanceCatalog in the CompoundInstanceCatalog; default True)
160 @param [in] write_mode is 'w' if you want to overwrite the output file or
161 'a' if you want to append to an existing output file (default: 'w')
162 """
164 instantiated_ic_list = [None]*len(self._ic_list)
166 # first, loop over all of the InstanceCatalog and CatalogDBObject classes, pre-processing
167 # them (i.e. verifying that they have access to all of the columns they need)
168 for ix, (icClass, dboClass) in enumerate(zip(self._ic_list, self._dbo_list)):
169 dbo = dboClass()
171 ic = icClass(dbo, obs_metadata=self._obs_metadata)
173 # assign all non-private member variables of the CompoundInstanceCatalog
174 # to the instantiated InstanceCatalogs
175 for kk in self.__dict__:
176 if kk[0] != '_' and not hasattr(self.__dict__[kk], '__call__'):
177 setattr(ic, kk, self.__dict__[kk])
179 for kk in self.__class__.__dict__:
180 if kk[0] != '_' and not hasattr(self.__class__.__dict__[kk], '__call__'):
181 setattr(ic, kk, self.__class__.__dict__[kk])
183 ic._write_pre_process()
184 instantiated_ic_list[ix] = ic
186 for row in self._dbObjectGroupList:
187 if len(row) == 1:
188 ic = instantiated_ic_list[row[0]]
189 ic._query_and_write(filename, chunk_size=chunk_size,
190 write_header=write_header, write_mode=write_mode,
191 obs_metadata=self._obs_metadata,
192 constraint=self._constraint)
193 write_mode = 'a'
194 write_header = False
196 default_compound_dbo = None
197 if self._compoundDBclass is not None:
198 if not hasattr(self._compoundDBclass, '__getitem__'):
199 default_compound_dbo = CompoundCatalogDBObject
200 else:
201 for dbo in self._compoundDBclass:
202 if dbo._table_restriction is None:
203 default_compound_dbo = dbo
204 break
206 if default_compound_dbo is None:
207 default_compound_dbo is CompoundCatalogDBObject
209 for row in self._dbObjectGroupList:
210 if len(row) > 1:
211 dbObjClassList = [self._dbo_list[ix] for ix in row]
212 catList = [instantiated_ic_list[ix] for ix in row]
213 for cat in catList:
214 cat._pre_screen = True
216 if self._compoundDBclass is None:
217 compound_dbo = CompoundCatalogDBObject(dbObjClassList)
218 elif not hasattr(self._compoundDBclass, '__getitem__'):
219 # if self._compoundDBclass is not a list
220 try:
221 compound_dbo = self._compoundDBclass(dbObjClassList)
222 except:
223 compound_dbo = default_compound_dbo(dbObjClassList)
224 else:
225 compound_dbo = None
226 for candidate in self._compoundDBclass:
227 use_it = True
228 if False in [candidate._table_restriction is not None and
229 dbo.tableid in candidate._table_restriction
230 for dbo in dbObjClassList]:
232 use_it = False
234 if use_it:
235 compound_dbo = candidate(dbObjClassList)
236 break
238 if compound_dbo is None:
239 compound_dbo = default_compound_dbo(dbObjClassList)
241 self._write_compound(catList, compound_dbo, filename,
242 chunk_size=chunk_size, write_header=write_header,
243 write_mode=write_mode)
244 write_mode = 'a'
245 write_header = False
247 def _write_compound(self, catList, compound_dbo, filename,
248 chunk_size=None, write_header=False, write_mode='a'):
249 """
250 Write out a set of InstanceCatalog instantiations that have been
251 determined to query the same database table.
253 @param [in] catList is the list of InstanceCatalog instantiations
255 @param [in] compound_db is the CompoundCatalogDBObject instantiation
256 associated with catList
258 @param [in] filename is the name of the file to be written
260 @param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog
261 to query the database in manageable chunks (in case returning the whole catalog
262 takes too much memory)
264 @param [in] write_header a boolean specifying whether or not to add a header
265 to the output catalog (Note: only one header will be written; there will not be
266 a header for each InstanceCatalog in the CompoundInstanceCatalog; default True)
268 @param [in] write_mode is 'w' if you want to overwrite the output file or
269 'a' if you want to append to an existing output file (default: 'w')
270 """
272 colnames = []
273 master_colnames = []
274 name_map = []
275 dbObjNameList = [db.objid for db in compound_dbo._dbObjectClassList]
276 for name, cat in zip(dbObjNameList, catList):
277 localNames = []
278 local_map = {}
279 for colName in cat._active_columns:
280 prefixed_name = '%s_%s' % (name, colName)
281 query_name = compound_dbo.name_map(prefixed_name)
282 if query_name not in colnames:
283 colnames.append(query_name)
284 localNames.append(query_name)
285 local_map[query_name] = colName
286 master_colnames.append(localNames)
287 name_map.append(local_map)
289 master_results = compound_dbo.query_columns(colnames=colnames,
290 obs_metadata=self._obs_metadata,
291 constraint=self._constraint,
292 chunk_size=chunk_size)
294 with open(filename, write_mode) as file_handle:
295 if write_header:
296 catList[0].write_header(file_handle)
298 new_dtype_name_list = [None]*len(catList)
300 first_chunk = True
301 for chunk in master_results:
302 for ix, (catName, cat) in enumerate(zip(dbObjNameList, catList)):
304 if first_chunk:
305 for iy, name in enumerate(master_colnames[ix]):
306 if name not in chunk.dtype.fields:
307 master_colnames[ix][iy] = name_map[ix][name]
309 local_recarray = recfunctions.repack_fields(
310 chunk[master_colnames[ix]].view(np.recarray))
312 local_recarray.flags['WRITEABLE'] = False # so numpy does not raise a warning
313 # because it thinks we may accidentally
314 # write to this array
315 if new_dtype_name_list[ix] is None:
316 new_dtype_name_list[ix] = list([dd.replace(catName+'_','')
317 for dd in master_colnames[ix]])
319 local_recarray.dtype.names = new_dtype_name_list[ix]
320 cat._write_recarray(local_recarray, file_handle)
321 cat._delete_current_chunk()
323 first_chunk = False