Coverage for python/lsst/sims/catUtils/utils/avroAlertGenerator.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This script will provide classes to process the sqlite files produced
2# by the AlertDataGenerator and write them as avro files
4try:
5 import avro.schema
6 from avro.io import DatumWriter
7 from avro.datafile import DataFileWriter
8except ImportError:
9 pass
11from lsst.sims.catalogs.db import DBObject
12import os
13import numpy as np
14import json
15import warnings
16import time
18__all__ = ["AvroAlertGenerator"]
21################
22# The methods
23# combine_schemas and load_single_avsc
24# are copied from Maria Patterson's
25# validateAvroNestedSchema.py script in
26# https://github.com:lsst-dim/sample-avro-alert
27# I am copying them here since sample-avro-alert
28# is not a proper eups package designed to be
29# distributed and installed with the LSST stack
30################
32def combine_schemas(schema_files):
33 """Combine multiple nested schemas into a single schema.
34 """
35 known_schemas = avro.schema.Names()
37 for s in schema_files:
38 schema = load_single_avsc(s, known_schemas)
39 return schema
42def load_single_avsc(file_path, names):
43 """Load a single avsc file.
44 """
45 with open(file_path) as file_text:
46 json_data = json.load(file_text)
47 schema = avro.schema.SchemaFromJSONData(json_data, names)
48 return schema
51class AvroAlertGenerator(object):
52 """
53 This class reads in the sqlite files created by the AlertDataGenerator
54 and converts them into avro files separated by obsHistID (the unique
55 integer identifying each pointing in an OpSim run).
56 """
58 def __init__(self):
59 self._diasource_schema = None
60 self._diasource_ct = {}
61 self._rng = np.random.RandomState(7123)
62 self._n_bit_shift = 10
64 def load_schema(self, schema_dir):
65 """
66 Load the schema for the avro files. Currently, these are in
68 https://github.com/lsst-dm/sample-avro-alert/tree/master/schema
69 """
70 file_names = [os.path.join(schema_dir, 'diasource.avsc'),
71 os.path.join(schema_dir, 'diaobject.avsc'),
72 os.path.join(schema_dir, 'ssobject.avsc'),
73 os.path.join(schema_dir, 'cutout.avsc'),
74 os.path.join(schema_dir, 'alert.avsc')]
76 self._alert_schema = combine_schemas(file_names)
78 def _create_sources(self, obshistid, diasource_data):
79 """
80 Create a list of diaSources that adhere to the corresponding
81 avro schema.
83 Parameters
84 ----------
85 obshistid is an integer corresponding to the OpSim pointing
86 being simulated
88 diasource_data is numpy recarray containing all of the data
89 for the diaSources being formatted
91 Returns
92 -------
93 A list of dicts, each of which is ready to be written as
94 an avro-formatted diaSource.
95 """
97 bp_name_dict = {0: 'u', 1: 'g', 2: 'r', 3: 'i', 4: 'z', 5: 'y'}
99 avro_diasource_list = []
101 tot_flux = diasource_data['dflux'] + diasource_data['quiescent_flux']
102 full_noise = tot_flux/diasource_data['tot_snr']
103 quiescent_noise = diasource_data['quiescent_flux']/diasource_data['quiescent_snr']
104 diff_noise = np.sqrt(full_noise**2 + quiescent_noise**2)
105 diff_snr = np.abs(diasource_data['dflux']/diff_noise)
107 for i_source in range(len(diasource_data)):
108 diasource = diasource_data[i_source]
109 if diasource['uniqueId'] not in self._diasource_ct:
110 self._diasource_ct[diasource['uniqueId']] = 1
112 avro_diasource = {}
113 avro_diasource['diaSourceId'] = np.long((diasource['uniqueId'] << self._n_bit_shift) +
114 self._diasource_ct[diasource['uniqueId']])
115 self._diasource_ct[diasource['uniqueId']] += 1
116 avro_diasource['ccdVisitId'] = np.long((diasource['chipNum']*10**7) + obshistid)
117 avro_diasource['diaObjectId'] = np.long(diasource['uniqueId'])
119 avro_diasource['midPointTai'] = diasource['TAI']
120 avro_diasource['filterName'] = bp_name_dict[diasource['band']]
121 avro_diasource['ra'] = diasource['ra']
122 avro_diasource['decl'] = diasource['dec']
123 avro_diasource['flags'] = self._rng.randint(10, 1000)
125 avro_diasource['x'] = diasource['xPix']
126 avro_diasource['y'] = diasource['yPix']
127 avro_diasource['snr'] = diff_snr[i_source]
128 avro_diasource['psFlux'] = diasource['dflux']
130 ra_dec_cov = {}
131 ra_dec_cov['raSigma'] = self._rng.random_sample()*0.001
132 ra_dec_cov['declSigma'] = self._rng.random_sample()*0.001
133 ra_dec_cov['ra_decl_Cov'] = self._rng.random_sample()*0.001
135 avro_diasource['ra_decl_Cov'] = ra_dec_cov
137 x_y_cov = {}
138 x_y_cov['xSigma'] = self._rng.random_sample()*0.001*3600.0/0.2
139 x_y_cov['ySigma'] = self._rng.random_sample()*0.001*3600.0/0.2
140 x_y_cov['x_y_Cov'] = self._rng.random_sample()*0.001
142 avro_diasource['x_y_Cov'] = x_y_cov
144 avro_diasource['totFlux'] = diasource['quiescent_flux'] + diasource['dflux']
145 avro_diasource['totFluxErr'] = full_noise[i_source]
146 avro_diasource['diffFlux'] = diasource['dflux']
147 avro_diasource['diffFluxErr'] = diff_noise[i_source]
149 avro_diasource_list.append(avro_diasource)
151 return avro_diasource_list
153 def _create_objects(self, diaobject_data):
154 """
155 Create a dict of diaObjects formatted according to the
156 appropriate avro schema
158 Parameters
159 ----------
160 diaobject_data is a numpy recarray containing all of the
161 data needed for the diaObject
163 Returns
164 -------
165 A dict keyed on uniqueId (the CatSim unique identifier for each
166 astrophysical source). Each value is a properly formatted
167 diaObject corresponding to its key.
168 """
169 diaobject_dict = {}
170 for i_object in range(len(diaobject_data)):
171 diaobject = diaobject_data[i_object]
173 avro_diaobject = {}
174 avro_diaobject['flags'] = np.long(self._rng.randint(10, 1000))
175 avro_diaobject['diaObjectId'] = np.long(diaobject['uniqueId'])
176 avro_diaobject['ra'] = diaobject['ra']
177 avro_diaobject['decl'] = diaobject['dec']
179 ra_dec_cov = {}
180 ra_dec_cov['raSigma'] = self._rng.random_sample()*0.001
181 ra_dec_cov['declSigma'] = self._rng.random_sample()*0.001
182 ra_dec_cov['ra_decl_Cov'] = self._rng.random_sample()*0.001
184 avro_diaobject['ra_decl_Cov'] = ra_dec_cov
185 avro_diaobject['radecTai'] = diaobject['TAI']
187 avro_diaobject['pmRa'] = diaobject['pmRA']
188 avro_diaobject['pmDecl'] = diaobject['pmDec']
189 avro_diaobject['parallax'] = diaobject['parallax']
190 pm_parallax_cov = {}
192 for field in ('pmRaSigma', 'pmDeclSigma', 'parallaxSigma', 'pmRa_pmDecl_Cov',
193 'pmRa_parallax_Cov', 'pmDecl_parallax_Cov'):
194 pm_parallax_cov[field] = 0.0
196 avro_diaobject['pm_parallax_Cov'] = pm_parallax_cov
198 avro_diaobject['pmParallaxLnL'] = self._rng.random_sample()
199 avro_diaobject['pmParallaxChi2'] = self._rng.random_sample()
200 avro_diaobject['pmParallaxNdata'] = 0
202 diaobject_dict[diaobject['uniqueId']] = avro_diaobject
203 return diaobject_dict
205 def write_alerts(self, obshistid, data_dir, prefix_list,
206 htmid_list, out_dir, out_prefix,
207 dmag_cutoff, lock=None, log_file_name=None):
208 """
209 Write the alerts for an obsHistId to a properly formatted avro file.
211 Parameters
212 ----------
213 obshistid is the integer uniquely identifying the OpSim pointing
214 being simulated
216 data_dir is the directory containing the sqlite files created by
217 the AlertDataGenerator
219 prefix_list is a list of prefixes for those sqlite files.
221 htmid_list is the list of htmids identifying the trixels that overlap
222 this obshistid's field of view. For each htmid in htmid_list and each
223 prefix in prefix_list, this method will process the files
224 data_dir/prefix_htmid_sqlite.db
225 searching for alerts that correspond to this obshistid
227 out_dir is the directory to which the avro files should be written
229 out_prefix is the prefix of the avro file names
231 dmag_cutoff is the minimum delta magnitude needed to trigger an alert
233 lock is an optional multiprocessing.Lock() for use when running many
234 instances of this method. It prevents multiple processes from writing to
235 the logfile or stdout at once.
237 log_file_name is the name of an optional text file to which progress is
238 written.
239 """
241 out_name = os.path.join(out_dir, '%s_%d.avro' % (out_prefix, obshistid))
242 if os.path.exists(out_name):
243 os.unlink(out_name)
245 with DataFileWriter(open(out_name, "wb"),
246 DatumWriter(), self._alert_schema) as data_writer:
248 diasource_query = 'SELECT alert.uniqueId, alert.xPix, alert.yPix, '
249 diasource_query += 'alert.chipNum, alert.dflux, alert.snr, alert.ra, alert.dec, '
250 diasource_query += 'meta.band, meta.TAI, quiescent.flux, quiescent.snr '
251 diasource_query += 'FROM alert_data as alert '
252 diasource_query += 'INNER JOIN metadata AS meta ON alert.obshistId=meta.obshistId '
253 diasource_query += 'INNER JOIN quiescent_flux AS quiescent ON quiescent.uniqueId=alert.uniqueID '
254 diasource_query += 'AND quiescent.band=meta.band '
255 diasource_query += 'WHERE alert.obshistId=%d ' % obshistid
256 diasource_query += 'ORDER BY alert.uniqueId'
258 diasource_dtype = np.dtype([('uniqueId', int), ('xPix', float), ('yPix', float),
259 ('chipNum', int), ('dflux', float), ('tot_snr', float),
260 ('ra', float), ('dec', float), ('band', int), ('TAI', float),
261 ('quiescent_flux', float), ('quiescent_snr', float)])
263 diaobject_query = 'SELECT uniqueId, ra, dec, TAI, pmRA, pmDec, parallax '
264 diaobject_query += 'FROM baseline_astrometry'
266 diaobject_dtype = np.dtype([('uniqueId', int), ('ra', float), ('dec', float),
267 ('TAI', float), ('pmRA', float), ('pmDec', float),
268 ('parallax', float)])
270 t_start = time.time()
271 alert_ct = 0
272 for htmid in htmid_list:
273 for prefix in prefix_list:
274 db_name = os.path.join(data_dir, '%s_%d_sqlite.db' % (prefix, htmid))
275 if not os.path.exists(db_name):
276 warnings.warn('%s does not exist' % db_name)
277 continue
279 db_obj = DBObject(db_name, driver='sqlite')
281 diaobject_data = db_obj.execute_arbitrary(diaobject_query,
282 dtype=diaobject_dtype)
284 diaobject_dict = self._create_objects(diaobject_data)
286 diasource_data = db_obj.execute_arbitrary(diasource_query,
287 dtype=diasource_dtype)
289 dmag = 2.5*np.log10(1.0+diasource_data['dflux']/diasource_data['quiescent_flux'])
290 valid_alerts = np.where(np.abs(dmag) >= dmag_cutoff)
291 diasource_data = diasource_data[valid_alerts]
292 avro_diasource_list = self._create_sources(obshistid, diasource_data)
294 for i_source in range(len(avro_diasource_list)):
295 alert_ct += 1
296 unq = diasource_data[i_source]['uniqueId']
297 diaobject = diaobject_dict[unq]
298 diasource = avro_diasource_list[i_source]
300 avro_alert = {}
301 avro_alert['alertId'] = np.long((obshistid << 20) + alert_ct)
302 avro_alert['l1dbId'] = np.long(unq)
303 avro_alert['diaSource'] = diasource
304 avro_alert['diaObject'] = diaobject
306 data_writer.append(avro_alert)
308 if lock is not None:
309 lock.acquire()
311 elapsed = (time.time()-t_start)/3600.0
313 msg = 'finished obshistid %d; %d alerts in %.2e hrs' % (obshistid, alert_ct, elapsed)
315 print(msg)
317 if log_file_name is not None:
318 with open(log_file_name, 'a') as out_file:
319 out_file.write(msg)
320 out_file.write('\n')
322 if lock is not None:
323 lock.release()