Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This script will provide classes to process the sqlite files produced 

2# by the AlertDataGenerator and write them as avro files 

3 

4try: 

5 import avro.schema 

6 from avro.io import DatumWriter 

7 from avro.datafile import DataFileWriter 

8except ImportError: 

9 pass 

10 

11from lsst.sims.catalogs.db import DBObject 

12import os 

13import numpy as np 

14import json 

15import warnings 

16import time 

17 

18__all__ = ["AvroAlertGenerator"] 

19 

20 

21################ 

22# The methods 

23# combine_schemas and load_single_avsc 

24# are copied from Maria Patterson's 

25# validateAvroNestedSchema.py script in 

26# https://github.com:lsst-dim/sample-avro-alert 

27# I am copying them here since sample-avro-alert 

28# is not a proper eups package designed to be 

29# distributed and installed with the LSST stack 

30################ 

31 

32def combine_schemas(schema_files): 

33 """Combine multiple nested schemas into a single schema. 

34 """ 

35 known_schemas = avro.schema.Names() 

36 

37 for s in schema_files: 

38 schema = load_single_avsc(s, known_schemas) 

39 return schema 

40 

41 

42def load_single_avsc(file_path, names): 

43 """Load a single avsc file. 

44 """ 

45 with open(file_path) as file_text: 

46 json_data = json.load(file_text) 

47 schema = avro.schema.SchemaFromJSONData(json_data, names) 

48 return schema 

49 

50 

51class AvroAlertGenerator(object): 

52 """ 

53 This class reads in the sqlite files created by the AlertDataGenerator 

54 and converts them into avro files separated by obsHistID (the unique 

55 integer identifying each pointing in an OpSim run). 

56 """ 

57 

58 def __init__(self): 

59 self._diasource_schema = None 

60 self._diasource_ct = {} 

61 self._rng = np.random.RandomState(7123) 

62 self._n_bit_shift = 10 

63 

64 def load_schema(self, schema_dir): 

65 """ 

66 Load the schema for the avro files. Currently, these are in 

67 

68 https://github.com/lsst-dm/sample-avro-alert/tree/master/schema 

69 """ 

70 file_names = [os.path.join(schema_dir, 'diasource.avsc'), 

71 os.path.join(schema_dir, 'diaobject.avsc'), 

72 os.path.join(schema_dir, 'ssobject.avsc'), 

73 os.path.join(schema_dir, 'cutout.avsc'), 

74 os.path.join(schema_dir, 'alert.avsc')] 

75 

76 self._alert_schema = combine_schemas(file_names) 

77 

78 def _create_sources(self, obshistid, diasource_data): 

79 """ 

80 Create a list of diaSources that adhere to the corresponding 

81 avro schema. 

82 

83 Parameters 

84 ---------- 

85 obshistid is an integer corresponding to the OpSim pointing 

86 being simulated 

87 

88 diasource_data is numpy recarray containing all of the data 

89 for the diaSources being formatted 

90 

91 Returns 

92 ------- 

93 A list of dicts, each of which is ready to be written as 

94 an avro-formatted diaSource. 

95 """ 

96 

97 bp_name_dict = {0: 'u', 1: 'g', 2: 'r', 3: 'i', 4: 'z', 5: 'y'} 

98 

99 avro_diasource_list = [] 

100 

101 tot_flux = diasource_data['dflux'] + diasource_data['quiescent_flux'] 

102 full_noise = tot_flux/diasource_data['tot_snr'] 

103 quiescent_noise = diasource_data['quiescent_flux']/diasource_data['quiescent_snr'] 

104 diff_noise = np.sqrt(full_noise**2 + quiescent_noise**2) 

105 diff_snr = np.abs(diasource_data['dflux']/diff_noise) 

106 

107 for i_source in range(len(diasource_data)): 

108 diasource = diasource_data[i_source] 

109 if diasource['uniqueId'] not in self._diasource_ct: 

110 self._diasource_ct[diasource['uniqueId']] = 1 

111 

112 avro_diasource = {} 

113 avro_diasource['diaSourceId'] = np.long((diasource['uniqueId'] << self._n_bit_shift) + 

114 self._diasource_ct[diasource['uniqueId']]) 

115 self._diasource_ct[diasource['uniqueId']] += 1 

116 avro_diasource['ccdVisitId'] = np.long((diasource['chipNum']*10**7) + obshistid) 

117 avro_diasource['diaObjectId'] = np.long(diasource['uniqueId']) 

118 

119 avro_diasource['midPointTai'] = diasource['TAI'] 

120 avro_diasource['filterName'] = bp_name_dict[diasource['band']] 

121 avro_diasource['ra'] = diasource['ra'] 

122 avro_diasource['decl'] = diasource['dec'] 

123 avro_diasource['flags'] = self._rng.randint(10, 1000) 

124 

125 avro_diasource['x'] = diasource['xPix'] 

126 avro_diasource['y'] = diasource['yPix'] 

127 avro_diasource['snr'] = diff_snr[i_source] 

128 avro_diasource['psFlux'] = diasource['dflux'] 

129 

130 ra_dec_cov = {} 

131 ra_dec_cov['raSigma'] = self._rng.random_sample()*0.001 

132 ra_dec_cov['declSigma'] = self._rng.random_sample()*0.001 

133 ra_dec_cov['ra_decl_Cov'] = self._rng.random_sample()*0.001 

134 

135 avro_diasource['ra_decl_Cov'] = ra_dec_cov 

136 

137 x_y_cov = {} 

138 x_y_cov['xSigma'] = self._rng.random_sample()*0.001*3600.0/0.2 

139 x_y_cov['ySigma'] = self._rng.random_sample()*0.001*3600.0/0.2 

140 x_y_cov['x_y_Cov'] = self._rng.random_sample()*0.001 

141 

142 avro_diasource['x_y_Cov'] = x_y_cov 

143 

144 avro_diasource['totFlux'] = diasource['quiescent_flux'] + diasource['dflux'] 

145 avro_diasource['totFluxErr'] = full_noise[i_source] 

146 avro_diasource['diffFlux'] = diasource['dflux'] 

147 avro_diasource['diffFluxErr'] = diff_noise[i_source] 

148 

149 avro_diasource_list.append(avro_diasource) 

150 

151 return avro_diasource_list 

152 

153 def _create_objects(self, diaobject_data): 

154 """ 

155 Create a dict of diaObjects formatted according to the 

156 appropriate avro schema 

157 

158 Parameters 

159 ---------- 

160 diaobject_data is a numpy recarray containing all of the 

161 data needed for the diaObject 

162 

163 Returns 

164 ------- 

165 A dict keyed on uniqueId (the CatSim unique identifier for each 

166 astrophysical source). Each value is a properly formatted 

167 diaObject corresponding to its key. 

168 """ 

169 diaobject_dict = {} 

170 for i_object in range(len(diaobject_data)): 

171 diaobject = diaobject_data[i_object] 

172 

173 avro_diaobject = {} 

174 avro_diaobject['flags'] = np.long(self._rng.randint(10, 1000)) 

175 avro_diaobject['diaObjectId'] = np.long(diaobject['uniqueId']) 

176 avro_diaobject['ra'] = diaobject['ra'] 

177 avro_diaobject['decl'] = diaobject['dec'] 

178 

179 ra_dec_cov = {} 

180 ra_dec_cov['raSigma'] = self._rng.random_sample()*0.001 

181 ra_dec_cov['declSigma'] = self._rng.random_sample()*0.001 

182 ra_dec_cov['ra_decl_Cov'] = self._rng.random_sample()*0.001 

183 

184 avro_diaobject['ra_decl_Cov'] = ra_dec_cov 

185 avro_diaobject['radecTai'] = diaobject['TAI'] 

186 

187 avro_diaobject['pmRa'] = diaobject['pmRA'] 

188 avro_diaobject['pmDecl'] = diaobject['pmDec'] 

189 avro_diaobject['parallax'] = diaobject['parallax'] 

190 pm_parallax_cov = {} 

191 

192 for field in ('pmRaSigma', 'pmDeclSigma', 'parallaxSigma', 'pmRa_pmDecl_Cov', 

193 'pmRa_parallax_Cov', 'pmDecl_parallax_Cov'): 

194 pm_parallax_cov[field] = 0.0 

195 

196 avro_diaobject['pm_parallax_Cov'] = pm_parallax_cov 

197 

198 avro_diaobject['pmParallaxLnL'] = self._rng.random_sample() 

199 avro_diaobject['pmParallaxChi2'] = self._rng.random_sample() 

200 avro_diaobject['pmParallaxNdata'] = 0 

201 

202 diaobject_dict[diaobject['uniqueId']] = avro_diaobject 

203 return diaobject_dict 

204 

205 def write_alerts(self, obshistid, data_dir, prefix_list, 

206 htmid_list, out_dir, out_prefix, 

207 dmag_cutoff, lock=None, log_file_name=None): 

208 """ 

209 Write the alerts for an obsHistId to a properly formatted avro file. 

210 

211 Parameters 

212 ---------- 

213 obshistid is the integer uniquely identifying the OpSim pointing 

214 being simulated 

215 

216 data_dir is the directory containing the sqlite files created by 

217 the AlertDataGenerator 

218 

219 prefix_list is a list of prefixes for those sqlite files. 

220 

221 htmid_list is the list of htmids identifying the trixels that overlap 

222 this obshistid's field of view. For each htmid in htmid_list and each 

223 prefix in prefix_list, this method will process the files 

224 data_dir/prefix_htmid_sqlite.db 

225 searching for alerts that correspond to this obshistid 

226 

227 out_dir is the directory to which the avro files should be written 

228 

229 out_prefix is the prefix of the avro file names 

230 

231 dmag_cutoff is the minimum delta magnitude needed to trigger an alert 

232 

233 lock is an optional multiprocessing.Lock() for use when running many 

234 instances of this method. It prevents multiple processes from writing to 

235 the logfile or stdout at once. 

236 

237 log_file_name is the name of an optional text file to which progress is 

238 written. 

239 """ 

240 

241 out_name = os.path.join(out_dir, '%s_%d.avro' % (out_prefix, obshistid)) 

242 if os.path.exists(out_name): 

243 os.unlink(out_name) 

244 

245 with DataFileWriter(open(out_name, "wb"), 

246 DatumWriter(), self._alert_schema) as data_writer: 

247 

248 diasource_query = 'SELECT alert.uniqueId, alert.xPix, alert.yPix, ' 

249 diasource_query += 'alert.chipNum, alert.dflux, alert.snr, alert.ra, alert.dec, ' 

250 diasource_query += 'meta.band, meta.TAI, quiescent.flux, quiescent.snr ' 

251 diasource_query += 'FROM alert_data as alert ' 

252 diasource_query += 'INNER JOIN metadata AS meta ON alert.obshistId=meta.obshistId ' 

253 diasource_query += 'INNER JOIN quiescent_flux AS quiescent ON quiescent.uniqueId=alert.uniqueID ' 

254 diasource_query += 'AND quiescent.band=meta.band ' 

255 diasource_query += 'WHERE alert.obshistId=%d ' % obshistid 

256 diasource_query += 'ORDER BY alert.uniqueId' 

257 

258 diasource_dtype = np.dtype([('uniqueId', int), ('xPix', float), ('yPix', float), 

259 ('chipNum', int), ('dflux', float), ('tot_snr', float), 

260 ('ra', float), ('dec', float), ('band', int), ('TAI', float), 

261 ('quiescent_flux', float), ('quiescent_snr', float)]) 

262 

263 diaobject_query = 'SELECT uniqueId, ra, dec, TAI, pmRA, pmDec, parallax ' 

264 diaobject_query += 'FROM baseline_astrometry' 

265 

266 diaobject_dtype = np.dtype([('uniqueId', int), ('ra', float), ('dec', float), 

267 ('TAI', float), ('pmRA', float), ('pmDec', float), 

268 ('parallax', float)]) 

269 

270 t_start = time.time() 

271 alert_ct = 0 

272 for htmid in htmid_list: 

273 for prefix in prefix_list: 

274 db_name = os.path.join(data_dir, '%s_%d_sqlite.db' % (prefix, htmid)) 

275 if not os.path.exists(db_name): 

276 warnings.warn('%s does not exist' % db_name) 

277 continue 

278 

279 db_obj = DBObject(db_name, driver='sqlite') 

280 

281 diaobject_data = db_obj.execute_arbitrary(diaobject_query, 

282 dtype=diaobject_dtype) 

283 

284 diaobject_dict = self._create_objects(diaobject_data) 

285 

286 diasource_data = db_obj.execute_arbitrary(diasource_query, 

287 dtype=diasource_dtype) 

288 

289 dmag = 2.5*np.log10(1.0+diasource_data['dflux']/diasource_data['quiescent_flux']) 

290 valid_alerts = np.where(np.abs(dmag) >= dmag_cutoff) 

291 diasource_data = diasource_data[valid_alerts] 

292 avro_diasource_list = self._create_sources(obshistid, diasource_data) 

293 

294 for i_source in range(len(avro_diasource_list)): 

295 alert_ct += 1 

296 unq = diasource_data[i_source]['uniqueId'] 

297 diaobject = diaobject_dict[unq] 

298 diasource = avro_diasource_list[i_source] 

299 

300 avro_alert = {} 

301 avro_alert['alertId'] = np.long((obshistid << 20) + alert_ct) 

302 avro_alert['l1dbId'] = np.long(unq) 

303 avro_alert['diaSource'] = diasource 

304 avro_alert['diaObject'] = diaobject 

305 

306 data_writer.append(avro_alert) 

307 

308 if lock is not None: 

309 lock.acquire() 

310 

311 elapsed = (time.time()-t_start)/3600.0 

312 

313 msg = 'finished obshistid %d; %d alerts in %.2e hrs' % (obshistid, alert_ct, elapsed) 

314 

315 print(msg) 

316 

317 if log_file_name is not None: 

318 with open(log_file_name, 'a') as out_file: 

319 out_file.write(msg) 

320 out_file.write('\n') 

321 

322 if lock is not None: 

323 lock.release()