Coverage for python/lsst/sims/catUtils/utils/alertDataGenerator.py : 81%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
"AlertStellarVariabilityCatalog", "AlertAgnVariabilityCatalog", "_baseAlertCatalog", "StellarAlertDBObj", "AgnAlertDBObj", "StellarAlertDBObjMixin"]
""" Mimics StarObj class, except it allows you to directly query all objects in a trixel specified by an htmid. """ constraint=None, limit=None, htmid=None): """Execute a query from the primary catsim database
Execute a query, taking advantage of the spherical geometry library and htmid indexes on all catalog tables in the UW catsim database
**Parameters**
* colnames : list or None a list of valid column names, corresponding to entries in the `columns` class attribute. If not specified, all columns are queried. * chunk_size : int (optional) if specified, then return an iterator object to query the database, each time returning the next `chunk_size` elements. If not specified, all matching results will be returned. * constraint : str (optional) a string which is interpreted as SQL and used as a predicate on the query * limit : int (optional) limits the number of rows returned by the query * htmid is the htmid to be queried
**Returns**
* result : list or iterator If chunk_size is not specified, then result is a list of all items which match the specified query. If chunk_size is specified, then result is an iterator over lists of the given size. """
# find the minimum and maximum htmid # (level=21 since that is what is implemented # on fatboy) that we are asking for # # Note that sqlalchemy does not like np.int64 # as a data type
# add spatial constraints to query.
# Hint sql engine to seek on htmid
# SQL is not case sensitive but python is: htmid_name = 'htmID' else: htmid_name = 'htmId'
# Range join on htmid ranges
query = query.filter(text(constraint))
query = query.limit(limit)
""" Mimics GalaxyAgnObj class, except it allows you to directly query all objects in a trixel specified by an htmid. """
('galtileid', None, np.int64), ('galid', None, str, 30), ('componentra', 'agnra*PI()/180.'), ('componentdec', 'agndec*PI()/180.'), #: This is actually a problem with the stored procedure. #: We need to be able to map columns other than #: just ra/dec to raJ2000/decJ2000. This gets #: important when we start perturbing the three galaxy components ('raJ2000', 'ra'), ('decJ2000', 'dec'), ('magNorm', 'magnorm_agn'), ('magNormAgn', 'magnorm_agn'), ('sedFilename', 'sedname_agn', str, 40), ('sedFilenameAgn', 'sedname_agn', str, 40), ('variabilityParameters', 'varParamStr', str, 256), ('lsst_u', 'u_ab'), ('lsst_g', 'g_ab'), ('lsst_r', 'r_ab'), ('lsst_i', 'i_ab'), ('lsst_z', 'z_ab'), ('lsst_y', 'y_ab')]
constraint=None, limit=None, htmid=None): """Execute a query from the primary catsim database
Execute a query, taking advantage of the spherical geometry library and htmid indexes on all catalog tables in the UW catsim database
**Parameters**
* colnames : list or None a list of valid column names, corresponding to entries in the `columns` class attribute. If not specified, all columns are queried. * chunk_size : int (optional) if specified, then return an iterator object to query the database, each time returning the next `chunk_size` elements. If not specified, all matching results will be returned. * constraint : str (optional) a string which is interpreted as SQL and used as a predicate on the query * limit : int (optional) limits the number of rows returned by the query * htmid is the htmid to be queried
**Returns**
* result : list or iterator If chunk_size is not specified, then result is a list of all items which match the specified query. If chunk_size is specified, then result is an iterator over lists of the given size. """
trixel = trixelFromHtmid(htmid) ra_0, dec_0 = trixel.get_center() new_obs = ObservationMetaData(pointingRA=ra_0, pointingDec=dec_0, boundType='circle', boundLength=trixel.get_radius()+0.1)
self._queried_trixel = trixel self._queried_htmid_level = levelFromHtmid(htmid)
return self.query_columns(colnames=colnames, chunk_size=chunk_size, obs_metadata=new_obs, constraint=constraint, limit=limit)
"""Modify the results of raJ2000 and decJ2000 to be in radians.
Also filter the results so that any objects outside of the trixel specified in query_columns_htmid are returned with htmid=0.
**Parameters**
* results : Structured array of results from query
**Returns**
* results : Modified structured array
"""
if hasattr(self, '_queried_trixel'): htmid = self._queried_trixel.htmid htmid_21 = htmid << 2*(21-self._queried_htmid_level) assert levelFromHtmid(htmid_21) == 21 contains_arr = self._queried_trixel.contains(results['raJ2000'], results['decJ2000']) results['htmid'] = np.where(contains_arr, htmid_21, 0)
results['raJ2000'] = np.radians(results['raJ2000']) results['decJ2000'] = np.radians(results['decJ2000']) return results
'flux', 'SNR', 'dflux', 'chipNum', 'xPix', 'yPix']
('properMotionDec', 0.0, float), ('parallax', 0.0, float)]
""" Returns an iterator over chunks of the catalog.
Parameters ---------- chunk_size : int, optional, defaults to None the number of rows to return from the database at a time. If None, returns the entire database query in one chunk.
query_cache : iterator over database rows, optional, defaults to None the result of calling db_obj.query_columns(). If query_cache is not None, this method will iterate over the rows in query_cache and produce an appropriate InstanceCatalog. DO NOT set to non-None values unless you know what you are doing. It is an optional input for those who want to repeatedly examine the same patch of sky without actually querying the database over and over again. If it is set to None (default), this method will handle the database query.
column_cache : a dict that will be copied over into the catalogs self._column_cache. Should be left as None, unless you know what you are doing. """
# Call the original version of iter_catalog defined in the # InstanceCatalog class. This version of iter_catalog includes # the call to self.db_obj.query_columns, which the user would have # used to generate query_cache. for line in InstanceCatalog.iter_catalog_chunks(self, chunk_size=chunk_size): yield line else: # Otherwise iterate over the query cache if col in transform_keys else self.column_by_name(col) for col in self.iter_column_names()]
for i, col in enumerate(self.iter_column_names())])
def get_chipName(self): raise RuntimeError("Should not get this far in get_chipName")
def get_pupilFromSky(self): raise RuntimeError("Should not get this far in get_pupilFromSky")
def get_chipNum(self): """ Concatenate the digits in 'R:i,j S:m,n' to make the chip number ijmn """ for name in chip_name])
def get_pixelCoordinates(self): band=self.obs_metadata.bandpass, includeDistortion=True)
'delta_imag', 'delta_zmag', 'delta_ymag') def get_deltaMagAvro(self):
raise RuntimeError("Should not have gotten this far in delta mag getter")
def get_lsst_magnitudes(self): """ getter for LSST stellar magnitudes """
self.column_by_name('quiescent_lsst_g'), self.column_by_name('quiescent_lsst_r'), self.column_by_name('quiescent_lsst_i'), self.column_by_name('quiescent_lsst_z'), self.column_by_name('quiescent_lsst_y')])
self.column_by_name('delta_gmag'), self.column_by_name('delta_rmag'), self.column_by_name('delta_imag'), self.column_by_name('delta_zmag'), self.column_by_name('delta_ymag')])
def get_alertPhotometry(self):
def get_alertFlux(self):
self.obs_metadata.m5[self.obs_metadata.bandpass], self.phot_params, gamma=self._gamma)
VariabilityStars, AstrometryStars):
'quiescent_lsst_i', 'quiescent_lsst_z', 'quiescent_lsst_y') def get_quiescent_lsst_magnitudes(self): self.column_by_name('rmag'), self.column_by_name('imag'), self.column_by_name('zmag'), self.column_by_name('ymag')])
VariabilityGalaxies, AstrometryGalaxies):
'quiescent_lsst_i', 'quiescent_lsst_z', 'quiescent_lsst_y') def get_quiescent_lsst_magnitudes(self): return np.array([self.column_by_name('u_ab'), self.column_by_name('g_ab'), self.column_by_name('r_ab'), self.column_by_name('i_ab'), self.column_by_name('z_ab'), self.column_by_name('y_ab')])
""" This class will read in astrophysical sources and variability models from CatSim, observe them with a simulated OpSim cadence, and write a series of sqlite files containing all of the simulated observations that could trigger an alert.
In order to make this calculation as efficient as possible, the class works by partitioning the sky according to the Hierarchical Triangular Mesh (HTM) of
Kunszt P., Szalay A., Thakar A. (2006) in "Mining The Sky", Banday A, Zaroubi S, Bartelmann M. eds. ESO Astrophysics Symposia https://www.researchgate.net/publication/226072008_The_Hierarchical_Triangular_Mesh
Szalay A. et al. (2007) "Indexing the Sphere with the Hierarchical Triangular Mesh" arXiv:cs/0701164
and simulating all of the observations in a given trixel (the elementary unit of the HTM) at once. Accordintly, the outputs produced by this class are files named like
prefix_NNNN_sqlite.db
where prefix is specified by theuser and NNNN is the htmid, the unique identifying integer, corresponding to each simulated trixel.
The proper way to run this class is to instantiate it, run subdivide_obs on a list of ObservationMetaData corresponding to the OpSim pointings to be simulated, and then running alert_data_from_htmid on each of the htmid in the class property htmid_list. This last step can easily be parallelized using python's multiprocessing module, with each process handling a different htmid.
The sqlite files produced by alert_data_from_htmid will each contain four tables. They are as follows. Columns are listed below the tables.
alert_data ---------- uniqueId -- int -- a unique identifier for each astrophysical object
obshistId -- int -- a unique identifier for each OpSim pointing
xPix -- float -- the x pixel coordinate of the source on the focal plane
yPix -- float -- the y pixel coordinate of the source on the focal plane
dflux -- float -- the difference in flux between the source's current flux and its quiescent flux (the source's quiescent flux can be found in the quiescent_flux table). This is in units of Janskys.
snr -- float -- the signal to noise of the current detection of the source (not the signal to noise of the source's detection in a simulated difference image).
ra -- float -- the current RA of the source in degrees
dec -- float -- the current Declination of the source in degrees.
The alert_data table has a mult-column index on uniqueId and obshistId.
metadata -------- obshistId -- int --- a unique identifier for each OpSim pointing
TAI -- float -- the International Atomic Time of the observation as an MJD (in days)
band -- int -- denotes the filter used for the observation (0=u, 1=g, 2=r, etc.)
The metadata table is indexed on obshistId
quiescent_flux -------------- uniqueId -- int -- a unique identifier for each astrophysical source
band -- int -- an integer denoting each LSST filter (0=u, 1=g, 2=r, etc.)
flux -- float -- the flux of the source through the filter specified by band (in units of Janskys)
snr -- float -- the signal to noise ratio of the source in the given band with m5 taken from Table 2 of the overview paper (arXiv:0805.2366)
The quiescent_flux table has a multi-column index on uniqueId and band.
baseline_astrometry ------------------- uniqueId -- int -- a unique identifier for each astrophysical source
TAI -- float -- the International Atomic Time of the baseline astrometric measurements below as a MJD (in days)
ra -- float -- the RA of the source at TAI in degrees
dec -- float -- the Declination of the source at TAI in degrees
pmRA -- float -- the RA proper motion of the source in milliarcseconds/year
pmDec -- float -- the Declination proper motion of the source in milliarcseconds/year
parallax -- float -- the parallax of the source in milliarcseconds
The baseline_astrometry table is indexed on uniqueId
"""
testing=False): """ Parameters ---------- testing as a boolean that should only be True when running the unit tests. If True, it prevents the AlertDataGenerator from pre-caching variability models, which aids performance, but uses more memory than we want to use in a unit test. """
plm = ParametrizedLightCurveMixin() plm.load_parametrized_light_curves(variability_cache = self._variability_cache)
# This is a file that lists the maximum amplitude of variability # for each of the Kepler-derived light curve models. It will be # used by the stellar variability model to figure out which # stars can be skipped because they will never vary above # the alert-triggering threshold. 'catUtilsData', 'kplr_dmag_171204.txt')
script_name = os.path.join(getPackageDir('sims_catUtils'), 'support_scripts', 'get_kepler_dmag.sh') raise RuntimeError('\n%s does not exist; run the script\n\n%s\n\n' % script_name) else:
""" If running with multiprocessing, acquire the lock. """ self._stdout_lock.acquire()
""" If running with multiprocessing, release the lock. """ self._stdout_lock.release()
""" Take a list of ObservationMetaData and subdivide them according to which trixels (see htmModule.py in sims_utils) they intersect.
Parameters ---------- obs_list is a list of ObservationMetaData
htmid_level is an int denoting the level of the HTM mesh you want to use to tile the sky (higher htmid_level corresponds to a finer tiling). Default = 6
Returns ------- Nothing.
After running this method, this AlertGenerator will contain the following data.
- a list of the htmid of every trixel intersected by the fields of view specified in obs_list. This list is accessible from the property AlertGenerator.htmid_list
- a dict mapping each htmid to the ObservationMetaData from obs_list that intersect it. The method AlertGenerator.obs_from_htmid(htmid) will return a list of all of the ObservationMetaData that intersect the trixel specified by htmid. """
obs.pointingDec, obs.boundLength)
len(self._htmid_list))
def htmid_list(self): """ A list of the unique htmids corresponding to the trixels that need to be queried to generate the alert data """
""" Return the number of observations that intersect the trixel specified by htmid.
Must run subdivide_obs in order for this method to work. """ return len(self._htmid_dict[htmid])
""" Return a numpy array containing all of the ObservationMetaData that intersect the trixel specified by htmid.
Must run subdivide_obs in order for this method to work. """ return self._obs_list[self._htmid_dict[htmid]]
""" Write a cache of alert data to the sqlite file currently open.
Parameters ---------- conn is the connection to the sqlite file (already open)
data_cache is a dict containing all of the data to be written. It will keyed on a string like 'i_j' where i is the obshistID of an OpSim pointing and j is an arbitrary integer. That key will lead to another dict keyed on the columns being output to the sqlite file. The values of this second layer of dict are numpy arrays.
Returns ------- The number of rows written to the sqlite file """
obshistid, data_cache[cache_tag]['xPix'][i_obj], data_cache[cache_tag]['yPix'][i_obj], int(data_cache[cache_tag]['chipNum'][i_obj]), data_cache[cache_tag]['dflux'][i_obj], data_cache[cache_tag]['SNR'][i_obj], np.degrees(data_cache[cache_tag]['raICRS'][i_obj]), np.degrees(data_cache[cache_tag]['decICRS'][i_obj])) for i_obj in range(n_obj))
obs_valid_dex, expmjd_list, photometry_catalog, dmag_cutoff): """ Determine which simulated observations are actually worth storing by first figuring out which observations of which objects are photometrically detectable and alert-worthy, then determining which of those actually fall on an LSST detector.
Parameters ---------- chunk is the output yielded from a CatSim ChunkIterator. It is a numpy recarray representing one chunk_size query from the underlying simulations database
column_query is a list of the columns that were queried from the database
obs_valid_dex is a list of integers corresponding to indexes in self._obs_list of the ObservationMetaData that are actually valid for the trixel currently being simulated
expmjd_list is a numpy array of the TAI dates of ObservtionMetaData represented by obs_valid_dex
photometry_catalog is an instantiation of the InstanceCatalog class being used to calculate magnitudes for these variable sources.
Outputs ------- chip_name_dict is a dict keyed on i_obs (which is the index of an ObservationMetaData's position in obs_valid_dex, NOT its position in self._obs_list). The values of chip_name_dict are tuples containing: - a list of the names of the detectors that objects from chunk landed on (including Nones for those objects that did not land on any detector)
- a list of the xPupil coords for every object in chunk
- a list of the yPupil coords for every object in chunk
- a list of the indexes in chunk of those objects which actually landed on a detector
dmag_arr is a numpy array of the delta_magnitudes of every object in chunk. dmag_arr[11][3][4] is the delta_magnitude of chunk[4] in the 3rd band (i.e. the i band) at TAI = expmjd[11].
dmag_arr_transpose is dmag_arr with the time and object columns transposed so that dmag_arr_transpose[4][3][11] == dmag_arr[11][3][4].
time_arr is an array of integers with shape == (len(chunk), len(obs_valid_dex)). A -1 in time_arr means that that combination of object and observation did not yield a valid observation. A +1 means that the object and observation combination are valid. """
###################################################### # Calculate the delta_magnitude for all of the sources # variability_cache=self._variability_cache, expmjd=expmjd_list).transpose((2, 0, 1))
else: pmra = None pmdec = None px = None vrad = None
################################################################### # Figure out which sources actually land on an LSST detector during # the observations in question #
# time_arr will keep track of which objects appear in which observations; # 1 means the object appears; -1 means it does not dtype=int)
chunk['decJ2000'][photometrically_valid], pm_ra=pmra, pm_dec=pmdec, parallax=px, v_rad=vrad, obs_metadata=obs)
ypup_list_val)
xpup_list, ypup_list, valid_obj)
dmag_cutoff=0.005, chunk_size=1000, write_every=10000, output_dir='.', output_prefix='', log_file_name=None, photometry_class=None, chunk_cutoff=-1, lock=None):
""" Generate an sqlite file with all of the alert data for a given trixel.
Parameters ---------- htmid is an integer denoting the trixel from self.htmid_list that should be simulated
dbobj is a CatalogDBObject connecting to the data underlying the simulation
dmag_cutoff indicates the minimum change magnitude needed to trigger a simulated alert
chunk_size denotes the number of objects to query from the database and process at one time
write_every indicates how often to write to the sqlite file (i.e. the code will pause the simulation process and write to the sqlite file when it has accumulated this many valid observations)
output_dir is the directory in which to create the sqlite file
output_prefix is the prefix of the sqlite file's name
log_file_name is the name of a text file where progress will be written
photometry_class is a InstanceCatalog class (not an instantiation) that contains the methods for calculating the photometry associated with the simulated alerts (see AlertStellarVariabilityCatalog and AlertAgnVariabilityCatalog in this module)
chunk_cutoff is an optional int; stop the simulation after this many chunks have been processed. This is for testing purposes. If chunk_cutoff == -1, the code will process all of the astrophysical objects in the trixel.
lock is a multiprocessing.Lock() for use if running multiple instances of alert_data_from_htmid. This will prevent multiple processes from writing to the log file or stdout simultaneously. """
raise RuntimeError('must specify log_file_name')
self._dmag_lookup_file_exists):
self._variability_cache['_PARAMETRIZED_LC_DMAG_CUTOFF'] = dmag_cutoff self._variability_cache['_PARAMETRIZED_LC_DMAG_LOOKUP'] = {}
with open(self._dmag_lookup_file, 'r') as in_file: for line in in_file: if line[0] == '#': continue params = line.split() self._variability_cache['_PARAMETRIZED_LC_DMAG_LOOKUP'][int(params[0])] = float(params[1])
# simulation will take
elif 'u_ab' in dbobj.columnMap: desired_columns.append('u_ab') desired_columns.append('g_ab') desired_columns.append('r_ab') desired_columns.append('i_ab') desired_columns.append('z_ab') desired_columns.append('y_ab') else: raise RuntimeError('Not sure what quiescent ' 'LSST magnitudes are called ' 'in this CatalogDBObject')
raise RuntimeError('Must specify photometry_class')
raise RuntimeError('%s is not a dir' % output_dir) os.mkdir(output_dir)
# a dummy call to make sure that the initialization # is done before we attempt to parallelize calls # to chipNameFromRaDecLSST
# from Table 2 of the overview paper
'i': 3, 'z': 4, 'y': 5}
htmid=htmid, chunk_size=chunk_size)
column_outputs=['lsst_u', 'lsst_g', 'lsst_r', 'lsst_i', 'lsst_z', 'lsst_y'])
# "iterating over astrophysical objects" part # of the simulation will take
(uniqueId int, obshistId int, xPix float, yPix float, chipNum int, dflux float, snr float, ra float, dec float)'''
(obshistId int, TAI float, band int)'''
VALUES(%d, %.5f, %d)''' % (obs.OpsimMetaData['obsHistID'], obs.mjd.TAI, mag_name_to_int[obs.bandpass])
(uniqueId int, band int, flux float, snr float)'''
(uniqueId int, ra real, dec real, pmRA real, pmDec real, parallax real, TAI real)'''
break
# filter the chunk so that we are only considering sources that are in # the trixel being considered
continue
dmag_arr, dmag_arr_transpose, time_arr) = self._filter_on_photometry_then_chip_name(chunk, column_query, obs_valid_dex, expmjd_list, photometry_catalog, dmag_cutoff)
self.bp_dict[mag_names[i_filter]], obs_mag_cutoff[i_filter], phot_params, gamma=gamma_template[i_filter])
except AssertionError: print('dmag_arr_transpose_shape %s' % str(dmag_arr_transpose.shape)) print('should be (%d, %d, %d)' % (len(chunk), len(mag_names), len(expmjd_list))) raise
# only include those sources for which np.abs(delta_mag) >= dmag_cutoff # at some point in their history (note that delta_mag is defined with # respect to the quiescent magnitude) # # also demand that the magnitude at some point is less than obs_mag_cutoff # # This is different from the dmag>dmag_cutoff check done in # # self._filter_on_photometry_then_chip_name() # # Now we have information about which observations actually detected # each object (in self._filter_on_photometry_then_chip_name(), # we assumed that every object was detected at every time step).
############################ # Process and output sources #
# only include those sources which fall on a detector for this pointing
except: print('failed') print(actually_valid_obj) print(completely_valid) raise
dmag_arr[i_obs][i_mag][actually_valid_obj]) for i_mag in range(len(mag_names))])
('y_pupil', valid_ypup[actually_valid_obj])])
column_cache=local_column_cache):
'chipNum', 'xPix', 'yPix'):
i_filter, q_f_dict[i_filter][completely_valid][i_q], q_snr_dict[i_filter][completely_valid][i_q]) for i_q in range(len(completely_valid[0])))
q_ra[completely_valid][i_q], q_dec[completely_valid][i_q], q_pmra[completely_valid][i_q], q_pmdec[completely_valid][i_q], q_parallax[completely_valid][i_q], q_tai) for i_q in range(len(completely_valid[0])))
self.acquire_lock() with open(log_file_name, 'a') as out_file: out_file.write('%d is writing \n' % os.getpid())
print('%d is writing' % os.getpid())
self.release_lock()
n_rows += self._output_alert_data(conn, output_data_cache) output_data_cache = {} n_rows_cached = 0
if n_rows > 0: self.acquire_lock() with open(log_file_name, 'a') as out_file: elapsed = (time.time()-t_before_obj)/3600.0 elapsed_per = elapsed/n_rows rows_per_chunk = float(n_rows)/float(i_chunk) total_projection = 1000.0*rows_per_chunk*elapsed_per out_file.write('\n %d n_obj %d %d trimmed %d\n' % (this_pid, n_obj, n_actual_obj, n_htmid_trim)) out_file.write(' elapsed %.2e hrs per row %.2e total %2e\n' % (elapsed, elapsed_per, total_projection)) out_file.write(' n_time_last %d; rows %d\n' % (n_time_last, n_rows))
out_file.write('%d is done writing\n' % os.getpid())
print('\n %d n_obj %d %d trimmed %d' % (this_pid, n_obj, n_actual_obj, n_htmid_trim)) print(' elapsed %.2e hrs per row %.2e total %2e' % (elapsed, elapsed_per, total_projection)) print(' n_time_last %d; rows %d\n' % (n_time_last, n_rows)) print('%d is done writing' % os.getpid())
self.release_lock()
(htmid, (time.time()-t_start)/3600.0, n_obj, n_rows))
(htmid, (time.time()-t_start)/3600.0, n_obj))
|