22__all__ = [
"ConvertRefcatManager",
"ConvertGaiaManager",
"ConvertGaiaXpManager"]
24from ctypes
import c_int
31import astropy.units
as u
42COUNTER = multiprocessing.Value(c_int, 0)
44FILE_PROGRESS = multiprocessing.Value(c_int, 0)
48 """Placeholder for ConfigurableField validation; refcat convert is
49 configured by the parent convert Task.
56 Convert a reference catalog from external files into the LSST HTM sharded
57 format, using a multiprocessing Pool to speed up the work.
61 filenames : `dict` [`int`, `str`]
62 The HTM pixel id
and filenames to convert the catalog into.
64 The Task configuration holding the field names.
65 file_reader : `lsst.pipe.base.Task`
66 The file reader to use to load the files.
68 The
class used to compute the HTM pixel per coordinate.
70 The schema of the output catalog.
72 The mapping
from output field names to keys
in the Schema.
73 htmRange : `tuple` [`int`]
74 The start
and end HTM pixel ids.
75 addRefCatMetadata : callable
76 A function called to add extra metadata to each output Catalog.
78 The log to send messages to.
80 _flags = ['photometric',
'resolved',
'variable']
81 _DefaultName =
'convertRefcatManager'
82 ConfigClass = ConvertRefcatManagerConfig
84 def __init__(self, filenames, config, file_reader, indexer,
85 schema, key_map, htmRange, addRefCatMetadata, log):
96 if self.
config.coord_err_unit
is not None:
100 def run(self, inputFiles):
101 """Index a set of input files from a reference catalog, and write the
102 output to the appropriate filenames, in parallel.
107 A list of file paths to read data
from.
111 output : `dict` [`int`, `str`]
112 The htm ids
and the filenames that were written to.
114 global COUNTER, FILE_PROGRESS
117 with multiprocessing.Manager()
as manager:
119 FILE_PROGRESS.value = 0
120 fileLocks = manager.dict()
123 fileLocks[i] = manager.Lock()
124 self.
log.info(
"File locks created.")
126 start_time = time.perf_counter()
127 with multiprocessing.Pool(self.
config.n_processes)
as pool:
128 result = pool.starmap(self.
_convertOneFile, zip(inputFiles, itertools.repeat(fileLocks)))
129 end_time = time.perf_counter()
130 self.
log.info(
"Finished writing files. Elapsed time: %.2f seconds", end_time-start_time)
132 return {id: self.
filenames[id]
for item
in result
for id
in item}
134 def _convertOneFile(self, filename, fileLocks):
135 """Read and process one file, and write its records to the correct
136 indexed files, while handling exceptions
in a useful way so that they
137 don
't get swallowed by the multiprocess pool.
143 fileLocks : `dict` [`int`, `multiprocessing.Lock`]
144 A Lock for each HTM pixel; each pixel gets one file written,
and
145 we need to block when one process
is accessing that file.
149 pixels, files : `list` [`int`]
150 The pixel ids that were written to.
156 matchedPixels = self.
indexer.indexPoints(inputData[self.
config.ra_name],
157 inputData[self.
config.dec_name])
158 pixel_ids = set(matchedPixels)
159 for pixelId
in pixel_ids:
160 with fileLocks[pixelId]:
161 self.
_doOnePixel(inputData, matchedPixels, pixelId, fluxes, coordErr)
162 with FILE_PROGRESS.get_lock():
163 oldPercent = 100 * FILE_PROGRESS.value / self.
nInputFiles
164 FILE_PROGRESS.value += 1
165 percent = 100 * FILE_PROGRESS.value / self.
nInputFiles
167 if np.floor(percent) - np.floor(oldPercent) >= 1:
168 self.
log.info(
"Completed %d / %d files: %d %% complete ",
174 def _doOnePixel(self, inputData, matchedPixels, pixelId, fluxes, coordErr):
175 """Process one HTM pixel, appending to an existing catalog or creating
176 a new catalog, as needed.
180 inputData : `numpy.ndarray`
181 The data
from one input file.
182 matchedPixels : `numpy.ndarray`
183 The row-matched pixel indexes corresponding to ``inputData``.
185 The pixel index we are currently processing.
186 fluxes : `dict` [`str`, `numpy.ndarray`]
187 The values that will go into the flux
and fluxErr fields
in the
189 coordErr : `dict` [`str`, `numpy.ndarray`]
190 The values that will go into the coord_raErr, coord_decErr,
and
191 coord_ra_dec_Cov fields
in the output catalog (
in radians).
193 idx = np.where(matchedPixels == pixelId)[0]
195 for outputRow, inputRow
in zip(catalog[-len(idx):], inputData[idx]):
199 with COUNTER.get_lock():
200 self.
_setIds(inputData[idx], catalog)
203 for name, array
in fluxes.items():
204 catalog[self.
key_map[name]][-len(idx):] = array[idx]
207 for name, array
in coordErr.items():
208 catalog[name][-len(idx):] = array[idx]
210 catalog.writeFits(self.
filenames[pixelId])
212 def _setIds(self, inputData, catalog):
213 """Fill the `id` field of catalog with a running index, filling the
214 last values up to the length of ``inputData``.
216 Fill with `self.
config.id_name`
if specified, otherwise use the
217 global running counter value.
221 inputData : `numpy.ndarray`
222 The input data that
is being processed.
224 The output catalog to fill the ids.
227 size = len(inputData)
229 catalog[
'id'][-size:] = inputData[self.
config.id_name]
231 idEnd = COUNTER.value + size
232 catalog[
'id'][-size:] = np.arange(COUNTER.value, idEnd)
233 COUNTER.value = idEnd
236 """Get a catalog from disk or create it if it doesn't exist.
241 Identifier for catalog to retrieve
243 Schema to use
in catalog creation it does
not exist.
245 The number of new elements that will be added to the catalog,
246 so space can be preallocated.
251 The new
or read-
and-resized catalog specified by `dataId`.
254 if os.path.isfile(self.
filenames[pixelId]):
255 catalog = afwTable.SimpleCatalog.readFits(self.
filenames[pixelId])
256 catalog.resize(len(catalog) + nNewElements)
257 return catalog.copy(deep=
True)
258 catalog = afwTable.SimpleCatalog(schema)
259 catalog.resize(nNewElements)
265 """Create an ICRS coord. from a row of a catalog being converted.
269 row : `numpy.ndarray`
270 Row from catalog being converted.
272 Name of RA key
in catalog being converted.
274 Name of Dec key
in catalog being converted.
283 def _getCoordErr(self, inputData, ):
284 """Compute the ra/dec error fields that will go into the output catalog.
288 inputData : `numpy.ndarray`
289 The input data to compute fluxes for.
293 coordErr : `dict` [`str`, `numpy.ndarray`]
294 The values that will go into the coord_raErr, coord_decErr, fields
295 in the output catalog (
in radians).
299 This does
not handle the ra/dec covariance field,
300 ``coord_ra_coord_dec_Cov``. That field
is handled
in
301 `_setCoordinateCovariance`.
304 if hasattr(self,
"coord_err_unit"):
305 result[
'coord_raErr'] = u.Quantity(inputData[self.
config.ra_err_name],
307 result[
'coord_decErr'] = u.Quantity(inputData[self.
config.dec_err_name],
311 def _setFlags(self, record, row):
312 """Set flags in an output record.
317 Row from indexed catalog to modify.
318 row : `numpy.ndarray`
319 Row
from catalog being converted.
321 names = record.schema.getNames()
324 attr_name =
'is_{}_name'.format(flag)
325 record.set(self.
key_map[flag], bool(row[getattr(self.
config, attr_name)]))
327 def _getFluxes(self, inputData):
328 """Compute the flux fields that will go into the output catalog.
332 inputData : `numpy.ndarray`
333 The input data to compute fluxes for.
337 fluxes : `dict` [`str`, `numpy.ndarray`]
338 The values that will go into the flux
and fluxErr fields
in the
342 for item
in self.
config.mag_column_list:
343 result[item+
'_flux'] = (inputData[item]*u.ABmag).to_value(u.nJy)
344 if len(self.
config.mag_err_column_map) > 0:
345 for err_key
in self.
config.mag_err_column_map.keys():
346 error_col_name = self.
config.mag_err_column_map[err_key]
349 fluxErr = fluxErrFromABMagErr(inputData[error_col_name].copy(),
350 inputData[err_key].copy())*1e9
351 result[err_key+
'_fluxErr'] = fluxErr
354 def _setProperMotion(self, record, row):
355 """Set proper motion fields in a record of an indexed catalog.
357 The proper motions are read from the specified columns,
358 scaled appropriately,
and installed
in the appropriate
359 columns of the output.
364 Row
from indexed catalog to modify.
365 row : structured `numpy.array`
366 Row
from catalog being converted.
368 if self.
config.pm_ra_name
is None:
370 radPerOriginal = np.radians(self.
config.pm_scale)/(3600*1000)
371 record.set(self.
key_map[
"pm_ra"], row[self.
config.pm_ra_name]*radPerOriginal*lsst.geom.radians)
372 record.set(self.
key_map[
"pm_dec"], row[self.
config.pm_dec_name]*radPerOriginal*lsst.geom.radians)
374 if self.
config.pm_ra_err_name
is not None:
375 record.set(self.
key_map[
"pm_raErr"], row[self.
config.pm_ra_err_name]*radPerOriginal)
376 record.set(self.
key_map[
"pm_decErr"], row[self.
config.pm_dec_err_name]*radPerOriginal)
378 def _setParallax(self, record, row):
379 """Set the parallax fields in a record of a refcat.
381 if self.
config.parallax_name
is None:
383 scale = self.
config.parallax_scale*lsst.geom.milliarcseconds
384 record.set(self.
key_map[
'parallax'], row[self.
config.parallax_name]*scale)
385 record.set(self.
key_map[
'parallaxErr'], row[self.
config.parallax_err_name]*scale)
387 def _epochToMjdTai(self, nativeEpoch):
388 """Convert an epoch in native format to TAI MJD (a float).
390 return astropy.time.Time(nativeEpoch, format=self.
config.epoch_format,
391 scale=self.
config.epoch_scale).tai.mjd
393 def _setCoordinateCovariance(self, record, row):
394 """Set the off-diagonal position covariance in a record of an indexed
397 There is no generic way to determine covariance. Override this method
398 in a subclass specialized
for your dataset.
403 Row
from indexed catalog to modify.
404 row : structured `numpy.array`
405 Row
from catalog being converted.
407 raise NotImplementedError(
"There is no default method for setting the covariance. Override this "
408 "method in a subclass specialized for your dataset.")
410 def _setExtra(self, record, row):
411 """Set extra data fields in a record of an indexed catalog.
416 Row from indexed catalog to modify.
417 row : structured `numpy.array`
418 Row
from catalog being converted.
420 for extra_col
in self.
config.extra_col_names:
421 value = row[extra_col]
429 if isinstance(value, np.str_):
431 record.set(self.
key_map[extra_col], value)
433 def _fillRecord(self, record, row):
434 """Fill a record in an indexed catalog to be persisted.
439 Row from indexed catalog to modify.
440 row : structured `numpy.array`
441 Row
from catalog being converted.
446 if self.
config.full_position_information:
454 """Special-case convert manager to deal with Gaia fluxes.
462 def _getFluxes(self, input):
465 def gaiaFluxToFlux(flux, zeroPoint):
466 """Equations 5.19 and 5.30 from the Gaia calibration document define the
467 conversion from Gaia electron/second fluxes to AB magnitudes.
468 https://gea.esac.esa.int/archive/documentation/GDR2/Data_processing/chap_cu5pho/sec_cu5pho_calibr/ssec_cu5pho_calibr_extern.html
470 result = ((zeroPoint + -2.5 * np.log10(flux))*u.ABmag).to_value(u.nJy)
472 result[flux == 0] = 0
477 with np.errstate(invalid=
'ignore', divide=
'ignore'):
480 result[
'phot_g_mean_flux'] = gaiaFluxToFlux(input[
'phot_g_mean_flux'], 25.7934)
481 result[
'phot_bp_mean_flux'] = gaiaFluxToFlux(input[
'phot_bp_mean_flux'], 25.3806)
482 result[
'phot_rp_mean_flux'] = gaiaFluxToFlux(input[
'phot_rp_mean_flux'], 25.1161)
484 result[
'phot_g_mean_fluxErr'] = result[
'phot_g_mean_flux'] / input[
'phot_g_mean_flux_over_error']
485 result[
'phot_bp_mean_fluxErr'] = result[
'phot_bp_mean_flux'] / input[
'phot_bp_mean_flux_over_error']
486 result[
'phot_rp_mean_fluxErr'] = result[
'phot_rp_mean_flux'] / input[
'phot_rp_mean_flux_over_error']
490 def _setCoordinateCovariance(self, record, row):
491 """Set the off-diagonal position covariance in a record of an indexed
494 Convert the Gaia coordinate correlations into covariances.
499 Row from indexed catalog to modify.
500 row : structured `numpy.array`
501 Row
from catalog being converted.
503 inputParams = ['ra',
'dec',
'parallax',
'pmra',
'pmdec']
504 outputParams = [
'coord_ra',
'coord_dec',
'parallax',
'pm_ra',
'pm_dec']
510 reorder = [0, 1, 4, 2, 3]
517 j_error = row[f
'{inputParams[j]}_error'] * inputUnits[j]
518 i_error = row[f
'{inputParams[i]}_error'] * inputUnits[i]
519 ij_corr = row[f
'{inputParams[j]}_{inputParams[i]}_corr']
520 cov = (i_error * j_error * ij_corr).to_value(self.
outputUnit)
524 a = (i
if (reorder[i] < reorder[j])
else j)
525 b = (j
if (reorder[i] < reorder[j])
else i)
527 record.set(self.
key_map[f
'{outputParams[a]}_{outputParams[b]}_Cov'], cov)
531 """Special-case convert manager for Gaia XP spectrophotometry catalogs,
532 that have fluxes/flux errors, instead of magnitudes/mag errors. The input
533 flux and error values are
in units of W/Hz/(m^2) (Gaia Collaboration, Montegriffo et al. 2022).
534 The the flux
and fluxErr fields
in the output catalog have units of nJy.
537 def _getFluxes(self, inputData):
539 for item
in self.
config.mag_column_list:
541 error_col_name = item.replace(
"_flux_",
"_flux_error_")
543 result[item +
"_flux"] = (
544 inputData[item] * u.Watt / u.Hz / u.meter / u.meter
546 result[item +
"_fluxErr"] = (
547 inputData[error_col_name] * u.Watt / u.Hz / u.meter / u.meter
def __init__(self, *args, **kwargs)
def _setFlags(self, record, row)
def _epochToMjdTai(self, nativeEpoch)
def _setExtra(self, record, row)
def run(self, inputFiles)
def _setIds(self, inputData, catalog)
def _fillRecord(self, record, row)
def __init__(self, filenames, config, file_reader, indexer, schema, key_map, htmRange, addRefCatMetadata, log)
def computeCoord(row, ra_name, dec_name)
def _getFluxes(self, inputData)
def _setProperMotion(self, record, row)
def _getCoordErr(self, inputData)
def _doOnePixel(self, inputData, matchedPixels, pixelId, fluxes, coordErr)
def _setParallax(self, record, row)
def _setCoordinateCovariance(self, record, row)
def getCatalog(self, pixelId, schema, nNewElements)
def _convertOneFile(self, filename, fileLocks)