Coverage for python/lsst/meas/algorithms/ingestIndexManager.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# This file is part of meas_algorithms. # # Developed for the LSST Data Management System. # This product includes software developed by the LSST Project # (https://www.lsst.org). # See the COPYRIGHT file at the top-level directory of this distribution # for details of code ownership. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>.
# global shared counter to keep track of source ids # (multiprocess sharing is most easily done with a global) # global shared counter to keep track of number of files processed.
""" Ingest a reference catalog from external files into a butler repository, using a multiprocessing Pool to speed up the work.
Parameters ---------- filenames : `dict` [`int`, `str`] The HTM pixel id and filenames to ingest the catalog into. config : `lsst.meas.algorithms.IngestIndexedReferenceConfig` The Task configuration holding the field names. file_reader : `lsst.pipe.base.Task` The file reader to use to load the files. indexer : `lsst.meas.algorithms.HtmIndexer` The class used to compute the HTM pixel per coordinate. schema : `lsst.afw.table.Schema` The schema of the output catalog. key_map : `dict` [`str`, `lsst.afw.table.Key`] The mapping from output field names to keys in the Schema. htmRange : `tuple` [`int`] The start and end HTM pixel ids. addRefCatMetadata : callable A function called to add extra metadata to each output Catalog. log : `lsst.log.Log` The log to send messages to. """
schema, key_map, htmRange, addRefCatMetadata, log): self.filenames = filenames self.config = config self.file_reader = file_reader self.indexer = indexer self.schema = schema self.key_map = key_map self.htmRange = htmRange self.addRefCatMetadata = addRefCatMetadata self.log = log
"""Index a set of input files from a reference catalog, and write the output to the appropriate filenames, in parallel.
Parameters ---------- inputFiles : `list` A list of file paths to read data from. """ global COUNTER, FILE_PROGRESS self.nInputFiles = len(inputFiles)
with multiprocessing.Manager() as manager: COUNTER = multiprocessing.Value('i', 0) FILE_PROGRESS = multiprocessing.Value('i', 0) fileLocks = manager.dict() self.log.info("Creating %s file locks.", self.htmRange[1] - self.htmRange[0]) for i in range(self.htmRange[0], self.htmRange[1]): fileLocks[i] = manager.Lock() self.log.info("File locks created.") with multiprocessing.Pool(self.config.n_processes) as pool: pool.starmap(self._ingestOneFile, zip(inputFiles, itertools.repeat(fileLocks)))
"""Read and process one file, and write its records to the correct indexed files, while handling exceptions in a useful way so that they don't get swallowed by the multiprocess pool.
Parameters ---------- filename : `str` The file to process. fileLocks : `dict` [`int`, `multiprocessing.Lock`] A Lock for each HTM pixel; each pixel gets one file written, and we need to block when one process is accessing that file. """ global FILE_PROGRESS inputData = self.file_reader.run(filename) fluxes = self._getFluxes(inputData) matchedPixels = self.indexer.indexPoints(inputData[self.config.ra_name], inputData[self.config.dec_name]) pixel_ids = set(matchedPixels) for pixelId in pixel_ids: with fileLocks[pixelId]: self._doOnePixel(inputData, matchedPixels, pixelId, fluxes) with FILE_PROGRESS.get_lock(): oldPercent = 100 * FILE_PROGRESS.value / self.nInputFiles FILE_PROGRESS.value += 1 percent = 100 * FILE_PROGRESS.value / self.nInputFiles # only log each "new percent" if np.floor(percent) - np.floor(oldPercent) >= 1: self.log.info("Completed %d / %d files: %d %% complete ", FILE_PROGRESS.value, self.nInputFiles, percent)
"""Process one HTM pixel, appending to an existing catalog or creating a new catalog, as needed.
Parameters ---------- inputData : `numpy.ndarray` The data from one input file. matchedPixels : `numpy.ndarray` The row-matched pixel indexes corresponding to ``inputData``. pixelId : `int` The pixel index we are currently processing. fluxes : `dict` [`str`, `numpy.ndarray`] The values that will go into the flux and fluxErr fields in the output catalog. """ idx = np.where(matchedPixels == pixelId)[0] catalog = self.getCatalog(pixelId, self.schema, len(idx)) for outputRow, inputRow in zip(catalog[-len(idx):], inputData[idx]): self._fillRecord(outputRow, inputRow)
global COUNTER with COUNTER.get_lock(): self._setIds(inputData[idx], catalog)
for name, array in fluxes.items(): catalog[self.key_map[name]][-len(idx):] = array[idx]
catalog.writeFits(self.filenames[pixelId])
"""Fill the `id` field of catalog with a running index, filling the last values up to the length of ``inputData``.
Fill with `self.config.id_name` if specified, otherwise use the global running counter value.
Parameters ---------- inputData : `numpy.ndarray` The input data that is being processed. catalog : `lsst.afw.table.SimpleCatalog` The output catalog to fill the ids. """ global COUNTER size = len(inputData) if self.config.id_name: catalog['id'][-size:] = inputData[self.config.id_name] else: idEnd = COUNTER.value + size catalog['id'][-size:] = np.arange(COUNTER.value, idEnd) COUNTER.value = idEnd
"""Get a catalog from disk or create it if it doesn't exist.
Parameters ---------- pixelId : `dict` Identifier for catalog to retrieve schema : `lsst.afw.table.Schema` Schema to use in catalog creation it does not exist. nNewElements : `int` The number of new elements that will be added to the catalog, so space can be preallocated.
Returns ------- catalog : `lsst.afw.table.SimpleCatalog` The new or read-and-resized catalog specified by `dataId`. """ # This is safe, because we lock on this file before getCatalog is called. if os.path.isfile(self.filenames[pixelId]): catalog = afwTable.SimpleCatalog.readFits(self.filenames[pixelId]) catalog.resize(len(catalog) + nNewElements) return catalog.copy(deep=True) # ensure contiguity, so that column-assignment works catalog = afwTable.SimpleCatalog(schema) catalog.resize(nNewElements) self.addRefCatMetadata(catalog) return catalog
def computeCoord(row, ra_name, dec_name): """Create an ICRS coord. from a row of a catalog being ingested.
Parameters ---------- row : `numpy.ndarray` Row from catalog being ingested. ra_name : `str` Name of RA key in catalog being ingested. dec_name : `str` Name of Dec key in catalog being ingested.
Returns ------- coord : `lsst.geom.SpherePoint` ICRS coordinate. """ return lsst.geom.SpherePoint(row[ra_name], row[dec_name], lsst.geom.degrees)
"""Set coordinate error in a record of an indexed catalog.
The errors are read from the specified columns, and installed in the appropriate columns of the output.
Parameters ---------- record : `lsst.afw.table.SimpleRecord` Row from indexed catalog to modify. row : `numpy.ndarray` Row from catalog being ingested. """ if self.config.ra_err_name: # IngestIndexedReferenceConfig.validate ensures all or none record.set(self.key_map["coord_raErr"], np.radians(row[self.config.ra_err_name])) record.set(self.key_map["coord_decErr"], np.radians(row[self.config.dec_err_name]))
"""Set flags in an output record.
Parameters ---------- record : `lsst.afw.table.SimpleRecord` Row from indexed catalog to modify. row : `numpy.ndarray` Row from catalog being ingested. """ names = record.schema.getNames() for flag in self._flags: if flag in names: attr_name = 'is_{}_name'.format(flag) record.set(self.key_map[flag], bool(row[getattr(self.config, attr_name)]))
"""Compute the flux fields that will go into the output catalog.
Parameters ---------- inputData : `numpy.ndarray` The input data to compute fluxes for.
Returns ------- fluxes : `dict` [`str`, `numpy.ndarray`] The values that will go into the flux and fluxErr fields in the output catalog. """ result = {} for item in self.config.mag_column_list: result[item+'_flux'] = (inputData[item]*u.ABmag).to_value(u.nJy) if len(self.config.mag_err_column_map) > 0: for err_key in self.config.mag_err_column_map.keys(): error_col_name = self.config.mag_err_column_map[err_key] # TODO: multiply by 1e9 here until we have a replacement (see DM-16903) # NOTE: copy the arrays because the numpy strides may not be useable by C++. fluxErr = fluxErrFromABMagErr(inputData[error_col_name].copy(), inputData[err_key].copy())*1e9 result[err_key+'_fluxErr'] = fluxErr return result
"""Set proper motion fields in a record of an indexed catalog.
The proper motions are read from the specified columns, scaled appropriately, and installed in the appropriate columns of the output.
Parameters ---------- record : `lsst.afw.table.SimpleRecord` Row from indexed catalog to modify. row : structured `numpy.array` Row from catalog being ingested. """ if self.config.pm_ra_name is None: # IngestIndexedReferenceConfig.validate ensures all or none return radPerOriginal = np.radians(self.config.pm_scale)/(3600*1000) record.set(self.key_map["pm_ra"], row[self.config.pm_ra_name]*radPerOriginal*lsst.geom.radians) record.set(self.key_map["pm_dec"], row[self.config.pm_dec_name]*radPerOriginal*lsst.geom.radians) record.set(self.key_map["epoch"], self._epochToMjdTai(row[self.config.epoch_name])) if self.config.pm_ra_err_name is not None: # pm_dec_err_name also, by validation record.set(self.key_map["pm_raErr"], row[self.config.pm_ra_err_name]*radPerOriginal) record.set(self.key_map["pm_decErr"], row[self.config.pm_dec_err_name]*radPerOriginal)
"""Set the parallax fields in a record of a refcat. """ if self.config.parallax_name is None: return scale = self.config.parallax_scale*lsst.geom.milliarcseconds record.set(self.key_map['parallax'], row[self.config.parallax_name]*scale) record.set(self.key_map['parallaxErr'], row[self.config.parallax_err_name]*scale)
"""Convert an epoch in native format to TAI MJD (a float). """ return astropy.time.Time(nativeEpoch, format=self.config.epoch_format, scale=self.config.epoch_scale).tai.mjd
"""Set extra data fields in a record of an indexed catalog.
Parameters ---------- record : `lsst.afw.table.SimpleRecord` Row from indexed catalog to modify. row : structured `numpy.array` Row from catalog being ingested. """ for extra_col in self.config.extra_col_names: value = row[extra_col] # If data read from a text file contains string like entires, # numpy stores this as its own internal type, a numpy.str_ # object. This seems to be a consequence of how numpy stores # string like objects in fixed column arrays. This checks # if any of the values to be added to the catalog are numpy # string types, and if they are, casts them to a python string # which is what the python c++ records expect if isinstance(value, np.str_): value = str(value) record.set(self.key_map[extra_col], value)
"""Fill a record in an indexed catalog to be persisted.
Parameters ---------- record : `lsst.afw.table.SimpleRecord` Row from indexed catalog to modify. row : structured `numpy.array` Row from catalog being ingested. """ record.setCoord(self.computeCoord(row, self.config.ra_name, self.config.dec_name))
self._setCoordErr(record, row) self._setFlags(record, row) self._setProperMotion(record, row) self._setParallax(record, row) self._setExtra(record, row) |