23 from __future__
import absolute_import, division, print_function
25 __all__ = [
"IngestIndexedReferenceConfig",
"IngestIndexedReferenceTask",
"DatasetConfig"]
29 import lsst.pex.config
as pexConfig
30 import lsst.pipe.base
as pipeBase
31 import lsst.afw.table
as afwTable
32 import lsst.afw.coord
as afwCoord
35 from .indexerRegistry
import IndexerRegistry
36 from .readTextCatalogTask
import ReadTextCatalogTask
40 """!Task runner for the reference catalog ingester
42 Data IDs are ignored so the runner should just run the task on the parsed command.
45 def run(self, parsedCmd):
47 Several arguments need to be collected to send on to the task methods.
49 @param[in] parsedCmd Parsed command including command line arguments.
50 @param[out] Struct containing the result of the indexing.
52 files = parsedCmd.files
53 butler = parsedCmd.butler
54 task = self.TaskClass(config=self.config, log=self.log, butler=butler)
55 task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
57 result = task.create_indexed_catalog(files)
58 if self.doReturnResults:
59 return pipeBase.Struct(
64 ref_dataset_name = pexConfig.Field(
66 default=
'cal_ref_cat',
67 doc=
'String to pass to the butler to retrieve persisted files.',
69 indexer = IndexerRegistry.makeField(
71 doc=
'Name of indexer algoritm to use. Default is HTM',
75 dataset_config = pexConfig.ConfigField(
77 doc=
"Configuration for reading the ingested data",
79 file_reader = pexConfig.ConfigurableField(
80 target=ReadTextCatalogTask,
81 doc=
'Task to use to read the files. Default is to expect text files.'
83 ra_name = pexConfig.Field(
85 doc=
"Name of RA column",
87 dec_name = pexConfig.Field(
89 doc=
"Name of Dec column",
91 mag_column_list = pexConfig.ListField(
93 doc=
"The values in the reference catalog are assumed to be in AB magnitudes. "
94 "List of column names to use for photometric information. At least one entry is required."
96 mag_err_column_map = pexConfig.DictField(
100 doc=
"A map of magnitude column name (key) to magnitude error column (value)."
102 is_photometric_name = pexConfig.Field(
105 doc=
'Name of column stating if satisfactory for photometric calibration (optional).'
107 is_resolved_name = pexConfig.Field(
110 doc=
'Name of column stating if the object is resolved (optional).'
112 is_variable_name = pexConfig.Field(
115 doc=
'Name of column stating if the object is measured to be variable (optional).'
117 id_name = pexConfig.Field(
120 doc=
'Name of column to use as an identifier (optional).'
122 extra_col_names = pexConfig.ListField(
125 doc=
'Extra columns to add to the reference catalog.'
129 pexConfig.Config.validate(self)
131 raise ValueError(
"ra_name and dec_name and at least one entry in mag_column_list must be" +
134 raise ValueError(
"If magnitude errors are provided, all magnitudes must have an error column")
138 """!Class for both producing indexed reference catalogs and for loading them.
140 This implements an indexing scheme based on hierarchical triangular mesh (HTM).
141 The term index really means breaking the catalog into localized chunks called
142 shards. In this case each shard contains the entries from the catalog in a single
145 canMultiprocess =
False
146 ConfigClass = IngestIndexedReferenceConfig
147 RunnerClass = IngestReferenceRunner
148 _DefaultName =
'IngestIndexedReferenceTask'
150 _flags = [
'photometric',
'resolved',
'variable']
153 def _makeArgumentParser(cls):
154 """Create an argument parser
156 This overrides the original because we need the file arguments
158 parser = pipeBase.InputOnlyArgumentParser(name=cls._DefaultName)
159 parser.add_argument(
"files", nargs=
"+", help=
"Names of files to index")
163 """!Constructor for the HTM indexing engine
165 @param[in] butler dafPersistence.Butler object for reading and writing catalogs
168 pipeBase.Task.__init__(self, *args, **kwargs)
169 self.
indexer = IndexerRegistry[self.config.dataset_config.indexer.name](self.config.dataset_config.indexer.active)
170 self.makeSubtask(
'file_reader')
173 """!Index a set of files comprising a reference catalog. Outputs are persisted in the
176 @param[in] files A list of file names to read.
180 for filename
in files:
181 arr = self.file_reader.run(filename)
182 index_list = self.indexer.index_points(arr[self.config.ra_name], arr[self.config.dec_name])
186 dataId = self.indexer.make_data_id(
'master_schema', self.config.dataset_config.ref_dataset_name)
187 self.butler.put(self.
get_catalog(dataId, schema),
'ref_cat',
190 pixel_ids = set(index_list)
191 for pixel_id
in pixel_ids:
192 dataId = self.indexer.make_data_id(pixel_id, self.config.dataset_config.ref_dataset_name)
194 els = np.where(index_list == pixel_id)
196 record = catalog.addNew()
197 rec_num = self.
_fill_record(record, row, rec_num, key_map)
198 self.butler.put(catalog,
'ref_cat', dataId=dataId)
199 dataId = self.indexer.make_data_id(
None, self.config.dataset_config.ref_dataset_name)
200 self.butler.put(self.config.dataset_config,
'ref_cat_config', dataId=dataId)
204 """!Create a afwCoord object from a np.array row
205 @param[in] row dict like object with ra/dec info in degrees
206 @param[in] ra_name name of RA key
207 @param[in] dec_name name of Dec key
208 @param[out] IcrsCoord object constructed from the RA/Dec values
210 return afwCoord.IcrsCoord(row[ra_name]*afwGeom.degrees,
211 row[dec_name]*afwGeom.degrees)
213 def _set_flags(self, record, row, key_map):
214 """!Set the flags for a record. Relies on the _flags class attribute
215 @param[in,out] record SourceCatalog record to modify
216 @param[in] row dict like object containing flag info
217 @param[in] key_map Map of catalog keys to use in filling the record
219 names = record.schema.getNames()
222 attr_name =
'is_{}_name'.format(flag)
223 record.set(key_map[flag], bool(row[getattr(self.config, attr_name)]))
225 def _set_mags(self, record, row, key_map):
226 """!Set the flux records from the input magnitudes
227 @param[in,out] record SourceCatalog record to modify
228 @param[in] row dict like object containing magnitude values
229 @param[in] key_map Map of catalog keys to use in filling the record
231 for item
in self.config.mag_column_list:
232 record.set(key_map[item+
'_flux'], fluxFromABMag(row[item]))
233 if len(self.config.mag_err_column_map) > 0:
234 for err_key
in self.config.mag_err_column_map.keys():
235 error_col_name = self.config.mag_err_column_map[err_key]
236 record.set(key_map[err_key+
'_fluxSigma'],
237 fluxErrFromABMagErr(row[error_col_name], row[err_key]))
239 def _set_extra(self, record, row, key_map):
240 """!Copy the extra column information to the record
241 @param[in,out] record SourceCatalog record to modify
242 @param[in] row dict like object containing the column values
243 @param[in] key_map Map of catalog keys to use in filling the record
245 for extra_col
in self.config.extra_col_names:
246 value = row[extra_col]
254 if isinstance(value, np.str_):
256 record.set(key_map[extra_col], value)
258 def _fill_record(self, record, row, rec_num, key_map):
259 """!Fill a record to put in the persisted indexed catalogs
261 @param[in,out] record afwTable.SourceRecord in a reference catalog to fill.
262 @param[in] row A row from a numpy array constructed from the input catalogs.
263 @param[in] rec_num Starting integer to increment for the unique id
264 @param[in] key_map Map of catalog keys to use in filling the record
266 record.setCoord(self.
compute_coord(row, self.config.ra_name, self.config.dec_name))
267 if self.config.id_name:
268 record.setId(row[self.config.id_name])
271 record.setId(rec_num)
281 """!Get a catalog from the butler or create it if it doesn't exist
283 @param[in] dataId Identifier for catalog to retrieve
284 @param[in] schema Schema to use in catalog creation if the butler can't get it
285 @param[out] afwTable.SourceCatalog for the specified identifier
287 if self.butler.datasetExists(
'ref_cat', dataId=dataId):
288 return self.butler.get(
'ref_cat', dataId=dataId)
289 return afwTable.SourceCatalog(schema)
292 """!Make the schema to use in constructing the persisted catalogs.
294 @param[in] dtype A np.dtype to use in constructing the schema
295 @param[out] The schema for the output source catalog.
296 @param[out] A map of catalog keys to use in filling the record
299 mag_column_list = self.config.mag_column_list
300 mag_err_column_map = self.config.mag_err_column_map
301 if len(mag_err_column_map) > 0
and (
302 not len(mag_column_list) == len(mag_err_column_map)
or
303 not sorted(mag_column_list) == sorted(mag_err_column_map.keys())):
304 raise ValueError(
"Every magnitude column must have a corresponding error column")
306 schema = afwTable.SourceTable.makeMinimalSchema()
309 if dtype[name].kind ==
'U':
312 at_size = dtype[name].itemsize
313 return schema.addField(name, type=at_type, size=at_size)
315 at_type = dtype[name].type
316 return schema.addField(name, at_type)
318 for item
in mag_column_list:
319 key_map[item+
'_flux'] = schema.addField(item+
'_flux', float)
320 if len(mag_err_column_map) > 0:
321 for err_item
in mag_err_column_map.keys():
322 key_map[err_item+
'_fluxSigma'] = schema.addField(err_item+
'_fluxSigma', float)
324 attr_name =
'is_{}_name'.format(flag)
325 if getattr(self.config, attr_name):
326 key_map[flag] = schema.addField(flag,
'Flag')
327 for col
in self.config.extra_col_names:
328 key_map[col] = add_field(col)
329 return schema, key_map
def _fill_record
Fill a record to put in the persisted indexed catalogs.
def _set_flags
Set the flags for a record.
def _set_extra
Copy the extra column information to the record.
def _set_mags
Set the flux records from the input magnitudes.
def make_schema
Make the schema to use in constructing the persisted catalogs.
def create_indexed_catalog
Index a set of files comprising a reference catalog.
def get_catalog
Get a catalog from the butler or create it if it doesn't exist.
def compute_coord
Create a afwCoord object from a np.array row.
Class for both producing indexed reference catalogs and for loading them.
Task runner for the reference catalog ingester.
def __init__
Constructor for the HTM indexing engine.