23 from __future__
import absolute_import, division, print_function
25 __all__ = [
"IngestIndexedReferenceConfig",
"IngestIndexedReferenceTask",
"DatasetConfig"]
34 from .indexerRegistry
import IndexerRegistry
35 from .readTextCatalogTask
import ReadTextCatalogTask
39 """!Task runner for the reference catalog ingester 41 Data IDs are ignored so the runner should just run the task on the parsed command. 44 def run(self, parsedCmd):
46 Several arguments need to be collected to send on to the task methods. 48 @param[in] parsedCmd Parsed command including command line arguments. 49 @param[out] Struct containing the result of the indexing. 51 files = parsedCmd.files
52 butler = parsedCmd.butler
53 task = self.TaskClass(config=self.config, log=self.log, butler=butler)
54 task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
56 result = task.create_indexed_catalog(files)
57 if self.doReturnResults:
58 return pipeBase.Struct(
64 ref_dataset_name = pexConfig.Field(
66 default=
'cal_ref_cat',
67 doc=
'String to pass to the butler to retrieve persisted files.',
69 indexer = IndexerRegistry.makeField(
71 doc=
'Name of indexer algoritm to use. Default is HTM',
76 dataset_config = pexConfig.ConfigField(
78 doc=
"Configuration for reading the ingested data",
80 file_reader = pexConfig.ConfigurableField(
81 target=ReadTextCatalogTask,
82 doc=
'Task to use to read the files. Default is to expect text files.' 84 ra_name = pexConfig.Field(
86 doc=
"Name of RA column",
88 dec_name = pexConfig.Field(
90 doc=
"Name of Dec column",
92 mag_column_list = pexConfig.ListField(
94 doc=
"The values in the reference catalog are assumed to be in AB magnitudes. " 95 "List of column names to use for photometric information. At least one entry is required." 97 mag_err_column_map = pexConfig.DictField(
101 doc=
"A map of magnitude column name (key) to magnitude error column (value)." 103 is_photometric_name = pexConfig.Field(
106 doc=
'Name of column stating if satisfactory for photometric calibration (optional).' 108 is_resolved_name = pexConfig.Field(
111 doc=
'Name of column stating if the object is resolved (optional).' 113 is_variable_name = pexConfig.Field(
116 doc=
'Name of column stating if the object is measured to be variable (optional).' 118 id_name = pexConfig.Field(
121 doc=
'Name of column to use as an identifier (optional).' 123 extra_col_names = pexConfig.ListField(
126 doc=
'Extra columns to add to the reference catalog.' 130 pexConfig.Config.validate(self)
132 raise ValueError(
"ra_name and dec_name and at least one entry in mag_column_list must be" +
135 raise ValueError(
"If magnitude errors are provided, all magnitudes must have an error column")
139 """!Class for both producing indexed reference catalogs and for loading them. 141 This implements an indexing scheme based on hierarchical triangular mesh (HTM). 142 The term index really means breaking the catalog into localized chunks called 143 shards. In this case each shard contains the entries from the catalog in a single 146 canMultiprocess =
False 147 ConfigClass = IngestIndexedReferenceConfig
148 RunnerClass = IngestReferenceRunner
149 _DefaultName =
'IngestIndexedReferenceTask' 151 _flags = [
'photometric',
'resolved',
'variable']
154 def _makeArgumentParser(cls):
155 """Create an argument parser 157 This overrides the original because we need the file arguments 159 parser = pipeBase.InputOnlyArgumentParser(name=cls.
_DefaultName)
160 parser.add_argument(
"files", nargs=
"+", help=
"Names of files to index")
164 """!Constructor for the HTM indexing engine 166 @param[in] butler dafPersistence.Butler object for reading and writing catalogs 169 pipeBase.Task.__init__(self, *args, **kwargs)
170 self.
indexer = IndexerRegistry[self.config.dataset_config.indexer.name](
171 self.config.dataset_config.indexer.active)
172 self.makeSubtask(
'file_reader')
175 """!Index a set of files comprising a reference catalog. Outputs are persisted in the 178 @param[in] files A list of file names to read. 182 for filename
in files:
183 arr = self.file_reader.run(filename)
184 index_list = self.
indexer.index_points(arr[self.config.ra_name], arr[self.config.dec_name])
188 dataId = self.
indexer.make_data_id(
'master_schema',
189 self.config.dataset_config.ref_dataset_name)
193 pixel_ids = set(index_list)
194 for pixel_id
in pixel_ids:
195 dataId = self.
indexer.make_data_id(pixel_id, self.config.dataset_config.ref_dataset_name)
197 els = np.where(index_list == pixel_id)
199 record = catalog.addNew()
200 rec_num = self.
_fill_record(record, row, rec_num, key_map)
201 self.
butler.put(catalog,
'ref_cat', dataId=dataId)
202 dataId = self.
indexer.make_data_id(
None, self.config.dataset_config.ref_dataset_name)
203 self.
butler.put(self.config.dataset_config,
'ref_cat_config', dataId=dataId)
207 """!Create an ICRS SpherePoint from a np.array row 208 @param[in] row dict like object with ra/dec info in degrees 209 @param[in] ra_name name of RA key 210 @param[in] dec_name name of Dec key 211 @param[out] ICRS SpherePoint constructed from the RA/Dec values 215 def _set_flags(self, record, row, key_map):
216 """!Set the flags for a record. Relies on the _flags class attribute 217 @param[in,out] record SourceCatalog record to modify 218 @param[in] row dict like object containing flag info 219 @param[in] key_map Map of catalog keys to use in filling the record 221 names = record.schema.getNames()
224 attr_name =
'is_{}_name'.format(flag)
225 record.set(key_map[flag], bool(row[getattr(self.config, attr_name)]))
227 def _set_mags(self, record, row, key_map):
228 """!Set the flux records from the input magnitudes 229 @param[in,out] record SourceCatalog record to modify 230 @param[in] row dict like object containing magnitude values 231 @param[in] key_map Map of catalog keys to use in filling the record 233 for item
in self.config.mag_column_list:
234 record.set(key_map[item+
'_flux'], fluxFromABMag(row[item]))
235 if len(self.config.mag_err_column_map) > 0:
236 for err_key
in self.config.mag_err_column_map.keys():
237 error_col_name = self.config.mag_err_column_map[err_key]
238 record.set(key_map[err_key+
'_fluxSigma'],
239 fluxErrFromABMagErr(row[error_col_name], row[err_key]))
241 def _set_extra(self, record, row, key_map):
242 """!Copy the extra column information to the record 243 @param[in,out] record SourceCatalog record to modify 244 @param[in] row dict like object containing the column values 245 @param[in] key_map Map of catalog keys to use in filling the record 247 for extra_col
in self.config.extra_col_names:
248 value = row[extra_col]
256 if isinstance(value, np.str_):
258 record.set(key_map[extra_col], value)
260 def _fill_record(self, record, row, rec_num, key_map):
261 """!Fill a record to put in the persisted indexed catalogs 263 @param[in,out] record afwTable.SourceRecord in a reference catalog to fill. 264 @param[in] row A row from a numpy array constructed from the input catalogs. 265 @param[in] rec_num Starting integer to increment for the unique id 266 @param[in] key_map Map of catalog keys to use in filling the record 268 record.setCoord(self.
compute_coord(row, self.config.ra_name, self.config.dec_name))
269 if self.config.id_name:
270 record.setId(row[self.config.id_name])
273 record.setId(rec_num)
283 """!Get a catalog from the butler or create it if it doesn't exist 285 @param[in] dataId Identifier for catalog to retrieve 286 @param[in] schema Schema to use in catalog creation if the butler can't get it 287 @param[out] afwTable.SourceCatalog for the specified identifier 289 if self.
butler.datasetExists(
'ref_cat', dataId=dataId):
290 return self.
butler.get(
'ref_cat', dataId=dataId)
291 return afwTable.SourceCatalog(schema)
294 """!Make the schema to use in constructing the persisted catalogs. 296 @param[in] dtype A np.dtype to use in constructing the schema 297 @param[out] The schema for the output source catalog. 298 @param[out] A map of catalog keys to use in filling the record 301 mag_column_list = self.config.mag_column_list
302 mag_err_column_map = self.config.mag_err_column_map
303 if len(mag_err_column_map) > 0
and (
304 not len(mag_column_list) == len(mag_err_column_map)
or 305 not sorted(mag_column_list) == sorted(mag_err_column_map.keys())):
306 raise ValueError(
"Every magnitude column must have a corresponding error column")
308 schema = afwTable.SourceTable.makeMinimalSchema()
311 if dtype[name].kind ==
'U': 314 at_size = dtype[name].itemsize
315 return schema.addField(name, type=at_type, size=at_size)
317 at_type = dtype[name].type
318 return schema.addField(name, at_type)
320 for item
in mag_column_list:
321 key_map[item+
'_flux'] = schema.addField(item+
'_flux', float)
322 if len(mag_err_column_map) > 0:
323 for err_item
in mag_err_column_map.keys():
324 key_map[err_item+
'_fluxSigma'] = schema.addField(err_item+
'_fluxSigma', float)
326 attr_name =
'is_{}_name'.format(flag)
327 if getattr(self.config, attr_name):
328 key_map[flag] = schema.addField(flag,
'Flag')
329 for col
in self.config.extra_col_names:
330 key_map[col] = add_field(col)
331 return schema, key_map
def create_indexed_catalog(self, files)
Index a set of files comprising a reference catalog.
def _fill_record(self, record, row, rec_num, key_map)
Fill a record to put in the persisted indexed catalogs.
def make_schema(self, dtype)
Make the schema to use in constructing the persisted catalogs.
def _set_mags(self, record, row, key_map)
Set the flux records from the input magnitudes.
def _set_extra(self, record, row, key_map)
Copy the extra column information to the record.
def get_catalog(self, dataId, schema)
Get a catalog from the butler or create it if it doesn't exist.
def compute_coord(row, ra_name, dec_name)
Create an ICRS SpherePoint from a np.array row.
Class for both producing indexed reference catalogs and for loading them.
def run(self, parsedCmd)
Run the task.
Task runner for the reference catalog ingester.
def _set_flags(self, record, row, key_map)
Set the flags for a record.
def __init__(self, args, kwargs)
Constructor for the HTM indexing engine.