24 __all__ = [
"IngestIndexedReferenceConfig",
"IngestIndexedReferenceTask",
"DatasetConfig"]
34 from .indexerRegistry
import IndexerRegistry
35 from .readTextCatalogTask
import ReadTextCatalogTask
36 from .loadReferenceObjects
import LoadReferenceObjectsTask
37 from .ingestIndexManager
import IngestIndexManager
40 LATEST_FORMAT_VERSION = 1
44 """Add metadata to a new (not yet populated) reference catalog. 48 catalog : `lsst.afw.table.SimpleCatalog` 49 Catalog to which metadata should be attached. Will be modified 52 md = catalog.getMetadata()
55 md.set(
"REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)
56 catalog.setMetadata(md)
60 """Task runner for the reference catalog ingester 62 Data IDs are ignored so the runner should just run the task on the parsed command. 65 def run(self, parsedCmd):
68 Several arguments need to be collected to send on to the task methods. 72 parsedCmd : `argparse.Namespace` 77 results : `lsst.pipe.base.Struct` or `None` 78 A empty struct if self.doReturnResults, else None 80 files = parsedCmd.files
81 butler = parsedCmd.butler
82 task = self.TaskClass(config=self.config, log=self.log, butler=butler)
83 task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
85 task.createIndexedCatalog(files)
86 if self.doReturnResults:
87 return pipeBase.Struct()
91 """The description of the on-disk storage format for the persisted 94 format_version = pexConfig.Field(
96 doc=
"Version number of the persisted on-disk storage format." 97 "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)." 98 "\nVersion 1 had nJy as flux units.",
101 ref_dataset_name = pexConfig.Field(
103 default=
'cal_ref_cat',
104 doc=
'String to pass to the butler to retrieve persisted files.',
106 indexer = IndexerRegistry.makeField(
108 doc=
'Name of indexer algoritm to use. Default is HTM',
113 dataset_config = pexConfig.ConfigField(
115 doc=
"Configuration for reading the ingested data",
117 n_processes = pexConfig.Field(
119 doc=(
"Number of python processes to use when ingesting."),
122 file_reader = pexConfig.ConfigurableField(
123 target=ReadTextCatalogTask,
124 doc=
'Task to use to read the files. Default is to expect text files.' 126 ra_name = pexConfig.Field(
128 doc=
"Name of RA column",
130 dec_name = pexConfig.Field(
132 doc=
"Name of Dec column",
134 ra_err_name = pexConfig.Field(
136 doc=
"Name of RA error column",
139 dec_err_name = pexConfig.Field(
141 doc=
"Name of Dec error column",
144 mag_column_list = pexConfig.ListField(
146 doc=
"The values in the reference catalog are assumed to be in AB magnitudes. " 147 "List of column names to use for photometric information. At least one entry is required." 149 mag_err_column_map = pexConfig.DictField(
153 doc=
"A map of magnitude column name (key) to magnitude error column (value)." 155 is_photometric_name = pexConfig.Field(
158 doc=
'Name of column stating if satisfactory for photometric calibration (optional).' 160 is_resolved_name = pexConfig.Field(
163 doc=
'Name of column stating if the object is resolved (optional).' 165 is_variable_name = pexConfig.Field(
168 doc=
'Name of column stating if the object is measured to be variable (optional).' 170 id_name = pexConfig.Field(
173 doc=
'Name of column to use as an identifier (optional).' 175 pm_ra_name = pexConfig.Field(
177 doc=
"Name of proper motion RA column",
180 pm_dec_name = pexConfig.Field(
182 doc=
"Name of proper motion Dec column",
185 pm_ra_err_name = pexConfig.Field(
187 doc=
"Name of proper motion RA error column",
190 pm_dec_err_name = pexConfig.Field(
192 doc=
"Name of proper motion Dec error column",
195 pm_scale = pexConfig.Field(
197 doc=
"Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year",
200 parallax_name = pexConfig.Field(
202 doc=
"Name of parallax column",
205 parallax_err_name = pexConfig.Field(
207 doc=
"Name of parallax error column",
210 parallax_scale = pexConfig.Field(
212 doc=
"Scale factor by which to multiply parallax values to obtain units of milliarcsec",
215 epoch_name = pexConfig.Field(
217 doc=
"Name of epoch column",
220 epoch_format = pexConfig.Field(
222 doc=
"Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'",
225 epoch_scale = pexConfig.Field(
227 doc=
"Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'",
230 extra_col_names = pexConfig.ListField(
233 doc=
'Extra columns to add to the reference catalog.' 241 pexConfig.Config.validate(self)
243 def assertAllOrNone(*names):
244 """Raise ValueError unless all the named fields are set or are 247 setNames = [name
for name
in names
if bool(getattr(self, name))]
248 if len(setNames)
in (len(names), 0):
250 prefix =
"Both or neither" if len(names) == 2
else "All or none" 251 raise ValueError(
"{} of {} must be set, but only {} are set".format(
252 prefix,
", ".join(names),
", ".join(setNames)))
256 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.")
259 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".format(
261 assertAllOrNone(
"ra_err_name",
"dec_err_name")
262 assertAllOrNone(
"epoch_name",
"epoch_format",
"epoch_scale")
263 assertAllOrNone(
"pm_ra_name",
"pm_dec_name")
264 assertAllOrNone(
"pm_ra_err_name",
"pm_dec_err_name")
266 raise ValueError(
'"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified')
269 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified')
273 """Class for producing and loading indexed reference catalogs. 275 This implements an indexing scheme based on hierarchical triangular 276 mesh (HTM). The term index really means breaking the catalog into 277 localized chunks called shards. In this case each shard contains 278 the entries from the catalog in a single HTM trixel 280 For producing catalogs this task makes the following assumptions 281 about the input catalogs: 282 - RA, Dec, RA error and Dec error are all in decimal degrees. 283 - Epoch is available in a column, in a format supported by astropy.time.Time. 284 - There are no off-diagonal covariance terms, such as covariance 285 between RA and Dec, or between PM RA and PM Dec. Gaia is a well 286 known example of a catalog that has such terms, and thus should not 287 be ingested with this task. 291 butler : `lsst.daf.persistence.Butler` 292 Data butler for reading and writing catalogs 294 canMultiprocess =
False 295 ConfigClass = IngestIndexedReferenceConfig
296 RunnerClass = IngestReferenceRunner
297 _DefaultName =
'IngestIndexedReferenceTask' 300 def _makeArgumentParser(cls):
301 """Create an argument parser. 303 This returns a standard parser with an extra "files" argument. 305 parser = pipeBase.InputOnlyArgumentParser(name=cls.
_DefaultName)
306 parser.add_argument(
"files", nargs=
"+", help=
"Names of files to index")
312 self.
indexer = IndexerRegistry[self.config.dataset_config.indexer.name](
313 self.config.dataset_config.indexer.active)
314 self.makeSubtask(
'file_reader')
317 """Index a set of files comprising a reference catalog. 319 Outputs are persisted in the butler repository. 324 A list of file paths to read. 339 worker.run(inputFiles)
342 dataId = self.
indexer.makeDataId(
None, self.config.dataset_config.ref_dataset_name)
343 self.
butler.put(self.config.dataset_config,
'ref_cat_config', dataId=dataId)
345 def _saveMasterSchema(self, filename):
346 """Generate and save the master catalog schema. 351 An input file to read to get the input dtype. 353 arr = self.file_reader.run(filename)
355 dataId = self.
indexer.makeDataId(
'master_schema',
356 self.config.dataset_config.ref_dataset_name)
358 catalog = afwTable.SimpleCatalog(schema)
360 self.
butler.put(catalog,
'ref_cat', dataId=dataId)
361 return schema, key_map
363 def _getButlerFilenames(self, htm):
364 """Get filenames from the butler for each output pixel.""" 366 start, end = htm.universe()[0]
368 dataId = self.
indexer.makeDataId(start, self.config.dataset_config.ref_dataset_name)
369 path = self.
butler.get(
'ref_cat_filename', dataId=dataId)[0]
370 base = os.path.join(os.path.dirname(path),
"%d"+os.path.splitext(path)[1])
371 for pixelId
in range(start, end):
372 filenames[pixelId] = base % pixelId
377 """Make the schema to use in constructing the persisted catalogs. 381 dtype : `numpy.dtype` 382 Data type describing each entry in ``config.extra_col_names`` 383 for the catalogs being ingested. 387 schemaAndKeyMap : `tuple` of (`lsst.afw.table.Schema`, `dict`) 388 A tuple containing two items: 389 - The schema for the output source catalog. 390 - A map of catalog keys to use in filling the record 393 schema = LoadReferenceObjectsTask.makeMinimalSchema(
394 filterNameList=self.config.mag_column_list,
396 addIsPhotometric=bool(self.config.is_photometric_name),
397 addIsResolved=bool(self.config.is_resolved_name),
398 addIsVariable=bool(self.config.is_variable_name),
399 coordErrDim=2
if bool(self.config.ra_err_name)
else 0,
400 addProperMotion=2
if bool(self.config.pm_ra_name)
else 0,
401 properMotionErrDim=2
if bool(self.config.pm_ra_err_name)
else 0,
402 addParallax=bool(self.config.parallax_name),
403 addParallaxErr=bool(self.config.parallax_err_name),
405 keysToSkip = set((
"id",
"centroid_x",
"centroid_y",
"hasCentroid"))
406 key_map = {fieldName: schema[fieldName].asKey()
for fieldName
in schema.getOrderedNames()
407 if fieldName
not in keysToSkip}
410 if dtype[name].kind ==
'U': 412 at_size = dtype[name].itemsize
413 return schema.addField(name, type=str, size=at_size)
415 at_type = dtype[name].type
416 return schema.addField(name, at_type)
418 for col
in self.config.extra_col_names:
419 key_map[col] = addField(col)
420 return schema, key_map
def __init__(self, args, butler=None, kwargs)
def makeSchema(self, dtype)
def addRefCatMetadata(catalog)
def createIndexedCatalog(self, inputFiles)
def _saveMasterSchema(self, filename)
def _getButlerFilenames(self, htm)