28__all__ = [
"DatasetConfig",
"ConvertReferenceCatalogBase",
"ConvertReferenceCatalogConfig"]
36import lsst.pipe.base
as pipeBase
41from .indexerRegistry
import IndexerRegistry
42from .readTextCatalogTask
import ReadTextCatalogTask
43from .loadReferenceObjects
import ReferenceObjectLoader
44from .
import convertRefcatManager
47LATEST_FORMAT_VERSION = 1
51 """Add metadata to a new (not yet populated) reference catalog.
56 Catalog to which metadata should be attached. Will be modified
59 md = catalog.getMetadata()
62 md.set(
"REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)
63 catalog.setMetadata(md)
67 """Task runner for the reference catalog ingester (gen2 version).
69 Data IDs are ignored so the runner should just run the task on the parsed command.
72 def run(self, parsedCmd):
75 Several arguments need to be collected to send on to the task methods.
79 parsedCmd : `argparse.Namespace`
84 results : `lsst.pipe.base.Struct` or `
None`
85 A empty struct
if self.doReturnResults,
else None
87 files = parsedCmd.files
88 butler = parsedCmd.butler
89 task = self.TaskClass(config=self.config, log=self.log, butler=butler)
90 task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
93 if self.doReturnResults:
94 return pipeBase.Struct()
98 """The description of the on-disk storage format for the persisted
101 format_version = pexConfig.Field(
103 doc="Version number of the persisted on-disk storage format."
104 "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)."
105 "\nVersion 1 had nJy as flux units.",
108 ref_dataset_name = pexConfig.Field(
111 default=
'cal_ref_cat',
112 doc=
"Name of this reference catalog to be used in the butler registry.",
114 indexer = IndexerRegistry.makeField(
116 doc=
'Name of indexer algoritm to use. Default is HTM',
121 dataset_config = pexConfig.ConfigField(
123 doc=
"Configuration for reading the ingested data",
125 n_processes = pexConfig.Field(
127 doc=(
"Number of python processes to use when ingesting."),
130 manager = pexConfig.ConfigurableField(
132 doc=
"Multiprocessing manager to perform the actual conversion of values, file-by-file."
134 file_reader = pexConfig.ConfigurableField(
135 target=ReadTextCatalogTask,
136 doc=
'Task to use to read the files. Default is to expect text files.'
138 ra_name = pexConfig.Field(
140 doc=
"Name of RA column (values in decimal degrees)",
142 dec_name = pexConfig.Field(
144 doc=
"Name of Dec column (values in decimal degrees)",
146 ra_err_name = pexConfig.Field(
148 doc=
"Name of RA error column",
151 dec_err_name = pexConfig.Field(
153 doc=
"Name of Dec error column",
156 coord_err_unit = pexConfig.Field(
158 doc=
"Unit of RA/Dec error fields (astropy.unit.Unit compatible)",
161 mag_column_list = pexConfig.ListField(
163 doc=
"The values in the reference catalog are assumed to be in AB magnitudes. "
164 "List of column names to use for photometric information. At least one entry is required."
166 mag_err_column_map = pexConfig.DictField(
170 doc=
"A map of magnitude column name (key) to magnitude error column (value)."
172 is_photometric_name = pexConfig.Field(
175 doc=
'Name of column stating if satisfactory for photometric calibration (optional).'
177 is_resolved_name = pexConfig.Field(
180 doc=
'Name of column stating if the object is resolved (optional).'
182 is_variable_name = pexConfig.Field(
185 doc=
'Name of column stating if the object is measured to be variable (optional).'
187 id_name = pexConfig.Field(
190 doc=
'Name of column to use as an identifier (optional).'
192 pm_ra_name = pexConfig.Field(
194 doc=
"Name of proper motion RA column",
197 pm_dec_name = pexConfig.Field(
199 doc=
"Name of proper motion Dec column",
202 pm_ra_err_name = pexConfig.Field(
204 doc=
"Name of proper motion RA error column",
207 pm_dec_err_name = pexConfig.Field(
209 doc=
"Name of proper motion Dec error column",
212 pm_scale = pexConfig.Field(
214 doc=
"Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year",
217 parallax_name = pexConfig.Field(
219 doc=
"Name of parallax column",
222 parallax_err_name = pexConfig.Field(
224 doc=
"Name of parallax error column",
227 parallax_scale = pexConfig.Field(
229 doc=
"Scale factor by which to multiply parallax values to obtain units of milliarcsec",
232 epoch_name = pexConfig.Field(
234 doc=
"Name of epoch column",
237 epoch_format = pexConfig.Field(
239 doc=
"Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'",
242 epoch_scale = pexConfig.Field(
244 doc=
"Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'",
247 extra_col_names = pexConfig.ListField(
250 doc=
'Extra columns to add to the reference catalog.'
260 pexConfig.Config.validate(self)
262 def assertAllOrNone(*names):
263 """Raise ValueError unless all the named fields are set or are
266 setNames = [name for name
in names
if bool(getattr(self, name))]
267 if len(setNames)
in (len(names), 0):
269 prefix =
"Both or neither" if len(names) == 2
else "All or none"
270 raise ValueError(
"{} of {} must be set, but only {} are set".format(
271 prefix,
", ".join(names),
", ".join(setNames)))
275 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.")
278 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".format(
280 assertAllOrNone(
"ra_err_name",
"dec_err_name",
"coord_err_unit")
282 result = astropy.units.Unit(self.
coord_err_unit, parse_strict=
'silent')
283 if isinstance(result, astropy.units.UnrecognizedUnit):
284 msg = f
"{self.coord_err_unit} is not a valid astropy unit string."
285 raise pexConfig.FieldValidationError(ConvertReferenceCatalogConfig.coord_err_unit, self, msg)
287 assertAllOrNone(
"epoch_name",
"epoch_format",
"epoch_scale")
288 assertAllOrNone(
"pm_ra_name",
"pm_dec_name")
289 assertAllOrNone(
"pm_ra_err_name",
"pm_dec_err_name")
290 assertAllOrNone(
"parallax_name",
"parallax_err_name")
292 raise ValueError(
'"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified')
295 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified')
299 """Base class for producing and loading indexed reference catalogs,
300 shared between gen2 and gen3.
302 This implements an indexing scheme based on hierarchical triangular
303 mesh (HTM). The term index really means breaking the catalog into
304 localized chunks called shards. In this case each shard contains
305 the entries
from the catalog
in a single HTM trixel
307 For producing catalogs this task makes the following assumptions
308 about the input catalogs:
309 - RA, Dec are
in decimal degrees.
310 - Epoch
is available
in a column,
in a format supported by astropy.time.Time.
311 - There are no off-diagonal covariance terms, such
as covariance
312 between RA
and Dec,
or between PM RA
and PM Dec. Support
for such
313 covariance would have to be added to to the config, including consideration
314 of the units
in the input catalog.
316 canMultiprocess = False
317 ConfigClass = ConvertReferenceCatalogConfig
321 self.
indexer = IndexerRegistry[self.config.dataset_config.indexer.name](
322 self.config.dataset_config.indexer.active)
323 self.makeSubtask(
'file_reader')
325 def run(self, inputFiles):
326 """Index a set of files comprising a reference catalog.
328 Outputs are persisted in the butler repository.
333 A list of file paths to read.
340 worker = self.config.manager.target(filenames,
349 result = worker.run(inputFiles)
355 """Any setup that has to be performed at the start of ``run``, but that
356 cannot be performed during ``__init__`` (e.g. making directories).
360 def _postRun(self, result):
361 """Any tasks that have to happen at the end of ``run``.
366 The result returned from``worker.run()``.
370 def _getButlerFilenames(self, htm):
371 """Get filenames from the butler for each output htm pixel.
376 The HTM pixelization scheme to be used to build filenames.
380 filenames : `list [str]`
381 List of filenames to write each HTM pixel to.
384 start, end = htm.universe()[0]
387 base = os.path.join(os.path.dirname(path),
"%d"+os.path.splitext(path)[1])
388 for pixelId
in range(start, end):
389 filenames[pixelId] = base % pixelId
394 """Make the schema to use in constructing the persisted catalogs.
398 dtype : `numpy.dtype`
399 Data type describing each entry in ``config.extra_col_names``
400 for the catalogs being ingested.
405 A tuple containing two items:
406 - The schema
for the output source catalog.
407 - A map of catalog keys to use
in filling the record
410 schema = ReferenceObjectLoader.makeMinimalSchema(
411 filterNameList=self.config.mag_column_list,
413 addIsPhotometric=bool(self.config.is_photometric_name),
414 addIsResolved=bool(self.config.is_resolved_name),
415 addIsVariable=bool(self.config.is_variable_name),
416 coordErrDim=2
if bool(self.config.ra_err_name)
else 0,
417 addProperMotion=2
if bool(self.config.pm_ra_name)
else 0,
418 properMotionErrDim=2
if bool(self.config.pm_ra_err_name)
else 0,
419 addParallax=bool(self.config.parallax_name),
421 keysToSkip = set((
"id",
"centroid_x",
"centroid_y",
"hasCentroid"))
422 key_map = {fieldName: schema[fieldName].asKey()
for fieldName
in schema.getOrderedNames()
423 if fieldName
not in keysToSkip}
426 if dtype[name].kind ==
'U':
428 at_size = dtype[name].itemsize
429 return schema.addField(name, type=str, size=at_size)
431 at_type = dtype[name].type
432 return schema.addField(name, at_type)
434 for col
in self.config.extra_col_names:
435 key_map[col] = addField(col)
436 return schema, key_map
438 def _saveMasterSchema(self, filename):
439 """Generate and save the master catalog schema.
444 An input file to read to get the input dtype.
446 arr = self.file_reader.run(filename)
449 catalog = afwTable.SimpleCatalog(schema)
452 return schema, key_map
455 def _getOnePixelFilename(self, start):
456 """Return one example filename to help construct the rest of the
457 per-htm pixel filenames.
462 The first HTM index in this HTM pixelization.
467 Path to a single file that would be written to the output location.
472 def _persistConfig(self):
473 """Write the config that was used to generate the refcat.
478 def _writeMasterSchema(self, catalog):
479 """Butler put the master catalog schema.
484 An empty catalog with a fully-defined schema that matches the
485 schema used
in each of the HTM pixel files.
def run(self, inputFiles)
def _postRun(self, result)
def makeSchema(self, dtype)
def _saveMasterSchema(self, filename)
def _getButlerFilenames(self, htm)
def _getOnePixelFilename(self, start)
def _writeMasterSchema(self, catalog)
def __init__(self, *args, **kwargs)
def addRefCatMetadata(catalog)