28__all__ = [
"DatasetConfig",
"ConvertReferenceCatalogBase",
"ConvertReferenceCatalogConfig"]
36import lsst.pipe.base
as pipeBase
41from .indexerRegistry
import IndexerRegistry
42from .readTextCatalogTask
import ReadTextCatalogTask
43from .loadReferenceObjects
import ReferenceObjectLoader
44from .
import convertRefcatManager
47LATEST_FORMAT_VERSION = 1
51 """Add metadata to a new (not yet populated) reference catalog.
56 Catalog to which metadata should be attached. Will be modified
59 md = catalog.getMetadata()
62 md.set(
"REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)
63 catalog.setMetadata(md)
67 """The description of the on-disk storage format for the persisted
70 format_version = pexConfig.Field(
72 doc="Version number of the persisted on-disk storage format."
73 "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)."
74 "\nVersion 1 had nJy as flux units.",
77 ref_dataset_name = pexConfig.Field(
80 default=
'cal_ref_cat',
81 doc=
"Name of this reference catalog to be used in the butler registry.",
83 indexer = IndexerRegistry.makeField(
85 doc=
'Name of indexer algoritm to use. Default is HTM',
90 dataset_config = pexConfig.ConfigField(
92 doc=
"Configuration for reading the ingested data",
94 n_processes = pexConfig.Field(
96 doc=(
"Number of python processes to use when ingesting."),
99 manager = pexConfig.ConfigurableField(
101 doc=
"Multiprocessing manager to perform the actual conversion of values, file-by-file."
103 file_reader = pexConfig.ConfigurableField(
104 target=ReadTextCatalogTask,
105 doc=
'Task to use to read the files. Default is to expect text files.'
107 ra_name = pexConfig.Field(
109 doc=
"Name of RA column (values in decimal degrees)",
111 dec_name = pexConfig.Field(
113 doc=
"Name of Dec column (values in decimal degrees)",
115 ra_err_name = pexConfig.Field(
117 doc=
"Name of RA error column",
120 dec_err_name = pexConfig.Field(
122 doc=
"Name of Dec error column",
125 coord_err_unit = pexConfig.Field(
127 doc=
"Unit of RA/Dec error fields (astropy.unit.Unit compatible)",
130 mag_column_list = pexConfig.ListField(
132 doc=
"The values in the reference catalog are assumed to be in AB magnitudes. "
133 "List of column names to use for photometric information. At least one entry is required."
135 mag_err_column_map = pexConfig.DictField(
139 doc=
"A map of magnitude column name (key) to magnitude error column (value)."
141 is_photometric_name = pexConfig.Field(
144 doc=
'Name of column stating if satisfactory for photometric calibration (optional).'
146 is_resolved_name = pexConfig.Field(
149 doc=
'Name of column stating if the object is resolved (optional).'
151 is_variable_name = pexConfig.Field(
154 doc=
'Name of column stating if the object is measured to be variable (optional).'
156 id_name = pexConfig.Field(
159 doc=
'Name of column to use as an identifier (optional).'
161 pm_ra_name = pexConfig.Field(
163 doc=
"Name of proper motion RA column",
166 pm_dec_name = pexConfig.Field(
168 doc=
"Name of proper motion Dec column",
171 pm_ra_err_name = pexConfig.Field(
173 doc=
"Name of proper motion RA error column",
176 pm_dec_err_name = pexConfig.Field(
178 doc=
"Name of proper motion Dec error column",
181 pm_scale = pexConfig.Field(
183 doc=
"Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year",
186 parallax_name = pexConfig.Field(
188 doc=
"Name of parallax column",
191 parallax_err_name = pexConfig.Field(
193 doc=
"Name of parallax error column",
196 parallax_scale = pexConfig.Field(
198 doc=
"Scale factor by which to multiply parallax values to obtain units of milliarcsec",
201 epoch_name = pexConfig.Field(
203 doc=
"Name of epoch column",
206 epoch_format = pexConfig.Field(
208 doc=
"Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'",
211 epoch_scale = pexConfig.Field(
213 doc=
"Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'",
216 extra_col_names = pexConfig.ListField(
219 doc=
'Extra columns to add to the reference catalog.'
229 pexConfig.Config.validate(self)
231 def assertAllOrNone(*names):
232 """Raise ValueError unless all the named fields are set or are
235 setNames = [name for name
in names
if bool(getattr(self, name))]
236 if len(setNames)
in (len(names), 0):
238 prefix =
"Both or neither" if len(names) == 2
else "All or none"
239 raise ValueError(
"{} of {} must be set, but only {} are set".format(
240 prefix,
", ".join(names),
", ".join(setNames)))
244 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.")
247 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".format(
249 assertAllOrNone(
"ra_err_name",
"dec_err_name",
"coord_err_unit")
251 result = astropy.units.Unit(self.
coord_err_unit, parse_strict=
'silent')
252 if isinstance(result, astropy.units.UnrecognizedUnit):
253 msg = f
"{self.coord_err_unit} is not a valid astropy unit string."
254 raise pexConfig.FieldValidationError(ConvertReferenceCatalogConfig.coord_err_unit, self, msg)
256 assertAllOrNone(
"epoch_name",
"epoch_format",
"epoch_scale")
257 assertAllOrNone(
"pm_ra_name",
"pm_dec_name")
258 assertAllOrNone(
"pm_ra_err_name",
"pm_dec_err_name")
259 assertAllOrNone(
"parallax_name",
"parallax_err_name")
261 raise ValueError(
'"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified')
264 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified')
268 """Base class for producing and loading indexed reference catalogs,
269 shared between gen2 and gen3.
271 This implements an indexing scheme based on hierarchical triangular
272 mesh (HTM). The term index really means breaking the catalog into
273 localized chunks called shards. In this case each shard contains
274 the entries
from the catalog
in a single HTM trixel
276 For producing catalogs this task makes the following assumptions
277 about the input catalogs:
278 - RA, Dec are
in decimal degrees.
279 - Epoch
is available
in a column,
in a format supported by astropy.time.Time.
280 - There are no off-diagonal covariance terms, such
as covariance
281 between RA
and Dec,
or between PM RA
and PM Dec. Support
for such
282 covariance would have to be added to to the config, including consideration
283 of the units
in the input catalog.
285 canMultiprocess = False
286 ConfigClass = ConvertReferenceCatalogConfig
290 self.
indexer = IndexerRegistry[self.config.dataset_config.indexer.name](
291 self.config.dataset_config.indexer.active)
292 self.makeSubtask(
'file_reader')
294 def run(self, inputFiles):
295 """Index a set of files comprising a reference catalog.
297 Outputs are persisted in the butler repository.
302 A list of file paths to read.
309 worker = self.config.manager.target(filenames,
318 result = worker.run(inputFiles)
324 """Any setup that has to be performed at the start of ``run``, but that
325 cannot be performed during ``__init__`` (e.g. making directories).
329 def _postRun(self, result):
330 """Any tasks that have to happen at the end of ``run``.
335 The result returned from``worker.run()``.
339 def _getButlerFilenames(self, htm):
340 """Get filenames from the butler for each output htm pixel.
345 The HTM pixelization scheme to be used to build filenames.
349 filenames : `list [str]`
350 List of filenames to write each HTM pixel to.
353 start, end = htm.universe()[0]
356 base = os.path.join(os.path.dirname(path),
"%d"+os.path.splitext(path)[1])
357 for pixelId
in range(start, end):
358 filenames[pixelId] = base % pixelId
363 """Make the schema to use in constructing the persisted catalogs.
367 dtype : `numpy.dtype`
368 Data type describing each entry in ``config.extra_col_names``
369 for the catalogs being ingested.
374 A tuple containing two items:
375 - The schema
for the output source catalog.
376 - A map of catalog keys to use
in filling the record
379 schema = ReferenceObjectLoader.makeMinimalSchema(
380 filterNameList=self.config.mag_column_list,
382 addIsPhotometric=bool(self.config.is_photometric_name),
383 addIsResolved=bool(self.config.is_resolved_name),
384 addIsVariable=bool(self.config.is_variable_name),
385 coordErrDim=2
if bool(self.config.ra_err_name)
else 0,
386 addProperMotion=2
if bool(self.config.pm_ra_name)
else 0,
387 properMotionErrDim=2
if bool(self.config.pm_ra_err_name)
else 0,
388 addParallax=bool(self.config.parallax_name),
390 keysToSkip = set((
"id",
"centroid_x",
"centroid_y",
"hasCentroid"))
391 key_map = {fieldName: schema[fieldName].asKey()
for fieldName
in schema.getOrderedNames()
392 if fieldName
not in keysToSkip}
395 if dtype[name].kind ==
'U':
397 at_size = dtype[name].itemsize
398 return schema.addField(name, type=str, size=at_size)
400 at_type = dtype[name].type
401 return schema.addField(name, at_type)
403 for col
in self.config.extra_col_names:
404 key_map[col] = addField(col)
405 return schema, key_map
407 def _saveMasterSchema(self, filename):
408 """Generate and save the master catalog schema.
413 An input file to read to get the input dtype.
415 arr = self.file_reader.run(filename)
418 catalog = afwTable.SimpleCatalog(schema)
421 return schema, key_map
424 def _getOnePixelFilename(self, start):
425 """Return one example filename to help construct the rest of the
426 per-htm pixel filenames.
431 The first HTM index in this HTM pixelization.
436 Path to a single file that would be written to the output location.
441 def _persistConfig(self):
442 """Write the config that was used to generate the refcat.
447 def _writeMasterSchema(self, catalog):
448 """Butler put the master catalog schema.
453 An empty catalog with a fully-defined schema that matches the
454 schema used
in each of the HTM pixel files.
def run(self, inputFiles)
def _postRun(self, result)
def makeSchema(self, dtype)
def _saveMasterSchema(self, filename)
def _getButlerFilenames(self, htm)
def _getOnePixelFilename(self, start)
def _writeMasterSchema(self, catalog)
def __init__(self, *args, **kwargs)
def addRefCatMetadata(catalog)