Coverage for python/lsst/meas/algorithms/ingestIndexReferenceTask.py: 65%
148 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-06 01:41 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-08-06 01:41 -0700
1# This file is part of meas_algorithms.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
23# TODO DM-31698: post-gen2 removal notes
24# `DatasetConfig`, `ConvertReferenceCatalogBase`, and `ConvertReferenceCatalogConfig`
25# should all be moved to to `convertReferenceCatalog.py` once gen2 butler
26# has been removed.
28__all__ = ["DatasetConfig", "ConvertReferenceCatalogBase", "ConvertReferenceCatalogConfig"]
30import abc
31import os.path
33import astropy.units
35import lsst.pex.config as pexConfig
36import lsst.pipe.base as pipeBase
37import lsst.geom
38import lsst.sphgeom
39import lsst.afw.table as afwTable
40from lsst.daf.base import PropertyList
41from .indexerRegistry import IndexerRegistry
42from .readTextCatalogTask import ReadTextCatalogTask
43from .loadReferenceObjects import ReferenceObjectLoader
44from . import convertRefcatManager
46# The most recent Indexed Reference Catalog on-disk format version.
47LATEST_FORMAT_VERSION = 1
50def addRefCatMetadata(catalog):
51 """Add metadata to a new (not yet populated) reference catalog.
53 Parameters
54 ----------
55 catalog : `lsst.afw.table.SimpleCatalog`
56 Catalog to which metadata should be attached. Will be modified
57 in-place.
58 """
59 md = catalog.getMetadata()
60 if md is None: 60 ↛ 62line 60 didn't jump to line 62, because the condition on line 60 was never false
61 md = PropertyList()
62 md.set("REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)
63 catalog.setMetadata(md)
66class IngestReferenceRunner(pipeBase.TaskRunner):
67 """Task runner for the reference catalog ingester (gen2 version).
69 Data IDs are ignored so the runner should just run the task on the parsed command.
70 """
72 def run(self, parsedCmd):
73 """Run the task.
75 Several arguments need to be collected to send on to the task methods.
77 Parameters
78 ----------
79 parsedCmd : `argparse.Namespace`
80 Parsed command.
82 Returns
83 -------
84 results : `lsst.pipe.base.Struct` or `None`
85 A empty struct if self.doReturnResults, else None
86 """
87 files = parsedCmd.files
88 butler = parsedCmd.butler
89 task = self.TaskClass(config=self.config, log=self.log, butler=butler)
90 task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
92 task.run(files)
93 if self.doReturnResults:
94 return pipeBase.Struct()
97class DatasetConfig(pexConfig.Config):
98 """The description of the on-disk storage format for the persisted
99 reference catalog.
100 """
101 format_version = pexConfig.Field(
102 dtype=int,
103 doc="Version number of the persisted on-disk storage format."
104 "\nVersion 0 had Jy as flux units (default 0 for unversioned catalogs)."
105 "\nVersion 1 had nJy as flux units.",
106 default=0 # This needs to always be 0, so that unversioned catalogs are interpreted as version 0.
107 )
108 ref_dataset_name = pexConfig.Field(
109 dtype=str,
110 # TODO DM-31817: remove this default value.
111 default='cal_ref_cat',
112 doc="Name of this reference catalog to be used in the butler registry.",
113 )
114 indexer = IndexerRegistry.makeField(
115 default='HTM',
116 doc='Name of indexer algoritm to use. Default is HTM',
117 )
120class ConvertReferenceCatalogConfig(pexConfig.Config):
121 dataset_config = pexConfig.ConfigField(
122 dtype=DatasetConfig,
123 doc="Configuration for reading the ingested data",
124 )
125 n_processes = pexConfig.Field(
126 dtype=int,
127 doc=("Number of python processes to use when ingesting."),
128 default=1
129 )
130 manager = pexConfig.ConfigurableField(
131 target=convertRefcatManager.ConvertRefcatManager,
132 doc="Multiprocessing manager to perform the actual conversion of values, file-by-file."
133 )
134 file_reader = pexConfig.ConfigurableField(
135 target=ReadTextCatalogTask,
136 doc='Task to use to read the files. Default is to expect text files.'
137 )
138 ra_name = pexConfig.Field(
139 dtype=str,
140 doc="Name of RA column (values in decimal degrees)",
141 )
142 dec_name = pexConfig.Field(
143 dtype=str,
144 doc="Name of Dec column (values in decimal degrees)",
145 )
146 ra_err_name = pexConfig.Field(
147 dtype=str,
148 doc="Name of RA error column",
149 optional=True,
150 )
151 dec_err_name = pexConfig.Field(
152 dtype=str,
153 doc="Name of Dec error column",
154 optional=True,
155 )
156 coord_err_unit = pexConfig.Field(
157 dtype=str,
158 doc="Unit of RA/Dec error fields (astropy.unit.Unit compatible)",
159 optional=True
160 )
161 mag_column_list = pexConfig.ListField(
162 dtype=str,
163 doc="The values in the reference catalog are assumed to be in AB magnitudes. "
164 "List of column names to use for photometric information. At least one entry is required."
165 )
166 mag_err_column_map = pexConfig.DictField(
167 keytype=str,
168 itemtype=str,
169 default={},
170 doc="A map of magnitude column name (key) to magnitude error column (value)."
171 )
172 is_photometric_name = pexConfig.Field(
173 dtype=str,
174 optional=True,
175 doc='Name of column stating if satisfactory for photometric calibration (optional).'
176 )
177 is_resolved_name = pexConfig.Field(
178 dtype=str,
179 optional=True,
180 doc='Name of column stating if the object is resolved (optional).'
181 )
182 is_variable_name = pexConfig.Field(
183 dtype=str,
184 optional=True,
185 doc='Name of column stating if the object is measured to be variable (optional).'
186 )
187 id_name = pexConfig.Field(
188 dtype=str,
189 optional=True,
190 doc='Name of column to use as an identifier (optional).'
191 )
192 pm_ra_name = pexConfig.Field(
193 dtype=str,
194 doc="Name of proper motion RA column",
195 optional=True,
196 )
197 pm_dec_name = pexConfig.Field(
198 dtype=str,
199 doc="Name of proper motion Dec column",
200 optional=True,
201 )
202 pm_ra_err_name = pexConfig.Field(
203 dtype=str,
204 doc="Name of proper motion RA error column",
205 optional=True,
206 )
207 pm_dec_err_name = pexConfig.Field(
208 dtype=str,
209 doc="Name of proper motion Dec error column",
210 optional=True,
211 )
212 pm_scale = pexConfig.Field(
213 dtype=float,
214 doc="Scale factor by which to multiply proper motion values to obtain units of milliarcsec/year",
215 default=1.0,
216 )
217 parallax_name = pexConfig.Field(
218 dtype=str,
219 doc="Name of parallax column",
220 optional=True,
221 )
222 parallax_err_name = pexConfig.Field(
223 dtype=str,
224 doc="Name of parallax error column",
225 optional=True,
226 )
227 parallax_scale = pexConfig.Field(
228 dtype=float,
229 doc="Scale factor by which to multiply parallax values to obtain units of milliarcsec",
230 default=1.0,
231 )
232 epoch_name = pexConfig.Field(
233 dtype=str,
234 doc="Name of epoch column",
235 optional=True,
236 )
237 epoch_format = pexConfig.Field(
238 dtype=str,
239 doc="Format of epoch column: any value accepted by astropy.time.Time, e.g. 'iso' or 'unix'",
240 optional=True,
241 )
242 epoch_scale = pexConfig.Field(
243 dtype=str,
244 doc="Scale of epoch column: any value accepted by astropy.time.Time, e.g. 'utc'",
245 optional=True,
246 )
247 extra_col_names = pexConfig.ListField(
248 dtype=str,
249 default=[],
250 doc='Extra columns to add to the reference catalog.'
251 )
253 def setDefaults(self):
254 # Newly ingested reference catalogs always have the latest format_version.
255 self.dataset_config.format_version = LATEST_FORMAT_VERSION
256 # gen3 refcats are all depth=7
257 self.dataset_config.indexer['HTM'].depth = 7
259 def validate(self):
260 pexConfig.Config.validate(self)
262 def assertAllOrNone(*names):
263 """Raise ValueError unless all the named fields are set or are
264 all none (or blank)
265 """
266 setNames = [name for name in names if bool(getattr(self, name))]
267 if len(setNames) in (len(names), 0):
268 return
269 prefix = "Both or neither" if len(names) == 2 else "All or none"
270 raise ValueError("{} of {} must be set, but only {} are set".format(
271 prefix, ", ".join(names), ", ".join(setNames)))
273 if not (self.ra_name and self.dec_name and self.mag_column_list):
274 raise ValueError(
275 "ra_name and dec_name and at least one entry in mag_column_list must be supplied.")
276 if self.mag_err_column_map and set(self.mag_column_list) != set(self.mag_err_column_map.keys()):
277 raise ValueError(
278 "mag_err_column_map specified, but keys do not match mag_column_list: {} != {}".format(
279 sorted(self.mag_err_column_map.keys()), sorted(self.mag_column_list)))
280 assertAllOrNone("ra_err_name", "dec_err_name", "coord_err_unit")
281 if self.coord_err_unit is not None:
282 result = astropy.units.Unit(self.coord_err_unit, parse_strict='silent')
283 if isinstance(result, astropy.units.UnrecognizedUnit):
284 msg = f"{self.coord_err_unit} is not a valid astropy unit string."
285 raise pexConfig.FieldValidationError(ConvertReferenceCatalogConfig.coord_err_unit, self, msg)
287 assertAllOrNone("epoch_name", "epoch_format", "epoch_scale")
288 assertAllOrNone("pm_ra_name", "pm_dec_name")
289 assertAllOrNone("pm_ra_err_name", "pm_dec_err_name")
290 assertAllOrNone("parallax_name", "parallax_err_name")
291 if self.pm_ra_err_name and not self.pm_ra_name:
292 raise ValueError('"pm_ra/dec_name" must be specified if "pm_ra/dec_err_name" are specified')
293 if (self.pm_ra_name or self.parallax_name) and not self.epoch_name:
294 raise ValueError(
295 '"epoch_name" must be specified if "pm_ra/dec_name" or "parallax_name" are specified')
298class ConvertReferenceCatalogBase(pipeBase.Task, abc.ABC):
299 """Base class for producing and loading indexed reference catalogs,
300 shared between gen2 and gen3.
302 This implements an indexing scheme based on hierarchical triangular
303 mesh (HTM). The term index really means breaking the catalog into
304 localized chunks called shards. In this case each shard contains
305 the entries from the catalog in a single HTM trixel
307 For producing catalogs this task makes the following assumptions
308 about the input catalogs:
309 - RA, Dec are in decimal degrees.
310 - Epoch is available in a column, in a format supported by astropy.time.Time.
311 - There are no off-diagonal covariance terms, such as covariance
312 between RA and Dec, or between PM RA and PM Dec. Support for such
313 covariance would have to be added to to the config, including consideration
314 of the units in the input catalog.
315 """
316 canMultiprocess = False
317 ConfigClass = ConvertReferenceCatalogConfig
319 def __init__(self, *args, **kwargs):
320 super().__init__(*args, **kwargs)
321 self.indexer = IndexerRegistry[self.config.dataset_config.indexer.name](
322 self.config.dataset_config.indexer.active)
323 self.makeSubtask('file_reader')
325 def run(self, inputFiles):
326 """Index a set of files comprising a reference catalog.
328 Outputs are persisted in the butler repository.
330 Parameters
331 ----------
332 inputFiles : `list`
333 A list of file paths to read.
334 """
335 self._preRun()
336 schema, key_map = self._saveMasterSchema(inputFiles[0])
337 # create an HTM we can interrogate about pixel ids
338 htm = lsst.sphgeom.HtmPixelization(self.indexer.htm.get_depth())
339 filenames = self._getButlerFilenames(htm)
340 worker = self.config.manager.target(filenames,
341 self.config,
342 self.file_reader,
343 self.indexer,
344 schema,
345 key_map,
346 htm.universe()[0],
347 addRefCatMetadata,
348 self.log)
349 result = worker.run(inputFiles)
351 self._persistConfig()
352 self._postRun(result)
354 def _preRun(self):
355 """Any setup that has to be performed at the start of ``run``, but that
356 cannot be performed during ``__init__`` (e.g. making directories).
357 """
358 pass
360 def _postRun(self, result):
361 """Any tasks that have to happen at the end of ``run``.
363 Parameters
364 ----------
365 result
366 The result returned from``worker.run()``.
367 """
368 pass
370 def _getButlerFilenames(self, htm):
371 """Get filenames from the butler for each output htm pixel.
373 Parameters
374 ----------
375 htm : `lsst.sphgeom.HtmPixelization`
376 The HTM pixelization scheme to be used to build filenames.
378 Returns
379 -------
380 filenames : `list [str]`
381 List of filenames to write each HTM pixel to.
382 """
383 filenames = {}
384 start, end = htm.universe()[0]
385 # path manipulation because butler.get() per pixel will take forever
386 path = self._getOnePixelFilename(start)
387 base = os.path.join(os.path.dirname(path), "%d"+os.path.splitext(path)[1])
388 for pixelId in range(start, end):
389 filenames[pixelId] = base % pixelId
391 return filenames
393 def makeSchema(self, dtype):
394 """Make the schema to use in constructing the persisted catalogs.
396 Parameters
397 ----------
398 dtype : `numpy.dtype`
399 Data type describing each entry in ``config.extra_col_names``
400 for the catalogs being ingested.
402 Returns
403 -------
404 schemaAndKeyMap : `tuple` of (`lsst.afw.table.Schema`, `dict`)
405 A tuple containing two items:
406 - The schema for the output source catalog.
407 - A map of catalog keys to use in filling the record
408 """
409 # make a schema with the standard fields
410 schema = ReferenceObjectLoader.makeMinimalSchema(
411 filterNameList=self.config.mag_column_list,
412 addCentroid=False,
413 addIsPhotometric=bool(self.config.is_photometric_name),
414 addIsResolved=bool(self.config.is_resolved_name),
415 addIsVariable=bool(self.config.is_variable_name),
416 coordErrDim=2 if bool(self.config.ra_err_name) else 0,
417 addProperMotion=2 if bool(self.config.pm_ra_name) else 0,
418 properMotionErrDim=2 if bool(self.config.pm_ra_err_name) else 0,
419 addParallax=bool(self.config.parallax_name),
420 )
421 keysToSkip = set(("id", "centroid_x", "centroid_y", "hasCentroid"))
422 key_map = {fieldName: schema[fieldName].asKey() for fieldName in schema.getOrderedNames()
423 if fieldName not in keysToSkip}
425 def addField(name):
426 if dtype[name].kind == 'U':
427 # dealing with a string like thing. Need to get type and size.
428 at_size = dtype[name].itemsize
429 return schema.addField(name, type=str, size=at_size)
430 else:
431 at_type = dtype[name].type
432 return schema.addField(name, at_type)
434 for col in self.config.extra_col_names: 434 ↛ 435line 434 didn't jump to line 435, because the loop on line 434 never started
435 key_map[col] = addField(col)
436 return schema, key_map
438 def _saveMasterSchema(self, filename):
439 """Generate and save the master catalog schema.
441 Parameters
442 ----------
443 filename : `str`
444 An input file to read to get the input dtype.
445 """
446 arr = self.file_reader.run(filename)
447 schema, key_map = self.makeSchema(arr.dtype)
449 catalog = afwTable.SimpleCatalog(schema)
450 addRefCatMetadata(catalog)
451 self._writeMasterSchema(catalog)
452 return schema, key_map
454 @abc.abstractmethod
455 def _getOnePixelFilename(self, start):
456 """Return one example filename to help construct the rest of the
457 per-htm pixel filenames.
459 Parameters
460 ----------
461 start : `int`
462 The first HTM index in this HTM pixelization.
464 Returns
465 -------
466 filename : `str`
467 Path to a single file that would be written to the output location.
468 """
469 pass
471 @abc.abstractmethod
472 def _persistConfig(self):
473 """Write the config that was used to generate the refcat.
474 """
475 pass
477 @abc.abstractmethod
478 def _writeMasterSchema(self, catalog):
479 """Butler put the master catalog schema.
481 Parameters
482 ----------
483 catalog : `lsst.afw.table.SimpleCatalog`
484 An empty catalog with a fully-defined schema that matches the
485 schema used in each of the HTM pixel files.
486 """
487 pass