Coverage for python/lsst/fgcmcal/fgcmBuildStarsTable.py: 14%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# See COPYRIGHT file at the top of the source tree.
2#
3# This file is part of fgcmcal.
4#
5# Developed for the LSST Data Management System.
6# This product includes software developed by the LSST Project
7# (https://www.lsst.org).
8# See the COPYRIGHT file at the top-level directory of this distribution
9# for details of code ownership.
10#
11# This program is free software: you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation, either version 3 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program. If not, see <https://www.gnu.org/licenses/>.
23"""Build star observations for input to FGCM using sourceTable_visit.
25This task finds all the visits and sourceTable_visits in a repository (or a
26subset based on command line parameters) and extracts all the potential
27calibration stars for input into fgcm. This task additionally uses fgcm to
28match star observations into unique stars, and performs as much cleaning of the
29input catalog as possible.
30"""
32import time
34import numpy as np
35import collections
37import lsst.daf.persistence as dafPersist
38import lsst.pex.config as pexConfig
39import lsst.pipe.base as pipeBase
40from lsst.pipe.base import connectionTypes
41import lsst.afw.table as afwTable
42from lsst.meas.algorithms import ReferenceObjectLoader
44from .fgcmBuildStarsBase import FgcmBuildStarsConfigBase, FgcmBuildStarsRunner, FgcmBuildStarsBaseTask
45from .utilities import computeApproxPixelAreaFields, computeApertureRadiusFromDataRef
46from .utilities import lookupStaticCalibrations
48__all__ = ['FgcmBuildStarsTableConfig', 'FgcmBuildStarsTableTask']
51class FgcmBuildStarsTableConnections(pipeBase.PipelineTaskConnections,
52 dimensions=("instrument",),
53 defaultTemplates={}):
54 camera = connectionTypes.PrerequisiteInput(
55 doc="Camera instrument",
56 name="camera",
57 storageClass="Camera",
58 dimensions=("instrument",),
59 lookupFunction=lookupStaticCalibrations,
60 isCalibration=True,
61 )
63 fgcmLookUpTable = connectionTypes.PrerequisiteInput(
64 doc=("Atmosphere + instrument look-up-table for FGCM throughput and "
65 "chromatic corrections."),
66 name="fgcmLookUpTable",
67 storageClass="Catalog",
68 dimensions=("instrument",),
69 deferLoad=True,
70 )
72 sourceSchema = connectionTypes.InitInput(
73 doc="Schema for source catalogs",
74 name="src_schema",
75 storageClass="SourceCatalog",
76 )
78 refCat = connectionTypes.PrerequisiteInput(
79 doc="Reference catalog to use for photometric calibration",
80 name="cal_ref_cat",
81 storageClass="SimpleCatalog",
82 dimensions=("skypix",),
83 deferLoad=True,
84 multiple=True,
85 )
87 sourceTable_visit = connectionTypes.Input(
88 doc="Source table in parquet format, per visit",
89 name="sourceTable_visit",
90 storageClass="DataFrame",
91 dimensions=("instrument", "visit"),
92 deferLoad=True,
93 multiple=True,
94 )
96 visitSummary = connectionTypes.Input(
97 doc=("Per-visit consolidated exposure metadata. These catalogs use "
98 "detector id for the id and must be sorted for fast lookups of a "
99 "detector."),
100 name="visitSummary",
101 storageClass="ExposureCatalog",
102 dimensions=("instrument", "visit"),
103 deferLoad=True,
104 multiple=True,
105 )
107 background = connectionTypes.Input(
108 doc="Calexp background model",
109 name="calexpBackground",
110 storageClass="Background",
111 dimensions=("instrument", "visit", "detector"),
112 deferLoad=True,
113 multiple=True,
114 )
116 fgcmVisitCatalog = connectionTypes.Output(
117 doc="Catalog of visit information for fgcm",
118 name="fgcmVisitCatalog",
119 storageClass="Catalog",
120 dimensions=("instrument",),
121 )
123 fgcmStarObservations = connectionTypes.Output(
124 doc="Catalog of star observations for fgcm",
125 name="fgcmStarObservations",
126 storageClass="Catalog",
127 dimensions=("instrument",),
128 )
130 fgcmStarIds = connectionTypes.Output(
131 doc="Catalog of fgcm calibration star IDs",
132 name="fgcmStarIds",
133 storageClass="Catalog",
134 dimensions=("instrument",),
135 )
137 fgcmStarIndices = connectionTypes.Output(
138 doc="Catalog of fgcm calibration star indices",
139 name="fgcmStarIndices",
140 storageClass="Catalog",
141 dimensions=("instrument",),
142 )
144 fgcmReferenceStars = connectionTypes.Output(
145 doc="Catalog of fgcm-matched reference stars",
146 name="fgcmReferenceStars",
147 storageClass="Catalog",
148 dimensions=("instrument",),
149 )
151 def __init__(self, *, config=None):
152 super().__init__(config=config)
154 if not config.doReferenceMatches:
155 self.prerequisiteInputs.remove("refCat")
156 self.prerequisiteInputs.remove("fgcmLookUpTable")
158 if not config.doModelErrorsWithBackground:
159 self.inputs.remove("background")
161 if not config.doReferenceMatches:
162 self.outputs.remove("fgcmReferenceStars")
165class FgcmBuildStarsTableConfig(FgcmBuildStarsConfigBase, pipeBase.PipelineTaskConfig,
166 pipelineConnections=FgcmBuildStarsTableConnections):
167 """Config for FgcmBuildStarsTableTask"""
169 referenceCCD = pexConfig.Field(
170 doc="Reference CCD for checking PSF and background",
171 dtype=int,
172 default=40,
173 )
175 def setDefaults(self):
176 super().setDefaults()
178 # The names here correspond to the post-transformed
179 # sourceTable_visit catalogs, which differ from the raw src
180 # catalogs. Therefore, all field and flag names cannot
181 # be derived from the base config class.
182 self.instFluxField = 'apFlux_12_0_instFlux'
183 self.localBackgroundFluxField = 'localBackground_instFlux'
184 self.apertureInnerInstFluxField = 'apFlux_12_0_instFlux'
185 self.apertureOuterInstFluxField = 'apFlux_17_0_instFlux'
186 self.psfCandidateName = 'calib_psf_candidate'
188 sourceSelector = self.sourceSelector["science"]
190 fluxFlagName = self.instFluxField[0: -len('instFlux')] + 'flag'
192 sourceSelector.flags.bad = ['pixelFlags_edge',
193 'pixelFlags_interpolatedCenter',
194 'pixelFlags_saturatedCenter',
195 'pixelFlags_crCenter',
196 'pixelFlags_bad',
197 'pixelFlags_interpolated',
198 'pixelFlags_saturated',
199 'centroid_flag',
200 fluxFlagName]
202 if self.doSubtractLocalBackground:
203 localBackgroundFlagName = self.localBackgroundFluxField[0: -len('instFlux')] + 'flag'
204 sourceSelector.flags.bad.append(localBackgroundFlagName)
206 sourceSelector.signalToNoise.fluxField = self.instFluxField
207 sourceSelector.signalToNoise.errField = self.instFluxField + 'Err'
209 sourceSelector.isolated.parentName = 'parentSourceId'
210 sourceSelector.isolated.nChildName = 'deblend_nChild'
212 sourceSelector.unresolved.name = 'extendedness'
215class FgcmBuildStarsTableTask(FgcmBuildStarsBaseTask):
216 """
217 Build stars for the FGCM global calibration, using sourceTable_visit catalogs.
218 """
219 ConfigClass = FgcmBuildStarsTableConfig
220 RunnerClass = FgcmBuildStarsRunner
221 _DefaultName = "fgcmBuildStarsTable"
223 canMultiprocess = False
225 def __init__(self, initInputs=None, **kwargs):
226 super().__init__(initInputs=initInputs, **kwargs)
227 if initInputs is not None:
228 self.sourceSchema = initInputs["sourceSchema"].schema
230 def runQuantum(self, butlerQC, inputRefs, outputRefs):
231 inputRefDict = butlerQC.get(inputRefs)
233 sourceTableRefs = inputRefDict['sourceTable_visit']
235 self.log.info("Running with %d sourceTable_visit dataRefs",
236 len(sourceTableRefs))
238 sourceTableDataRefDict = {sourceTableRef.dataId['visit']: sourceTableRef for
239 sourceTableRef in sourceTableRefs}
241 if self.config.doReferenceMatches:
242 # Get the LUT dataRef
243 lutDataRef = inputRefDict['fgcmLookUpTable']
245 # Prepare the refCat loader
246 refConfig = self.config.fgcmLoadReferenceCatalog.refObjLoader
247 refObjLoader = ReferenceObjectLoader(dataIds=[ref.datasetRef.dataId
248 for ref in inputRefs.refCat],
249 refCats=butlerQC.get(inputRefs.refCat),
250 config=refConfig,
251 log=self.log)
252 self.makeSubtask('fgcmLoadReferenceCatalog', refObjLoader=refObjLoader)
253 else:
254 lutDataRef = None
256 # Compute aperture radius if necessary. This is useful to do now before
257 # any heave lifting has happened (fail early).
258 calibFluxApertureRadius = None
259 if self.config.doSubtractLocalBackground:
260 try:
261 calibFluxApertureRadius = computeApertureRadiusFromDataRef(sourceTableRefs[0],
262 self.config.instFluxField)
263 except RuntimeError as e:
264 raise RuntimeError("Could not determine aperture radius from %s. "
265 "Cannot use doSubtractLocalBackground." %
266 (self.config.instFluxField)) from e
268 visitSummaryRefs = inputRefDict['visitSummary']
269 visitSummaryDataRefDict = {visitSummaryRef.dataId['visit']: visitSummaryRef for
270 visitSummaryRef in visitSummaryRefs}
272 camera = inputRefDict['camera']
273 groupedDataRefs = self._groupDataRefs(sourceTableDataRefDict,
274 visitSummaryDataRefDict)
276 if self.config.doModelErrorsWithBackground:
277 bkgRefs = inputRefDict['background']
278 bkgDataRefDict = {(bkgRef.dataId.byName()['visit'],
279 bkgRef.dataId.byName()['detector']): bkgRef for
280 bkgRef in bkgRefs}
281 else:
282 bkgDataRefDict = None
284 # Gen3 does not currently allow "checkpoint" saving of datasets,
285 # so we need to have this all in one go.
286 visitCat = self.fgcmMakeVisitCatalog(camera, groupedDataRefs,
287 bkgDataRefDict=bkgDataRefDict,
288 visitCatDataRef=None,
289 inVisitCat=None)
291 rad = calibFluxApertureRadius
292 # sourceSchemaDataRef = inputRefDict['sourceSchema']
293 fgcmStarObservationCat = self.fgcmMakeAllStarObservations(groupedDataRefs,
294 visitCat,
295 self.sourceSchema,
296 camera,
297 calibFluxApertureRadius=rad,
298 starObsDataRef=None,
299 visitCatDataRef=None,
300 inStarObsCat=None)
302 butlerQC.put(visitCat, outputRefs.fgcmVisitCatalog)
303 butlerQC.put(fgcmStarObservationCat, outputRefs.fgcmStarObservations)
305 fgcmStarIdCat, fgcmStarIndicesCat, fgcmRefCat = self.fgcmMatchStars(visitCat,
306 fgcmStarObservationCat,
307 lutDataRef=lutDataRef)
309 butlerQC.put(fgcmStarIdCat, outputRefs.fgcmStarIds)
310 butlerQC.put(fgcmStarIndicesCat, outputRefs.fgcmStarIndices)
311 if fgcmRefCat is not None:
312 butlerQC.put(fgcmRefCat, outputRefs.fgcmReferenceStars)
314 @classmethod
315 def _makeArgumentParser(cls):
316 """Create an argument parser"""
317 parser = pipeBase.ArgumentParser(name=cls._DefaultName)
318 parser.add_id_argument("--id", "sourceTable_visit", help="Data ID, e.g. --id visit=6789")
320 return parser
322 def _groupDataRefs(self, sourceTableDataRefDict, visitSummaryDataRefDict):
323 """Group sourceTable and visitSummary dataRefs (gen3 only).
325 Parameters
326 ----------
327 sourceTableDataRefDict : `dict` [`int`, `str`]
328 Dict of source tables, keyed by visit.
329 visitSummaryDataRefDict : `dict` [int, `str`]
330 Dict of visit summary catalogs, keyed by visit.
332 Returns
333 -------
334 groupedDataRefs : `dict` [`int`, `list`]
335 Dictionary with sorted visit keys, and `list`s with
336 `lsst.daf.butler.DeferredDataSetHandle`. The first
337 item in the list will be the visitSummary ref, and
338 the second will be the source table ref.
339 """
340 groupedDataRefs = collections.defaultdict(list)
341 visits = sorted(sourceTableDataRefDict.keys())
343 for visit in visits:
344 groupedDataRefs[visit] = [visitSummaryDataRefDict[visit],
345 sourceTableDataRefDict[visit]]
347 return groupedDataRefs
349 def _findAndGroupDataRefsGen2(self, butler, camera, dataRefs):
350 self.log.info("Grouping dataRefs by %s", (self.config.visitDataRefName))
352 ccdIds = []
353 for detector in camera:
354 ccdIds.append(detector.getId())
355 # Insert our preferred referenceCCD first:
356 # It is fine that this is listed twice, because we only need
357 # the first calexp that is found.
358 ccdIds.insert(0, self.config.referenceCCD)
360 # The visitTable building code expects a dictionary of groupedDataRefs
361 # keyed by visit, the first element as the "primary" calexp dataRef.
362 # We then append the sourceTable_visit dataRef at the end for the
363 # code which does the data reading (fgcmMakeAllStarObservations).
365 groupedDataRefs = collections.defaultdict(list)
366 for dataRef in dataRefs:
367 visit = dataRef.dataId[self.config.visitDataRefName]
369 # Find an existing calexp (we need for psf and metadata)
370 # and make the relevant dataRef
371 for ccdId in ccdIds:
372 try:
373 calexpRef = butler.dataRef('calexp', dataId={self.config.visitDataRefName: visit,
374 self.config.ccdDataRefName: ccdId})
375 except RuntimeError:
376 # Not found
377 continue
379 # Make sure the dataset exists
380 if not calexpRef.datasetExists():
381 continue
383 # It was found. Add and quit out, since we only
384 # need one calexp per visit.
385 groupedDataRefs[visit].append(calexpRef)
386 break
388 # And append this dataRef
389 groupedDataRefs[visit].append(dataRef)
391 # This should be sorted by visit (the key)
392 return dict(sorted(groupedDataRefs.items()))
394 def fgcmMakeAllStarObservations(self, groupedDataRefs, visitCat,
395 sourceSchema,
396 camera,
397 calibFluxApertureRadius=None,
398 visitCatDataRef=None,
399 starObsDataRef=None,
400 inStarObsCat=None):
401 startTime = time.time()
403 # If both dataRefs are None, then we assume the caller does not
404 # want to store checkpoint files. If both are set, we will
405 # do checkpoint files. And if only one is set, this is potentially
406 # unintentional and we will warn.
407 if (visitCatDataRef is not None and starObsDataRef is None
408 or visitCatDataRef is None and starObsDataRef is not None):
409 self.log.warning("Only one of visitCatDataRef and starObsDataRef are set, so "
410 "no checkpoint files will be persisted.")
412 if self.config.doSubtractLocalBackground and calibFluxApertureRadius is None:
413 raise RuntimeError("Must set calibFluxApertureRadius if doSubtractLocalBackground is True.")
415 # To get the correct output schema, we use similar code as fgcmBuildStarsTask
416 # We are not actually using this mapper, except to grab the outputSchema
417 sourceMapper = self._makeSourceMapper(sourceSchema)
418 outputSchema = sourceMapper.getOutputSchema()
420 # Construct mapping from ccd number to index
421 ccdMapping = {}
422 for ccdIndex, detector in enumerate(camera):
423 ccdMapping[detector.getId()] = ccdIndex
425 approxPixelAreaFields = computeApproxPixelAreaFields(camera)
427 if inStarObsCat is not None:
428 fullCatalog = inStarObsCat
429 comp1 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_KEYS)
430 comp2 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_NAMES)
431 if not comp1 or not comp2:
432 raise RuntimeError("Existing fgcmStarObservations file found with mismatched schema.")
433 else:
434 fullCatalog = afwTable.BaseCatalog(outputSchema)
436 visitKey = outputSchema['visit'].asKey()
437 ccdKey = outputSchema['ccd'].asKey()
438 instMagKey = outputSchema['instMag'].asKey()
439 instMagErrKey = outputSchema['instMagErr'].asKey()
441 # Prepare local background if desired
442 if self.config.doSubtractLocalBackground:
443 localBackgroundArea = np.pi*calibFluxApertureRadius**2.
445 columns = None
447 k = 2.5/np.log(10.)
449 for counter, visit in enumerate(visitCat):
450 # Check if these sources have already been read and stored in the checkpoint file
451 if visit['sources_read']:
452 continue
454 expTime = visit['exptime']
456 dataRef = groupedDataRefs[visit['visit']][-1]
458 if isinstance(dataRef, dafPersist.ButlerDataRef):
459 srcTable = dataRef.get()
460 if columns is None:
461 columns, detColumn = self._get_sourceTable_visit_columns(srcTable.columns)
462 df = srcTable.toDataFrame(columns)
463 else:
464 if columns is None:
465 inColumns = dataRef.get(component='columns')
466 columns, detColumn = self._get_sourceTable_visit_columns(inColumns)
467 df = dataRef.get(parameters={'columns': columns})
469 goodSrc = self.sourceSelector.selectSources(df)
471 # Need to add a selection based on the local background correction
472 # if necessary
473 if self.config.doSubtractLocalBackground:
474 localBackground = localBackgroundArea*df[self.config.localBackgroundFluxField].values
475 use, = np.where((goodSrc.selected)
476 & ((df[self.config.instFluxField].values - localBackground) > 0.0))
477 else:
478 use, = np.where(goodSrc.selected)
480 tempCat = afwTable.BaseCatalog(fullCatalog.schema)
481 tempCat.resize(use.size)
483 tempCat['ra'][:] = np.deg2rad(df['ra'].values[use])
484 tempCat['dec'][:] = np.deg2rad(df['decl'].values[use])
485 tempCat['x'][:] = df['x'].values[use]
486 tempCat['y'][:] = df['y'].values[use]
487 # The "visit" name in the parquet table is hard-coded.
488 tempCat[visitKey][:] = df['visit'].values[use]
489 tempCat[ccdKey][:] = df[detColumn].values[use]
490 tempCat['psf_candidate'] = df[self.config.psfCandidateName].values[use]
492 if self.config.doSubtractLocalBackground:
493 # At the moment we only adjust the flux and not the flux
494 # error by the background because the error on
495 # base_LocalBackground_instFlux is the rms error in the
496 # background annulus, not the error on the mean in the
497 # background estimate (which is much smaller, by sqrt(n)
498 # pixels used to estimate the background, which we do not
499 # have access to in this task). In the default settings,
500 # the annulus is sufficiently large such that these
501 # additional errors are are negligibly small (much less
502 # than a mmag in quadrature).
504 # This is the difference between the mag with local background correction
505 # and the mag without local background correction.
506 tempCat['deltaMagBkg'] = (-2.5*np.log10(df[self.config.instFluxField].values[use]
507 - localBackground[use]) -
508 -2.5*np.log10(df[self.config.instFluxField].values[use]))
509 else:
510 tempCat['deltaMagBkg'][:] = 0.0
512 # Need to loop over ccds here
513 for detector in camera:
514 ccdId = detector.getId()
515 # used index for all observations with a given ccd
516 use2 = (tempCat[ccdKey] == ccdId)
517 tempCat['jacobian'][use2] = approxPixelAreaFields[ccdId].evaluate(tempCat['x'][use2],
518 tempCat['y'][use2])
519 scaledInstFlux = (df[self.config.instFluxField].values[use[use2]]
520 * visit['scaling'][ccdMapping[ccdId]])
521 tempCat[instMagKey][use2] = (-2.5*np.log10(scaledInstFlux) + 2.5*np.log10(expTime))
523 # Compute instMagErr from instFluxErr/instFlux, any scaling
524 # will cancel out.
525 tempCat[instMagErrKey][:] = k*(df[self.config.instFluxField + 'Err'].values[use]
526 / df[self.config.instFluxField].values[use])
528 # Apply the jacobian if configured
529 if self.config.doApplyWcsJacobian:
530 tempCat[instMagKey][:] -= 2.5*np.log10(tempCat['jacobian'][:])
532 fullCatalog.extend(tempCat)
534 # Now do the aperture information
535 with np.warnings.catch_warnings():
536 # Ignore warnings, we will filter infinites and nans below
537 np.warnings.simplefilter("ignore")
539 instMagIn = -2.5*np.log10(df[self.config.apertureInnerInstFluxField].values[use])
540 instMagErrIn = k*(df[self.config.apertureInnerInstFluxField + 'Err'].values[use]
541 / df[self.config.apertureInnerInstFluxField].values[use])
542 instMagOut = -2.5*np.log10(df[self.config.apertureOuterInstFluxField].values[use])
543 instMagErrOut = k*(df[self.config.apertureOuterInstFluxField + 'Err'].values[use]
544 / df[self.config.apertureOuterInstFluxField].values[use])
546 ok = (np.isfinite(instMagIn) & np.isfinite(instMagErrIn)
547 & np.isfinite(instMagOut) & np.isfinite(instMagErrOut))
549 visit['deltaAper'] = np.median(instMagIn[ok] - instMagOut[ok])
550 visit['sources_read'] = True
552 self.log.info(" Found %d good stars in visit %d (deltaAper = %0.3f)",
553 use.size, visit['visit'], visit['deltaAper'])
555 if ((counter % self.config.nVisitsPerCheckpoint) == 0
556 and starObsDataRef is not None and visitCatDataRef is not None):
557 # We need to persist both the stars and the visit catalog which gets
558 # additional metadata from each visit.
559 starObsDataRef.put(fullCatalog)
560 visitCatDataRef.put(visitCat)
562 self.log.info("Found all good star observations in %.2f s" %
563 (time.time() - startTime))
565 return fullCatalog
567 def _get_sourceTable_visit_columns(self, inColumns):
568 """
569 Get the sourceTable_visit columns from the config.
571 Parameters
572 ----------
573 inColumns : `list`
574 List of columns available in the sourceTable_visit
576 Returns
577 -------
578 columns : `list`
579 List of columns to read from sourceTable_visit.
580 detectorColumn : `str`
581 Name of the detector column.
582 """
583 if 'detector' in inColumns:
584 # Default name for Gen3.
585 detectorColumn = 'detector'
586 else:
587 # Default name for Gen2 and Gen2 conversions.
588 detectorColumn = 'ccd'
589 # Some names are hard-coded in the parquet table.
590 columns = ['visit', detectorColumn,
591 'ra', 'decl', 'x', 'y', self.config.psfCandidateName,
592 self.config.instFluxField, self.config.instFluxField + 'Err',
593 self.config.apertureInnerInstFluxField, self.config.apertureInnerInstFluxField + 'Err',
594 self.config.apertureOuterInstFluxField, self.config.apertureOuterInstFluxField + 'Err']
595 if self.sourceSelector.config.doFlags:
596 columns.extend(self.sourceSelector.config.flags.bad)
597 if self.sourceSelector.config.doUnresolved:
598 columns.append(self.sourceSelector.config.unresolved.name)
599 if self.sourceSelector.config.doIsolated:
600 columns.append(self.sourceSelector.config.isolated.parentName)
601 columns.append(self.sourceSelector.config.isolated.nChildName)
602 if self.config.doSubtractLocalBackground:
603 columns.append(self.config.localBackgroundFluxField)
605 return columns, detectorColumn