Coverage for python/lsst/fgcmcal/fgcmBuildStarsTable.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# See COPYRIGHT file at the top of the source tree.
2#
3# This file is part of fgcmcal.
4#
5# Developed for the LSST Data Management System.
6# This product includes software developed by the LSST Project
7# (https://www.lsst.org).
8# See the COPYRIGHT file at the top-level directory of this distribution
9# for details of code ownership.
10#
11# This program is free software: you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation, either version 3 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program. If not, see <https://www.gnu.org/licenses/>.
23"""Build star observations for input to FGCM using sourceTable_visit.
25This task finds all the visits and sourceTable_visits in a repository (or a
26subset based on command line parameters) and extracts all the potential
27calibration stars for input into fgcm. This task additionally uses fgcm to
28match star observations into unique stars, and performs as much cleaning of the
29input catalog as possible.
30"""
32import time
34import numpy as np
35import collections
37import lsst.daf.persistence as dafPersist
38import lsst.pex.config as pexConfig
39import lsst.pipe.base as pipeBase
40from lsst.pipe.base import connectionTypes
41import lsst.afw.table as afwTable
42from lsst.meas.algorithms import ReferenceObjectLoader
44from .fgcmBuildStarsBase import FgcmBuildStarsConfigBase, FgcmBuildStarsRunner, FgcmBuildStarsBaseTask
45from .utilities import computeApproxPixelAreaFields, computeApertureRadiusFromDataRef
46from .utilities import lookupStaticCalibrations
48__all__ = ['FgcmBuildStarsTableConfig', 'FgcmBuildStarsTableTask']
51class FgcmBuildStarsTableConnections(pipeBase.PipelineTaskConnections,
52 dimensions=("instrument",),
53 defaultTemplates={}):
54 camera = connectionTypes.PrerequisiteInput(
55 doc="Camera instrument",
56 name="camera",
57 storageClass="Camera",
58 dimensions=("instrument",),
59 lookupFunction=lookupStaticCalibrations,
60 isCalibration=True,
61 )
63 fgcmLookUpTable = connectionTypes.PrerequisiteInput(
64 doc=("Atmosphere + instrument look-up-table for FGCM throughput and "
65 "chromatic corrections."),
66 name="fgcmLookUpTable",
67 storageClass="Catalog",
68 dimensions=("instrument",),
69 deferLoad=True,
70 )
72 sourceSchema = connectionTypes.PrerequisiteInput(
73 doc="Schema for source catalogs",
74 name="src_schema",
75 storageClass="SourceCatalog",
76 deferLoad=True,
77 )
79 refCat = connectionTypes.PrerequisiteInput(
80 doc="Reference catalog to use for photometric calibration",
81 name="cal_ref_cat",
82 storageClass="SimpleCatalog",
83 dimensions=("skypix",),
84 deferLoad=True,
85 multiple=True,
86 )
88 sourceTable_visit = connectionTypes.Input(
89 doc="Source table in parquet format, per visit",
90 name="sourceTable_visit",
91 storageClass="DataFrame",
92 dimensions=("instrument", "visit"),
93 deferLoad=True,
94 multiple=True,
95 )
97 visitSummary = connectionTypes.Input(
98 doc=("Per-visit consolidated exposure metadata. These catalogs use "
99 "detector id for the id and must be sorted for fast lookups of a "
100 "detector."),
101 name="visitSummary",
102 storageClass="ExposureCatalog",
103 dimensions=("instrument", "visit"),
104 deferLoad=True,
105 multiple=True,
106 )
108 background = connectionTypes.Input(
109 doc="Calexp background model",
110 name="calexpBackground",
111 storageClass="Background",
112 dimensions=("instrument", "visit", "detector"),
113 deferLoad=True,
114 multiple=True,
115 )
117 fgcmVisitCatalog = connectionTypes.Output(
118 doc="Catalog of visit information for fgcm",
119 name="fgcmVisitCatalog",
120 storageClass="Catalog",
121 dimensions=("instrument",),
122 )
124 fgcmStarObservations = connectionTypes.Output(
125 doc="Catalog of star observations for fgcm",
126 name="fgcmStarObservations",
127 storageClass="Catalog",
128 dimensions=("instrument",),
129 )
131 fgcmStarIds = connectionTypes.Output(
132 doc="Catalog of fgcm calibration star IDs",
133 name="fgcmStarIds",
134 storageClass="Catalog",
135 dimensions=("instrument",),
136 )
138 fgcmStarIndices = connectionTypes.Output(
139 doc="Catalog of fgcm calibration star indices",
140 name="fgcmStarIndices",
141 storageClass="Catalog",
142 dimensions=("instrument",),
143 )
145 fgcmReferenceStars = connectionTypes.Output(
146 doc="Catalog of fgcm-matched reference stars",
147 name="fgcmReferenceStars",
148 storageClass="Catalog",
149 dimensions=("instrument",),
150 )
152 def __init__(self, *, config=None):
153 super().__init__(config=config)
155 if not config.doReferenceMatches:
156 self.prerequisiteInputs.remove("refCat")
157 self.prerequisiteInputs.remove("fgcmLookUpTable")
159 if not config.doModelErrorsWithBackground:
160 self.inputs.remove("background")
162 if not config.doReferenceMatches:
163 self.outputs.remove("fgcmReferenceStars")
166class FgcmBuildStarsTableConfig(FgcmBuildStarsConfigBase, pipeBase.PipelineTaskConfig,
167 pipelineConnections=FgcmBuildStarsTableConnections):
168 """Config for FgcmBuildStarsTableTask"""
170 referenceCCD = pexConfig.Field(
171 doc="Reference CCD for checking PSF and background",
172 dtype=int,
173 default=40,
174 )
176 def setDefaults(self):
177 super().setDefaults()
179 # The names here correspond to the post-transformed
180 # sourceTable_visit catalogs, which differ from the raw src
181 # catalogs. Therefore, all field and flag names cannot
182 # be derived from the base config class.
183 self.instFluxField = 'ApFlux_12_0_instFlux'
184 self.localBackgroundFluxField = 'LocalBackground_instFlux'
185 self.apertureInnerInstFluxField = 'ApFlux_12_0_instFlux'
186 self.apertureOuterInstFluxField = 'ApFlux_17_0_instFlux'
187 self.psfCandidateName = 'Calib_psf_candidate'
189 sourceSelector = self.sourceSelector["science"]
191 fluxFlagName = self.instFluxField[0: -len('instFlux')] + 'flag'
193 sourceSelector.flags.bad = ['PixelFlags_edge',
194 'PixelFlags_interpolatedCenter',
195 'PixelFlags_saturatedCenter',
196 'PixelFlags_crCenter',
197 'PixelFlags_bad',
198 'PixelFlags_interpolated',
199 'PixelFlags_saturated',
200 'Centroid_flag',
201 fluxFlagName]
203 if self.doSubtractLocalBackground:
204 localBackgroundFlagName = self.localBackgroundFluxField[0: -len('instFlux')] + 'flag'
205 sourceSelector.flags.bad.append(localBackgroundFlagName)
207 sourceSelector.signalToNoise.fluxField = self.instFluxField
208 sourceSelector.signalToNoise.errField = self.instFluxField + 'Err'
210 sourceSelector.isolated.parentName = 'parentSourceId'
211 sourceSelector.isolated.nChildName = 'Deblend_nChild'
213 sourceSelector.unresolved.name = 'extendedness'
216class FgcmBuildStarsTableTask(FgcmBuildStarsBaseTask):
217 """
218 Build stars for the FGCM global calibration, using sourceTable_visit catalogs.
219 """
220 ConfigClass = FgcmBuildStarsTableConfig
221 RunnerClass = FgcmBuildStarsRunner
222 _DefaultName = "fgcmBuildStarsTable"
224 canMultiprocess = False
226 def runQuantum(self, butlerQC, inputRefs, outputRefs):
227 inputRefDict = butlerQC.get(inputRefs)
229 sourceTableRefs = inputRefDict['sourceTable_visit']
231 self.log.info("Running with %d sourceTable_visit dataRefs",
232 len(sourceTableRefs))
234 sourceTableDataRefDict = {sourceTableRef.dataId['visit']: sourceTableRef for
235 sourceTableRef in sourceTableRefs}
237 if self.config.doReferenceMatches:
238 # Get the LUT dataRef
239 lutDataRef = inputRefDict['fgcmLookUpTable']
241 # Prepare the refCat loader
242 refConfig = self.config.fgcmLoadReferenceCatalog.refObjLoader
243 refObjLoader = ReferenceObjectLoader(dataIds=[ref.datasetRef.dataId
244 for ref in inputRefs.refCat],
245 refCats=butlerQC.get(inputRefs.refCat),
246 config=refConfig,
247 log=self.log)
248 self.makeSubtask('fgcmLoadReferenceCatalog', refObjLoader=refObjLoader)
249 else:
250 lutDataRef = None
252 # Compute aperture radius if necessary. This is useful to do now before
253 # any heave lifting has happened (fail early).
254 calibFluxApertureRadius = None
255 if self.config.doSubtractLocalBackground:
256 try:
257 calibFluxApertureRadius = computeApertureRadiusFromDataRef(sourceTableRefs[0],
258 self.config.instFluxField)
259 except RuntimeError as e:
260 raise RuntimeError("Could not determine aperture radius from %s. "
261 "Cannot use doSubtractLocalBackground." %
262 (self.config.instFluxField)) from e
264 visitSummaryRefs = inputRefDict['visitSummary']
265 visitSummaryDataRefDict = {visitSummaryRef.dataId['visit']: visitSummaryRef for
266 visitSummaryRef in visitSummaryRefs}
268 camera = inputRefDict['camera']
269 groupedDataRefs = self._groupDataRefs(sourceTableDataRefDict,
270 visitSummaryDataRefDict)
272 if self.config.doModelErrorsWithBackground:
273 bkgRefs = inputRefDict['background']
274 bkgDataRefDict = {(bkgRef.dataId.byName()['visit'],
275 bkgRef.dataId.byName()['detector']): bkgRef for
276 bkgRef in bkgRefs}
277 else:
278 bkgDataRefDict = None
280 # Gen3 does not currently allow "checkpoint" saving of datasets,
281 # so we need to have this all in one go.
282 visitCat = self.fgcmMakeVisitCatalog(camera, groupedDataRefs,
283 bkgDataRefDict=bkgDataRefDict,
284 visitCatDataRef=None,
285 inVisitCat=None)
287 rad = calibFluxApertureRadius
288 sourceSchemaDataRef = inputRefDict['sourceSchema']
289 fgcmStarObservationCat = self.fgcmMakeAllStarObservations(groupedDataRefs,
290 visitCat,
291 sourceSchemaDataRef,
292 camera,
293 calibFluxApertureRadius=rad,
294 starObsDataRef=None,
295 visitCatDataRef=None,
296 inStarObsCat=None)
298 butlerQC.put(visitCat, outputRefs.fgcmVisitCatalog)
299 butlerQC.put(fgcmStarObservationCat, outputRefs.fgcmStarObservations)
301 fgcmStarIdCat, fgcmStarIndicesCat, fgcmRefCat = self.fgcmMatchStars(visitCat,
302 fgcmStarObservationCat,
303 lutDataRef=lutDataRef)
305 butlerQC.put(fgcmStarIdCat, outputRefs.fgcmStarIds)
306 butlerQC.put(fgcmStarIndicesCat, outputRefs.fgcmStarIndices)
307 if fgcmRefCat is not None:
308 butlerQC.put(fgcmRefCat, outputRefs.fgcmReferenceStars)
310 @classmethod
311 def _makeArgumentParser(cls):
312 """Create an argument parser"""
313 parser = pipeBase.ArgumentParser(name=cls._DefaultName)
314 parser.add_id_argument("--id", "sourceTable_visit", help="Data ID, e.g. --id visit=6789")
316 return parser
318 def _groupDataRefs(self, sourceTableDataRefDict, visitSummaryDataRefDict):
319 """Group sourceTable and visitSummary dataRefs (gen3 only).
321 Parameters
322 ----------
323 sourceTableDataRefDict : `dict` [`int`, `str`]
324 Dict of source tables, keyed by visit.
325 visitSummaryDataRefDict : `dict` [int, `str`]
326 Dict of visit summary catalogs, keyed by visit.
328 Returns
329 -------
330 groupedDataRefs : `dict` [`int`, `list`]
331 Dictionary with sorted visit keys, and `list`s with
332 `lsst.daf.butler.DeferredDataSetHandle`. The first
333 item in the list will be the visitSummary ref, and
334 the second will be the source table ref.
335 """
336 groupedDataRefs = collections.defaultdict(list)
337 visits = sorted(sourceTableDataRefDict.keys())
339 for visit in visits:
340 groupedDataRefs[visit] = [visitSummaryDataRefDict[visit],
341 sourceTableDataRefDict[visit]]
343 return groupedDataRefs
345 def _findAndGroupDataRefsGen2(self, butler, camera, dataRefs):
346 self.log.info("Grouping dataRefs by %s", (self.config.visitDataRefName))
348 ccdIds = []
349 for detector in camera:
350 ccdIds.append(detector.getId())
351 # Insert our preferred referenceCCD first:
352 # It is fine that this is listed twice, because we only need
353 # the first calexp that is found.
354 ccdIds.insert(0, self.config.referenceCCD)
356 # The visitTable building code expects a dictionary of groupedDataRefs
357 # keyed by visit, the first element as the "primary" calexp dataRef.
358 # We then append the sourceTable_visit dataRef at the end for the
359 # code which does the data reading (fgcmMakeAllStarObservations).
361 groupedDataRefs = collections.defaultdict(list)
362 for dataRef in dataRefs:
363 visit = dataRef.dataId[self.config.visitDataRefName]
365 # Find an existing calexp (we need for psf and metadata)
366 # and make the relevant dataRef
367 for ccdId in ccdIds:
368 try:
369 calexpRef = butler.dataRef('calexp', dataId={self.config.visitDataRefName: visit,
370 self.config.ccdDataRefName: ccdId})
371 except RuntimeError:
372 # Not found
373 continue
375 # Make sure the dataset exists
376 if not calexpRef.datasetExists():
377 continue
379 # It was found. Add and quit out, since we only
380 # need one calexp per visit.
381 groupedDataRefs[visit].append(calexpRef)
382 break
384 # And append this dataRef
385 groupedDataRefs[visit].append(dataRef)
387 # This should be sorted by visit (the key)
388 return dict(sorted(groupedDataRefs.items()))
390 def fgcmMakeAllStarObservations(self, groupedDataRefs, visitCat,
391 sourceSchemaDataRef,
392 camera,
393 calibFluxApertureRadius=None,
394 visitCatDataRef=None,
395 starObsDataRef=None,
396 inStarObsCat=None):
397 startTime = time.time()
399 # If both dataRefs are None, then we assume the caller does not
400 # want to store checkpoint files. If both are set, we will
401 # do checkpoint files. And if only one is set, this is potentially
402 # unintentional and we will warn.
403 if (visitCatDataRef is not None and starObsDataRef is None
404 or visitCatDataRef is None and starObsDataRef is not None):
405 self.log.warn("Only one of visitCatDataRef and starObsDataRef are set, so "
406 "no checkpoint files will be persisted.")
408 if self.config.doSubtractLocalBackground and calibFluxApertureRadius is None:
409 raise RuntimeError("Must set calibFluxApertureRadius if doSubtractLocalBackground is True.")
411 # To get the correct output schema, we use similar code as fgcmBuildStarsTask
412 # We are not actually using this mapper, except to grab the outputSchema
413 sourceSchema = sourceSchemaDataRef.get().schema
414 sourceMapper = self._makeSourceMapper(sourceSchema)
415 outputSchema = sourceMapper.getOutputSchema()
417 # Construct mapping from ccd number to index
418 ccdMapping = {}
419 for ccdIndex, detector in enumerate(camera):
420 ccdMapping[detector.getId()] = ccdIndex
422 approxPixelAreaFields = computeApproxPixelAreaFields(camera)
424 if inStarObsCat is not None:
425 fullCatalog = inStarObsCat
426 comp1 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_KEYS)
427 comp2 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_NAMES)
428 if not comp1 or not comp2:
429 raise RuntimeError("Existing fgcmStarObservations file found with mismatched schema.")
430 else:
431 fullCatalog = afwTable.BaseCatalog(outputSchema)
433 visitKey = outputSchema['visit'].asKey()
434 ccdKey = outputSchema['ccd'].asKey()
435 instMagKey = outputSchema['instMag'].asKey()
436 instMagErrKey = outputSchema['instMagErr'].asKey()
438 # Prepare local background if desired
439 if self.config.doSubtractLocalBackground:
440 localBackgroundArea = np.pi*calibFluxApertureRadius**2.
442 columns = None
444 k = 2.5/np.log(10.)
446 for counter, visit in enumerate(visitCat):
447 # Check if these sources have already been read and stored in the checkpoint file
448 if visit['sources_read']:
449 continue
451 expTime = visit['exptime']
453 dataRef = groupedDataRefs[visit['visit']][-1]
455 if isinstance(dataRef, dafPersist.ButlerDataRef):
456 srcTable = dataRef.get()
457 if columns is None:
458 columns, detColumn = self._get_sourceTable_visit_columns(srcTable.columns)
459 df = srcTable.toDataFrame(columns)
460 else:
461 if columns is None:
462 inColumns = dataRef.get(component='columns')
463 columns, detColumn = self._get_sourceTable_visit_columns(inColumns)
464 df = dataRef.get(parameters={'columns': columns})
466 goodSrc = self.sourceSelector.selectSources(df)
468 # Need to add a selection based on the local background correction
469 # if necessary
470 if self.config.doSubtractLocalBackground:
471 localBackground = localBackgroundArea*df[self.config.localBackgroundFluxField].values
472 use, = np.where((goodSrc.selected)
473 & ((df[self.config.instFluxField].values - localBackground) > 0.0))
474 else:
475 use, = np.where(goodSrc.selected)
477 tempCat = afwTable.BaseCatalog(fullCatalog.schema)
478 tempCat.resize(use.size)
480 tempCat['ra'][:] = np.deg2rad(df['ra'].values[use])
481 tempCat['dec'][:] = np.deg2rad(df['decl'].values[use])
482 tempCat['x'][:] = df['x'].values[use]
483 tempCat['y'][:] = df['y'].values[use]
484 # The "visit" name in the parquet table is hard-coded.
485 tempCat[visitKey][:] = df['visit'].values[use]
486 tempCat[ccdKey][:] = df[detColumn].values[use]
487 tempCat['psf_candidate'] = df['Calib_psf_candidate'].values[use]
489 if self.config.doSubtractLocalBackground:
490 # At the moment we only adjust the flux and not the flux
491 # error by the background because the error on
492 # base_LocalBackground_instFlux is the rms error in the
493 # background annulus, not the error on the mean in the
494 # background estimate (which is much smaller, by sqrt(n)
495 # pixels used to estimate the background, which we do not
496 # have access to in this task). In the default settings,
497 # the annulus is sufficiently large such that these
498 # additional errors are are negligibly small (much less
499 # than a mmag in quadrature).
501 # This is the difference between the mag with local background correction
502 # and the mag without local background correction.
503 tempCat['deltaMagBkg'] = (-2.5*np.log10(df[self.config.instFluxField].values[use]
504 - localBackground[use]) -
505 -2.5*np.log10(df[self.config.instFluxField].values[use]))
506 else:
507 tempCat['deltaMagBkg'][:] = 0.0
509 # Need to loop over ccds here
510 for detector in camera:
511 ccdId = detector.getId()
512 # used index for all observations with a given ccd
513 use2 = (tempCat[ccdKey] == ccdId)
514 tempCat['jacobian'][use2] = approxPixelAreaFields[ccdId].evaluate(tempCat['x'][use2],
515 tempCat['y'][use2])
516 scaledInstFlux = (df[self.config.instFluxField].values[use[use2]]
517 * visit['scaling'][ccdMapping[ccdId]])
518 tempCat[instMagKey][use2] = (-2.5*np.log10(scaledInstFlux) + 2.5*np.log10(expTime))
520 # Compute instMagErr from instFluxErr/instFlux, any scaling
521 # will cancel out.
522 tempCat[instMagErrKey][:] = k*(df[self.config.instFluxField + 'Err'].values[use]
523 / df[self.config.instFluxField].values[use])
525 # Apply the jacobian if configured
526 if self.config.doApplyWcsJacobian:
527 tempCat[instMagKey][:] -= 2.5*np.log10(tempCat['jacobian'][:])
529 fullCatalog.extend(tempCat)
531 # Now do the aperture information
532 with np.warnings.catch_warnings():
533 # Ignore warnings, we will filter infinites and nans below
534 np.warnings.simplefilter("ignore")
536 instMagIn = -2.5*np.log10(df[self.config.apertureInnerInstFluxField].values[use])
537 instMagErrIn = k*(df[self.config.apertureInnerInstFluxField + 'Err'].values[use]
538 / df[self.config.apertureInnerInstFluxField].values[use])
539 instMagOut = -2.5*np.log10(df[self.config.apertureOuterInstFluxField].values[use])
540 instMagErrOut = k*(df[self.config.apertureOuterInstFluxField + 'Err'].values[use]
541 / df[self.config.apertureOuterInstFluxField].values[use])
543 ok = (np.isfinite(instMagIn) & np.isfinite(instMagErrIn)
544 & np.isfinite(instMagOut) & np.isfinite(instMagErrOut))
546 visit['deltaAper'] = np.median(instMagIn[ok] - instMagOut[ok])
547 visit['sources_read'] = True
549 self.log.info(" Found %d good stars in visit %d (deltaAper = %0.3f)",
550 use.size, visit['visit'], visit['deltaAper'])
552 if ((counter % self.config.nVisitsPerCheckpoint) == 0
553 and starObsDataRef is not None and visitCatDataRef is not None):
554 # We need to persist both the stars and the visit catalog which gets
555 # additional metadata from each visit.
556 starObsDataRef.put(fullCatalog)
557 visitCatDataRef.put(visitCat)
559 self.log.info("Found all good star observations in %.2f s" %
560 (time.time() - startTime))
562 return fullCatalog
564 def _get_sourceTable_visit_columns(self, inColumns):
565 """
566 Get the sourceTable_visit columns from the config.
568 Parameters
569 ----------
570 inColumns : `list`
571 List of columns available in the sourceTable_visit
573 Returns
574 -------
575 columns : `list`
576 List of columns to read from sourceTable_visit.
577 detectorColumn : `str`
578 Name of the detector column.
579 """
580 if 'detector' in inColumns:
581 # Default name for Gen3.
582 detectorColumn = 'detector'
583 else:
584 # Default name for Gen2 and Gen2 conversions.
585 detectorColumn = 'ccd'
586 # Some names are hard-coded in the parquet table.
587 columns = ['visit', detectorColumn,
588 'ra', 'decl', 'x', 'y', self.config.psfCandidateName,
589 self.config.instFluxField, self.config.instFluxField + 'Err',
590 self.config.apertureInnerInstFluxField, self.config.apertureInnerInstFluxField + 'Err',
591 self.config.apertureOuterInstFluxField, self.config.apertureOuterInstFluxField + 'Err']
592 if self.sourceSelector.config.doFlags:
593 columns.extend(self.sourceSelector.config.flags.bad)
594 if self.sourceSelector.config.doUnresolved:
595 columns.append(self.sourceSelector.config.unresolved.name)
596 if self.sourceSelector.config.doIsolated:
597 columns.append(self.sourceSelector.config.isolated.parentName)
598 columns.append(self.sourceSelector.config.isolated.nChildName)
599 if self.config.doSubtractLocalBackground:
600 columns.append(self.config.localBackgroundFluxField)
602 return columns, detectorColumn