Coverage for python/lsst/fgcmcal/fgcmBuildStarsTable.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# See COPYRIGHT file at the top of the source tree.
2#
3# This file is part of fgcmcal.
4#
5# Developed for the LSST Data Management System.
6# This product includes software developed by the LSST Project
7# (https://www.lsst.org).
8# See the COPYRIGHT file at the top-level directory of this distribution
9# for details of code ownership.
10#
11# This program is free software: you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation, either version 3 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program. If not, see <https://www.gnu.org/licenses/>.
23"""Build star observations for input to FGCM using sourceTable_visit.
25This task finds all the visits and sourceTable_visits in a repository (or a
26subset based on command line parameters) and extracts all the potential
27calibration stars for input into fgcm. This task additionally uses fgcm to
28match star observations into unique stars, and performs as much cleaning of the
29input catalog as possible.
30"""
32import time
34import numpy as np
35import collections
37import lsst.daf.persistence as dafPersist
38import lsst.pex.config as pexConfig
39import lsst.pipe.base as pipeBase
40from lsst.pipe.base import connectionTypes
41import lsst.afw.table as afwTable
42from lsst.meas.algorithms import ReferenceObjectLoader
44from .fgcmBuildStarsBase import FgcmBuildStarsConfigBase, FgcmBuildStarsRunner, FgcmBuildStarsBaseTask
45from .utilities import computeApproxPixelAreaFields, computeApertureRadiusFromDataRef
46from .utilities import lookupStaticCalibrations
48__all__ = ['FgcmBuildStarsTableConfig', 'FgcmBuildStarsTableTask']
51class FgcmBuildStarsTableConnections(pipeBase.PipelineTaskConnections,
52 dimensions=("instrument",),
53 defaultTemplates={}):
54 camera = connectionTypes.PrerequisiteInput(
55 doc="Camera instrument",
56 name="camera",
57 storageClass="Camera",
58 dimensions=("instrument",),
59 lookupFunction=lookupStaticCalibrations,
60 isCalibration=True,
61 )
63 fgcmLookUpTable = connectionTypes.PrerequisiteInput(
64 doc=("Atmosphere + instrument look-up-table for FGCM throughput and "
65 "chromatic corrections."),
66 name="fgcmLookUpTable",
67 storageClass="Catalog",
68 dimensions=("instrument",),
69 deferLoad=True,
70 )
72 sourceSchema = connectionTypes.PrerequisiteInput(
73 doc="Schema for source catalogs",
74 name="src_schema",
75 storageClass="SourceCatalog",
76 deferLoad=True,
77 )
79 refCat = connectionTypes.PrerequisiteInput(
80 doc="Reference catalog to use for photometric calibration",
81 name="cal_ref_cat",
82 storageClass="SimpleCatalog",
83 dimensions=("skypix",),
84 deferLoad=True,
85 multiple=True,
86 )
88 sourceTable_visit = connectionTypes.Input(
89 doc="Source table in parquet format, per visit",
90 name="sourceTable_visit",
91 storageClass="DataFrame",
92 dimensions=("instrument", "visit"),
93 deferLoad=True,
94 multiple=True,
95 )
97 visitSummary = connectionTypes.Input(
98 doc=("Per-visit consolidated exposure metadata. These catalogs use "
99 "detector id for the id and must be sorted for fast lookups of a "
100 "detector."),
101 name="visitSummary",
102 storageClass="ExposureCatalog",
103 dimensions=("instrument", "visit"),
104 deferLoad=True,
105 multiple=True,
106 )
108 background = connectionTypes.Input(
109 doc="Calexp background model",
110 name="calexpBackground",
111 storageClass="Background",
112 dimensions=("instrument", "visit", "detector"),
113 deferLoad=True,
114 multiple=True,
115 )
117 fgcmVisitCatalog = connectionTypes.Output(
118 doc="Catalog of visit information for fgcm",
119 name="fgcmVisitCatalog",
120 storageClass="Catalog",
121 dimensions=("instrument",),
122 )
124 fgcmStarObservations = connectionTypes.Output(
125 doc="Catalog of star observations for fgcm",
126 name="fgcmStarObservations",
127 storageClass="Catalog",
128 dimensions=("instrument",),
129 )
131 fgcmStarIds = connectionTypes.Output(
132 doc="Catalog of fgcm calibration star IDs",
133 name="fgcmStarIds",
134 storageClass="Catalog",
135 dimensions=("instrument",),
136 )
138 fgcmStarIndices = connectionTypes.Output(
139 doc="Catalog of fgcm calibration star indices",
140 name="fgcmStarIndices",
141 storageClass="Catalog",
142 dimensions=("instrument",),
143 )
145 fgcmReferenceStars = connectionTypes.Output(
146 doc="Catalog of fgcm-matched reference stars",
147 name="fgcmReferenceStars",
148 storageClass="Catalog",
149 dimensions=("instrument",),
150 )
152 def __init__(self, *, config=None):
153 super().__init__(config=config)
155 if not config.doReferenceMatches:
156 self.prerequisiteInputs.remove("refCat")
157 self.prerequisiteInputs.remove("fgcmLookUpTable")
159 if not config.doModelErrorsWithBackground:
160 self.inputs.remove("background")
162 if not config.doReferenceMatches:
163 self.outputs.remove("fgcmReferenceStars")
166class FgcmBuildStarsTableConfig(FgcmBuildStarsConfigBase, pipeBase.PipelineTaskConfig,
167 pipelineConnections=FgcmBuildStarsTableConnections):
168 """Config for FgcmBuildStarsTableTask"""
170 referenceCCD = pexConfig.Field(
171 doc="Reference CCD for checking PSF and background",
172 dtype=int,
173 default=40,
174 )
176 def setDefaults(self):
177 super().setDefaults()
179 # The names here correspond to the post-transformed
180 # sourceTable_visit catalogs, which differ from the raw src
181 # catalogs. Therefore, all field and flag names cannot
182 # be derived from the base config class.
183 self.instFluxField = 'ApFlux_12_0_instFlux'
184 self.localBackgroundFluxField = 'LocalBackground_instFlux'
185 self.apertureInnerInstFluxField = 'ApFlux_12_0_instFlux'
186 self.apertureOuterInstFluxField = 'ApFlux_17_0_instFlux'
187 self.psfCandidateName = 'Calib_psf_candidate'
189 sourceSelector = self.sourceSelector["science"]
191 fluxFlagName = self.instFluxField[0: -len('instFlux')] + 'flag'
193 sourceSelector.flags.bad = ['PixelFlags_edge',
194 'PixelFlags_interpolatedCenter',
195 'PixelFlags_saturatedCenter',
196 'PixelFlags_crCenter',
197 'PixelFlags_bad',
198 'PixelFlags_interpolated',
199 'PixelFlags_saturated',
200 'Centroid_flag',
201 fluxFlagName]
203 if self.doSubtractLocalBackground:
204 localBackgroundFlagName = self.localBackgroundFluxField[0: -len('instFlux')] + 'flag'
205 sourceSelector.flags.bad.append(localBackgroundFlagName)
207 sourceSelector.signalToNoise.fluxField = self.instFluxField
208 sourceSelector.signalToNoise.errField = self.instFluxField + 'Err'
210 sourceSelector.isolated.parentName = 'parentSourceId'
211 sourceSelector.isolated.nChildName = 'Deblend_nChild'
213 sourceSelector.unresolved.name = 'extendedness'
216class FgcmBuildStarsTableTask(FgcmBuildStarsBaseTask):
217 """
218 Build stars for the FGCM global calibration, using sourceTable_visit catalogs.
219 """
220 ConfigClass = FgcmBuildStarsTableConfig
221 RunnerClass = FgcmBuildStarsRunner
222 _DefaultName = "fgcmBuildStarsTable"
224 canMultiprocess = False
226 def runQuantum(self, butlerQC, inputRefs, outputRefs):
227 inputRefDict = butlerQC.get(inputRefs)
229 sourceTableRefs = inputRefDict['sourceTable_visit']
231 self.log.info("Running with %d sourceTable_visit dataRefs",
232 len(sourceTableRefs))
234 sourceTableDataRefDict = {sourceTableRef.dataId['visit']: sourceTableRef for
235 sourceTableRef in sourceTableRefs}
237 if self.config.doReferenceMatches:
238 # Get the LUT dataRef
239 lutDataRef = inputRefDict['fgcmLookUpTable']
241 # Prepare the refCat loader
242 refConfig = self.config.fgcmLoadReferenceCatalog.refObjLoader
243 refObjLoader = ReferenceObjectLoader(dataIds=[ref.datasetRef.dataId
244 for ref in inputRefs.refCat],
245 refCats=butlerQC.get(inputRefs.refCat),
246 config=refConfig,
247 log=self.log)
248 self.makeSubtask('fgcmLoadReferenceCatalog', refObjLoader=refObjLoader)
249 else:
250 lutDataRef = None
252 # Compute aperture radius if necessary. This is useful to do now before
253 # any heave lifting has happened (fail early).
254 calibFluxApertureRadius = None
255 if self.config.doSubtractLocalBackground:
256 try:
257 calibFluxApertureRadius = computeApertureRadiusFromDataRef(sourceTableRefs[0],
258 self.config.instFluxField)
259 except RuntimeError as e:
260 raise RuntimeError("Could not determine aperture radius from %s. "
261 "Cannot use doSubtractLocalBackground." %
262 (self.config.instFluxField)) from e
264 visitSummaryRefs = inputRefDict['visitSummary']
265 visitSummaryDataRefDict = {visitSummaryRef.dataId['visit']: visitSummaryRef for
266 visitSummaryRef in visitSummaryRefs}
268 camera = inputRefDict['camera']
269 groupedDataRefs = self._groupDataRefs(sourceTableDataRefDict,
270 visitSummaryDataRefDict)
272 if self.config.doModelErrorsWithBackground:
273 bkgRefs = inputRefDict['background']
274 bkgDataRefDict = {(bkgRef.dataId.byName()['visit'],
275 bkgRef.dataId.byName()['detector']): bkgRef for
276 bkgRef in bkgRefs}
277 else:
278 bkgDataRefDict = None
280 # Gen3 does not currently allow "checkpoint" saving of datasets,
281 # so we need to have this all in one go.
282 visitCat = self.fgcmMakeVisitCatalog(camera, groupedDataRefs,
283 bkgDataRefDict=bkgDataRefDict,
284 visitCatDataRef=None,
285 inVisitCat=None)
287 rad = calibFluxApertureRadius
288 sourceSchemaDataRef = inputRefDict['sourceSchema']
289 fgcmStarObservationCat = self.fgcmMakeAllStarObservations(groupedDataRefs,
290 visitCat,
291 sourceSchemaDataRef,
292 camera,
293 calibFluxApertureRadius=rad,
294 starObsDataRef=None,
295 visitCatDataRef=None,
296 inStarObsCat=None)
298 butlerQC.put(visitCat, outputRefs.fgcmVisitCatalog)
299 butlerQC.put(fgcmStarObservationCat, outputRefs.fgcmStarObservations)
301 fgcmStarIdCat, fgcmStarIndicesCat, fgcmRefCat = self.fgcmMatchStars(visitCat,
302 fgcmStarObservationCat,
303 lutDataRef=lutDataRef)
305 butlerQC.put(fgcmStarIdCat, outputRefs.fgcmStarIds)
306 butlerQC.put(fgcmStarIndicesCat, outputRefs.fgcmStarIndices)
307 if fgcmRefCat is not None:
308 butlerQC.put(fgcmRefCat, outputRefs.fgcmReferenceStars)
310 @classmethod
311 def _makeArgumentParser(cls):
312 """Create an argument parser"""
313 parser = pipeBase.ArgumentParser(name=cls._DefaultName)
314 parser.add_id_argument("--id", "sourceTable_visit", help="Data ID, e.g. --id visit=6789")
316 return parser
318 def _groupDataRefs(self, sourceTableDataRefDict, visitSummaryDataRefDict):
319 """Group sourceTable and visitSummary dataRefs (gen3 only).
321 Parameters
322 ----------
323 sourceTableDataRefDict : `dict` [`int`, `str`]
324 Dict of source tables, keyed by visit.
325 visitSummaryDataRefDict : `dict` [int, `str`]
326 Dict of visit summary catalogs, keyed by visit.
328 Returns
329 -------
330 groupedDataRefs : `dict` [`int`, `list`]
331 Dictionary with sorted visit keys, and `list`s with
332 `lsst.daf.butler.DeferredDataSetHandle`. The first
333 item in the list will be the visitSummary ref, and
334 the second will be the source table ref.
335 """
336 groupedDataRefs = collections.defaultdict(list)
337 visits = sorted(sourceTableDataRefDict.keys())
339 for visit in visits:
340 groupedDataRefs[visit] = [visitSummaryDataRefDict[visit],
341 sourceTableDataRefDict[visit]]
343 return groupedDataRefs
345 def _findAndGroupDataRefsGen2(self, butler, camera, dataRefs):
346 self.log.info("Grouping dataRefs by %s", (self.config.visitDataRefName))
348 ccdIds = []
349 for detector in camera:
350 ccdIds.append(detector.getId())
351 # Insert our preferred referenceCCD first:
352 # It is fine that this is listed twice, because we only need
353 # the first calexp that is found.
354 ccdIds.insert(0, self.config.referenceCCD)
356 # The visitTable building code expects a dictionary of groupedDataRefs
357 # keyed by visit, the first element as the "primary" calexp dataRef.
358 # We then append the sourceTable_visit dataRef at the end for the
359 # code which does the data reading (fgcmMakeAllStarObservations).
361 groupedDataRefs = collections.defaultdict(list)
362 for dataRef in dataRefs:
363 visit = dataRef.dataId[self.config.visitDataRefName]
365 # Find an existing calexp (we need for psf and metadata)
366 # and make the relevant dataRef
367 for ccdId in ccdIds:
368 try:
369 calexpRef = butler.dataRef('calexp', dataId={self.config.visitDataRefName: visit,
370 self.config.ccdDataRefName: ccdId})
371 except RuntimeError:
372 # Not found
373 continue
375 # Make sure the dataset exists
376 if not calexpRef.datasetExists():
377 continue
379 # It was found. Add and quit out, since we only
380 # need one calexp per visit.
381 groupedDataRefs[visit].append(calexpRef)
382 break
384 # And append this dataRef
385 groupedDataRefs[visit].append(dataRef)
387 # This should be sorted by visit (the key)
388 return dict(sorted(groupedDataRefs.items()))
390 def fgcmMakeAllStarObservations(self, groupedDataRefs, visitCat,
391 sourceSchemaDataRef,
392 camera,
393 calibFluxApertureRadius=None,
394 visitCatDataRef=None,
395 starObsDataRef=None,
396 inStarObsCat=None):
397 startTime = time.time()
399 # If both dataRefs are None, then we assume the caller does not
400 # want to store checkpoint files. If both are set, we will
401 # do checkpoint files. And if only one is set, this is potentially
402 # unintentional and we will warn.
403 if (visitCatDataRef is not None and starObsDataRef is None
404 or visitCatDataRef is None and starObsDataRef is not None):
405 self.log.warn("Only one of visitCatDataRef and starObsDataRef are set, so "
406 "no checkpoint files will be persisted.")
408 if self.config.doSubtractLocalBackground and calibFluxApertureRadius is None:
409 raise RuntimeError("Must set calibFluxApertureRadius if doSubtractLocalBackground is True.")
411 # To get the correct output schema, we use similar code as fgcmBuildStarsTask
412 # We are not actually using this mapper, except to grab the outputSchema
413 sourceSchema = sourceSchemaDataRef.get().schema
414 sourceMapper = self._makeSourceMapper(sourceSchema)
415 outputSchema = sourceMapper.getOutputSchema()
417 # Construct mapping from ccd number to index
418 ccdMapping = {}
419 for ccdIndex, detector in enumerate(camera):
420 ccdMapping[detector.getId()] = ccdIndex
422 approxPixelAreaFields = computeApproxPixelAreaFields(camera)
424 if inStarObsCat is not None:
425 fullCatalog = inStarObsCat
426 comp1 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_KEYS)
427 comp2 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_NAMES)
428 if not comp1 or not comp2:
429 raise RuntimeError("Existing fgcmStarObservations file found with mismatched schema.")
430 else:
431 fullCatalog = afwTable.BaseCatalog(outputSchema)
433 visitKey = outputSchema['visit'].asKey()
434 ccdKey = outputSchema['ccd'].asKey()
435 instMagKey = outputSchema['instMag'].asKey()
436 instMagErrKey = outputSchema['instMagErr'].asKey()
438 # Prepare local background if desired
439 if self.config.doSubtractLocalBackground:
440 localBackgroundArea = np.pi*calibFluxApertureRadius**2.
442 # Determine which columns we need from the sourceTable_visit catalogs
443 columns = self._get_sourceTable_visit_columns()
445 k = 2.5/np.log(10.)
447 for counter, visit in enumerate(visitCat):
448 # Check if these sources have already been read and stored in the checkpoint file
449 if visit['sources_read']:
450 continue
452 expTime = visit['exptime']
454 dataRef = groupedDataRefs[visit['visit']][-1]
456 if isinstance(dataRef, dafPersist.ButlerDataRef):
457 srcTable = dataRef.get()
458 df = srcTable.toDataFrame(columns)
459 else:
460 df = dataRef.get(parameters={'columns': columns})
462 goodSrc = self.sourceSelector.selectSources(df)
464 # Need to add a selection based on the local background correction
465 # if necessary
466 if self.config.doSubtractLocalBackground:
467 localBackground = localBackgroundArea*df[self.config.localBackgroundFluxField].values
468 use, = np.where((goodSrc.selected)
469 & ((df[self.config.instFluxField].values - localBackground) > 0.0))
470 else:
471 use, = np.where(goodSrc.selected)
473 tempCat = afwTable.BaseCatalog(fullCatalog.schema)
474 tempCat.resize(use.size)
476 tempCat['ra'][:] = np.deg2rad(df['ra'].values[use])
477 tempCat['dec'][:] = np.deg2rad(df['decl'].values[use])
478 tempCat['x'][:] = df['x'].values[use]
479 tempCat['y'][:] = df['y'].values[use]
480 # These "visit" and "ccd" names in the parquet tables are
481 # hard-coded.
482 tempCat[visitKey][:] = df['visit'].values[use]
483 tempCat[ccdKey][:] = df['ccd'].values[use]
484 tempCat['psf_candidate'] = df['Calib_psf_candidate'].values[use]
486 if self.config.doSubtractLocalBackground:
487 # At the moment we only adjust the flux and not the flux
488 # error by the background because the error on
489 # base_LocalBackground_instFlux is the rms error in the
490 # background annulus, not the error on the mean in the
491 # background estimate (which is much smaller, by sqrt(n)
492 # pixels used to estimate the background, which we do not
493 # have access to in this task). In the default settings,
494 # the annulus is sufficiently large such that these
495 # additional errors are are negligibly small (much less
496 # than a mmag in quadrature).
498 # This is the difference between the mag with local background correction
499 # and the mag without local background correction.
500 tempCat['deltaMagBkg'] = (-2.5*np.log10(df[self.config.instFluxField].values[use]
501 - localBackground[use]) -
502 -2.5*np.log10(df[self.config.instFluxField].values[use]))
503 else:
504 tempCat['deltaMagBkg'][:] = 0.0
506 # Need to loop over ccds here
507 for detector in camera:
508 ccdId = detector.getId()
509 # used index for all observations with a given ccd
510 use2 = (tempCat[ccdKey] == ccdId)
511 tempCat['jacobian'][use2] = approxPixelAreaFields[ccdId].evaluate(tempCat['x'][use2],
512 tempCat['y'][use2])
513 scaledInstFlux = (df[self.config.instFluxField].values[use[use2]]
514 * visit['scaling'][ccdMapping[ccdId]])
515 tempCat[instMagKey][use2] = (-2.5*np.log10(scaledInstFlux) + 2.5*np.log10(expTime))
517 # Compute instMagErr from instFluxErr/instFlux, any scaling
518 # will cancel out.
519 tempCat[instMagErrKey][:] = k*(df[self.config.instFluxField + 'Err'].values[use]
520 / df[self.config.instFluxField].values[use])
522 # Apply the jacobian if configured
523 if self.config.doApplyWcsJacobian:
524 tempCat[instMagKey][:] -= 2.5*np.log10(tempCat['jacobian'][:])
526 fullCatalog.extend(tempCat)
528 # Now do the aperture information
529 with np.warnings.catch_warnings():
530 # Ignore warnings, we will filter infinites and nans below
531 np.warnings.simplefilter("ignore")
533 instMagIn = -2.5*np.log10(df[self.config.apertureInnerInstFluxField].values[use])
534 instMagErrIn = k*(df[self.config.apertureInnerInstFluxField + 'Err'].values[use]
535 / df[self.config.apertureInnerInstFluxField].values[use])
536 instMagOut = -2.5*np.log10(df[self.config.apertureOuterInstFluxField].values[use])
537 instMagErrOut = k*(df[self.config.apertureOuterInstFluxField + 'Err'].values[use]
538 / df[self.config.apertureOuterInstFluxField].values[use])
540 ok = (np.isfinite(instMagIn) & np.isfinite(instMagErrIn)
541 & np.isfinite(instMagOut) & np.isfinite(instMagErrOut))
543 visit['deltaAper'] = np.median(instMagIn[ok] - instMagOut[ok])
544 visit['sources_read'] = True
546 self.log.info(" Found %d good stars in visit %d (deltaAper = %0.3f)",
547 use.size, visit['visit'], visit['deltaAper'])
549 if ((counter % self.config.nVisitsPerCheckpoint) == 0
550 and starObsDataRef is not None and visitCatDataRef is not None):
551 # We need to persist both the stars and the visit catalog which gets
552 # additional metadata from each visit.
553 starObsDataRef.put(fullCatalog)
554 visitCatDataRef.put(visitCat)
556 self.log.info("Found all good star observations in %.2f s" %
557 (time.time() - startTime))
559 return fullCatalog
561 def _get_sourceTable_visit_columns(self):
562 """
563 Get the sourceTable_visit columns from the config.
565 Returns
566 -------
567 columns : `list`
568 List of columns to read from sourceTable_visit
569 """
570 # These "visit" and "ccd" names in the parquet tables are hard-coded.
571 columns = ['visit', 'ccd',
572 'ra', 'decl', 'x', 'y', self.config.psfCandidateName,
573 self.config.instFluxField, self.config.instFluxField + 'Err',
574 self.config.apertureInnerInstFluxField, self.config.apertureInnerInstFluxField + 'Err',
575 self.config.apertureOuterInstFluxField, self.config.apertureOuterInstFluxField + 'Err']
576 if self.sourceSelector.config.doFlags:
577 columns.extend(self.sourceSelector.config.flags.bad)
578 if self.sourceSelector.config.doUnresolved:
579 columns.append(self.sourceSelector.config.unresolved.name)
580 if self.sourceSelector.config.doIsolated:
581 columns.append(self.sourceSelector.config.isolated.parentName)
582 columns.append(self.sourceSelector.config.isolated.nChildName)
583 if self.config.doSubtractLocalBackground:
584 columns.append(self.config.localBackgroundFluxField)
586 return columns