Coverage for python/lsst/fgcmcal/fgcmBuildStarsTable.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# See COPYRIGHT file at the top of the source tree.
2#
3# This file is part of fgcmcal.
4#
5# Developed for the LSST Data Management System.
6# This product includes software developed by the LSST Project
7# (https://www.lsst.org).
8# See the COPYRIGHT file at the top-level directory of this distribution
9# for details of code ownership.
10#
11# This program is free software: you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation, either version 3 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program. If not, see <https://www.gnu.org/licenses/>.
23"""Build star observations for input to FGCM using sourceTable_visit.
25This task finds all the visits and sourceTable_visits in a repository (or a
26subset based on command line parameters) and extracts all the potential
27calibration stars for input into fgcm. This task additionally uses fgcm to
28match star observations into unique stars, and performs as much cleaning of the
29input catalog as possible.
30"""
32import time
34import numpy as np
35import collections
37import lsst.daf.persistence as dafPersist
38import lsst.pex.config as pexConfig
39import lsst.pipe.base as pipeBase
40from lsst.pipe.base import connectionTypes
41import lsst.afw.table as afwTable
42from lsst.meas.algorithms import ReferenceObjectLoader
44from .fgcmBuildStarsBase import FgcmBuildStarsConfigBase, FgcmBuildStarsRunner, FgcmBuildStarsBaseTask
45from .utilities import computeApproxPixelAreaFields, computeApertureRadiusFromDataRef
46from .utilities import lookupStaticCalibrations
48__all__ = ['FgcmBuildStarsTableConfig', 'FgcmBuildStarsTableTask']
51class FgcmBuildStarsTableConnections(pipeBase.PipelineTaskConnections,
52 dimensions=("instrument",),
53 defaultTemplates={}):
54 camera = connectionTypes.PrerequisiteInput(
55 doc="Camera instrument",
56 name="camera",
57 storageClass="Camera",
58 dimensions=("instrument",),
59 lookupFunction=lookupStaticCalibrations,
60 isCalibration=True,
61 )
63 fgcmLookUpTable = connectionTypes.PrerequisiteInput(
64 doc=("Atmosphere + instrument look-up-table for FGCM throughput and "
65 "chromatic corrections."),
66 name="fgcmLookUpTable",
67 storageClass="Catalog",
68 dimensions=("instrument",),
69 deferLoad=True,
70 )
72 sourceSchema = connectionTypes.PrerequisiteInput(
73 doc="Schema for source catalogs",
74 name="src_schema",
75 storageClass="SourceCatalog",
76 deferLoad=True,
77 )
79 refCat = connectionTypes.PrerequisiteInput(
80 doc="Reference catalog to use for photometric calibration",
81 name="cal_ref_cat",
82 storageClass="SimpleCatalog",
83 dimensions=("skypix",),
84 deferLoad=True,
85 multiple=True,
86 )
88 sourceTable_visit = connectionTypes.Input(
89 doc="Source table in parquet format, per visit",
90 name="sourceTable_visit",
91 storageClass="DataFrame",
92 dimensions=("instrument", "visit"),
93 deferLoad=True,
94 multiple=True,
95 )
97 visitSummary = connectionTypes.Input(
98 doc=("Per-visit consolidated exposure metadata. These catalogs use "
99 "detector id for the id and must be sorted for fast lookups of a "
100 "detector."),
101 name="visitSummary",
102 storageClass="ExposureCatalog",
103 dimensions=("instrument", "visit"),
104 deferLoad=True,
105 multiple=True,
106 )
108 background = connectionTypes.Input(
109 doc="Calexp background model",
110 name="calexpBackground",
111 storageClass="Background",
112 dimensions=("instrument", "visit", "detector"),
113 deferLoad=True,
114 multiple=True,
115 )
117 fgcmVisitCatalog = connectionTypes.Output(
118 doc="Catalog of visit information for fgcm",
119 name="fgcmVisitCatalog",
120 storageClass="Catalog",
121 dimensions=("instrument",),
122 )
124 fgcmStarObservations = connectionTypes.Output(
125 doc="Catalog of star observations for fgcm",
126 name="fgcmStarObservations",
127 storageClass="Catalog",
128 dimensions=("instrument",),
129 )
131 fgcmStarIds = connectionTypes.Output(
132 doc="Catalog of fgcm calibration star IDs",
133 name="fgcmStarIds",
134 storageClass="Catalog",
135 dimensions=("instrument",),
136 )
138 fgcmStarIndices = connectionTypes.Output(
139 doc="Catalog of fgcm calibration star indices",
140 name="fgcmStarIndices",
141 storageClass="Catalog",
142 dimensions=("instrument",),
143 )
145 fgcmReferenceStars = connectionTypes.Output(
146 doc="Catalog of fgcm-matched reference stars",
147 name="fgcmReferenceStars",
148 storageClass="Catalog",
149 dimensions=("instrument",),
150 )
152 def __init__(self, *, config=None):
153 super().__init__(config=config)
155 if not config.doReferenceMatches:
156 self.prerequisiteInputs.remove("refCat")
157 self.prerequisiteInputs.remove("fgcmLookUpTable")
159 if not config.doModelErrorsWithBackground:
160 self.inputs.remove("background")
162 if not config.doReferenceMatches:
163 self.outputs.remove("fgcmReferenceStars")
166class FgcmBuildStarsTableConfig(FgcmBuildStarsConfigBase, pipeBase.PipelineTaskConfig,
167 pipelineConnections=FgcmBuildStarsTableConnections):
168 """Config for FgcmBuildStarsTableTask"""
170 referenceCCD = pexConfig.Field(
171 doc="Reference CCD for checking PSF and background",
172 dtype=int,
173 default=40,
174 )
176 def setDefaults(self):
177 super().setDefaults()
179 # The names here correspond to the post-transformed
180 # sourceTable_visit catalogs, which differ from the raw src
181 # catalogs. Therefore, all field and flag names cannot
182 # be derived from the base config class.
183 self.instFluxField = 'ApFlux_12_0_instFlux'
184 self.localBackgroundFluxField = 'LocalBackground_instFlux'
185 self.apertureInnerInstFluxField = 'ApFlux_12_0_instFlux'
186 self.apertureOuterInstFluxField = 'ApFlux_17_0_instFlux'
187 self.psfCandidateName = 'Calib_psf_candidate'
189 sourceSelector = self.sourceSelector["science"]
191 fluxFlagName = self.instFluxField[0: -len('instFlux')] + 'flag'
193 sourceSelector.flags.bad = ['PixelFlags_edge',
194 'PixelFlags_interpolatedCenter',
195 'PixelFlags_saturatedCenter',
196 'PixelFlags_crCenter',
197 'PixelFlags_bad',
198 'PixelFlags_interpolated',
199 'PixelFlags_saturated',
200 'Centroid_flag',
201 fluxFlagName]
203 if self.doSubtractLocalBackground:
204 localBackgroundFlagName = self.localBackgroundFluxField[0: -len('instFlux')] + 'flag'
205 sourceSelector.flags.bad.append(localBackgroundFlagName)
207 sourceSelector.signalToNoise.fluxField = self.instFluxField
208 sourceSelector.signalToNoise.errField = self.instFluxField + 'Err'
210 sourceSelector.isolated.parentName = 'parentSourceId'
211 sourceSelector.isolated.nChildName = 'Deblend_nChild'
213 sourceSelector.unresolved.name = 'extendedness'
216class FgcmBuildStarsTableTask(FgcmBuildStarsBaseTask):
217 """
218 Build stars for the FGCM global calibration, using sourceTable_visit catalogs.
219 """
220 ConfigClass = FgcmBuildStarsTableConfig
221 RunnerClass = FgcmBuildStarsRunner
222 _DefaultName = "fgcmBuildStarsTable"
224 canMultiprocess = False
226 def runQuantum(self, butlerQC, inputRefs, outputRefs):
227 inputRefDict = butlerQC.get(inputRefs)
229 sourceTableRefs = inputRefDict['sourceTable_visit']
231 self.log.info("Running with %d sourceTable_visit dataRefs",
232 len(sourceTableRefs))
234 sourceTableDataRefDict = {sourceTableRef.dataId['visit']: sourceTableRef for
235 sourceTableRef in sourceTableRefs}
237 if self.config.doReferenceMatches:
238 # Get the LUT dataRef
239 lutDataRef = inputRefDict['fgcmLookUpTable']
241 # Prepare the refCat loader
242 refConfig = self.config.fgcmLoadReferenceCatalog.refObjLoader
243 refObjLoader = ReferenceObjectLoader(dataIds=[ref.datasetRef.dataId
244 for ref in inputRefs.refCat],
245 refCats=butlerQC.get(inputRefs.refCat),
246 config=refConfig,
247 log=self.log)
248 self.makeSubtask('fgcmLoadReferenceCatalog', refObjLoader=refObjLoader)
249 else:
250 lutDataRef = None
252 # Compute aperture radius if necessary. This is useful to do now before
253 # any heave lifting has happened (fail early).
254 calibFluxApertureRadius = None
255 if self.config.doSubtractLocalBackground:
256 try:
257 calibFluxApertureRadius = computeApertureRadiusFromDataRef(sourceTableRefs[0],
258 self.config.instFluxField)
259 except RuntimeError as e:
260 raise RuntimeError("Could not determine aperture radius from %s. "
261 "Cannot use doSubtractLocalBackground." %
262 (self.config.instFluxField)) from e
264 visitSummaryRefs = inputRefDict['visitSummary']
265 visitSummaryDataRefDict = {visitSummaryRef.dataId['visit']: visitSummaryRef for
266 visitSummaryRef in visitSummaryRefs}
268 camera = inputRefDict['camera']
269 groupedDataRefs = self._groupDataRefs(sourceTableDataRefDict,
270 visitSummaryDataRefDict)
272 if self.config.doModelErrorsWithBackground:
273 bkgRefs = inputRefDict['background']
274 bkgDataRefDict = {(bkgRef.dataId.byName()['visit'],
275 bkgRef.dataId.byName()['detector']): bkgRef for
276 bkgRef in bkgRefs}
277 else:
278 bkgDataRefDict = None
280 # Gen3 does not currently allow "checkpoint" saving of datasets,
281 # so we need to have this all in one go.
282 visitCat = self.fgcmMakeVisitCatalog(camera, groupedDataRefs,
283 bkgDataRefDict=bkgDataRefDict,
284 visitCatDataRef=None,
285 inVisitCat=None)
287 rad = calibFluxApertureRadius
288 sourceSchemaDataRef = inputRefDict['sourceSchema']
289 fgcmStarObservationCat = self.fgcmMakeAllStarObservations(groupedDataRefs,
290 visitCat,
291 sourceSchemaDataRef,
292 camera,
293 calibFluxApertureRadius=rad,
294 starObsDataRef=None,
295 visitCatDataRef=None,
296 inStarObsCat=None)
298 butlerQC.put(visitCat, outputRefs.fgcmVisitCatalog)
299 butlerQC.put(fgcmStarObservationCat, outputRefs.fgcmStarObservations)
301 fgcmStarIdCat, fgcmStarIndicesCat, fgcmRefCat = self.fgcmMatchStars(visitCat,
302 fgcmStarObservationCat,
303 lutDataRef=lutDataRef)
305 butlerQC.put(fgcmStarIdCat, outputRefs.fgcmStarIds)
306 butlerQC.put(fgcmStarIndicesCat, outputRefs.fgcmStarIndices)
307 if fgcmRefCat is not None:
308 butlerQC.put(fgcmRefCat, outputRefs.fgcmReferenceStars)
310 @classmethod
311 def _makeArgumentParser(cls):
312 """Create an argument parser"""
313 parser = pipeBase.ArgumentParser(name=cls._DefaultName)
314 parser.add_id_argument("--id", "sourceTable_visit", help="Data ID, e.g. --id visit=6789")
316 return parser
318 def _groupDataRefs(self, sourceTableDataRefDict, visitSummaryDataRefDict):
319 """Group sourceTable and visitSummary dataRefs (gen3 only).
321 Parameters
322 ----------
323 sourceTableDataRefDict : `dict` [`int`, `str`]
324 Dict of source tables, keyed by visit.
325 visitSummaryDataRefDict : `dict` [int, `str`]
326 Dict of visit summary catalogs, keyed by visit.
328 Returns
329 -------
330 groupedDataRefs : `dict` [`int`, `list`]
331 Dictionary with sorted visit keys, and `list`s with
332 `lsst.daf.butler.DeferredDataSetHandle`. The first
333 item in the list will be the visitSummary ref, and
334 the second will be the source table ref.
335 """
336 groupedDataRefs = collections.defaultdict(list)
337 visits = sorted(sourceTableDataRefDict.keys())
339 for visit in visits:
340 groupedDataRefs[visit] = [visitSummaryDataRefDict[visit],
341 sourceTableDataRefDict[visit]]
343 return groupedDataRefs
345 def _findAndGroupDataRefsGen2(self, butler, camera, dataRefs):
346 self.log.info("Grouping dataRefs by %s", (self.config.visitDataRefName))
348 ccdIds = []
349 for detector in camera:
350 ccdIds.append(detector.getId())
351 # Insert our preferred referenceCCD first:
352 # It is fine that this is listed twice, because we only need
353 # the first calexp that is found.
354 ccdIds.insert(0, self.config.referenceCCD)
356 # The visitTable building code expects a dictionary of groupedDataRefs
357 # keyed by visit, the first element as the "primary" calexp dataRef.
358 # We then append the sourceTable_visit dataRef at the end for the
359 # code which does the data reading (fgcmMakeAllStarObservations).
361 groupedDataRefs = collections.defaultdict(list)
362 for dataRef in dataRefs:
363 visit = dataRef.dataId[self.config.visitDataRefName]
365 # Find an existing calexp (we need for psf and metadata)
366 # and make the relevant dataRef
367 for ccdId in ccdIds:
368 try:
369 calexpRef = butler.dataRef('calexp', dataId={self.config.visitDataRefName: visit,
370 self.config.ccdDataRefName: ccdId})
371 except RuntimeError:
372 # Not found
373 continue
375 # It was found. Add and quit out, since we only
376 # need one calexp per visit.
377 groupedDataRefs[visit].append(calexpRef)
378 break
380 # And append this dataRef
381 groupedDataRefs[visit].append(dataRef)
383 # This should be sorted by visit (the key)
384 return dict(sorted(groupedDataRefs.items()))
386 def fgcmMakeAllStarObservations(self, groupedDataRefs, visitCat,
387 sourceSchemaDataRef,
388 camera,
389 calibFluxApertureRadius=None,
390 visitCatDataRef=None,
391 starObsDataRef=None,
392 inStarObsCat=None):
393 startTime = time.time()
395 # If both dataRefs are None, then we assume the caller does not
396 # want to store checkpoint files. If both are set, we will
397 # do checkpoint files. And if only one is set, this is potentially
398 # unintentional and we will warn.
399 if (visitCatDataRef is not None and starObsDataRef is None
400 or visitCatDataRef is None and starObsDataRef is not None):
401 self.log.warn("Only one of visitCatDataRef and starObsDataRef are set, so "
402 "no checkpoint files will be persisted.")
404 if self.config.doSubtractLocalBackground and calibFluxApertureRadius is None:
405 raise RuntimeError("Must set calibFluxApertureRadius if doSubtractLocalBackground is True.")
407 # To get the correct output schema, we use similar code as fgcmBuildStarsTask
408 # We are not actually using this mapper, except to grab the outputSchema
409 sourceSchema = sourceSchemaDataRef.get().schema
410 sourceMapper = self._makeSourceMapper(sourceSchema)
411 outputSchema = sourceMapper.getOutputSchema()
413 # Construct mapping from ccd number to index
414 ccdMapping = {}
415 for ccdIndex, detector in enumerate(camera):
416 ccdMapping[detector.getId()] = ccdIndex
418 approxPixelAreaFields = computeApproxPixelAreaFields(camera)
420 if inStarObsCat is not None:
421 fullCatalog = inStarObsCat
422 comp1 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_KEYS)
423 comp2 = fullCatalog.schema.compare(outputSchema, outputSchema.EQUAL_NAMES)
424 if not comp1 or not comp2:
425 raise RuntimeError("Existing fgcmStarObservations file found with mismatched schema.")
426 else:
427 fullCatalog = afwTable.BaseCatalog(outputSchema)
429 visitKey = outputSchema['visit'].asKey()
430 ccdKey = outputSchema['ccd'].asKey()
431 instMagKey = outputSchema['instMag'].asKey()
432 instMagErrKey = outputSchema['instMagErr'].asKey()
434 # Prepare local background if desired
435 if self.config.doSubtractLocalBackground:
436 localBackgroundArea = np.pi*calibFluxApertureRadius**2.
438 # Determine which columns we need from the sourceTable_visit catalogs
439 columns = self._get_sourceTable_visit_columns()
441 k = 2.5/np.log(10.)
443 for counter, visit in enumerate(visitCat):
444 # Check if these sources have already been read and stored in the checkpoint file
445 if visit['sources_read']:
446 continue
448 expTime = visit['exptime']
450 dataRef = groupedDataRefs[visit['visit']][-1]
452 if isinstance(dataRef, dafPersist.ButlerDataRef):
453 srcTable = dataRef.get()
454 df = srcTable.toDataFrame(columns)
455 else:
456 df = dataRef.get(parameters={'columns': columns})
458 goodSrc = self.sourceSelector.selectSources(df)
460 # Need to add a selection based on the local background correction
461 # if necessary
462 if self.config.doSubtractLocalBackground:
463 localBackground = localBackgroundArea*df[self.config.localBackgroundFluxField].values
464 use, = np.where((goodSrc.selected)
465 & ((df[self.config.instFluxField].values - localBackground) > 0.0))
466 else:
467 use, = np.where(goodSrc.selected)
469 tempCat = afwTable.BaseCatalog(fullCatalog.schema)
470 tempCat.resize(use.size)
472 tempCat['ra'][:] = np.deg2rad(df['ra'].values[use])
473 tempCat['dec'][:] = np.deg2rad(df['decl'].values[use])
474 tempCat['x'][:] = df['x'].values[use]
475 tempCat['y'][:] = df['y'].values[use]
476 # These "visit" and "ccd" names in the parquet tables are
477 # hard-coded.
478 tempCat[visitKey][:] = df['visit'].values[use]
479 tempCat[ccdKey][:] = df['ccd'].values[use]
480 tempCat['psf_candidate'] = df['Calib_psf_candidate'].values[use]
482 if self.config.doSubtractLocalBackground:
483 # At the moment we only adjust the flux and not the flux
484 # error by the background because the error on
485 # base_LocalBackground_instFlux is the rms error in the
486 # background annulus, not the error on the mean in the
487 # background estimate (which is much smaller, by sqrt(n)
488 # pixels used to estimate the background, which we do not
489 # have access to in this task). In the default settings,
490 # the annulus is sufficiently large such that these
491 # additional errors are are negligibly small (much less
492 # than a mmag in quadrature).
494 # This is the difference between the mag with local background correction
495 # and the mag without local background correction.
496 tempCat['deltaMagBkg'] = (-2.5*np.log10(df[self.config.instFluxField].values[use]
497 - localBackground[use]) -
498 -2.5*np.log10(df[self.config.instFluxField].values[use]))
499 else:
500 tempCat['deltaMagBkg'][:] = 0.0
502 # Need to loop over ccds here
503 for detector in camera:
504 ccdId = detector.getId()
505 # used index for all observations with a given ccd
506 use2 = (tempCat[ccdKey] == ccdId)
507 tempCat['jacobian'][use2] = approxPixelAreaFields[ccdId].evaluate(tempCat['x'][use2],
508 tempCat['y'][use2])
509 scaledInstFlux = (df[self.config.instFluxField].values[use[use2]]
510 * visit['scaling'][ccdMapping[ccdId]])
511 tempCat[instMagKey][use2] = (-2.5*np.log10(scaledInstFlux) + 2.5*np.log10(expTime))
513 # Compute instMagErr from instFluxErr/instFlux, any scaling
514 # will cancel out.
515 tempCat[instMagErrKey][:] = k*(df[self.config.instFluxField + 'Err'].values[use]
516 / df[self.config.instFluxField].values[use])
518 # Apply the jacobian if configured
519 if self.config.doApplyWcsJacobian:
520 tempCat[instMagKey][:] -= 2.5*np.log10(tempCat['jacobian'][:])
522 fullCatalog.extend(tempCat)
524 # Now do the aperture information
525 with np.warnings.catch_warnings():
526 # Ignore warnings, we will filter infinites and nans below
527 np.warnings.simplefilter("ignore")
529 instMagIn = -2.5*np.log10(df[self.config.apertureInnerInstFluxField].values[use])
530 instMagErrIn = k*(df[self.config.apertureInnerInstFluxField + 'Err'].values[use]
531 / df[self.config.apertureInnerInstFluxField].values[use])
532 instMagOut = -2.5*np.log10(df[self.config.apertureOuterInstFluxField].values[use])
533 instMagErrOut = k*(df[self.config.apertureOuterInstFluxField + 'Err'].values[use]
534 / df[self.config.apertureOuterInstFluxField].values[use])
536 ok = (np.isfinite(instMagIn) & np.isfinite(instMagErrIn)
537 & np.isfinite(instMagOut) & np.isfinite(instMagErrOut))
539 visit['deltaAper'] = np.median(instMagIn[ok] - instMagOut[ok])
540 visit['sources_read'] = True
542 self.log.info(" Found %d good stars in visit %d (deltaAper = %0.3f)",
543 use.size, visit['visit'], visit['deltaAper'])
545 if ((counter % self.config.nVisitsPerCheckpoint) == 0
546 and starObsDataRef is not None and visitCatDataRef is not None):
547 # We need to persist both the stars and the visit catalog which gets
548 # additional metadata from each visit.
549 starObsDataRef.put(fullCatalog)
550 visitCatDataRef.put(visitCat)
552 self.log.info("Found all good star observations in %.2f s" %
553 (time.time() - startTime))
555 return fullCatalog
557 def _get_sourceTable_visit_columns(self):
558 """
559 Get the sourceTable_visit columns from the config.
561 Returns
562 -------
563 columns : `list`
564 List of columns to read from sourceTable_visit
565 """
566 # These "visit" and "ccd" names in the parquet tables are hard-coded.
567 columns = ['visit', 'ccd',
568 'ra', 'decl', 'x', 'y', self.config.psfCandidateName,
569 self.config.instFluxField, self.config.instFluxField + 'Err',
570 self.config.apertureInnerInstFluxField, self.config.apertureInnerInstFluxField + 'Err',
571 self.config.apertureOuterInstFluxField, self.config.apertureOuterInstFluxField + 'Err']
572 if self.sourceSelector.config.doFlags:
573 columns.extend(self.sourceSelector.config.flags.bad)
574 if self.sourceSelector.config.doUnresolved:
575 columns.append(self.sourceSelector.config.unresolved.name)
576 if self.sourceSelector.config.doIsolated:
577 columns.append(self.sourceSelector.config.isolated.parentName)
578 columns.append(self.sourceSelector.config.isolated.nChildName)
579 if self.config.doSubtractLocalBackground:
580 columns.append(self.config.localBackgroundFluxField)
582 return columns