Coverage for python/lsst/validate/drp/matchreduce.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# LSST Data Management System
2# Copyright 2016-2019 AURA/LSST.
3#
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6#
7# This program is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the LSST License Statement and
18# the GNU General Public License along with this program. If not,
19# see <https://www.lsstcorp.org/LegalNotices/>.
20"""Blob classes that reduce a multi-visit dataset and encapsulate data
21for measurement classes, plotting functions, and JSON persistence.
22"""
24__all__ = ['build_matched_dataset', 'getKeysFilter', 'filterSources', 'summarizeSources']
26import numpy as np
27import astropy.units as u
28from sqlalchemy.exc import OperationalError
29import sqlite3
31import lsst.geom as geom
32import lsst.daf.persistence as dafPersist
33from lsst.afw.table import (SourceCatalog, SchemaMapper, Field,
34 MultiMatch, SimpleRecord, GroupView,
35 SOURCE_IO_NO_FOOTPRINTS)
36import lsst.afw.table as afwTable
37from lsst.afw.fits import FitsError
38import lsst.pipe.base as pipeBase
39from lsst.verify import Blob, Datum
41from .util import (getCcdKeyName, raftSensorToInt, positionRmsFromCat,
42 ellipticity_from_cat)
45def build_matched_dataset(repo, dataIds, matchRadius=None, brightSnrMin=50.,
46 doApplyExternalPhotoCalib=False, externalPhotoCalibName=None,
47 doApplyExternalSkyWcs=False, externalSkyWcsName=None,
48 skipTEx=False, skipNonSrd=False):
49 """Construct a container for matched star catalogs from multple visits, with filtering,
50 summary statistics, and modelling.
52 `lsst.verify.Blob` instances are serializable to JSON.
54 Parameters
55 ----------
56 repo : `str` or `lsst.daf.persistence.Butler`
57 A Butler instance or a repository URL that can be used to construct
58 one.
59 dataIds : `list` of `dict`
60 List of `butler` data IDs of Image catalogs to compare to reference.
61 The `calexp` cpixel image is needed for the photometric calibration.
62 matchRadius : `lsst.geom.Angle`, optional
63 Radius for matching. Default is 1 arcsecond.
64 brightSnrMin : `float`, optional
65 Minimum median SNR for a match to be considered "safe".
66 doApplyExternalPhotoCalib : bool, optional
67 Apply external photoCalib to calibrate fluxes.
68 externalPhotoCalibName : str, optional
69 Type of external `PhotoCalib` to apply. Currently supported are jointcal,
70 fgcm, and fgcm_tract. Must be set if "doApplyExternalPhotoCalib" is True.
71 doApplyExternalSkyWcs : bool, optional
72 Apply external wcs to calibrate positions.
73 externalSkyWcsName : str, optional:
74 Type of external `wcs` to apply. Currently supported is jointcal.
75 Must be set if "doApplyExternalSkyWcs" is True.
76 skipTEx : `bool`, optional
77 Skip TEx calculations (useful for older catalogs that don't have
78 PsfShape measurements).
79 skipNonSrd : `bool`, optional
80 Skip any metrics not defined in the LSST SRD; default False.
82 Attributes of returned Blob
83 ----------
84 filterName : `str`
85 Name of filter used for all observations.
86 mag : `astropy.units.Quantity`
87 Mean PSF magnitudes of stars over multiple visits (magnitudes).
88 magerr : `astropy.units.Quantity`
89 Median 1-sigma uncertainty of PSF magnitudes over multiple visits
90 (magnitudes).
91 magrms : `astropy.units.Quantity`
92 RMS of PSF magnitudes over multiple visits (magnitudes).
93 snr : `astropy.units.Quantity`
94 Median signal-to-noise ratio of PSF magnitudes over multiple visits
95 (dimensionless).
96 dist : `astropy.units.Quantity`
97 RMS of sky coordinates of stars over multiple visits (milliarcseconds).
99 *Not serialized.*
100 matchesFaint : `afw.table.GroupView`
101 Faint matches containing only objects that have:
103 1. A PSF Flux measurement with sufficient S/N.
104 2. A finite (non-nan) PSF magnitude. This separate check is largely
105 to reject failed zeropoints.
106 3. No flags set for bad, cosmic ray, edge or saturated.
107 4. Extendedness consistent with a point source.
109 *Not serialized.*
110 matchesBright : `afw.table.GroupView`
111 Bright matches matching a higher S/N threshold than matchesFaint.
113 *Not serialized.*
114 magKey
115 Key for `"base_PsfFlux_mag"` in the `matchesFaint` and `matchesBright`
116 catalog tables.
118 *Not serialized.*
120 Raises
121 ------
122 RuntimeError:
123 Raised if "doApplyExternalPhotoCalib" is True and "externalPhotoCalibName"
124 is None, or if "doApplyExternalSkyWcs" is True and "externalSkyWcsName" is
125 None.
126 """
127 if doApplyExternalPhotoCalib and externalPhotoCalibName is None:
128 raise RuntimeError("Must set externalPhotoCalibName if doApplyExternalPhotoCalib is True.")
129 if doApplyExternalSkyWcs and externalSkyWcsName is None:
130 raise RuntimeError("Must set externalSkyWcsName if doApplyExternalSkyWcs is True.")
132 blob = Blob('MatchedMultiVisitDataset')
134 if not matchRadius:
135 matchRadius = geom.Angle(1, geom.arcseconds)
137 # Extract single filter
138 blob['filterName'] = Datum(quantity=set([dId['filter'] for dId in dataIds]).pop(),
139 description='Filter name')
141 # Record important configuration
142 blob['doApplyExternalPhotoCalib'] = Datum(quantity=doApplyExternalPhotoCalib,
143 description=('Whether external photometric '
144 'calibrations were used.'))
145 blob['externalPhotoCalibName'] = Datum(quantity=externalPhotoCalibName,
146 description='Name of external PhotoCalib dataset used.')
147 blob['doApplyExternalSkyWcs'] = Datum(quantity=doApplyExternalSkyWcs,
148 description='Whether external wcs calibrations were used.')
149 blob['externalSkyWcsName'] = Datum(quantity=externalSkyWcsName,
150 description='Name of external wcs dataset used.')
152 # Match catalogs across visits
153 blob._catalog, blob._matchedCatalog = \
154 _loadAndMatchCatalogs(repo, dataIds, matchRadius,
155 doApplyExternalPhotoCalib=doApplyExternalPhotoCalib,
156 externalPhotoCalibName=externalPhotoCalibName,
157 doApplyExternalSkyWcs=doApplyExternalSkyWcs,
158 externalSkyWcsName=externalSkyWcsName,
159 skipTEx=skipTEx, skipNonSrd=skipNonSrd)
161 blob.magKey = blob._matchedCatalog.schema.find("base_PsfFlux_mag").key
162 # Reduce catalogs into summary statistics.
163 # These are the serialiable attributes of this class.
164 summarizeSources(blob, filterSources(blob._matchedCatalog, brightSnrMin=brightSnrMin))
165 return blob
168def _loadAndMatchCatalogs(repo, dataIds, matchRadius,
169 doApplyExternalPhotoCalib=False, externalPhotoCalibName=None,
170 doApplyExternalSkyWcs=False, externalSkyWcsName=None,
171 skipTEx=False, skipNonSrd=False):
172 """Load data from specific visits and returned a calibrated catalog matched
173 with a reference.
175 Parameters
176 ----------
177 repo : `str` or `lsst.daf.persistence.Butler`
178 A Butler or a repository URL that can be used to construct one.
179 dataIds : list of dict
180 List of butler data IDs of Image catalogs to compare to
181 reference. The calexp cpixel image is needed for the photometric
182 calibration.
183 matchRadius : `lsst.geom.Angle`, optional
184 Radius for matching. Default is 1 arcsecond.
185 doApplyExternalPhotoCalib : bool, optional
186 Apply external photoCalib to calibrate fluxes.
187 externalPhotoCalibName : str, optional
188 Type of external `PhotoCalib` to apply. Currently supported are jointcal,
189 fgcm, and fgcm_tract. Must be set if doApplyExternalPhotoCalib is True.
190 doApplyExternalSkyWcs : bool, optional
191 Apply external wcs to calibrate positions.
192 externalSkyWcsName : str, optional
193 Type of external `wcs` to apply. Currently supported is jointcal.
194 Must be set if "doApplyExternalWcs" is True.
195 skipTEx : `bool`, optional
196 Skip TEx calculations (useful for older catalogs that don't have
197 PsfShape measurements).
198 skipNonSrd : `bool`, optional
199 Skip any metrics not defined in the LSST SRD; default False.
201 Returns
202 -------
203 catalog : `lsst.afw.table.SourceCatalog`
204 A new calibrated SourceCatalog.
205 matches : `lsst.afw.table.GroupView`
206 A GroupView of the matched sources.
208 Raises
209 ------
210 RuntimeError:
211 Raised if "doApplyExternalPhotoCalib" is True and "externalPhotoCalibName"
212 is None, or if "doApplyExternalSkyWcs" is True and "externalSkyWcsName" is
213 None.
214 """
216 if doApplyExternalPhotoCalib and externalPhotoCalibName is None:
217 raise RuntimeError("Must set externalPhotoCalibName if doApplyExternalPhotoCalib is True.")
218 if doApplyExternalSkyWcs and externalSkyWcsName is None:
219 raise RuntimeError("Must set externalSkyWcsName if doApplyExternalSkyWcs is True.")
221 # Following
222 # https://github.com/lsst/afw/blob/tickets/DM-3896/examples/repeatability.ipynb
223 if isinstance(repo, dafPersist.Butler):
224 butler = repo
225 else:
226 butler = dafPersist.Butler(repo)
227 dataset = 'src'
229 # 2016-02-08 MWV:
230 # I feel like I could be doing something more efficient with
231 # something along the lines of the following:
232 # dataRefs = [dafPersist.ButlerDataRef(butler, vId) for vId in dataIds]
234 ccdKeyName = getCcdKeyName(dataIds[0])
236 # Hack to support raft and sensor 0,1 IDs as ints for multimatch
237 if ccdKeyName == 'sensor':
238 ccdKeyName = 'raft_sensor_int'
239 for vId in dataIds:
240 vId[ccdKeyName] = raftSensorToInt(vId)
242 schema = butler.get(dataset + "_schema").schema
243 mapper = SchemaMapper(schema)
244 mapper.addMinimalSchema(schema)
245 mapper.addOutputField(Field[float]('base_PsfFlux_snr',
246 'PSF flux SNR'))
247 mapper.addOutputField(Field[float]('base_PsfFlux_mag',
248 'PSF magnitude'))
249 mapper.addOutputField(Field[float]('base_PsfFlux_magErr',
250 'PSF magnitude uncertainty'))
251 if not skipNonSrd:
252 # Needed because addOutputField(... 'slot_ModelFlux_mag') will add a field with that literal name
253 aliasMap = schema.getAliasMap()
254 # Possibly not needed since base_GaussianFlux is the default, but this ought to be safe
255 modelName = aliasMap['slot_ModelFlux'] if 'slot_ModelFlux' in aliasMap.keys() else 'base_GaussianFlux'
256 mapper.addOutputField(Field[float](f'{modelName}_mag',
257 'Model magnitude'))
258 mapper.addOutputField(Field[float](f'{modelName}_magErr',
259 'Model magnitude uncertainty'))
260 mapper.addOutputField(Field[float](f'{modelName}_snr',
261 'Model flux snr'))
262 mapper.addOutputField(Field[float]('e1',
263 'Source Ellipticity 1'))
264 mapper.addOutputField(Field[float]('e2',
265 'Source Ellipticity 1'))
266 mapper.addOutputField(Field[float]('psf_e1',
267 'PSF Ellipticity 1'))
268 mapper.addOutputField(Field[float]('psf_e2',
269 'PSF Ellipticity 1'))
270 newSchema = mapper.getOutputSchema()
271 newSchema.setAliasMap(schema.getAliasMap())
273 # Create an object that matches multiple catalogs with same schema
274 mmatch = MultiMatch(newSchema,
275 dataIdFormat={'visit': np.int32, ccdKeyName: np.int32},
276 radius=matchRadius,
277 RecordClass=SimpleRecord)
279 # create the new extented source catalog
280 srcVis = SourceCatalog(newSchema)
282 for vId in dataIds:
283 if not butler.datasetExists('src', vId):
284 print(f'Could not find source catalog for {vId}; skipping.')
285 continue
287 photoCalib = _loadPhotoCalib(butler, vId,
288 doApplyExternalPhotoCalib, externalPhotoCalibName)
289 if photoCalib is None:
290 continue
292 if doApplyExternalSkyWcs:
293 wcs = _loadExternalSkyWcs(butler, vId, externalSkyWcsName)
294 if wcs is None:
295 continue
297 # We don't want to put this above the first _loadPhotoCalib call
298 # because we need to use the first `butler.get` in there to quickly
299 # catch dataIDs with no usable outputs.
300 try:
301 # HSC supports these flags, which dramatically improve I/O
302 # performance; support for other cameras is DM-6927.
303 oldSrc = butler.get('src', vId, flags=SOURCE_IO_NO_FOOTPRINTS)
304 except (OperationalError, sqlite3.OperationalError):
305 oldSrc = butler.get('src', vId)
307 print(len(oldSrc), "sources in ccd %s visit %s" %
308 (vId[ccdKeyName], vId["visit"]))
310 # create temporary catalog
311 tmpCat = SourceCatalog(SourceCatalog(newSchema).table)
312 tmpCat.extend(oldSrc, mapper=mapper)
313 tmpCat['base_PsfFlux_snr'][:] = tmpCat['base_PsfFlux_instFlux'] \
314 / tmpCat['base_PsfFlux_instFluxErr']
316 if doApplyExternalSkyWcs:
317 afwTable.updateSourceCoords(wcs, tmpCat)
318 photoCalib.instFluxToMagnitude(tmpCat, "base_PsfFlux", "base_PsfFlux")
319 if not skipNonSrd:
320 tmpCat['slot_ModelFlux_snr'][:] = (tmpCat['slot_ModelFlux_instFlux'] /
321 tmpCat['slot_ModelFlux_instFluxErr'])
322 photoCalib.instFluxToMagnitude(tmpCat, "slot_ModelFlux", "slot_ModelFlux")
324 if not skipTEx:
325 _, psf_e1, psf_e2 = ellipticity_from_cat(oldSrc, slot_shape='slot_PsfShape')
326 _, star_e1, star_e2 = ellipticity_from_cat(oldSrc, slot_shape='slot_Shape')
327 tmpCat['e1'][:] = star_e1
328 tmpCat['e2'][:] = star_e2
329 tmpCat['psf_e1'][:] = psf_e1
330 tmpCat['psf_e2'][:] = psf_e2
332 srcVis.extend(tmpCat, False)
333 mmatch.add(catalog=tmpCat, dataId=vId)
335 # Complete the match, returning a catalog that includes
336 # all matched sources with object IDs that can be used to group them.
337 matchCat = mmatch.finish()
339 # Create a mapping object that allows the matches to be manipulated
340 # as a mapping of object ID to catalog of sources.
341 allMatches = GroupView.build(matchCat)
343 return srcVis, allMatches
346def getKeysFilter(schema, nameFluxKey=None):
347 """ Get schema keys for filtering sources.
349 schema : `lsst.afw.table.Schema`
350 A table schema to retrieve keys from.
351 nameFluxKey : `str`
352 The name of a flux field to retrieve
354 Returns
355 -------
356 keys : `lsst.pipe.base.Struct`
357 A struct storing schema keys to aggregate over.
358 """
359 if nameFluxKey is None:
360 nameFluxKey = "base_PsfFlux"
361 # Filter down to matches with at least 2 sources and good flags
363 return pipeBase.Struct(
364 flags=[schema.find("base_PixelFlags_flag_%s" % flag).key
365 for flag in ("saturated", "cr", "bad", "edge")],
366 snr=schema.find(f"{nameFluxKey}_snr").key,
367 mag=schema.find(f"{nameFluxKey}_mag").key,
368 magErr=schema.find(f"{nameFluxKey}_magErr").key,
369 extended=schema.find("base_ClassificationExtendedness_value").key,
370 )
373def filterSources(allMatches, keys=None, faintSnrMin=5.0, brightSnrMin=50.0, safeExtendedness=1.0,
374 extended=False, faintSnrMax=np.Inf, brightSnrMax=np.Inf):
375 """Filter matched sources on flags and SNR.
377 Parameters
378 ----------
379 allMatches : `lsst.afw.table.GroupView`
380 GroupView object with matches.
381 keys : `lsst.pipe.base.Struct`
382 A struct storing schema keys to aggregate over.
383 faintSnrMin : float, optional
384 Minimum median SNR for a faint source match; default 5.
385 brightSnrMin : float, optional
386 Minimum median SNR for a bright source match; default 50.
387 safeExtendedness: float, optional
388 Maximum (exclusive) extendedness for sources or minimum (inclusive) if extended==True.
389 extended: bool, optional
390 Whether to select extended sources, i.e. galaxies.
391 faintSnrMax : float, optional
392 Maximum median SNR for a faint source match; default np.Inf.
393 brightSnrMax : float, optional
394 Maximum median SNR for a bright source match; default np.Inf.
396 Returns
397 -------
398 filterResult : `lsst.pipe.base.Struct`
399 A struct containing good and safe matches and the necessary keys to use them.
400 """
401 if keys is None:
402 keys = getKeysFilter(allMatches.schema, "slot_ModelFlux" if extended else "base_PsfFlux")
403 nMatchesRequired = 2
404 snrMin, snrMax = faintSnrMin, faintSnrMax
406 def extendedFilter(cat):
407 if len(cat) < nMatchesRequired:
408 return False
409 for flagKey in keys.flags:
410 if cat.get(flagKey).any():
411 return False
412 if not np.isfinite(cat.get(keys.mag)).all():
413 return False
414 extendedness = cat.get(keys.extended)
415 return np.min(extendedness) >= safeExtendedness if extended else \
416 np.max(extendedness) < safeExtendedness
418 def snrFilter(cat):
419 # Note that this also implicitly checks for psfSnr being non-nan.
420 snr = np.median(cat.get(keys.snr))
421 return snrMax >= snr >= snrMin
423 def fullFilter(cat):
424 return extendedFilter(cat) and snrFilter(cat)
426 # If brightSnrMin range is a subset of faintSnrMin, it's safe to only filter on snr again
427 # Otherwise, filter on flags/extendedness first, then snr
428 isSafeSubset = faintSnrMax >= brightSnrMax and faintSnrMin <= brightSnrMin
429 matchesFaint = allMatches.where(fullFilter) if isSafeSubset else allMatches.where(extendedFilter)
430 snrMin, snrMax = brightSnrMin, brightSnrMax
431 matchesBright = matchesFaint.where(snrFilter)
432 # This means that matchesFaint has had extendedFilter but not snrFilter applied
433 if not isSafeSubset:
434 snrMin, snrMax = faintSnrMin, faintSnrMax
435 matchesFaint = matchesFaint.where(snrFilter)
437 return pipeBase.Struct(
438 extended=extended, keys=keys, matchesFaint=matchesFaint, matchesBright=matchesBright,
439 )
442def summarizeSources(blob, filterResult):
443 """Calculate summary statistics for each source. These are persisted
444 as object attributes.
446 Parameters
447 ----------
448 blob : `lsst.verify.blob.Blob`
449 A verification blob to store Datums in.
450 filterResult : `lsst.pipe.base.Struct`
451 A struct containing bright and faint filter matches, as returned by `filterSources`.
452 """
453 # Pass field=psfMagKey so np.mean just gets that as its input
454 typeMag = "model" if filterResult.extended else "PSF"
455 filter_name = blob['filterName']
456 source_type = f'{"extended" if filterResult.extended else "point"} sources"'
457 matches = filterResult.matchesFaint
458 keys = filterResult.keys
459 blob['snr'] = Datum(quantity=matches.aggregate(np.median, field=keys.snr) * u.Unit(''),
460 label='SNR({band})'.format(band=filter_name),
461 description=f'Median signal-to-noise ratio of {typeMag} magnitudes for {source_type}'
462 f' over multiple visits')
463 blob['mag'] = Datum(quantity=matches.aggregate(np.mean, field=keys.mag) * u.mag,
464 label='{band}'.format(band=filter_name),
465 description=f'Mean of {typeMag} magnitudes for {source_type} over multiple visits')
466 blob['magrms'] = Datum(quantity=matches.aggregate(np.std, field=keys.mag) * u.mag,
467 label='RMS({band})'.format(band=filter_name),
468 description=f'RMS of {typeMag} magnitudes for {source_type} over multiple visits')
469 blob['magerr'] = Datum(quantity=matches.aggregate(np.median, field=keys.magErr) * u.mag,
470 label='sigma({band})'.format(band=filter_name),
471 description=f'Median 1-sigma uncertainty of {typeMag} magnitudes for {source_type}'
472 f' over multiple visits')
473 # positionRmsFromCat knows how to query a group
474 # so we give it the whole thing by going with the default `field=None`.
475 blob['dist'] = Datum(quantity=matches.aggregate(positionRmsFromCat) * u.milliarcsecond,
476 label='d',
477 description=f'RMS of sky coordinates of {source_type} over multiple visits')
479 # These attributes are not serialized
480 blob.matchesFaint = filterResult.matchesFaint
481 blob.matchesBright = filterResult.matchesBright
484def _loadPhotoCalib(butler, dataId, doApplyExternalPhotoCalib, externalPhotoCalibName):
485 """
486 Load a photoCalib object.
488 Parameters
489 ----------
490 butler: `lsst.daf.persistence.Butler`
491 dataId: Butler dataId `dict`
492 doApplyExternalPhotoCalib: `bool`
493 Apply external photoCalib to calibrate fluxes.
494 externalPhotoCalibName: `str`
495 Type of external `PhotoCalib` to apply. Currently supported are jointcal,
496 fgcm, and fgcm_tract. Must be set if "doApplyExternalPhotoCalib" is True.
498 Returns
499 -------
500 photoCalib: `lsst.afw.image.PhotoCalib` or None
501 photoCalib to apply. None if a suitable one was not found.
502 """
504 photoCalib = None
506 if doApplyExternalPhotoCalib:
507 try:
508 photoCalib = butler.get(f"{externalPhotoCalibName}_photoCalib", dataId)
509 except (FitsError, dafPersist.NoResults) as e:
510 print(e)
511 print(f'Could not open external photometric calib for {dataId}; skipping.')
512 photoCalib = None
513 else:
514 try:
515 photoCalib = butler.get('calexp_photoCalib', dataId)
516 except (FitsError, dafPersist.NoResults) as e:
517 print(e)
518 print(f'Could not open calibrated image file for {dataId}; skipping.')
519 except TypeError as te:
520 # DECam images that haven't been properly reformatted
521 # can trigger a TypeError because of a residual FITS header
522 # LTV2 which is a float instead of the expected integer.
523 # This generates an error of the form:
524 #
525 # lsst::pex::exceptions::TypeError: 'LTV2 has mismatched type'
526 #
527 # See, e.g., DM-2957 for details.
528 print(te)
529 print(f'Calibration image header information malformed for {dataId}; skipping.')
530 photoCalib = None
532 return photoCalib
535def _loadExternalSkyWcs(butler, dataId, externalSkyWcsName):
536 """
537 Load a SkyWcs object.
539 Parameters
540 ----------
541 butler: `lsst.daf.persistence.Butler`
542 dataId: Butler dataId `dict`
543 externalSkyWcsName: `str`
544 Type of external `SkyWcs` to apply. Currently supported is jointcal.
545 Must be not None if "doApplyExternalSkyWcs" is True.
547 Returns
548 -------
549 SkyWcs: `lsst.afw.geom.SkyWcs` or None
550 SkyWcs to apply. None if a suitable one was not found.
551 """
553 try:
554 wcs = butler.get(f"{externalSkyWcsName}_wcs", dataId)
555 except (FitsError, dafPersist.NoResults) as e:
556 print(e)
557 print(f'Could not open external WCS for {dataId}; skipping.')
558 wcs = None
560 return wcs