Coverage for python/lsst/validate/drp/matchreduce.py : 8%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# LSST Data Management System
2# Copyright 2016-2019 AURA/LSST.
3#
4# This product includes software developed by the
5# LSST Project (http://www.lsst.org/).
6#
7# This program is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the LSST License Statement and
18# the GNU General Public License along with this program. If not,
19# see <https://www.lsstcorp.org/LegalNotices/>.
20"""Blob classes that reduce a multi-visit dataset and encapsulate data
21for measurement classes, plotting functions, and JSON persistence.
22"""
24__all__ = ['build_matched_dataset', 'reduceSources']
26import numpy as np
27import astropy.units as u
28from sqlalchemy.exc import OperationalError
29import sqlite3
31import lsst.geom as geom
32import lsst.daf.persistence as dafPersist
33from lsst.afw.table import (SourceCatalog, SchemaMapper, Field,
34 MultiMatch, SimpleRecord, GroupView,
35 SOURCE_IO_NO_FOOTPRINTS)
36import lsst.afw.table as afwTable
37from lsst.afw.fits import FitsError
38from lsst.verify import Blob, Datum
40from .util import (getCcdKeyName, raftSensorToInt, positionRmsFromCat,
41 ellipticity_from_cat)
44def build_matched_dataset(repo, dataIds, matchRadius=None, safeSnr=50.,
45 doApplyExternalPhotoCalib=False, externalPhotoCalibName=None,
46 doApplyExternalSkyWcs=False, externalSkyWcsName=None,
47 skipTEx=False, skipNonSrd=False):
48 """Construct a container for matched star catalogs from multple visits, with filtering,
49 summary statistics, and modelling.
51 `lsst.verify.Blob` instances are serializable to JSON.
53 Parameters
54 ----------
55 repo : `str` or `lsst.daf.persistence.Butler`
56 A Butler instance or a repository URL that can be used to construct
57 one.
58 dataIds : `list` of `dict`
59 List of `butler` data IDs of Image catalogs to compare to reference.
60 The `calexp` cpixel image is needed for the photometric calibration.
61 matchRadius : `lsst.geom.Angle`, optional
62 Radius for matching. Default is 1 arcsecond.
63 safeSnr : `float`, optional
64 Minimum median SNR for a match to be considered "safe".
65 doApplyExternalPhotoCalib : bool, optional
66 Apply external photoCalib to calibrate fluxes.
67 externalPhotoCalibName : str, optional
68 Type of external `PhotoCalib` to apply. Currently supported are jointcal,
69 fgcm, and fgcm_tract. Must be set if "doApplyExternalPhotoCalib" is True.
70 doApplyExternalSkyWcs : bool, optional
71 Apply external wcs to calibrate positions.
72 externalSkyWcsName : str, optional:
73 Type of external `wcs` to apply. Currently supported is jointcal.
74 Must be set if "doApplyExternalSkyWcs" is True.
75 skipTEx : `bool`, optional
76 Skip TEx calculations (useful for older catalogs that don't have
77 PsfShape measurements).
78 skipNonSrd : `bool`, optional
79 Skip any metrics not defined in the LSST SRD; default False.
81 Attributes of returned Blob
82 ----------
83 filterName : `str`
84 Name of filter used for all observations.
85 mag : `astropy.units.Quantity`
86 Mean PSF magnitudes of stars over multiple visits (magnitudes).
87 magerr : `astropy.units.Quantity`
88 Median 1-sigma uncertainty of PSF magnitudes over multiple visits
89 (magnitudes).
90 magrms : `astropy.units.Quantity`
91 RMS of PSF magnitudes over multiple visits (magnitudes).
92 snr : `astropy.units.Quantity`
93 Median signal-to-noise ratio of PSF magnitudes over multiple visits
94 (dimensionless).
95 dist : `astropy.units.Quantity`
96 RMS of sky coordinates of stars over multiple visits (milliarcseconds).
98 *Not serialized.*
99 goodMatches
100 all good matches, as an afw.table.GroupView;
101 good matches contain only objects whose detections all have
103 1. a PSF Flux measurement with S/N > 1
104 2. a finite (non-nan) PSF magnitude. This separate check is largely
105 to reject failed zeropoints.
106 3. and do not have flags set for bad, cosmic ray, edge or saturated
108 *Not serialized.*
110 safeMatches
111 safe matches, as an afw.table.GroupView. Safe matches
112 are good matches that are sufficiently bright and sufficiently
113 compact.
115 *Not serialized.*
116 magKey
117 Key for `"base_PsfFlux_mag"` in the `goodMatches` and `safeMatches`
118 catalog tables.
120 *Not serialized.*
122 Raises
123 ------
124 RuntimeError:
125 Raised if "doApplyExternalPhotoCalib" is True and "externalPhotoCalibName"
126 is None, or if "doApplyExternalSkyWcs" is True and "externalSkyWcsName" is
127 None.
128 """
129 if doApplyExternalPhotoCalib and externalPhotoCalibName is None:
130 raise RuntimeError("Must set externalPhotoCalibName if doApplyExternalPhotoCalib is True.")
131 if doApplyExternalSkyWcs and externalSkyWcsName is None:
132 raise RuntimeError("Must set externalSkyWcsName if doApplyExternalSkyWcs is True.")
134 blob = Blob('MatchedMultiVisitDataset')
136 if not matchRadius:
137 matchRadius = geom.Angle(1, geom.arcseconds)
139 # Extract single filter
140 blob['filterName'] = Datum(quantity=set([dId['filter'] for dId in dataIds]).pop(),
141 description='Filter name')
143 # Record important configuration
144 blob['doApplyExternalPhotoCalib'] = Datum(quantity=doApplyExternalPhotoCalib,
145 description=('Whether external photometric '
146 'calibrations were used.'))
147 blob['externalPhotoCalibName'] = Datum(quantity=externalPhotoCalibName,
148 description='Name of external PhotoCalib dataset used.')
149 blob['doApplyExternalSkyWcs'] = Datum(quantity=doApplyExternalSkyWcs,
150 description='Whether external wcs calibrations were used.')
151 blob['externalSkyWcsName'] = Datum(quantity=externalSkyWcsName,
152 description='Name of external wcs dataset used.')
154 # Match catalogs across visits
155 blob._catalog, blob._matchedCatalog = \
156 _loadAndMatchCatalogs(repo, dataIds, matchRadius,
157 doApplyExternalPhotoCalib=doApplyExternalPhotoCalib,
158 externalPhotoCalibName=externalPhotoCalibName,
159 doApplyExternalSkyWcs=doApplyExternalSkyWcs,
160 externalSkyWcsName=externalSkyWcsName,
161 skipTEx=skipTEx, skipNonSrd=skipNonSrd)
163 blob.magKey = blob._matchedCatalog.schema.find("base_PsfFlux_mag").key
164 # Reduce catalogs into summary statistics.
165 # These are the serialiable attributes of this class.
166 reduceSources(blob, blob._matchedCatalog, safeSnr)
167 return blob
170def _loadAndMatchCatalogs(repo, dataIds, matchRadius,
171 doApplyExternalPhotoCalib=False, externalPhotoCalibName=None,
172 doApplyExternalSkyWcs=False, externalSkyWcsName=None,
173 skipTEx=False, skipNonSrd=False):
174 """Load data from specific visits and returned a calibrated catalog matched
175 with a reference.
177 Parameters
178 ----------
179 repo : `str` or `lsst.daf.persistence.Butler`
180 A Butler or a repository URL that can be used to construct one.
181 dataIds : list of dict
182 List of butler data IDs of Image catalogs to compare to
183 reference. The calexp cpixel image is needed for the photometric
184 calibration.
185 matchRadius : `lsst.geom.Angle`, optional
186 Radius for matching. Default is 1 arcsecond.
187 doApplyExternalPhotoCalib : bool, optional
188 Apply external photoCalib to calibrate fluxes.
189 externalPhotoCalibName : str, optional
190 Type of external `PhotoCalib` to apply. Currently supported are jointcal,
191 fgcm, and fgcm_tract. Must be set if doApplyExternalPhotoCalib is True.
192 doApplyExternalSkyWcs : bool, optional
193 Apply external wcs to calibrate positions.
194 externalSkyWcsName : str, optional
195 Type of external `wcs` to apply. Currently supported is jointcal.
196 Must be set if "doApplyExternalWcs" is True.
197 skipTEx : `bool`, optional
198 Skip TEx calculations (useful for older catalogs that don't have
199 PsfShape measurements).
200 skipNonSrd : `bool`, optional
201 Skip any metrics not defined in the LSST SRD; default False.
203 Returns
204 -------
205 catalog : `lsst.afw.table.SourceCatalog`
206 A new calibrated SourceCatalog.
207 matches : `lsst.afw.table.GroupView`
208 A GroupView of the matched sources.
210 Raises
211 ------
212 RuntimeError:
213 Raised if "doApplyExternalPhotoCalib" is True and "externalPhotoCalibName"
214 is None, or if "doApplyExternalSkyWcs" is True and "externalSkyWcsName" is
215 None.
216 """
218 if doApplyExternalPhotoCalib and externalPhotoCalibName is None:
219 raise RuntimeError("Must set externalPhotoCalibName if doApplyExternalPhotoCalib is True.")
220 if doApplyExternalSkyWcs and externalSkyWcsName is None:
221 raise RuntimeError("Must set externalSkyWcsName if doApplyExternalSkyWcs is True.")
223 # Following
224 # https://github.com/lsst/afw/blob/tickets/DM-3896/examples/repeatability.ipynb
225 if isinstance(repo, dafPersist.Butler):
226 butler = repo
227 else:
228 butler = dafPersist.Butler(repo)
229 dataset = 'src'
231 # 2016-02-08 MWV:
232 # I feel like I could be doing something more efficient with
233 # something along the lines of the following:
234 # dataRefs = [dafPersist.ButlerDataRef(butler, vId) for vId in dataIds]
236 ccdKeyName = getCcdKeyName(dataIds[0])
238 # Hack to support raft and sensor 0,1 IDs as ints for multimatch
239 if ccdKeyName == 'sensor':
240 ccdKeyName = 'raft_sensor_int'
241 for vId in dataIds:
242 vId[ccdKeyName] = raftSensorToInt(vId)
244 schema = butler.get(dataset + "_schema").schema
245 mapper = SchemaMapper(schema)
246 mapper.addMinimalSchema(schema)
247 mapper.addOutputField(Field[float]('base_PsfFlux_snr',
248 'PSF flux SNR'))
249 mapper.addOutputField(Field[float]('base_PsfFlux_mag',
250 'PSF magnitude'))
251 mapper.addOutputField(Field[float]('base_PsfFlux_magErr',
252 'PSF magnitude uncertainty'))
253 if not skipNonSrd:
254 # Needed because addOutputField(... 'slot_ModelFlux_mag') will add a field with that literal name
255 aliasMap = schema.getAliasMap()
256 # Possibly not needed since base_GaussianFlux is the default, but this ought to be safe
257 modelName = aliasMap['slot_ModelFlux'] if 'slot_ModelFlux' in aliasMap.keys() else 'base_GaussianFlux'
258 mapper.addOutputField(Field[float](f'{modelName}_mag',
259 'Model magnitude'))
260 mapper.addOutputField(Field[float](f'{modelName}_magErr',
261 'Model magnitude uncertainty'))
262 mapper.addOutputField(Field[float](f'{modelName}_snr',
263 'Model flux snr'))
264 mapper.addOutputField(Field[float]('e1',
265 'Source Ellipticity 1'))
266 mapper.addOutputField(Field[float]('e2',
267 'Source Ellipticity 1'))
268 mapper.addOutputField(Field[float]('psf_e1',
269 'PSF Ellipticity 1'))
270 mapper.addOutputField(Field[float]('psf_e2',
271 'PSF Ellipticity 1'))
272 newSchema = mapper.getOutputSchema()
273 newSchema.setAliasMap(schema.getAliasMap())
275 # Create an object that matches multiple catalogs with same schema
276 mmatch = MultiMatch(newSchema,
277 dataIdFormat={'visit': np.int32, ccdKeyName: np.int32},
278 radius=matchRadius,
279 RecordClass=SimpleRecord)
281 # create the new extented source catalog
282 srcVis = SourceCatalog(newSchema)
284 for vId in dataIds:
285 if not butler.datasetExists('src', vId):
286 print(f'Could not find source catalog for {vId}; skipping.')
287 continue
289 photoCalib = _loadPhotoCalib(butler, vId,
290 doApplyExternalPhotoCalib, externalPhotoCalibName)
291 if photoCalib is None:
292 continue
294 if doApplyExternalSkyWcs:
295 wcs = _loadExternalSkyWcs(butler, vId, externalSkyWcsName)
296 if wcs is None:
297 continue
299 # We don't want to put this above the first _loadPhotoCalib call
300 # because we need to use the first `butler.get` in there to quickly
301 # catch dataIDs with no usable outputs.
302 try:
303 # HSC supports these flags, which dramatically improve I/O
304 # performance; support for other cameras is DM-6927.
305 oldSrc = butler.get('src', vId, flags=SOURCE_IO_NO_FOOTPRINTS)
306 except (OperationalError, sqlite3.OperationalError):
307 oldSrc = butler.get('src', vId)
309 print(len(oldSrc), "sources in ccd %s visit %s" %
310 (vId[ccdKeyName], vId["visit"]))
312 # create temporary catalog
313 tmpCat = SourceCatalog(SourceCatalog(newSchema).table)
314 tmpCat.extend(oldSrc, mapper=mapper)
315 tmpCat['base_PsfFlux_snr'][:] = tmpCat['base_PsfFlux_instFlux'] \
316 / tmpCat['base_PsfFlux_instFluxErr']
318 if doApplyExternalSkyWcs:
319 afwTable.updateSourceCoords(wcs, tmpCat)
320 photoCalib.instFluxToMagnitude(tmpCat, "base_PsfFlux", "base_PsfFlux")
321 if not skipNonSrd:
322 tmpCat['slot_ModelFlux_snr'][:] = (tmpCat['slot_ModelFlux_instFlux'] /
323 tmpCat['slot_ModelFlux_instFluxErr'])
324 photoCalib.instFluxToMagnitude(tmpCat, "slot_ModelFlux", "slot_ModelFlux")
326 if not skipTEx:
327 _, psf_e1, psf_e2 = ellipticity_from_cat(oldSrc, slot_shape='slot_PsfShape')
328 _, star_e1, star_e2 = ellipticity_from_cat(oldSrc, slot_shape='slot_Shape')
329 tmpCat['e1'][:] = star_e1
330 tmpCat['e2'][:] = star_e2
331 tmpCat['psf_e1'][:] = psf_e1
332 tmpCat['psf_e2'][:] = psf_e2
334 srcVis.extend(tmpCat, False)
335 mmatch.add(catalog=tmpCat, dataId=vId)
337 # Complete the match, returning a catalog that includes
338 # all matched sources with object IDs that can be used to group them.
339 matchCat = mmatch.finish()
341 # Create a mapping object that allows the matches to be manipulated
342 # as a mapping of object ID to catalog of sources.
343 allMatches = GroupView.build(matchCat)
345 return srcVis, allMatches
348def reduceSources(blob, allMatches, goodSnr=5.0, safeSnr=50.0, safeExtendedness=1.0, extended=False,
349 nameFluxKey=None, goodSnrMax=np.Inf, safeSnrMax=np.Inf):
350 """Calculate summary statistics for each star. These are persisted
351 as object attributes.
353 Parameters
354 ----------
355 blob : `lsst.verify.blob.Blob`
356 A verification blob to store Datums in.
357 allMatches : `lsst.afw.table.GroupView`
358 GroupView object with matches.
359 goodSnr : float, optional
360 Minimum median SNR for a match to be considered "good"; default 3.
361 safeSnr : float, optional
362 Minimum median SNR for a match to be considered "safe"; default 50.
363 safeExtendedness: float, optional
364 Maximum (exclusive) extendedness for sources or minimum (inclusive) if extended==True.
365 extended: bool, optional
366 Whether to select extended sources, i.e. galaxies.
367 goodSnrMax : float, optional
368 Maximum median SNR for a match to be considered "good"; default np.Inf.
369 safeSnrMax : float, optional
370 Maximum median SNR for a match to be considered "safe"; default np.Inf.
371 """
372 if nameFluxKey is None:
373 nameFluxKey = "slot_ModelFlux" if extended else "base_PsfFlux"
374 # Filter down to matches with at least 2 sources and good flags
375 flagKeys = [allMatches.schema.find("base_PixelFlags_flag_%s" % flag).key
376 for flag in ("saturated", "cr", "bad", "edge")]
377 nMatchesRequired = 2
379 snrKey = allMatches.schema.find(f"{nameFluxKey}_snr").key
380 magKey = allMatches.schema.find(f"{nameFluxKey}_mag").key
381 magErrKey = allMatches.schema.find(f"{nameFluxKey}_magErr").key
382 extendedKey = allMatches.schema.find("base_ClassificationExtendedness_value").key
384 snrMin, snrMax = goodSnr, goodSnrMax
386 def extendedFilter(cat):
387 if len(cat) < nMatchesRequired:
388 return False
389 for flagKey in flagKeys:
390 if cat.get(flagKey).any():
391 return False
392 if not np.isfinite(cat.get(magKey)).all():
393 return False
394 extendedness = cat.get(extendedKey)
395 return np.min(extendedness) >= safeExtendedness if extended else \
396 np.max(extendedness) < safeExtendedness
398 def snrFilter(cat):
399 # Note that this also implicitly checks for psfSnr being non-nan.
400 snr = np.median(cat.get(snrKey))
401 return snrMax >= snr >= snrMin
403 def fullFilter(cat):
404 return extendedFilter(cat) and snrFilter(cat)
406 # If safeSnr range is a subset of goodSnr, it's safe to only filter on snr again
407 # Otherwise, filter on flags/extendedness first, then snr
408 isSafeSubset = goodSnrMax >= safeSnrMax and goodSnr <= safeSnr
409 goodMatches = allMatches.where(fullFilter) if isSafeSubset else allMatches.where(extendedFilter)
410 snrMin, snrMax = safeSnr, safeSnrMax
411 safeMatches = goodMatches.where(snrFilter)
412 if not isSafeSubset:
413 snrMin, snrMax = goodSnr, goodSnrMax
414 goodMatches = goodMatches.where(snrFilter)
416 # Pass field=psfMagKey so np.mean just gets that as its input
417 typeMag = "model" if extended else "PSF"
418 filter_name = blob['filterName']
419 source_type = f'{"extended" if extended else "point"} sources"'
420 blob['snr'] = Datum(quantity=goodMatches.aggregate(np.median, field=snrKey) * u.Unit(''),
421 label='SNR({band})'.format(band=filter_name),
422 description=f'Median signal-to-noise ratio of {typeMag} magnitudes for {source_type}'
423 f' over multiple visits')
424 blob['mag'] = Datum(quantity=goodMatches.aggregate(np.mean, field=magKey) * u.mag,
425 label='{band}'.format(band=filter_name),
426 description=f'Mean of {typeMag} magnitudes for {source_type} over multiple visits')
427 blob['magrms'] = Datum(quantity=goodMatches.aggregate(np.std, field=magKey) * u.mag,
428 label='RMS({band})'.format(band=filter_name),
429 description=f'RMS of {typeMag} magnitudes for {source_type} over multiple visits')
430 blob['magerr'] = Datum(quantity=goodMatches.aggregate(np.median, field=magErrKey) * u.mag,
431 label='sigma({band})'.format(band=filter_name),
432 description=f'Median 1-sigma uncertainty of {typeMag} magnitudes for {source_type}'
433 f' over multiple visits')
434 # positionRmsFromCat knows how to query a group
435 # so we give it the whole thing by going with the default `field=None`.
436 blob['dist'] = Datum(quantity=goodMatches.aggregate(positionRmsFromCat) * u.milliarcsecond,
437 label='d',
438 description=f'RMS of sky coordinates of {source_type} over multiple visits')
440 # These attributes are not serialized
441 blob.goodMatches = goodMatches
442 blob.safeMatches = safeMatches
445def _loadPhotoCalib(butler, dataId, doApplyExternalPhotoCalib, externalPhotoCalibName):
446 """
447 Load a photoCalib object.
449 Parameters
450 ----------
451 butler: `lsst.daf.persistence.Butler`
452 dataId: Butler dataId `dict`
453 doApplyExternalPhotoCalib: `bool`
454 Apply external photoCalib to calibrate fluxes.
455 externalPhotoCalibName: `str`
456 Type of external `PhotoCalib` to apply. Currently supported are jointcal,
457 fgcm, and fgcm_tract. Must be set if "doApplyExternalPhotoCalib" is True.
459 Returns
460 -------
461 photoCalib: `lsst.afw.image.PhotoCalib` or None
462 photoCalib to apply. None if a suitable one was not found.
463 """
465 photoCalib = None
467 if doApplyExternalPhotoCalib:
468 try:
469 photoCalib = butler.get(f"{externalPhotoCalibName}_photoCalib", dataId)
470 except (FitsError, dafPersist.NoResults) as e:
471 print(e)
472 print(f'Could not open external photometric calib for {dataId}; skipping.')
473 photoCalib = None
474 else:
475 try:
476 photoCalib = butler.get('calexp_photoCalib', dataId)
477 except (FitsError, dafPersist.NoResults) as e:
478 print(e)
479 print(f'Could not open calibrated image file for {dataId}; skipping.')
480 except TypeError as te:
481 # DECam images that haven't been properly reformatted
482 # can trigger a TypeError because of a residual FITS header
483 # LTV2 which is a float instead of the expected integer.
484 # This generates an error of the form:
485 #
486 # lsst::pex::exceptions::TypeError: 'LTV2 has mismatched type'
487 #
488 # See, e.g., DM-2957 for details.
489 print(te)
490 print(f'Calibration image header information malformed for {dataId}; skipping.')
491 photoCalib = None
493 return photoCalib
496def _loadExternalSkyWcs(butler, dataId, externalSkyWcsName):
497 """
498 Load a SkyWcs object.
500 Parameters
501 ----------
502 butler: `lsst.daf.persistence.Butler`
503 dataId: Butler dataId `dict`
504 externalSkyWcsName: `str`
505 Type of external `SkyWcs` to apply. Currently supported is jointcal.
506 Must be not None if "doApplyExternalSkyWcs" is True.
508 Returns
509 -------
510 SkyWcs: `lsst.afw.geom.SkyWcs` or None
511 SkyWcs to apply. None if a suitable one was not found.
512 """
514 try:
515 wcs = butler.get(f"{externalSkyWcsName}_wcs", dataId)
516 except (FitsError, dafPersist.NoResults) as e:
517 print(e)
518 print(f'Could not open external WCS for {dataId}; skipping.')
519 wcs = None
521 return wcs