lsst.pipe.tasks  13.0-66-gfbf2f2ce+5
selectImages.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division, print_function
23 from builtins import zip
24 import numpy as np
25 import lsst.pex.config as pexConfig
26 import lsst.pex.exceptions as pexExceptions
27 import lsst.afw.geom as afwGeom
28 import lsst.pipe.base as pipeBase
29 
30 __all__ = ["BaseSelectImagesTask", "BaseExposureInfo", "WcsSelectImagesTask", "PsfWcsSelectImagesTask",
31  "DatabaseSelectImagesConfig"]
32 
33 
34 class DatabaseSelectImagesConfig(pexConfig.Config):
35  """Base configuration for subclasses of BaseSelectImagesTask that use a database"""
36  host = pexConfig.Field(
37  doc="Database server host name",
38  dtype=str,
39  )
40  port = pexConfig.Field(
41  doc="Database server port",
42  dtype=int,
43  )
44  database = pexConfig.Field(
45  doc="Name of database",
46  dtype=str,
47  )
48  maxExposures = pexConfig.Field(
49  doc="maximum exposures to select; intended for debugging; ignored if None",
50  dtype=int,
51  optional=True,
52  )
53 
54 
55 class BaseExposureInfo(pipeBase.Struct):
56  """Data about a selected exposure
57  """
58 
59  def __init__(self, dataId, coordList):
60  """Create exposure information that can be used to generate data references
61 
62  The object has the following fields:
63  - dataId: data ID of exposure (a dict)
64  - coordList: a list of corner coordinates of the exposure (list of afwCoord.IcrsCoord)
65  plus any others items that are desired
66  """
67  super(BaseExposureInfo, self).__init__(dataId=dataId, coordList=coordList)
68 
69 
70 class BaseSelectImagesTask(pipeBase.Task):
71  """Base task for selecting images suitable for coaddition
72  """
73  ConfigClass = pexConfig.Config
74  _DefaultName = "selectImages"
75 
76  @pipeBase.timeMethod
77  def run(self, coordList):
78  """Select images suitable for coaddition in a particular region
79 
80  @param[in] coordList: list of coordinates defining region of interest; if None then select all images
81  subclasses may add additional keyword arguments, as required
82 
83  @return a pipeBase Struct containing:
84  - exposureInfoList: a list of exposure information objects (subclasses of BaseExposureInfo),
85  which have at least the following fields:
86  - dataId: data ID dictionary
87  - coordList: coordinates of the corner of the exposure (list of afwCoord.IcrsCoord)
88  """
89  raise NotImplementedError()
90 
91  def _runArgDictFromDataId(self, dataId):
92  """Extract keyword arguments for run (other than coordList) from a data ID
93 
94  @return keyword arguments for run (other than coordList), as a dict
95  """
96  raise NotImplementedError()
97 
98  def runDataRef(self, dataRef, coordList, makeDataRefList=True, selectDataList=[]):
99  """Run based on a data reference
100 
101  This delegates to run() and _runArgDictFromDataId() to do the actual
102  selection. In the event that the selectDataList is non-empty, this will
103  be used to further restrict the selection, providing the user with
104  additional control over the selection.
105 
106  @param[in] dataRef: data reference; must contain any extra keys needed by the subclass
107  @param[in] coordList: list of coordinates defining region of interest; if None, search the whole sky
108  @param[in] makeDataRefList: if True, return dataRefList
109  @param[in] selectDataList: List of SelectStruct with dataRefs to consider for selection
110  @return a pipeBase Struct containing:
111  - exposureInfoList: a list of objects derived from ExposureInfo
112  - dataRefList: a list of data references (None if makeDataRefList False)
113  """
114  runArgDict = self._runArgDictFromDataId(dataRef.dataId)
115  exposureInfoList = self.run(coordList, **runArgDict).exposureInfoList
116 
117  if len(selectDataList) > 0 and len(exposureInfoList) > 0:
118  # Restrict the exposure selection further
119  ccdKeys, ccdValues = _extractKeyValue(exposureInfoList)
120  inKeys, inValues = _extractKeyValue([s.dataRef for s in selectDataList], keys=ccdKeys)
121  inValues = set(inValues)
122  newExposureInfoList = []
123  for info, ccdVal in zip(exposureInfoList, ccdValues):
124  if ccdVal in inValues:
125  newExposureInfoList.append(info)
126  else:
127  self.log.info("De-selecting exposure %s: not in selectDataList" % info.dataId)
128  exposureInfoList = newExposureInfoList
129 
130  if makeDataRefList:
131  butler = dataRef.butlerSubset.butler
132  dataRefList = [butler.dataRef(datasetType="calexp",
133  dataId=expInfo.dataId,
134  ) for expInfo in exposureInfoList]
135  else:
136  dataRefList = None
137 
138  return pipeBase.Struct(
139  dataRefList=dataRefList,
140  exposureInfoList=exposureInfoList,
141  )
142 
143 
144 def _extractKeyValue(dataList, keys=None):
145  """Extract the keys and values from a list of dataIds
146 
147  The input dataList is a list of objects that have 'dataId' members.
148  This allows it to be used for both a list of data references and a
149  list of ExposureInfo
150  """
151  assert len(dataList) > 0
152  if keys is None:
153  keys = sorted(dataList[0].dataId.keys())
154  keySet = set(keys)
155  values = list()
156  for data in dataList:
157  thisKeys = set(data.dataId.keys())
158  if thisKeys != keySet:
159  raise RuntimeError("DataId keys inconsistent: %s vs %s" % (keySet, thisKeys))
160  values.append(tuple(data.dataId[k] for k in keys))
161  return keys, values
162 
163 
164 class SelectStruct(pipeBase.Struct):
165  """A container for data to be passed to the WcsSelectImagesTask"""
166 
167  def __init__(self, dataRef, wcs, bbox):
168  super(SelectStruct, self).__init__(dataRef=dataRef, wcs=wcs, bbox=bbox)
169 
170 
172  """Select images using their Wcs"""
173 
174  def runDataRef(self, dataRef, coordList, makeDataRefList=True, selectDataList=[]):
175  """Select images in the selectDataList that overlap the patch
176 
177  We use the "convexHull" function in the geom package to define
178  polygons on the celestial sphere, and test the polygon of the
179  patch for overlap with the polygon of the image.
180 
181  We use "convexHull" instead of generating a SphericalConvexPolygon
182  directly because the standard for the inputs to SphericalConvexPolygon
183  are pretty high and we don't want to be responsible for reaching them.
184  If "convexHull" is found to be too slow, we can revise this.
185 
186  @param dataRef: Data reference for coadd/tempExp (with tract, patch)
187  @param coordList: List of Coord specifying boundary of patch
188  @param makeDataRefList: Construct a list of data references?
189  @param selectDataList: List of SelectStruct, to consider for selection
190  """
191  from lsst.geom import convexHull
192 
193  dataRefList = []
194  exposureInfoList = []
195 
196  patchVertices = [coord.getVector() for coord in coordList]
197  patchPoly = convexHull(patchVertices)
198 
199  for data in selectDataList:
200  dataRef = data.dataRef
201  imageWcs = data.wcs
202  imageBox = data.bbox
203 
204  try:
205  imageCorners = [imageWcs.pixelToSky(pix) for pix in afwGeom.Box2D(imageBox).getCorners()]
206  except (pexExceptions.DomainError, pexExceptions.RuntimeError) as e:
207  # Protecting ourselves from awful Wcs solutions in input images
208  self.log.debug("WCS error in testing calexp %s (%s): deselecting", dataRef.dataId, e)
209  continue
210 
211  imagePoly = convexHull([coord.getVector() for coord in imageCorners])
212  if imagePoly is None:
213  self.log.debug("Unable to create polygon from image %s: deselecting", dataRef.dataId)
214  continue
215  if patchPoly.intersects(imagePoly): # "intersects" also covers "contains" or "is contained by"
216  self.log.info("Selecting calexp %s" % dataRef.dataId)
217  dataRefList.append(dataRef)
218  exposureInfoList.append(BaseExposureInfo(dataRef.dataId, imageCorners))
219 
220  return pipeBase.Struct(
221  dataRefList=dataRefList if makeDataRefList else None,
222  exposureInfoList=exposureInfoList,
223  )
224 
225 
226 class PsfWcsSelectImagesConfig(pexConfig.Config):
227  maxEllipResidual = pexConfig.Field(
228  doc="Maximum median ellipticity residual",
229  dtype=float,
230  default=0.007,
231  optional=True,
232  )
233  maxSizeScatter = pexConfig.Field(
234  doc="Maximum scatter in the size residuals",
235  dtype=float,
236  optional=True,
237  )
238  maxScaledSizeScatter = pexConfig.Field(
239  doc="Maximum scatter in the size residuals, scaled by the median size",
240  dtype=float,
241  default=0.009,
242  optional=True,
243  )
244  starSelection = pexConfig.Field(
245  doc="select star with this field",
246  dtype=str,
247  default='calib_psfUsed'
248  )
249  starShape = pexConfig.Field(
250  doc="name of star shape",
251  dtype=str,
252  default='base_SdssShape'
253  )
254  psfShape = pexConfig.Field(
255  doc="name of psf shape",
256  dtype=str,
257  default='base_SdssShape_psf'
258  )
259 
260 
261 def sigmaMad(array):
262  "Return median absolute deviation scaled to normally distributed data"
263  return 1.4826*np.median(np.abs(array - np.median(array)))
264 
266  """Select images using their Wcs and cuts on the PSF properties"""
267 
268  ConfigClass = PsfWcsSelectImagesConfig
269  _DefaultName = "PsfWcsSelectImages"
270 
271  def runDataRef(self, dataRef, coordList, makeDataRefList=True, selectDataList=[]):
272  """Select images in the selectDataList that overlap the patch and satisfy PSF quality critera.
273 
274  The PSF quality criteria are based on the size and ellipticity residuals from the
275  adaptive second moments of the star and the PSF.
276 
277  The criteria are:
278  - the median of the ellipticty residuals
279  - the robust scatter of the size residuals (using the median absolute deviation)
280  - the robust scatter of the size residuals scaled by the square of
281  the median size
282 
283  @param dataRef: Data reference for coadd/tempExp (with tract, patch)
284  @param coordList: List of Coord specifying boundary of patch
285  @param makeDataRefList: Construct a list of data references?
286  @param selectDataList: List of SelectStruct, to consider for selection
287  """
288  result = super(PsfWcsSelectImagesTask, self).runDataRef(dataRef, coordList, makeDataRefList,
289  selectDataList)
290 
291  dataRefList = []
292  exposureInfoList = []
293  for dataRef, exposureInfo in zip(result.dataRefList, result.exposureInfoList):
294  butler = dataRef.butlerSubset.butler
295  srcCatalog = butler.get('src',dataRef.dataId)
296  mask = srcCatalog[self.config.starSelection]
297 
298  starXX = srcCatalog[self.config.starShape+'_xx'][mask]
299  starYY = srcCatalog[self.config.starShape+'_yy'][mask]
300  starXY = srcCatalog[self.config.starShape+'_xy'][mask]
301  psfXX = srcCatalog[self.config.psfShape+'_xx'][mask]
302  psfYY = srcCatalog[self.config.psfShape+'_yy'][mask]
303  psfXY = srcCatalog[self.config.psfShape+'_xy'][mask]
304 
305  starSize = np.power(starXX*starYY - starXY**2, 0.25)
306  starE1 = (starXX - starYY)/(starXX + starYY)
307  starE2 = 2*starXY/(starXX + starYY)
308  medianSize = np.median(starSize)
309 
310  psfSize = np.power(psfXX*psfYY - psfXY**2, 0.25)
311  psfE1 = (psfXX - psfYY)/(psfXX + psfYY)
312  psfE2 = 2*psfXY/(psfXX + psfYY)
313 
314  medianE1 = np.abs(np.median(starE1 - psfE1))
315  medianE2 = np.abs(np.median(starE2 - psfE2))
316  medianE = np.sqrt(medianE1**2 + medianE2**2)
317 
318  scatterSize = sigmaMad(starSize - psfSize)
319  scaledScatterSize = scatterSize/medianSize**2
320 
321  valid = True
322  if self.config.maxEllipResidual and medianE > self.config.maxEllipResidual:
323  self.log.info("Removing visit %s because median e residual too large: %f vs %f" %
324  (dataRef.dataId, medianE, self.config.maxEllipResidual))
325  valid = False
326  elif self.config.maxSizeScatter and scatterSize > self.config.maxSizeScatter:
327  self.log.info("Removing visit %s because size scatter is too large: %f vs %f" %
328  (dataRef.dataId, scatterSize, self.config.maxSizeScatter))
329  valid = False
330  elif self.config.maxScaledSizeScatter and scaledScatterSize > self.config.maxScaledSizeScatter:
331  self.log.info("Removing visit %s because scaled size scatter is too large: %f vs %f" %
332  (dataRef.dataId, scaledScatterSize, self.config.maxScaledSizeScatter))
333  valid = False
334 
335  if valid is False:
336  continue
337 
338  dataRefList.append(dataRef)
339  exposureInfoList.append(exposureInfo)
340 
341  return pipeBase.Struct(
342  dataRefList=dataRefList,
343  exposureInfoList=exposureInfoList,
344  )
def runDataRef(self, dataRef, coordList, makeDataRefList=True, selectDataList=[])
Definition: selectImages.py:98
def runDataRef(self, dataRef, coordList, makeDataRefList=True, selectDataList=[])
def runDataRef(self, dataRef, coordList, makeDataRefList=True, selectDataList=[])
def __init__(self, dataRef, wcs, bbox)