lsst.meas.algorithms  16.0-19-gb830ed4e+14
objectSizeStarSelector.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 #
4 # Copyright 2008-2017 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 import sys
24 
25 import numpy
26 import warnings
27 from functools import reduce
28 
29 from lsst.log import Log
30 from lsst.pipe.base import Struct
31 import lsst.geom
32 from lsst.afw.cameraGeom import PIXELS, TAN_PIXELS
33 import lsst.afw.geom as afwGeom
34 import lsst.pex.config as pexConfig
35 import lsst.afw.display.ds9 as ds9
36 from .sourceSelector import BaseSourceSelectorTask, sourceSelectorRegistry
37 
38 
39 class ObjectSizeStarSelectorConfig(BaseSourceSelectorTask.ConfigClass):
40  fluxMin = pexConfig.Field(
41  doc="specify the minimum psfFlux for good Psf Candidates",
42  dtype=float,
43  default=12500.0,
44  check=lambda x: x >= 0.0,
45  )
46  fluxMax = pexConfig.Field(
47  doc="specify the maximum psfFlux for good Psf Candidates (ignored if == 0)",
48  dtype=float,
49  default=0.0,
50  check=lambda x: x >= 0.0,
51  )
52  widthMin = pexConfig.Field(
53  doc="minimum width to include in histogram",
54  dtype=float,
55  default=0.0,
56  check=lambda x: x >= 0.0,
57  )
58  widthMax = pexConfig.Field(
59  doc="maximum width to include in histogram",
60  dtype=float,
61  default=10.0,
62  check=lambda x: x >= 0.0,
63  )
64  sourceFluxField = pexConfig.Field(
65  doc="Name of field in Source to use for flux measurement",
66  dtype=str,
67  default="base_GaussianFlux_instFlux",
68  )
69  widthStdAllowed = pexConfig.Field(
70  doc="Standard deviation of width allowed to be interpreted as good stars",
71  dtype=float,
72  default=0.15,
73  check=lambda x: x >= 0.0,
74  )
75  nSigmaClip = pexConfig.Field(
76  doc="Keep objects within this many sigma of cluster 0's median",
77  dtype=float,
78  default=2.0,
79  check=lambda x: x >= 0.0,
80  )
81  badFlags = pexConfig.ListField(
82  doc="List of flags which cause a source to be rejected as bad",
83  dtype=str,
84  default=[
85  "base_PixelFlags_flag_edge",
86  "base_PixelFlags_flag_interpolatedCenter",
87  "base_PixelFlags_flag_saturatedCenter",
88  "base_PixelFlags_flag_crCenter",
89  "base_PixelFlags_flag_bad",
90  "base_PixelFlags_flag_interpolated",
91  ],
92  )
93 
94  def validate(self):
95  BaseSourceSelectorTask.ConfigClass.validate(self)
96  if self.widthMin > self.widthMax:
97  raise pexConfig.FieldValidationError("widthMin (%f) > widthMax (%f)"
98  % (self.widthMin, self.widthMax))
99 
100 
102  """A class to handle key strokes with matplotlib displays"""
103 
104  def __init__(self, axes, xs, ys, x, y, frames=[0]):
105  self.axes = axes
106  self.xs = xs
107  self.ys = ys
108  self.x = x
109  self.y = y
110  self.frames = frames
111 
112  self.cid = self.axes.figure.canvas.mpl_connect('key_press_event', self)
113 
114  def __call__(self, ev):
115  if ev.inaxes != self.axes:
116  return
117 
118  if ev.key and ev.key in ("p"):
119  dist = numpy.hypot(self.xs - ev.xdata, self.ys - ev.ydata)
120  dist[numpy.where(numpy.isnan(dist))] = 1e30
121 
122  which = numpy.where(dist == min(dist))
123 
124  x = self.x[which][0]
125  y = self.y[which][0]
126  for frame in self.frames:
127  ds9.pan(x, y, frame=frame)
128  ds9.cmdBuffer.flush()
129  else:
130  pass
131 
132 
133 def _assignClusters(yvec, centers):
134  """Return a vector of centerIds based on their distance to the centers"""
135  assert len(centers) > 0
136 
137  minDist = numpy.nan*numpy.ones_like(yvec)
138  clusterId = numpy.empty_like(yvec)
139  clusterId.dtype = int # zeros_like(..., dtype=int) isn't in numpy 1.5
140  dbl = Log.getLogger("objectSizeStarSelector._assignClusters")
141  dbl.setLevel(dbl.INFO)
142 
143  # Make sure we are logging aall numpy warnings...
144  oldSettings = numpy.seterr(all="warn")
145  with warnings.catch_warnings(record=True) as w:
146  warnings.simplefilter("always")
147  for i, mean in enumerate(centers):
148  dist = abs(yvec - mean)
149  if i == 0:
150  update = dist == dist # True for all points
151  else:
152  update = dist < minDist
153  if w: # Only do if w is not empty i.e. contains a warning message
154  dbl.trace(str(w[-1]))
155 
156  minDist[update] = dist[update]
157  clusterId[update] = i
158  numpy.seterr(**oldSettings)
159 
160  return clusterId
161 
162 
163 def _kcenters(yvec, nCluster, useMedian=False, widthStdAllowed=0.15):
164  """A classic k-means algorithm, clustering yvec into nCluster clusters
165 
166  Return the set of centres, and the cluster ID for each of the points
167 
168  If useMedian is true, use the median of the cluster as its centre, rather than
169  the traditional mean
170 
171  Serge Monkewitz points out that there other (maybe smarter) ways of seeding the means:
172  "e.g. why not use the Forgy or random partition initialization methods"
173  however, the approach adopted here seems to work well for the particular sorts of things
174  we're clustering in this application
175  """
176 
177  assert nCluster > 0
178 
179  mean0 = sorted(yvec)[len(yvec)//10] # guess
180  delta = mean0 * widthStdAllowed * 2.0
181  centers = mean0 + delta * numpy.arange(nCluster)
182 
183  func = numpy.median if useMedian else numpy.mean
184 
185  clusterId = numpy.zeros_like(yvec) - 1 # which cluster the points are assigned to
186  clusterId.dtype = int # zeros_like(..., dtype=int) isn't in numpy 1.5
187  while True:
188  oclusterId = clusterId
189  clusterId = _assignClusters(yvec, centers)
190 
191  if numpy.all(clusterId == oclusterId):
192  break
193 
194  for i in range(nCluster):
195  # Only compute func if some points are available; otherwise, default to NaN.
196  pointsInCluster = (clusterId == i)
197  if numpy.any(pointsInCluster):
198  centers[i] = func(yvec[pointsInCluster])
199  else:
200  centers[i] = numpy.nan
201 
202  return centers, clusterId
203 
204 
205 def _improveCluster(yvec, centers, clusterId, nsigma=2.0, nIteration=10, clusterNum=0, widthStdAllowed=0.15):
206  """Improve our estimate of one of the clusters (clusterNum) by sigma-clipping around its median"""
207 
208  nMember = sum(clusterId == clusterNum)
209  if nMember < 5: # can't compute meaningful interquartile range, so no chance of improvement
210  return clusterId
211  for iter in range(nIteration):
212  old_nMember = nMember
213 
214  inCluster0 = clusterId == clusterNum
215  yv = yvec[inCluster0]
216 
217  centers[clusterNum] = numpy.median(yv)
218  stdev = numpy.std(yv)
219 
220  syv = sorted(yv)
221  stdev_iqr = 0.741*(syv[int(0.75*nMember)] - syv[int(0.25*nMember)])
222  median = syv[int(0.5*nMember)]
223 
224  sd = stdev if stdev < stdev_iqr else stdev_iqr
225 
226  if False:
227  print("sigma(iqr) = %.3f, sigma = %.3f" % (stdev_iqr, numpy.std(yv)))
228  newCluster0 = abs(yvec - centers[clusterNum]) < nsigma*sd
229  clusterId[numpy.logical_and(inCluster0, newCluster0)] = clusterNum
230  clusterId[numpy.logical_and(inCluster0, numpy.logical_not(newCluster0))] = -1
231 
232  nMember = sum(clusterId == clusterNum)
233  # 'sd < widthStdAllowed * median' prevents too much rejections
234  if nMember == old_nMember or sd < widthStdAllowed * median:
235  break
236 
237  return clusterId
238 
239 
240 def plot(mag, width, centers, clusterId, marker="o", markersize=2, markeredgewidth=0, ltype='-',
241  magType="model", clear=True):
242 
243  log = Log.getLogger("objectSizeStarSelector.plot")
244  try:
245  import matplotlib.pyplot as plt
246  except ImportError as e:
247  log.warn("Unable to import matplotlib: %s", e)
248  return
249 
250  global fig
251  if not fig:
252  fig = plt.figure()
253  else:
254  if clear:
255  fig.clf()
256 
257  axes = fig.add_axes((0.1, 0.1, 0.85, 0.80))
258 
259  xmin = sorted(mag)[int(0.05*len(mag))]
260  xmax = sorted(mag)[int(0.95*len(mag))]
261 
262  axes.set_xlim(-17.5, -13)
263  axes.set_xlim(xmin - 0.1*(xmax - xmin), xmax + 0.1*(xmax - xmin))
264  axes.set_ylim(0, 10)
265 
266  colors = ["r", "g", "b", "c", "m", "k", ]
267  for k, mean in enumerate(centers):
268  if k == 0:
269  axes.plot(axes.get_xlim(), (mean, mean,), "k%s" % ltype)
270 
271  li = (clusterId == k)
272  axes.plot(mag[li], width[li], marker, markersize=markersize, markeredgewidth=markeredgewidth,
273  color=colors[k % len(colors)])
274 
275  li = (clusterId == -1)
276  axes.plot(mag[li], width[li], marker, markersize=markersize, markeredgewidth=markeredgewidth,
277  color='k')
278 
279  if clear:
280  axes.set_xlabel("Instrumental %s mag" % magType)
281  axes.set_ylabel(r"$\sqrt{(I_{xx} + I_{yy})/2}$")
282 
283  return fig
284 
285 
291 
292 
293 @pexConfig.registerConfigurable("objectSize", sourceSelectorRegistry)
295  r"""!A star selector that looks for a cluster of small objects in a size-magnitude plot
296 
297  @anchor ObjectSizeStarSelectorTask_
298 
299  @section meas_algorithms_objectSizeStarSelector_Contents Contents
300 
301  - @ref meas_algorithms_objectSizeStarSelector_Purpose
302  - @ref meas_algorithms_objectSizeStarSelector_Initialize
303  - @ref meas_algorithms_objectSizeStarSelector_IO
304  - @ref meas_algorithms_objectSizeStarSelector_Config
305  - @ref meas_algorithms_objectSizeStarSelector_Debug
306 
307  @section meas_algorithms_objectSizeStarSelector_Purpose Description
308 
309  A star selector that looks for a cluster of small objects in a size-magnitude plot.
310 
311  @section meas_algorithms_objectSizeStarSelector_Initialize Task initialisation
312 
313  @copydoc \_\_init\_\_
314 
315  @section meas_algorithms_objectSizeStarSelector_IO Invoking the Task
316 
317  Like all star selectors, the main method is `run`.
318 
319  @section meas_algorithms_objectSizeStarSelector_Config Configuration parameters
320 
321  See @ref ObjectSizeStarSelectorConfig
322 
323  @section meas_algorithms_objectSizeStarSelector_Debug Debug variables
324 
325  ObjectSizeStarSelectorTask has a debug dictionary with the following keys:
326  <dl>
327  <dt>display
328  <dd>bool; if True display debug information
329  <dt>displayExposure
330  <dd>bool; if True display the exposure and spatial cells
331  <dt>plotMagSize
332  <dd>bool: if True display the magnitude-size relation using matplotlib
333  <dt>dumpData
334  <dd>bool; if True dump data to a pickle file
335  </dl>
336 
337  For example, put something like:
338  @code{.py}
339  import lsstDebug
340  def DebugInfo(name):
341  di = lsstDebug.getInfo(name) # N.b. lsstDebug.Info(name) would call us recursively
342  if name.endswith("objectSizeStarSelector"):
343  di.display = True
344  di.displayExposure = True
345  di.plotMagSize = True
346 
347  return di
348 
349  lsstDebug.Info = DebugInfo
350  @endcode
351  into your `debug.py` file and run your task with the `--debug` flag.
352  """
353  ConfigClass = ObjectSizeStarSelectorConfig
354  usesMatches = False # selectStars does not use its matches argument
355 
356  def selectSources(self, sourceCat, matches=None, exposure=None):
357  """Return a selection of PSF candidates that represent likely stars.
358 
359  A list of PSF candidates may be used by a PSF fitter to construct a PSF.
360 
361  Parameters:
362  -----------
363  sourceCat : `lsst.afw.table.SourceCatalog`
364  Catalog of sources to select from.
365  This catalog must be contiguous in memory.
366  matches : `list` of `lsst.afw.table.ReferenceMatch` or None
367  Ignored in this SourceSelector.
368  exposure : `lsst.afw.image.Exposure` or None
369  The exposure the catalog was built from; used to get the detector
370  to transform to TanPix, and for debug display.
371 
372  Return
373  ------
374  struct : `lsst.pipe.base.Struct`
375  The struct contains the following data:
376 
377  - selected : `array` of `bool``
378  Boolean array of sources that were selected, same length as
379  sourceCat.
380  """
381  import lsstDebug
382  display = lsstDebug.Info(__name__).display
383  displayExposure = lsstDebug.Info(__name__).displayExposure # display the Exposure + spatialCells
384  plotMagSize = lsstDebug.Info(__name__).plotMagSize # display the magnitude-size relation
385  dumpData = lsstDebug.Info(__name__).dumpData # dump data to pickle file?
386 
387  detector = exposure.getDetector()
388  pixToTanPix = None
389  if detector is not None:
390  pixToTanPix = detector.getTransform(PIXELS, TAN_PIXELS)
391  #
392  # Look at the distribution of stars in the magnitude-size plane
393  #
394  flux = sourceCat.get(self.config.sourceFluxField)
395 
396  xx = numpy.empty(len(sourceCat))
397  xy = numpy.empty_like(xx)
398  yy = numpy.empty_like(xx)
399  for i, source in enumerate(sourceCat):
400  Ixx, Ixy, Iyy = source.getIxx(), source.getIxy(), source.getIyy()
401  if pixToTanPix:
402  p = lsst.geom.Point2D(source.getX(), source.getY())
403  linTransform = afwGeom.linearizeTransform(pixToTanPix, p).getLinear()
404  m = afwGeom.Quadrupole(Ixx, Iyy, Ixy)
405  m.transform(linTransform)
406  Ixx, Iyy, Ixy = m.getIxx(), m.getIyy(), m.getIxy()
407 
408  xx[i], xy[i], yy[i] = Ixx, Ixy, Iyy
409 
410  width = numpy.sqrt(0.5*(xx + yy))
411  with numpy.errstate(invalid="ignore"): # suppress NAN warnings
412  bad = reduce(lambda x, y: numpy.logical_or(x, sourceCat.get(y)), self.config.badFlags, False)
413  bad = numpy.logical_or(bad, flux < self.config.fluxMin)
414  bad = numpy.logical_or(bad, numpy.logical_not(numpy.isfinite(width)))
415  bad = numpy.logical_or(bad, numpy.logical_not(numpy.isfinite(flux)))
416  bad = numpy.logical_or(bad, width < self.config.widthMin)
417  bad = numpy.logical_or(bad, width > self.config.widthMax)
418  if self.config.fluxMax > 0:
419  bad = numpy.logical_or(bad, flux > self.config.fluxMax)
420  good = numpy.logical_not(bad)
421 
422  if not numpy.any(good):
423  raise RuntimeError("No objects passed our cuts for consideration as psf stars")
424 
425  mag = -2.5*numpy.log10(flux[good])
426  width = width[good]
427  #
428  # Look for the maximum in the size histogram, then search upwards for the minimum that separates
429  # the initial peak (of, we presume, stars) from the galaxies
430  #
431  if dumpData:
432  import os
433  import pickle as pickle
434  _ii = 0
435  while True:
436  pickleFile = os.path.expanduser(os.path.join("~", "widths-%d.pkl" % _ii))
437  if not os.path.exists(pickleFile):
438  break
439  _ii += 1
440 
441  with open(pickleFile, "wb") as fd:
442  pickle.dump(mag, fd, -1)
443  pickle.dump(width, fd, -1)
444 
445  centers, clusterId = _kcenters(width, nCluster=4, useMedian=True,
446  widthStdAllowed=self.config.widthStdAllowed)
447 
448  if display and plotMagSize:
449  fig = plot(mag, width, centers, clusterId,
450  magType=self.config.sourceFluxField.split(".")[-1].title(),
451  marker="+", markersize=3, markeredgewidth=None, ltype=':', clear=True)
452  else:
453  fig = None
454 
455  clusterId = _improveCluster(width, centers, clusterId,
456  nsigma=self.config.nSigmaClip,
457  widthStdAllowed=self.config.widthStdAllowed)
458 
459  if display and plotMagSize:
460  plot(mag, width, centers, clusterId, marker="x", markersize=3, markeredgewidth=None, clear=False)
461 
462  stellar = (clusterId == 0)
463  #
464  # We know enough to plot, if so requested
465  #
466  frame = 0
467 
468  if fig:
469  if display and displayExposure:
470  ds9.mtv(exposure.getMaskedImage(), frame=frame, title="PSF candidates")
471 
472  global eventHandler
473  eventHandler = EventHandler(fig.get_axes()[0], mag, width,
474  sourceCat.getX()[good], sourceCat.getY()[good], frames=[frame])
475 
476  fig.show()
477 
478  while True:
479  try:
480  reply = input("continue? [c h(elp) q(uit) p(db)] ").strip()
481  except EOFError:
482  reply = None
483  if not reply:
484  reply = "c"
485 
486  if reply:
487  if reply[0] == "h":
488  print("""\
489  We cluster the points; red are the stellar candidates and the other colours are other clusters.
490  Points labelled + are rejects from the cluster (only for cluster 0).
491 
492  At this prompt, you can continue with almost any key; 'p' enters pdb, and 'h' prints this text
493 
494  If displayExposure is true, you can put the cursor on a point and hit 'p' to see it in ds9.
495  """)
496  elif reply[0] == "p":
497  import pdb
498  pdb.set_trace()
499  elif reply[0] == 'q':
500  sys.exit(1)
501  else:
502  break
503 
504  if display and displayExposure:
505  mi = exposure.getMaskedImage()
506 
507  with ds9.Buffering():
508  for i, source in enumerate(sourceCat):
509  if good[i]:
510  ctype = ds9.GREEN # star candidate
511  else:
512  ctype = ds9.RED # not star
513 
514  ds9.dot("+", source.getX() - mi.getX0(),
515  source.getY() - mi.getY0(), frame=frame, ctype=ctype)
516 
517  # stellar only applies to good==True objects
518  mask = good == True # noqa (numpy bool comparison): E712
519  good[mask] = stellar
520 
521  return Struct(selected=good)
def plot(mag, width, centers, clusterId, marker="o", markersize=2, markeredgewidth=0, ltype='-', magType="model", clear=True)
A star selector that looks for a cluster of small objects in a size-magnitude plot.