lsst.meas.algorithms  14.0-7-g23fdbe95+10
reserveSourcesTask.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 #
4 # Copyright 2008-2017 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 from __future__ import absolute_import, division, print_function
24 
25 from builtins import zip
26 
27 import numpy as np
28 
29 from lsst.pex.config import Config, Field
30 from lsst.pipe.base import Task, Struct
31 
32 import lsst.afw.table
33 
34 __all__ = ["ReserveSourcesConfig", "ReserveSourcesTask"]
35 
36 
37 class ReserveSourcesConfig(Config):
38  """Configuration for reserving sources"""
39  fraction = Field(dtype=float, default=0.0,
40  doc="Fraction of candidates to reserve from fitting; none if <= 0")
41  seed = Field(dtype=int, default=1,
42  doc=("This number will be added to the exposure ID to set the random seed for "
43  "reserving candidates"))
44 
45 
46 class ReserveSourcesTask(Task):
47  """Reserve sources from analysis
48 
49  We randomly select a fraction of sources that will be reserved
50  from analysis. This allows evaluation of the quality of model fits
51  using sources that were not involved in the fitting process.
52 
53  Constructor parameters
54  ----------------------
55  columnName : `str`, required
56  Name of flag column to add; we will suffix this with "_reserved".
57  schema : `lsst.afw.table.Schema`, required
58  Catalog schema.
59  doc : `str`
60  Documentation for column to add.
61  config : `ReserveSourcesConfig`
62  Configuration.
63  """
64  ConfigClass = ReserveSourcesConfig
65  _DefaultName = "reserveSources"
66 
67  def __init__(self, columnName=None, schema=None, doc=None, **kwargs):
68  Task.__init__(self, **kwargs)
69  assert columnName is not None, "columnName not provided"
70  assert schema is not None, "schema not provided"
71  self.columnName = columnName
72  self.key = schema.addField(self.columnName + "_reserved", type="Flag", doc=doc)
73 
74  def run(self, sources, prior=None, expId=0):
75  """Select sources to be reserved
76 
77  Reserved sources will be flagged in the catalog, and we will return
78  boolean arrays that identify the sources to be reserved from and
79  used in the analysis. Typically you'll want to use the sources
80  from the `use` array in your fitting, and use the sources from the
81  `reserved` array as an independent test of your fitting.
82 
83  Parameters
84  ----------
85  sources : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`
86  Sources from which to select some to be reserved.
87  prior : `numpy.ndarray` of type `bool`, optional
88  Prior selection of sources. Should have the same length as
89  `sources`. If set, we will only consider for reservation sources
90  that are flagged `True` in this array.
91  expId : `int`
92  Exposure identifier; used for seeding the random number generator.
93 
94  Return struct contents
95  ----------------------
96  reserved : `numpy.ndarray` of type `bool`
97  Sources to be reserved are flagged `True` in this array.
98  use : `numpy.ndarray` of type `bool`
99  Sources the user should use in analysis are flagged `True`.
100  """
101  if prior is not None:
102  assert len(prior) == len(sources), "Length mismatch: %s vs %s" % (len(prior), len(sources))
103  numSources = prior.sum()
104  else:
105  numSources = len(sources)
106  selection = self.select(numSources, expId)
107  if prior is not None:
108  selection = self.applySelectionPrior(prior, selection)
109  self.markSources(sources, selection)
110  self.log.info("Reserved %d/%d sources", selection.sum(), len(selection))
111  return Struct(reserved=selection,
112  use=prior & ~selection if prior is not None else np.logical_not(selection))
113 
114  def select(self, numSources, expId=0):
115  """Randomly select some sources
116 
117  We return a boolean array with a random selection. The fraction
118  of sources selected is specified by the config parameter `fraction`.
119 
120  Parameters
121  ----------
122  numSources : `int`
123  Number of sources in catalog from which to select.
124  expId : `int`
125  Exposure identifier; used for seeding the random number generator.
126 
127  Returns
128  -------
129  selection : `numpy.ndarray` of type `bool`
130  Selected sources are flagged `True` in this array.
131  """
132  selection = np.zeros(numSources, dtype=bool)
133  if self.config.fraction <= 0:
134  return selection
135  reserve = int(np.round(numSources*self.config.fraction))
136  selection[:reserve] = True
137  rng = np.random.RandomState(self.config.seed + expId)
138  rng.shuffle(selection)
139  return selection
140 
141  def applySelectionPrior(self, prior, selection):
142  """Apply selection to full catalog
143 
144  The `select` method makes a random selection of sources. If those
145  sources don't represent the full population (because a sub-selection
146  has already been made), then we need to generate a selection covering
147  the entire population.
148 
149  Parameters
150  ----------
151  prior : `numpy.ndarray` of type `bool`
152  Prior selection of sources, identifying the subset from which the
153  random selection has been made.
154  selection : `numpy.ndarray` of type `bool`
155  Selection of sources in subset identified by `prior`.
156 
157  Returns
158  -------
159  full : `numpy.ndarray` of type `bool`
160  Selection applied to full population.
161  """
162  full = np.zeros(len(prior), dtype=bool)
163  full[prior] = selection
164  return full
165 
166  def markSources(self, sources, selection):
167  """Mark sources in a list or catalog
168 
169  This requires iterating through the list and setting the flag in
170  each source individually. Even if the `sources` is a `Catalog`
171  with contiguous records, it's not currently possible to set a boolean
172  column (DM-6981) so we need to iterate.
173 
174  Parameters
175  ----------
176  catalog : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`
177  Catalog in which to flag selected sources.
178  selection : `numpy.ndarray` of type `bool`
179  Selection of sources to mark.
180  """
181  for src, select in zip(sources, selection):
182  if select:
183  src.set(self.key, True)
def __init__(self, columnName=None, schema=None, doc=None, kwargs)