lsst.cp.pipe  20.0.0-7-g3c4151b+af0016561f
utils.py
Go to the documentation of this file.
1 # This file is part of cp_pipe.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 #
22 
23 __all__ = ['PairedVisitListTaskRunner', 'SingleVisitListTaskRunner',
24  'NonexistentDatasetTaskDataIdContainer', 'parseCmdlineNumberString',
25  'countMaskedPixels', 'checkExpLengthEqual']
26 
27 import re
28 import numpy as np
29 from scipy.optimize import leastsq
30 import numpy.polynomial.polynomial as poly
31 
32 import lsst.pipe.base as pipeBase
33 import lsst.ip.isr as ipIsr
34 import lsst.log
35 
36 
37 def countMaskedPixels(maskedIm, maskPlane):
38  """Count the number of pixels in a given mask plane."""
39  maskBit = maskedIm.mask.getPlaneBitMask(maskPlane)
40  nPix = np.where(np.bitwise_and(maskedIm.mask.array, maskBit))[0].flatten().size
41  return nPix
42 
43 
44 class PairedVisitListTaskRunner(pipeBase.TaskRunner):
45  """Subclass of TaskRunner for handling intrinsically paired visits.
46 
47  This transforms the processed arguments generated by the ArgumentParser
48  into the arguments expected by tasks which take visit pairs for their
49  run() methods.
50 
51  Such tasks' run() methods tend to take two arguments,
52  one of which is the dataRef (as usual), and the other is the list
53  of visit-pairs, in the form of a list of tuples.
54  This list is supplied on the command line as documented,
55  and this class parses that, and passes the parsed version
56  to the run() method.
57 
58  See pipeBase.TaskRunner for more information.
59  """
60 
61  @staticmethod
62  def getTargetList(parsedCmd, **kwargs):
63  """Parse the visit list and pass through explicitly."""
64  visitPairs = []
65  for visitStringPair in parsedCmd.visitPairs:
66  visitStrings = visitStringPair.split(",")
67  if len(visitStrings) != 2:
68  raise RuntimeError("Found {} visits in {} instead of 2".format(len(visitStrings),
69  visitStringPair))
70  try:
71  visits = [int(visit) for visit in visitStrings]
72  except Exception:
73  raise RuntimeError("Could not parse {} as two integer visit numbers".format(visitStringPair))
74  visitPairs.append(visits)
75 
76  return pipeBase.TaskRunner.getTargetList(parsedCmd, visitPairs=visitPairs, **kwargs)
77 
78 
79 def parseCmdlineNumberString(inputString):
80  """Parse command line numerical expression sytax and return as list of int
81 
82  Take an input of the form "'1..5:2^123..126'" as a string, and return
83  a list of ints as [1, 3, 5, 123, 124, 125, 126]
84  """
85  outList = []
86  for subString in inputString.split("^"):
87  mat = re.search(r"^(\d+)\.\.(\d+)(?::(\d+))?$", subString)
88  if mat:
89  v1 = int(mat.group(1))
90  v2 = int(mat.group(2))
91  v3 = mat.group(3)
92  v3 = int(v3) if v3 else 1
93  for v in range(v1, v2 + 1, v3):
94  outList.append(int(v))
95  else:
96  outList.append(int(subString))
97  return outList
98 
99 
100 class SingleVisitListTaskRunner(pipeBase.TaskRunner):
101  """Subclass of TaskRunner for tasks requiring a list of visits per dataRef.
102 
103  This transforms the processed arguments generated by the ArgumentParser
104  into the arguments expected by tasks which require a list of visits
105  to be supplied for each dataRef, as is common in `lsst.cp.pipe` code.
106 
107  Such tasks' run() methods tend to take two arguments,
108  one of which is the dataRef (as usual), and the other is the list
109  of visits.
110  This list is supplied on the command line as documented,
111  and this class parses that, and passes the parsed version
112  to the run() method.
113 
114  See `lsst.pipe.base.TaskRunner` for more information.
115  """
116 
117  @staticmethod
118  def getTargetList(parsedCmd, **kwargs):
119  """Parse the visit list and pass through explicitly."""
120  # if this has been pre-parsed and therefore doesn't have length of one
121  # then something has gone wrong, so execution should stop here.
122  assert len(parsedCmd.visitList) == 1, 'visitList parsing assumptions violated'
123  visits = parseCmdlineNumberString(parsedCmd.visitList[0])
124 
125  return pipeBase.TaskRunner.getTargetList(parsedCmd, visitList=visits, **kwargs)
126 
127 
128 class NonexistentDatasetTaskDataIdContainer(pipeBase.DataIdContainer):
129  """A DataIdContainer for the tasks for which the output does
130  not yet exist."""
131 
132  def makeDataRefList(self, namespace):
133  """Compute refList based on idList.
134 
135  This method must be defined as the dataset does not exist before this
136  task is run.
137 
138  Parameters
139  ----------
140  namespace
141  Results of parsing the command-line.
142 
143  Notes
144  -----
145  Not called if ``add_id_argument`` called
146  with ``doMakeDataRefList=False``.
147  Note that this is almost a copy-and-paste of the vanilla
148  implementation, but without checking if the datasets already exist,
149  as this task exists to make them.
150  """
151  if self.datasetType is None:
152  raise RuntimeError("Must call setDatasetType first")
153  butler = namespace.butler
154  for dataId in self.idList:
155  refList = list(butler.subset(datasetType=self.datasetType, level=self.level, dataId=dataId))
156  # exclude nonexistent data
157  # this is a recursive test, e.g. for the sake of "raw" data
158  if not refList:
159  namespace.log.warn("No data found for dataId=%s", dataId)
160  continue
161  self.refList += refList
162 
163 
164 def fitLeastSq(initialParams, dataX, dataY, function):
165  """Do a fit and estimate the parameter errors using using scipy.optimize.leastq.
166 
167  optimize.leastsq returns the fractional covariance matrix. To estimate the
168  standard deviation of the fit parameters, multiply the entries of this matrix
169  by the unweighted reduced chi squared and take the square root of the diagonal elements.
170 
171  Parameters
172  ----------
173  initialParams : `list` of `float`
174  initial values for fit parameters. For ptcFitType=POLYNOMIAL, its length
175  determines the degree of the polynomial.
176 
177  dataX : `numpy.array` of `float`
178  Data in the abscissa axis.
179 
180  dataY : `numpy.array` of `float`
181  Data in the ordinate axis.
182 
183  function : callable object (function)
184  Function to fit the data with.
185 
186  Return
187  ------
188  pFitSingleLeastSquares : `list` of `float`
189  List with fitted parameters.
190 
191  pErrSingleLeastSquares : `list` of `float`
192  List with errors for fitted parameters.
193 
194  reducedChiSqSingleLeastSquares : `float`
195  Unweighted reduced chi squared
196  """
197 
198  def errFunc(p, x, y):
199  return function(p, x) - y
200 
201  pFit, pCov, infoDict, errMessage, success = leastsq(errFunc, initialParams,
202  args=(dataX, dataY), full_output=1, epsfcn=0.0001)
203 
204  if (len(dataY) > len(initialParams)) and pCov is not None:
205  reducedChiSq = (errFunc(pFit, dataX, dataY)**2).sum()/(len(dataY)-len(initialParams))
206  pCov *= reducedChiSq
207  else:
208  pCov = np.zeros((len(initialParams), len(initialParams)))
209  pCov[:, :] = np.inf
210  reducedChiSq = np.inf
211 
212  errorVec = []
213  for i in range(len(pFit)):
214  errorVec.append(np.fabs(pCov[i][i])**0.5)
215 
216  pFitSingleLeastSquares = pFit
217  pErrSingleLeastSquares = np.array(errorVec)
218 
219  return pFitSingleLeastSquares, pErrSingleLeastSquares, reducedChiSq
220 
221 
222 def fitBootstrap(initialParams, dataX, dataY, function, confidenceSigma=1.):
223  """Do a fit using least squares and bootstrap to estimate parameter errors.
224 
225  The bootstrap error bars are calculated by fitting 100 random data sets.
226 
227  Parameters
228  ----------
229  initialParams : `list` of `float`
230  initial values for fit parameters. For ptcFitType=POLYNOMIAL, its length
231  determines the degree of the polynomial.
232 
233  dataX : `numpy.array` of `float`
234  Data in the abscissa axis.
235 
236  dataY : `numpy.array` of `float`
237  Data in the ordinate axis.
238 
239  function : callable object (function)
240  Function to fit the data with.
241 
242  confidenceSigma : `float`
243  Number of sigmas that determine confidence interval for the bootstrap errors.
244 
245  Return
246  ------
247  pFitBootstrap : `list` of `float`
248  List with fitted parameters.
249 
250  pErrBootstrap : `list` of `float`
251  List with errors for fitted parameters.
252 
253  reducedChiSqBootstrap : `float`
254  Reduced chi squared.
255  """
256 
257  def errFunc(p, x, y):
258  return function(p, x) - y
259 
260  # Fit first time
261  pFit, _ = leastsq(errFunc, initialParams, args=(dataX, dataY), full_output=0)
262 
263  # Get the stdev of the residuals
264  residuals = errFunc(pFit, dataX, dataY)
265  sigmaErrTotal = np.std(residuals)
266 
267  # 100 random data sets are generated and fitted
268  pars = []
269  for i in range(100):
270  randomDelta = np.random.normal(0., sigmaErrTotal, len(dataY))
271  randomDataY = dataY + randomDelta
272  randomFit, _ = leastsq(errFunc, initialParams,
273  args=(dataX, randomDataY), full_output=0)
274  pars.append(randomFit)
275  pars = np.array(pars)
276  meanPfit = np.mean(pars, 0)
277 
278  # confidence interval for parameter estimates
279  nSigma = confidenceSigma
280  errPfit = nSigma*np.std(pars, 0)
281  pFitBootstrap = meanPfit
282  pErrBootstrap = errPfit
283 
284  reducedChiSq = (errFunc(pFitBootstrap, dataX, dataY)**2).sum()/(len(dataY)-len(initialParams))
285  return pFitBootstrap, pErrBootstrap, reducedChiSq
286 
287 
288 def funcPolynomial(pars, x):
289  """Polynomial function definition
290  Parameters
291  ----------
292  params : `list`
293  Polynomial coefficients. Its length determines the polynomial order.
294 
295  x : `numpy.array`
296  Signal mu (ADU).
297 
298  Returns
299  -------
300  C_00 (variance) in ADU^2.
301  """
302  return poly.polyval(x, [*pars]) # C_00
303 
304 
305 def funcAstier(pars, x):
306  """Single brighter-fatter parameter model for PTC; Equation 16 of Astier+19.
307 
308  Parameters
309  ----------
310  params : `list`
311  Parameters of the model: a00 (brightter-fatter), gain (e/ADU), and noise (e^2).
312 
313  x : `numpy.array`
314  Signal mu (ADU).
315 
316  Returns
317  -------
318  C_00 (variance) in ADU^2.
319  """
320  a00, gain, noise = pars
321  return 0.5/(a00*gain*gain)*(np.exp(2*a00*x*gain)-1) + noise/(gain*gain) # C_00
322 
323 
324 def checkExpLengthEqual(exp1, exp2, v1=None, v2=None, raiseWithMessage=False):
325  """Check the exposure lengths of two exposures are equal.
326 
327  Parameters:
328  -----------
329  exp1 : `lsst.afw.image.exposure.ExposureF`
330  First exposure to check
331  exp2 : `lsst.afw.image.exposure.ExposureF`
332  Second exposure to check
333  v1 : `int` or `str`, optional
334  First visit of the visit pair
335  v2 : `int` or `str`, optional
336  Second visit of the visit pair
337  raiseWithMessage : `bool`
338  If True, instead of returning a bool, raise a RuntimeError if exposure
339  times are not equal, with a message about which visits mismatch if the
340  information is available.
341 
342  Raises:
343  -------
344  RuntimeError
345  Raised if the exposure lengths of the two exposures are not equal
346  """
347  expTime1 = exp1.getInfo().getVisitInfo().getExposureTime()
348  expTime2 = exp2.getInfo().getVisitInfo().getExposureTime()
349  if expTime1 != expTime2:
350  if raiseWithMessage:
351  msg = "Exposure lengths for visit pairs must be equal. " + \
352  "Found %s and %s" % (expTime1, expTime2)
353  if v1 and v2:
354  msg += " for visit pair %s, %s" % (v1, v2)
355  raise RuntimeError(msg)
356  else:
357  return False
358  return True
359 
360 
361 def validateIsrConfig(isrTask, mandatory=None, forbidden=None, desirable=None, undesirable=None,
362  checkTrim=True, logName=None):
363  """Check that appropriate ISR settings have been selected for the task.
364 
365  Note that this checks that the task itself is configured correctly rather
366  than checking a config.
367 
368  Parameters
369  ----------
370  isrTask : `lsst.ip.isr.IsrTask`
371  The task whose config is to be validated
372 
373  mandatory : `iterable` of `str`
374  isr steps that must be set to True. Raises if False or missing
375 
376  forbidden : `iterable` of `str`
377  isr steps that must be set to False. Raises if True, warns if missing
378 
379  desirable : `iterable` of `str`
380  isr steps that should probably be set to True. Warns is False, info if
381  missing
382 
383  undesirable : `iterable` of `str`
384  isr steps that should probably be set to False. Warns is True, info if
385  missing
386 
387  checkTrim : `bool`
388  Check to ensure the isrTask's assembly subtask is trimming the images.
389  This is a separate config as it is very ugly to do this within the
390  normal configuration lists as it is an option of a sub task.
391 
392  Raises
393  ------
394  RuntimeError
395  Raised if ``mandatory`` config parameters are False,
396  or if ``forbidden`` parameters are True.
397 
398  TypeError
399  Raised if parameter ``isrTask`` is an invalid type.
400 
401  Notes
402  -----
403  Logs warnings using an isrValidation logger for desirable/undesirable
404  options that are of the wrong polarity or if keys are missing.
405  """
406  if not isinstance(isrTask, ipIsr.IsrTask):
407  raise TypeError(f'Must supply an instance of lsst.ip.isr.IsrTask not {type(isrTask)}')
408 
409  configDict = isrTask.config.toDict()
410 
411  if logName and isinstance(logName, str):
412  log = lsst.log.getLogger(logName)
413  else:
414  log = lsst.log.getLogger("isrValidation")
415 
416  if mandatory:
417  for configParam in mandatory:
418  if configParam not in configDict:
419  raise RuntimeError(f"Mandatory parameter {configParam} not found in the isr configuration.")
420  if configDict[configParam] is False:
421  raise RuntimeError(f"Must set config.isr.{configParam} to True for this task.")
422 
423  if forbidden:
424  for configParam in forbidden:
425  if configParam not in configDict:
426  log.warn(f"Failed to find forbidden key {configParam} in the isr config. The keys in the"
427  " forbidden list should each have an associated Field in IsrConfig:"
428  " check that there is not a typo in this case.")
429  continue
430  if configDict[configParam] is True:
431  raise RuntimeError(f"Must set config.isr.{configParam} to False for this task.")
432 
433  if desirable:
434  for configParam in desirable:
435  if configParam not in configDict:
436  log.info(f"Failed to find key {configParam} in the isr config. You probably want" +
437  " to set the equivalent for your obs_package to True.")
438  continue
439  if configDict[configParam] is False:
440  log.warn(f"Found config.isr.{configParam} set to False for this task." +
441  " The cp_pipe Config recommends setting this to True.")
442  if undesirable:
443  for configParam in undesirable:
444  if configParam not in configDict:
445  log.info(f"Failed to find key {configParam} in the isr config. You probably want" +
446  " to set the equivalent for your obs_package to False.")
447  continue
448  if configDict[configParam] is True:
449  log.warn(f"Found config.isr.{configParam} set to True for this task." +
450  " The cp_pipe Config recommends setting this to False.")
451 
452  if checkTrim: # subtask setting, seems non-trivial to combine with above lists
453  if not isrTask.assembleCcd.config.doTrim:
454  raise RuntimeError("Must trim when assembling CCDs. Set config.isr.assembleCcd.doTrim to True")
lsst.cp.pipe.utils.validateIsrConfig
def validateIsrConfig(isrTask, mandatory=None, forbidden=None, desirable=None, undesirable=None, checkTrim=True, logName=None)
Definition: utils.py:361
lsst.cp.pipe.utils.NonexistentDatasetTaskDataIdContainer.makeDataRefList
def makeDataRefList(self, namespace)
Definition: utils.py:132
lsst.cp.pipe.utils.fitBootstrap
def fitBootstrap(initialParams, dataX, dataY, function, confidenceSigma=1.)
Definition: utils.py:222
lsst.cp.pipe.utils.funcPolynomial
def funcPolynomial(pars, x)
Definition: utils.py:288
lsst.cp.pipe.utils.PairedVisitListTaskRunner.getTargetList
def getTargetList(parsedCmd, **kwargs)
Definition: utils.py:62
lsst.cp.pipe.utils.fitLeastSq
def fitLeastSq(initialParams, dataX, dataY, function)
Definition: utils.py:164
lsst.cp.pipe.utils.countMaskedPixels
def countMaskedPixels(maskedIm, maskPlane)
Definition: utils.py:37
lsst.cp.pipe.utils.SingleVisitListTaskRunner.getTargetList
def getTargetList(parsedCmd, **kwargs)
Definition: utils.py:118
lsst.cp.pipe.utils.PairedVisitListTaskRunner
Definition: utils.py:44
lsst.cp.pipe.utils.funcAstier
def funcAstier(pars, x)
Definition: utils.py:305
lsst.cp.pipe.utils.SingleVisitListTaskRunner
Definition: utils.py:100
lsst.cp.pipe.utils.checkExpLengthEqual
def checkExpLengthEqual(exp1, exp2, v1=None, v2=None, raiseWithMessage=False)
Definition: utils.py:324
lsst.cp.pipe.utils.parseCmdlineNumberString
def parseCmdlineNumberString(inputString)
Definition: utils.py:79
lsst::ip::isr
lsst::pipe::base
lsst::log
lsst.cp.pipe.utils.NonexistentDatasetTaskDataIdContainer
Definition: utils.py:128