lsst.pipe.base  14.0-6-ge2c9487+45
task.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2016 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division
23 import contextlib
24 
25 from builtins import object
26 
27 import lsstDebug
28 from lsst.pex.config import ConfigurableField
29 from lsst.log import Log
30 import lsst.daf.base as dafBase
31 from .timer import logInfo
32 
33 __all__ = ["Task", "TaskError"]
34 
35 
36 class TaskError(Exception):
37  """Use to report errors for which a traceback is not useful.
38 
39  Notes
40  -----
41  Examples of such errors:
42 
43  - processCcd is asked to run detection, but not calibration, and no calexp is found.
44  - coadd finds no valid images in the specified patch.
45  """
46  pass
47 
48 
49 class Task(object):
50  """Base class for data processing tasks.
51 
52  See :ref:`task-framework-overview` to learn what tasks are, and :ref:`creating-a-task` for more
53  information about writing tasks.
54 
55  Parameters
56  ----------
57  config : `Task.ConfigClass` instance, optional
58  Configuration for this task (an instance of Task.ConfigClass, which is a task-specific subclass of
59  `lsst.pex.config.Config`, or `None`. If `None`:
60 
61  - If parentTask specified then defaults to parentTask.config.<name>
62  - If parentTask is None then defaults to self.ConfigClass()
63 
64  name : `str`, optional
65  Brief name of task, or `None`; if `None` then defaults to `Task._DefaultName`
66  parentTask : `Task`-type, optional
67  The parent task of this subtask, if any.
68 
69  - If `None` (a top-level task) then you must specify config and name is ignored.
70  - If not `None` (a subtask) then you must specify name.
71  log : `lsst.log.Log`, optional
72  Log whose name is used as a log name prefix, or `None` for no prefix. Ignored if is parentTask
73  specified, in which case ``parentTask.log``\ 's name is used as a prefix. The task's log name is
74  ``prefix + "." + name`` if a prefix exists, else ``name``. The task's log is then a child logger of
75  ``parentTask.log`` (if ``parentTask`` specified), or a child logger of the log from the argument
76  (if ``log`` is not `None`).
77 
78  Raises
79  ------
80  RuntimeError
81  Raised under these circumstances:
82 
83  - If ``parentTask`` is `None` and ``config`` is `None`.
84  - If ``parentTask`` is not `None` and ``name`` is `None`.
85  - If ``name`` is `None` and ``_DefaultName`` does not exist.
86 
87  Notes
88  -----
89  Useful attributes include:
90 
91  - ``log``: an lsst.log.Log
92  - ``config``: task-specific configuration; an instance of ``ConfigClass`` (see below).
93  - ``metadata``: an `lsst.daf.base.PropertyList` for collecting task-specific metadata,
94  e.g. data quality and performance metrics. This is data that is only meant to be
95  persisted, never to be used by the task.
96 
97  Subclasses typically have a method named ``run`` to perform the main data processing. Details:
98 
99  - ``run`` should process the minimum reasonable amount of data, typically a single CCD.
100  Iteration, if desired, is performed by a caller of the run method. This is good design and allows
101  multiprocessing without the run method having to support it directly.
102  - If ``run`` can persist or unpersist data:
103  - ``run`` should accept a butler data reference (or a collection of data references, if appropriate,
104  e.g. coaddition).
105  - There should be a way to run the task without persisting data. Typically the run method returns all
106  data, even if it is persisted, and the task's config method offers a flag to disable persistence.
107 
108  **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* accept a blob such as a butler
109  data reference. How we will handle data references is still TBD, so don't make changes yet!
110  RHL 2014-06-27
111 
112  Subclasses must also have an attribute ``ConfigClass`` that is a subclass of `lsst.pex.config.Config`
113  which configures the task. Subclasses should also have an attribute ``_DefaultName``:
114  the default name if there is no parent task. ``_DefaultName`` is required for subclasses of
115  `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task because it simplifies construction
116  (e.g. for unit tests).
117 
118  Tasks intended to be run from the command line should be subclasses of `~lsst.pipe.base.CmdLineTask`
119  not Task.
120  """
121 
122  def __init__(self, config=None, name=None, parentTask=None, log=None):
123  self.metadata = dafBase.PropertyList()
124  self._parentTask = parentTask
125 
126  if parentTask is not None:
127  if name is None:
128  raise RuntimeError("name is required for a subtask")
129  self._name = name
130  self._fullName = parentTask._computeFullName(name)
131  if config is None:
132  config = getattr(parentTask.config, name)
133  self._taskDict = parentTask._taskDict
134  loggerName = parentTask.log.getName() + '.' + name
135  else:
136  if name is None:
137  name = getattr(self, "_DefaultName", None)
138  if name is None:
139  raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
140  name = self._DefaultName
141  self._name = name
142  self._fullName = self._name
143  if config is None:
144  config = self.ConfigClass()
145  self._taskDict = dict()
146  loggerName = self._fullName
147  if log is not None and log.getName():
148  loggerName = log.getName() + '.' + loggerName
149 
150  self.log = Log.getLogger(loggerName)
151  self.config = config
152  self._display = lsstDebug.Info(self.__module__).display
153  self._taskDict[self._fullName] = self
154 
155  def emptyMetadata(self):
156  """Empty (clear) the metadata for this Task and all sub-Tasks.
157  """
158  for subtask in self._taskDict.values():
159  subtask.metadata = dafBase.PropertyList()
160 
161  def getSchemaCatalogs(self):
162  """Get the schemas generated by this task.
163 
164  Returns
165  -------
166  schemaCatalogs : `dict`
167  Keys are butler dataset type, values are an empty catalog (an instance of the appropriate
168  `lsst.afw.table` Catalog type) for this task.
169 
170  Notes
171  -----
172 
173  .. warning::
174 
175  Subclasses that use schemas must override this method. The default implemenation returns
176  an empty dict.
177 
178  This method may be called at any time after the Task is constructed, which means that all task
179  schemas should be computed at construction time, *not* when data is actually processed. This
180  reflects the philosophy that the schema should not depend on the data.
181 
182  Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well.
183 
184  See also
185  --------
186  Task.getAllSchemaCatalogs
187  """
188  return {}
189 
191  """Get schema catalogs for all tasks in the hierarchy, combining the results into a single dict.
192 
193  Returns
194  -------
195  schemacatalogs : `dict`
196  Keys are butler dataset type, values are a empty catalog (an instance of the appropriate
197  lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down
198  through all subtasks.
199 
200  Notes
201  -----
202  This method may be called on any task in the hierarchy; it will return the same answer, regardless.
203 
204  The default implementation should always suffice. If your subtask uses schemas the override
205  `Task.getSchemaCatalogs`, not this method.
206  """
207  schemaDict = self.getSchemaCatalogs()
208  for subtask in self._taskDict.values():
209  schemaDict.update(subtask.getSchemaCatalogs())
210  return schemaDict
211 
212  def getFullMetadata(self):
213  """Get metadata for all tasks.
214 
215  Returns
216  -------
217  metadata : `lsst.daf.base.PropertySet`
218  The `~lsst.daf.base.PropertySet` keys are the full task name. Values are metadata
219  for the top-level task and all subtasks, sub-subtasks, etc..
220 
221  Notes
222  -----
223  The returned metadata includes timing information (if ``@timer.timeMethod`` is used)
224  and any metadata set by the task. The name of each item consists of the full task name
225  with ``.`` replaced by ``:``, followed by ``.`` and the name of the item, e.g.::
226 
227  topLevelTaskName:subtaskName:subsubtaskName.itemName
228 
229  using ``:`` in the full task name disambiguates the rare situation that a task has a subtask
230  and a metadata item with the same name.
231  """
232  fullMetadata = dafBase.PropertySet()
233  for fullName, task in self.getTaskDict().items():
234  fullMetadata.set(fullName.replace(".", ":"), task.metadata)
235  return fullMetadata
236 
237  def getFullName(self):
238  """Get the task name as a hierarchical name including parent task names.
239 
240  Returns
241  -------
242  fullName : `str`
243  The full name consists of the name of the parent task and each subtask separated by periods.
244  For example:
245 
246  - The full name of top-level task "top" is simply "top".
247  - The full name of subtask "sub" of top-level task "top" is "top.sub".
248  - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2".
249  """
250  return self._fullName
251 
252  def getName(self):
253  """Get the name of the task.
254 
255  Returns
256  -------
257  taskName : `str`
258  Name of the task.
259 
260  See also
261  --------
262  getFullName
263  """
264  return self._name
265 
266  def getTaskDict(self):
267  """Get a dictionary of all tasks as a shallow copy.
268 
269  Returns
270  -------
271  taskDict : `dict`
272  Dictionary containing full task name: task object for the top-level task and all subtasks,
273  sub-subtasks, etc..
274  """
275  return self._taskDict.copy()
276 
277  def makeSubtask(self, name, **keyArgs):
278  """Create a subtask as a new instance as the ``name`` attribute of this task.
279 
280  Parameters
281  ----------
282  name : `str`
283  Brief name of the subtask.
284  keyArgs
285  Extra keyword arguments used to construct the task. The following arguments are automatically
286  provided and cannot be overridden:
287 
288  - "config".
289  - "parentTask".
290 
291  Notes
292  -----
293  The subtask must be defined by ``Task.config.name``, an instance of pex_config ConfigurableField
294  or RegistryField.
295  """
296  taskField = getattr(self.config, name, None)
297  if taskField is None:
298  raise KeyError("%s's config does not have field %r" % (self.getFullName(), name))
299  subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
300  setattr(self, name, subtask)
301 
302  @contextlib.contextmanager
303  def timer(self, name, logLevel=Log.DEBUG):
304  """Context manager to log performance data for an arbitrary block of code.
305 
306  Parameters
307  ----------
308  name : `str`
309  Name of code being timed; data will be logged using item name: ``Start`` and ``End``.
310  logLevel
311  A `lsst.log` level constant.
312 
313  Examples
314  --------
315  Creating a timer context::
316 
317  with self.timer("someCodeToTime"):
318  pass # code to time
319 
320  See also
321  --------
322  timer.logInfo
323  """
324  logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
325  try:
326  yield
327  finally:
328  logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
329 
330  @classmethod
331  def makeField(cls, doc):
332  """Make a `lsst.pex.config.ConfigurableField` for this task.
333 
334  Parameters
335  ----------
336  doc : `str`
337  Help text for the field.
338 
339  Returns
340  -------
341  configurableField : `lsst.pex.config.ConfigurableField`
342  A `~ConfigurableField` for this task.
343 
344  Examples
345  --------
346  Provides a convenient way to specify this task is a subtask of another task.
347 
348  Here is an example of use::
349 
350  class OtherTaskConfig(lsst.pex.config.Config)
351  aSubtask = ATaskClass.makeField("a brief description of what this task does")
352  """
353  return ConfigurableField(doc=doc, target=cls)
354 
355  def _computeFullName(self, name):
356  """Compute the full name of a subtask or metadata item, given its brief name.
357 
358  Parameters
359  ----------
360  name : `str`
361  Brief name of subtask or metadata item.
362 
363  Returns
364  -------
365  fullName : `str`
366  The full name: the ``name`` argument prefixed by the full task name and a period.
367 
368  Notes
369  -----
370  For example: if the full name of this task is "top.sub.sub2"
371  then ``_computeFullName("subname")`` returns ``"top.sub.sub2.subname"``.
372  """
373  return "%s.%s" % (self._fullName, name)
374 
375  def __reduce__(self):
376  """Pickler.
377  """
378  return self.__class__, (self.config, self._name, self._parentTask, None)
def makeSubtask(self, name, keyArgs)
Definition: task.py:277
def makeField(cls, doc)
Definition: task.py:331
def __reduce__(self)
Definition: task.py:375
def emptyMetadata(self)
Definition: task.py:155
def getFullMetadata(self)
Definition: task.py:212
def getAllSchemaCatalogs(self)
Definition: task.py:190
def logInfo(obj, prefix, logLevel=Log.DEBUG)
Definition: timer.py:63
def getSchemaCatalogs(self)
Definition: task.py:161
def timer(self, name, logLevel=Log.DEBUG)
Definition: task.py:303
def __init__(self, config=None, name=None, parentTask=None, log=None)
Definition: task.py:122
def getFullName(self)
Definition: task.py:237
def getTaskDict(self)
Definition: task.py:266