lsst.pipe.base  15.0-4-g654b129+16
task.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2016 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division
23 import contextlib
24 
25 from builtins import object
26 
27 import lsstDebug
28 from lsst.pex.config import ConfigurableField
29 from lsst.log import Log
30 import lsst.daf.base as dafBase
31 from .timer import logInfo
32 
33 __all__ = ["Task", "TaskError"]
34 
35 
36 class TaskError(Exception):
37  """Use to report errors for which a traceback is not useful.
38 
39  Notes
40  -----
41  Examples of such errors:
42 
43  - processCcd is asked to run detection, but not calibration, and no calexp is found.
44  - coadd finds no valid images in the specified patch.
45  """
46  pass
47 
48 
49 class Task(object):
50  """Base class for data processing tasks.
51 
52  See :ref:`task-framework-overview` to learn what tasks are, and :ref:`creating-a-task` for more
53  information about writing tasks.
54 
55  Parameters
56  ----------
57  config : `Task.ConfigClass` instance, optional
58  Configuration for this task (an instance of Task.ConfigClass, which is a task-specific subclass of
59  `lsst.pex.config.Config`, or `None`. If `None`:
60 
61  - If parentTask specified then defaults to parentTask.config.<name>
62  - If parentTask is None then defaults to self.ConfigClass()
63 
64  name : `str`, optional
65  Brief name of task, or `None`; if `None` then defaults to `Task._DefaultName`
66  parentTask : `Task`-type, optional
67  The parent task of this subtask, if any.
68 
69  - If `None` (a top-level task) then you must specify config and name is ignored.
70  - If not `None` (a subtask) then you must specify name.
71  log : `lsst.log.Log`, optional
72  Log whose name is used as a log name prefix, or `None` for no prefix. Ignored if is parentTask
73  specified, in which case ``parentTask.log``\ 's name is used as a prefix. The task's log name is
74  ``prefix + "." + name`` if a prefix exists, else ``name``. The task's log is then a child logger of
75  ``parentTask.log`` (if ``parentTask`` specified), or a child logger of the log from the argument
76  (if ``log`` is not `None`).
77 
78  Raises
79  ------
80  RuntimeError
81  Raised under these circumstances:
82 
83  - If ``parentTask`` is `None` and ``config`` is `None`.
84  - If ``parentTask`` is not `None` and ``name`` is `None`.
85  - If ``name`` is `None` and ``_DefaultName`` does not exist.
86 
87  Notes
88  -----
89  Useful attributes include:
90 
91  - ``log``: an lsst.log.Log
92  - ``config``: task-specific configuration; an instance of ``ConfigClass`` (see below).
93  - ``metadata``: an `lsst.daf.base.PropertyList` for collecting task-specific metadata,
94  e.g. data quality and performance metrics. This is data that is only meant to be
95  persisted, never to be used by the task.
96 
97  Subclasses typically have a method named ``run`` to perform the main data processing. Details:
98 
99  - ``run`` should process the minimum reasonable amount of data, typically a single CCD.
100  Iteration, if desired, is performed by a caller of the run method. This is good design and allows
101  multiprocessing without the run method having to support it directly.
102  - If ``run`` can persist or unpersist data:
103 
104  - ``run`` should accept a butler data reference (or a collection of data references, if appropriate,
105  e.g. coaddition).
106  - There should be a way to run the task without persisting data. Typically the run method returns all
107  data, even if it is persisted, and the task's config method offers a flag to disable persistence.
108 
109  **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* accept a blob such as a butler
110  data reference. How we will handle data references is still TBD, so don't make changes yet!
111  RHL 2014-06-27
112 
113  Subclasses must also have an attribute ``ConfigClass`` that is a subclass of `lsst.pex.config.Config`
114  which configures the task. Subclasses should also have an attribute ``_DefaultName``:
115  the default name if there is no parent task. ``_DefaultName`` is required for subclasses of
116  `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task because it simplifies construction
117  (e.g. for unit tests).
118 
119  Tasks intended to be run from the command line should be subclasses of `~lsst.pipe.base.CmdLineTask`
120  not Task.
121  """
122 
123  def __init__(self, config=None, name=None, parentTask=None, log=None):
124  self.metadata = dafBase.PropertyList()
125  self._parentTask = parentTask
126 
127  if parentTask is not None:
128  if name is None:
129  raise RuntimeError("name is required for a subtask")
130  self._name = name
131  self._fullName = parentTask._computeFullName(name)
132  if config is None:
133  config = getattr(parentTask.config, name)
134  self._taskDict = parentTask._taskDict
135  loggerName = parentTask.log.getName() + '.' + name
136  else:
137  if name is None:
138  name = getattr(self, "_DefaultName", None)
139  if name is None:
140  raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
141  name = self._DefaultName
142  self._name = name
143  self._fullName = self._name
144  if config is None:
145  config = self.ConfigClass()
146  self._taskDict = dict()
147  loggerName = self._fullName
148  if log is not None and log.getName():
149  loggerName = log.getName() + '.' + loggerName
150 
151  self.log = Log.getLogger(loggerName)
152  self.config = config
153  self._display = lsstDebug.Info(self.__module__).display
154  self._taskDict[self._fullName] = self
155 
156  def emptyMetadata(self):
157  """Empty (clear) the metadata for this Task and all sub-Tasks.
158  """
159  for subtask in self._taskDict.values():
160  subtask.metadata = dafBase.PropertyList()
161 
162  def getSchemaCatalogs(self):
163  """Get the schemas generated by this task.
164 
165  Returns
166  -------
167  schemaCatalogs : `dict`
168  Keys are butler dataset type, values are an empty catalog (an instance of the appropriate
169  `lsst.afw.table` Catalog type) for this task.
170 
171  Notes
172  -----
173 
174  .. warning::
175 
176  Subclasses that use schemas must override this method. The default implemenation returns
177  an empty dict.
178 
179  This method may be called at any time after the Task is constructed, which means that all task
180  schemas should be computed at construction time, *not* when data is actually processed. This
181  reflects the philosophy that the schema should not depend on the data.
182 
183  Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well.
184 
185  See also
186  --------
187  Task.getAllSchemaCatalogs
188  """
189  return {}
190 
192  """Get schema catalogs for all tasks in the hierarchy, combining the results into a single dict.
193 
194  Returns
195  -------
196  schemacatalogs : `dict`
197  Keys are butler dataset type, values are a empty catalog (an instance of the appropriate
198  lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down
199  through all subtasks.
200 
201  Notes
202  -----
203  This method may be called on any task in the hierarchy; it will return the same answer, regardless.
204 
205  The default implementation should always suffice. If your subtask uses schemas the override
206  `Task.getSchemaCatalogs`, not this method.
207  """
208  schemaDict = self.getSchemaCatalogs()
209  for subtask in self._taskDict.values():
210  schemaDict.update(subtask.getSchemaCatalogs())
211  return schemaDict
212 
213  def getFullMetadata(self):
214  """Get metadata for all tasks.
215 
216  Returns
217  -------
218  metadata : `lsst.daf.base.PropertySet`
219  The `~lsst.daf.base.PropertySet` keys are the full task name. Values are metadata
220  for the top-level task and all subtasks, sub-subtasks, etc..
221 
222  Notes
223  -----
224  The returned metadata includes timing information (if ``@timer.timeMethod`` is used)
225  and any metadata set by the task. The name of each item consists of the full task name
226  with ``.`` replaced by ``:``, followed by ``.`` and the name of the item, e.g.::
227 
228  topLevelTaskName:subtaskName:subsubtaskName.itemName
229 
230  using ``:`` in the full task name disambiguates the rare situation that a task has a subtask
231  and a metadata item with the same name.
232  """
233  fullMetadata = dafBase.PropertySet()
234  for fullName, task in self.getTaskDict().items():
235  fullMetadata.set(fullName.replace(".", ":"), task.metadata)
236  return fullMetadata
237 
238  def getFullName(self):
239  """Get the task name as a hierarchical name including parent task names.
240 
241  Returns
242  -------
243  fullName : `str`
244  The full name consists of the name of the parent task and each subtask separated by periods.
245  For example:
246 
247  - The full name of top-level task "top" is simply "top".
248  - The full name of subtask "sub" of top-level task "top" is "top.sub".
249  - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2".
250  """
251  return self._fullName
252 
253  def getName(self):
254  """Get the name of the task.
255 
256  Returns
257  -------
258  taskName : `str`
259  Name of the task.
260 
261  See also
262  --------
263  getFullName
264  """
265  return self._name
266 
267  def getTaskDict(self):
268  """Get a dictionary of all tasks as a shallow copy.
269 
270  Returns
271  -------
272  taskDict : `dict`
273  Dictionary containing full task name: task object for the top-level task and all subtasks,
274  sub-subtasks, etc..
275  """
276  return self._taskDict.copy()
277 
278  def makeSubtask(self, name, **keyArgs):
279  """Create a subtask as a new instance as the ``name`` attribute of this task.
280 
281  Parameters
282  ----------
283  name : `str`
284  Brief name of the subtask.
285  keyArgs
286  Extra keyword arguments used to construct the task. The following arguments are automatically
287  provided and cannot be overridden:
288 
289  - "config".
290  - "parentTask".
291 
292  Notes
293  -----
294  The subtask must be defined by ``Task.config.name``, an instance of pex_config ConfigurableField
295  or RegistryField.
296  """
297  taskField = getattr(self.config, name, None)
298  if taskField is None:
299  raise KeyError("%s's config does not have field %r" % (self.getFullName(), name))
300  subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
301  setattr(self, name, subtask)
302 
303  @contextlib.contextmanager
304  def timer(self, name, logLevel=Log.DEBUG):
305  """Context manager to log performance data for an arbitrary block of code.
306 
307  Parameters
308  ----------
309  name : `str`
310  Name of code being timed; data will be logged using item name: ``Start`` and ``End``.
311  logLevel
312  A `lsst.log` level constant.
313 
314  Examples
315  --------
316  Creating a timer context::
317 
318  with self.timer("someCodeToTime"):
319  pass # code to time
320 
321  See also
322  --------
323  timer.logInfo
324  """
325  logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
326  try:
327  yield
328  finally:
329  logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
330 
331  @classmethod
332  def makeField(cls, doc):
333  """Make a `lsst.pex.config.ConfigurableField` for this task.
334 
335  Parameters
336  ----------
337  doc : `str`
338  Help text for the field.
339 
340  Returns
341  -------
342  configurableField : `lsst.pex.config.ConfigurableField`
343  A `~ConfigurableField` for this task.
344 
345  Examples
346  --------
347  Provides a convenient way to specify this task is a subtask of another task.
348 
349  Here is an example of use::
350 
351  class OtherTaskConfig(lsst.pex.config.Config)
352  aSubtask = ATaskClass.makeField("a brief description of what this task does")
353  """
354  return ConfigurableField(doc=doc, target=cls)
355 
356  def _computeFullName(self, name):
357  """Compute the full name of a subtask or metadata item, given its brief name.
358 
359  Parameters
360  ----------
361  name : `str`
362  Brief name of subtask or metadata item.
363 
364  Returns
365  -------
366  fullName : `str`
367  The full name: the ``name`` argument prefixed by the full task name and a period.
368 
369  Notes
370  -----
371  For example: if the full name of this task is "top.sub.sub2"
372  then ``_computeFullName("subname")`` returns ``"top.sub.sub2.subname"``.
373  """
374  return "%s.%s" % (self._fullName, name)
375 
376  def __reduce__(self):
377  """Pickler.
378  """
379  return self.__class__, (self.config, self._name, self._parentTask, None)
def makeSubtask(self, name, keyArgs)
Definition: task.py:278
def makeField(cls, doc)
Definition: task.py:332
def __reduce__(self)
Definition: task.py:376
def emptyMetadata(self)
Definition: task.py:156
def getFullMetadata(self)
Definition: task.py:213
def getAllSchemaCatalogs(self)
Definition: task.py:191
def logInfo(obj, prefix, logLevel=Log.DEBUG)
Definition: timer.py:63
def getSchemaCatalogs(self)
Definition: task.py:162
def timer(self, name, logLevel=Log.DEBUG)
Definition: task.py:304
def __init__(self, config=None, name=None, parentTask=None, log=None)
Definition: task.py:123
def getFullName(self)
Definition: task.py:238
def getTaskDict(self)
Definition: task.py:267