lsst.pipe.base  14.0
 All Classes Namespaces Files Functions Variables Pages
task.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2016 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division
23 import contextlib
24 
25 from builtins import object
26 
27 import lsstDebug
28 from lsst.pex.config import ConfigurableField
29 from lsst.log import Log
30 import lsst.daf.base as dafBase
31 from .timer import logInfo
32 
33 __all__ = ["Task", "TaskError"]
34 
35 
36 class TaskError(Exception):
37  """!Use to report errors for which a traceback is not useful.
38 
39  Examples of such errors:
40  - processCcd is asked to run detection, but not calibration, and no calexp is found.
41  - coadd finds no valid images in the specified patch.
42  """
43  pass
44 
45 
46 class Task(object):
47  """!Base class for data processing tasks
48 
49  See \ref pipeBase_introduction "pipe_base introduction" to learn what tasks are,
50  and \ref pipeTasks_writeTask "how to write a task" for more information about writing tasks.
51  If the second link is broken (as it will be before the documentation is cross-linked)
52  then look at the main page of pipe_tasks documentation for a link.
53 
54  Useful attributes include:
55  * log: an lsst.log.Log
56  * config: task-specific configuration; an instance of ConfigClass (see below)
57  * metadata: an lsst.daf.base.PropertyList for collecting task-specific metadata,
58  e.g. data quality and performance metrics. This is data that is only meant to be
59  persisted, never to be used by the task.
60 
61  Subclasses typically have a method named "run" to perform the main data processing. Details:
62  * run should process the minimum reasonable amount of data, typically a single CCD.
63  Iteration, if desired, is performed by a caller of the run method. This is good design and allows
64  multiprocessing without the run method having to support it directly.
65  * If "run" can persist or unpersist data:
66  * "run" should accept a butler data reference (or a collection of data references, if appropriate,
67  e.g. coaddition).
68  * There should be a way to run the task without persisting data. Typically the run method returns all
69  data, even if it is persisted, and the task's config method offers a flag to disable persistence.
70 
71  \deprecated Tasks other than cmdLineTask.CmdLineTask%s should \em not accept a blob such as a butler data
72  reference. How we will handle data references is still TBD, so don't make changes yet! RHL 2014-06-27
73 
74  Subclasses must also have an attribute ConfigClass that is a subclass of lsst.pex.config.Config
75  which configures the task. Subclasses should also have an attribute _DefaultName:
76  the default name if there is no parent task. _DefaultName is required for subclasses of
77  \ref cmdLineTask.CmdLineTask "CmdLineTask" and recommended for subclasses of Task because it simplifies
78  construction (e.g. for unit tests).
79 
80  Tasks intended to be run from the command line should be subclasses of \ref cmdLineTask.CmdLineTask
81  "CmdLineTask", not Task.
82  """
83 
84  def __init__(self, config=None, name=None, parentTask=None, log=None):
85  """!Create a Task
86 
87  @param[in] config configuration for this task (an instance of self.ConfigClass,
88  which is a task-specific subclass of lsst.pex.config.Config), or None. If None:
89  - If parentTask specified then defaults to parentTask.config.<name>
90  - If parentTask is None then defaults to self.ConfigClass()
91  @param[in] name brief name of task, or None; if None then defaults to self._DefaultName
92  @param[in] parentTask the parent task of this subtask, if any.
93  - If None (a top-level task) then you must specify config and name is ignored.
94  - If not None (a subtask) then you must specify name
95  @param[in] log log (an lsst.log.Log) whose name is used as a log name prefix,
96  or None for no prefix. Ignored if parentTask specifie, in which case parentTask.log's
97  name is used as a prefix.
98  The task's log name is `prefix + "." + name` if a prefix exists, else `name`.
99  The task's log is then a child logger of parentTask.log (if parentTask specified),
100  or a child logger of the log from the argument (if log is not None).
101 
102  @throw RuntimeError if parentTask is None and config is None.
103  @throw RuntimeError if parentTask is not None and name is None.
104  @throw RuntimeError if name is None and _DefaultName does not exist.
105  """
106  self.metadata = dafBase.PropertyList()
107  self._parentTask = parentTask
108 
109  if parentTask is not None:
110  if name is None:
111  raise RuntimeError("name is required for a subtask")
112  self._name = name
113  self._fullName = parentTask._computeFullName(name)
114  if config is None:
115  config = getattr(parentTask.config, name)
116  self._taskDict = parentTask._taskDict
117  loggerName = parentTask.log.getName() + '.' + name
118  else:
119  if name is None:
120  name = getattr(self, "_DefaultName", None)
121  if name is None:
122  raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
123  name = self._DefaultName
124  self._name = name
125  self._fullName = self._name
126  if config is None:
127  config = self.ConfigClass()
128  self._taskDict = dict()
129  loggerName = self._fullName
130  if log is not None and log.getName():
131  loggerName = log.getName() + '.' + loggerName
132 
133  self.log = Log.getLogger(loggerName)
134  self.config = config
135  self._display = lsstDebug.Info(self.__module__).display
136  self._taskDict[self._fullName] = self
137 
138  def emptyMetadata(self):
139  """!Empty (clear) the metadata for this Task and all sub-Tasks."""
140  for subtask in self._taskDict.values():
141  subtask.metadata = dafBase.PropertyList()
142 
143  def getSchemaCatalogs(self):
144  """!Return the schemas generated by this task
145 
146  @warning Subclasses the use schemas must override this method. The default implemenation
147  returns an empty dict.
148 
149  @return a dict of butler dataset type: empty catalog (an instance of the appropriate
150  lsst.afw.table Catalog type) for this task
151 
152  This method may be called at any time after the Task is constructed, which means that
153  all task schemas should be computed at construction time, __not__ when data is actually
154  processed. This reflects the philosophy that the schema should not depend on the data.
155 
156  Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well.
157 
158  See also Task.getAllSchemaCatalogs
159  """
160  return {}
161 
163  """!Call getSchemaCatalogs() on all tasks in the hiearchy, combining the results into a single dict.
164 
165  @return a dict of butler dataset type: empty catalog (an instance of the appropriate
166  lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down
167  through all subtasks
168 
169  This method may be called on any task in the hierarchy; it will return the same answer, regardless.
170 
171  The default implementation should always suffice. If your subtask uses schemas the override
172  Task.getSchemaCatalogs, not this method.
173  """
174  schemaDict = self.getSchemaCatalogs()
175  for subtask in self._taskDict.values():
176  schemaDict.update(subtask.getSchemaCatalogs())
177  return schemaDict
178 
179  def getFullMetadata(self):
180  """!Get metadata for all tasks
181 
182  The returned metadata includes timing information (if \@timer.timeMethod is used)
183  and any metadata set by the task. The name of each item consists of the full task name
184  with "." replaced by ":", followed by "." and the name of the item, e.g.:
185  topLeveltTaskName:subtaskName:subsubtaskName.itemName
186  using ":" in the full task name disambiguates the rare situation that a task has a subtask
187  and a metadata item with the same name.
188 
189  @return metadata: an lsst.daf.base.PropertySet containing full task name: metadata
190  for the top-level task and all subtasks, sub-subtasks, etc.
191  """
192  fullMetadata = dafBase.PropertySet()
193  for fullName, task in self.getTaskDict().items():
194  fullMetadata.set(fullName.replace(".", ":"), task.metadata)
195  return fullMetadata
196 
197  def getFullName(self):
198  """!Return the task name as a hierarchical name including parent task names
199 
200  The full name consists of the name of the parent task and each subtask separated by periods.
201  For example:
202  - The full name of top-level task "top" is simply "top"
203  - The full name of subtask "sub" of top-level task "top" is "top.sub"
204  - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2".
205  """
206  return self._fullName
207 
208  def getName(self):
209  """!Return the name of the task
210 
211  See getFullName to get a hierarchical name including parent task names
212  """
213  return self._name
214 
215  def getTaskDict(self):
216  """!Return a dictionary of all tasks as a shallow copy.
217 
218  @return taskDict: a dict containing full task name: task object
219  for the top-level task and all subtasks, sub-subtasks, etc.
220  """
221  return self._taskDict.copy()
222 
223  def makeSubtask(self, name, **keyArgs):
224  """!Create a subtask as a new instance self.<name>
225 
226  The subtask must be defined by self.config.<name>, an instance of pex_config ConfigurableField
227  or RegistryField.
228 
229  @param name brief name of subtask
230  @param **keyArgs extra keyword arguments used to construct the task.
231  The following arguments are automatically provided and cannot be overridden:
232  "config" and "parentTask".
233  """
234  taskField = getattr(self.config, name, None)
235  if taskField is None:
236  raise KeyError("%s's config does not have field %r" % (self.getFullName(), name))
237  subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
238  setattr(self, name, subtask)
239 
240  @contextlib.contextmanager
241  def timer(self, name, logLevel=Log.DEBUG):
242  """!Context manager to log performance data for an arbitrary block of code
243 
244  @param[in] name name of code being timed;
245  data will be logged using item name: <name>Start<item> and <name>End<item>
246  @param[in] logLevel one of the lsst.log.Log level constants
247 
248  Example of use:
249  \code
250  with self.timer("someCodeToTime"):
251  ...code to time...
252  \endcode
253 
254  See timer.logInfo for the information logged
255  """
256  logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
257  try:
258  yield
259  finally:
260  logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
261 
262  @classmethod
263  def makeField(cls, doc):
264  """!Make an lsst.pex.config.ConfigurableField for this task
265 
266  Provides a convenient way to specify this task is a subtask of another task.
267  Here is an example of use:
268  \code
269  class OtherTaskConfig(lsst.pex.config.Config)
270  aSubtask = ATaskClass.makeField("a brief description of what this task does")
271  \endcode
272 
273  @param[in] cls this class
274  @param[in] doc help text for the field
275  @return a lsst.pex.config.ConfigurableField for this task
276  """
277  return ConfigurableField(doc=doc, target=cls)
278 
279  def _computeFullName(self, name):
280  """!Compute the full name of a subtask or metadata item, given its brief name
281 
282  For example: if the full name of this task is "top.sub.sub2"
283  then _computeFullName("subname") returns "top.sub.sub2.subname".
284 
285  @param[in] name brief name of subtask or metadata item
286  @return the full name: the "name" argument prefixed by the full task name and a period.
287  """
288  return "%s.%s" % (self._fullName, name)
289 
290  def __reduce__(self):
291  """Pickler"""
292  return self.__class__, (self.config, self._name, self._parentTask, None)
Use to report errors for which a traceback is not useful.
Definition: task.py:36
def getSchemaCatalogs
Return the schemas generated by this task.
Definition: task.py:143
def makeField
Make an lsst.pex.config.ConfigurableField for this task.
Definition: task.py:263
Base class for data processing tasks.
Definition: task.py:46
def makeSubtask
Create a subtask as a new instance self.
Definition: task.py:223
def logInfo
Log timer information to obj.metadata and obj.log.
Definition: timer.py:55
def getName
Return the name of the task.
Definition: task.py:208
def timer
Context manager to log performance data for an arbitrary block of code.
Definition: task.py:241
def __init__
Create a Task.
Definition: task.py:84
def getTaskDict
Return a dictionary of all tasks as a shallow copy.
Definition: task.py:215
def getAllSchemaCatalogs
Call getSchemaCatalogs() on all tasks in the hiearchy, combining the results into a single dict...
Definition: task.py:162
def getFullName
Return the task name as a hierarchical name including parent task names.
Definition: task.py:197
def emptyMetadata
Empty (clear) the metadata for this Task and all sub-Tasks.
Definition: task.py:138
def getFullMetadata
Get metadata for all tasks.
Definition: task.py:179