22 from __future__
import absolute_import, division
25 from builtins
import object
28 from lsst.pex.config
import ConfigurableField
29 from lsst.log
import Log
30 import lsst.daf.base
as dafBase
31 from .timer
import logInfo
33 __all__ = [
"Task",
"TaskError"]
37 """!Use to report errors for which a traceback is not useful.
39 Examples of such errors:
40 - processCcd is asked to run detection, but not calibration, and no calexp is found.
41 - coadd finds no valid images in the specified patch.
47 """!Base class for data processing tasks
49 See \ref pipeBase_introduction "pipe_base introduction" to learn what tasks are,
50 and \ref pipeTasks_writeTask "how to write a task" for more information about writing tasks.
51 If the second link is broken (as it will be before the documentation is cross-linked)
52 then look at the main page of pipe_tasks documentation for a link.
54 Useful attributes include:
55 * log: an lsst.log.Log
56 * config: task-specific configuration; an instance of ConfigClass (see below)
57 * metadata: an lsst.daf.base.PropertyList for collecting task-specific metadata,
58 e.g. data quality and performance metrics. This is data that is only meant to be
59 persisted, never to be used by the task.
61 Subclasses typically have a method named "run" to perform the main data processing. Details:
62 * run should process the minimum reasonable amount of data, typically a single CCD.
63 Iteration, if desired, is performed by a caller of the run method. This is good design and allows
64 multiprocessing without the run method having to support it directly.
65 * If "run" can persist or unpersist data:
66 * "run" should accept a butler data reference (or a collection of data references, if appropriate,
68 * There should be a way to run the task without persisting data. Typically the run method returns all
69 data, even if it is persisted, and the task's config method offers a flag to disable persistence.
71 \deprecated Tasks other than cmdLineTask.CmdLineTask%s should \em not accept a blob such as a butler data
72 reference. How we will handle data references is still TBD, so don't make changes yet! RHL 2014-06-27
74 Subclasses must also have an attribute ConfigClass that is a subclass of lsst.pex.config.Config
75 which configures the task. Subclasses should also have an attribute _DefaultName:
76 the default name if there is no parent task. _DefaultName is required for subclasses of
77 \ref cmdLineTask.CmdLineTask "CmdLineTask" and recommended for subclasses of Task because it simplifies
78 construction (e.g. for unit tests).
80 Tasks intended to be run from the command line should be subclasses of \ref cmdLineTask.CmdLineTask
81 "CmdLineTask", not Task.
84 def __init__(self, config=None, name=None, parentTask=None, log=None):
87 @param[in] config configuration for this task (an instance of self.ConfigClass,
88 which is a task-specific subclass of lsst.pex.config.Config), or None. If None:
89 - If parentTask specified then defaults to parentTask.config.<name>
90 - If parentTask is None then defaults to self.ConfigClass()
91 @param[in] name brief name of task, or None; if None then defaults to self._DefaultName
92 @param[in] parentTask the parent task of this subtask, if any.
93 - If None (a top-level task) then you must specify config and name is ignored.
94 - If not None (a subtask) then you must specify name
95 @param[in] log log (an lsst.log.Log) whose name is used as a log name prefix,
96 or None for no prefix. Ignored if parentTask specifie, in which case parentTask.log's
97 name is used as a prefix.
98 The task's log name is `prefix + "." + name` if a prefix exists, else `name`.
99 The task's log is then a child logger of parentTask.log (if parentTask specified),
100 or a child logger of the log from the argument (if log is not None).
102 @throw RuntimeError if parentTask is None and config is None.
103 @throw RuntimeError if parentTask is not None and name is None.
104 @throw RuntimeError if name is None and _DefaultName does not exist.
109 if parentTask
is not None:
111 raise RuntimeError(
"name is required for a subtask")
113 self.
_fullName = parentTask._computeFullName(name)
115 config = getattr(parentTask.config, name)
117 loggerName = parentTask.log.getName() +
'.' + name
120 name = getattr(self,
"_DefaultName",
None)
122 raise RuntimeError(
"name is required for a task unless it has attribute _DefaultName")
123 name = self._DefaultName
127 config = self.ConfigClass()
130 if log
is not None and log.getName():
131 loggerName = log.getName() +
'.' + loggerName
133 self.
log = Log.getLogger(loggerName)
135 self.
_display = lsstDebug.Info(self.__module__).display
139 """!Empty (clear) the metadata for this Task and all sub-Tasks."""
140 for subtask
in self._taskDict.values():
141 subtask.metadata = dafBase.PropertyList()
144 """!Return the schemas generated by this task
146 @warning Subclasses the use schemas must override this method. The default implemenation
147 returns an empty dict.
149 @return a dict of butler dataset type: empty catalog (an instance of the appropriate
150 lsst.afw.table Catalog type) for this task
152 This method may be called at any time after the Task is constructed, which means that
153 all task schemas should be computed at construction time, __not__ when data is actually
154 processed. This reflects the philosophy that the schema should not depend on the data.
156 Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well.
158 See also Task.getAllSchemaCatalogs
163 """!Call getSchemaCatalogs() on all tasks in the hiearchy, combining the results into a single dict.
165 @return a dict of butler dataset type: empty catalog (an instance of the appropriate
166 lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down
169 This method may be called on any task in the hierarchy; it will return the same answer, regardless.
171 The default implementation should always suffice. If your subtask uses schemas the override
172 Task.getSchemaCatalogs, not this method.
175 for subtask
in self._taskDict.values():
176 schemaDict.update(subtask.getSchemaCatalogs())
180 """!Get metadata for all tasks
182 The returned metadata includes timing information (if \@timer.timeMethod is used)
183 and any metadata set by the task. The name of each item consists of the full task name
184 with "." replaced by ":", followed by "." and the name of the item, e.g.:
185 topLeveltTaskName:subtaskName:subsubtaskName.itemName
186 using ":" in the full task name disambiguates the rare situation that a task has a subtask
187 and a metadata item with the same name.
189 @return metadata: an lsst.daf.base.PropertySet containing full task name: metadata
190 for the top-level task and all subtasks, sub-subtasks, etc.
192 fullMetadata = dafBase.PropertySet()
194 fullMetadata.set(fullName.replace(
".",
":"), task.metadata)
198 """!Return the task name as a hierarchical name including parent task names
200 The full name consists of the name of the parent task and each subtask separated by periods.
202 - The full name of top-level task "top" is simply "top"
203 - The full name of subtask "sub" of top-level task "top" is "top.sub"
204 - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2".
209 """!Return the name of the task
211 See getFullName to get a hierarchical name including parent task names
216 """!Return a dictionary of all tasks as a shallow copy.
218 @return taskDict: a dict containing full task name: task object
219 for the top-level task and all subtasks, sub-subtasks, etc.
221 return self._taskDict.copy()
224 """!Create a subtask as a new instance self.<name>
226 The subtask must be defined by self.config.<name>, an instance of pex_config ConfigurableField
229 @param name brief name of subtask
230 @param **keyArgs extra keyword arguments used to construct the task.
231 The following arguments are automatically provided and cannot be overridden:
232 "config" and "parentTask".
234 taskField = getattr(self.
config, name,
None)
235 if taskField
is None:
236 raise KeyError(
"%s's config does not have field %r" % (self.
getFullName(), name))
237 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
238 setattr(self, name, subtask)
240 @contextlib.contextmanager
241 def timer(self, name, logLevel=Log.DEBUG):
242 """!Context manager to log performance data for an arbitrary block of code
244 @param[in] name name of code being timed;
245 data will be logged using item name: <name>Start<item> and <name>End<item>
246 @param[in] logLevel one of the lsst.log.Log level constants
250 with self.timer("someCodeToTime"):
254 See timer.logInfo for the information logged
256 logInfo(obj=self, prefix=name +
"Start", logLevel=logLevel)
260 logInfo(obj=self, prefix=name +
"End", logLevel=logLevel)
264 """!Make an lsst.pex.config.ConfigurableField for this task
266 Provides a convenient way to specify this task is a subtask of another task.
267 Here is an example of use:
269 class OtherTaskConfig(lsst.pex.config.Config)
270 aSubtask = ATaskClass.makeField("a brief description of what this task does")
273 @param[in] cls this class
274 @param[in] doc help text for the field
275 @return a lsst.pex.config.ConfigurableField for this task
277 return ConfigurableField(doc=doc, target=cls)
279 def _computeFullName(self, name):
280 """!Compute the full name of a subtask or metadata item, given its brief name
282 For example: if the full name of this task is "top.sub.sub2"
283 then _computeFullName("subname") returns "top.sub.sub2.subname".
285 @param[in] name brief name of subtask or metadata item
286 @return the full name: the "name" argument prefixed by the full task name and a period.
Use to report errors for which a traceback is not useful.
def getSchemaCatalogs
Return the schemas generated by this task.
def makeField
Make an lsst.pex.config.ConfigurableField for this task.
Base class for data processing tasks.
def makeSubtask
Create a subtask as a new instance self.
def logInfo
Log timer information to obj.metadata and obj.log.
def getName
Return the name of the task.
def timer
Context manager to log performance data for an arbitrary block of code.
def __init__
Create a Task.
def getTaskDict
Return a dictionary of all tasks as a shallow copy.
def getAllSchemaCatalogs
Call getSchemaCatalogs() on all tasks in the hiearchy, combining the results into a single dict...
def getFullName
Return the task name as a hierarchical name including parent task names.
def emptyMetadata
Empty (clear) the metadata for this Task and all sub-Tasks.
def getFullMetadata
Get metadata for all tasks.