22 from __future__
import absolute_import, division
25 from builtins
import object
28 from lsst.pex.config
import ConfigurableField
29 from lsst.log
import Log
30 import lsst.daf.base
as dafBase
31 from .timer
import logInfo
33 __all__ = [
"Task",
"TaskError"]
37 """!Use to report errors for which a traceback is not useful. 39 Examples of such errors: 40 - processCcd is asked to run detection, but not calibration, and no calexp is found. 41 - coadd finds no valid images in the specified patch. 47 """!Base class for data processing tasks 49 See \ref pipeBase_introduction "pipe_base introduction" to learn what tasks are, 50 and \ref pipeTasks_writeTask "how to write a task" for more information about writing tasks. 51 If the second link is broken (as it will be before the documentation is cross-linked) 52 then look at the main page of pipe_tasks documentation for a link. 54 Useful attributes include: 55 * log: an lsst.log.Log 56 * config: task-specific configuration; an instance of ConfigClass (see below) 57 * metadata: an lsst.daf.base.PropertyList for collecting task-specific metadata, 58 e.g. data quality and performance metrics. This is data that is only meant to be 59 persisted, never to be used by the task. 61 Subclasses typically have a method named "run" to perform the main data processing. Details: 62 * run should process the minimum reasonable amount of data, typically a single CCD. 63 Iteration, if desired, is performed by a caller of the run method. This is good design and allows 64 multiprocessing without the run method having to support it directly. 65 * If "run" can persist or unpersist data: 66 * "run" should accept a butler data reference (or a collection of data references, if appropriate, 68 * There should be a way to run the task without persisting data. Typically the run method returns all 69 data, even if it is persisted, and the task's config method offers a flag to disable persistence. 71 \deprecated Tasks other than cmdLineTask.CmdLineTask%s should \em not accept a blob such as a butler data 72 reference. How we will handle data references is still TBD, so don't make changes yet! RHL 2014-06-27 74 Subclasses must also have an attribute ConfigClass that is a subclass of lsst.pex.config.Config 75 which configures the task. Subclasses should also have an attribute _DefaultName: 76 the default name if there is no parent task. _DefaultName is required for subclasses of 77 \ref cmdLineTask.CmdLineTask "CmdLineTask" and recommended for subclasses of Task because it simplifies 78 construction (e.g. for unit tests). 80 Tasks intended to be run from the command line should be subclasses of \ref cmdLineTask.CmdLineTask 81 "CmdLineTask", not Task. 84 def __init__(self, config=None, name=None, parentTask=None, log=None):
87 @param[in] config configuration for this task (an instance of self.ConfigClass, 88 which is a task-specific subclass of lsst.pex.config.Config), or None. If None: 89 - If parentTask specified then defaults to parentTask.config.<name> 90 - If parentTask is None then defaults to self.ConfigClass() 91 @param[in] name brief name of task, or None; if None then defaults to self._DefaultName 92 @param[in] parentTask the parent task of this subtask, if any. 93 - If None (a top-level task) then you must specify config and name is ignored. 94 - If not None (a subtask) then you must specify name 95 @param[in] log log (an lsst.log.Log) whose name is used as a log name prefix, 96 or None for no prefix. Ignored if parentTask specifie, in which case parentTask.log's 97 name is used as a prefix. 98 The task's log name is `prefix + "." + name` if a prefix exists, else `name`. 99 The task's log is then a child logger of parentTask.log (if parentTask specified), 100 or a child logger of the log from the argument (if log is not None). 102 @throw RuntimeError if parentTask is None and config is None. 103 @throw RuntimeError if parentTask is not None and name is None. 104 @throw RuntimeError if name is None and _DefaultName does not exist. 109 if parentTask
is not None:
111 raise RuntimeError(
"name is required for a subtask")
113 self.
_fullName = parentTask._computeFullName(name)
115 config = getattr(parentTask.config, name)
117 loggerName = parentTask.log.getName() +
'.' + name
120 name = getattr(self,
"_DefaultName",
None)
122 raise RuntimeError(
"name is required for a task unless it has attribute _DefaultName")
123 name = self._DefaultName
127 config = self.ConfigClass()
130 if log
is not None and log.getName():
131 loggerName = log.getName() +
'.' + loggerName
133 self.
log = Log.getLogger(loggerName)
135 self.
_display = lsstDebug.Info(self.__module__).display
139 """!Empty (clear) the metadata for this Task and all sub-Tasks.""" 141 subtask.metadata = dafBase.PropertyList()
144 """!Return the schemas generated by this task 146 @warning Subclasses the use schemas must override this method. The default implemenation 147 returns an empty dict. 149 @return a dict of butler dataset type: empty catalog (an instance of the appropriate 150 lsst.afw.table Catalog type) for this task 152 This method may be called at any time after the Task is constructed, which means that 153 all task schemas should be computed at construction time, __not__ when data is actually 154 processed. This reflects the philosophy that the schema should not depend on the data. 156 Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well. 158 See also Task.getAllSchemaCatalogs 163 """!Call getSchemaCatalogs() on all tasks in the hiearchy, combining the results into a single dict. 165 @return a dict of butler dataset type: empty catalog (an instance of the appropriate 166 lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down 169 This method may be called on any task in the hierarchy; it will return the same answer, regardless. 171 The default implementation should always suffice. If your subtask uses schemas the override 172 Task.getSchemaCatalogs, not this method. 176 schemaDict.update(subtask.getSchemaCatalogs())
180 """!Get metadata for all tasks 182 The returned metadata includes timing information (if \@timer.timeMethod is used) 183 and any metadata set by the task. The name of each item consists of the full task name 184 with "." replaced by ":", followed by "." and the name of the item, e.g.: 185 topLeveltTaskName:subtaskName:subsubtaskName.itemName 186 using ":" in the full task name disambiguates the rare situation that a task has a subtask 187 and a metadata item with the same name. 189 @return metadata: an lsst.daf.base.PropertySet containing full task name: metadata 190 for the top-level task and all subtasks, sub-subtasks, etc. 192 fullMetadata = dafBase.PropertySet()
194 fullMetadata.set(fullName.replace(
".",
":"), task.metadata)
198 """!Return the task name as a hierarchical name including parent task names 200 The full name consists of the name of the parent task and each subtask separated by periods. 202 - The full name of top-level task "top" is simply "top" 203 - The full name of subtask "sub" of top-level task "top" is "top.sub" 204 - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2". 209 """!Return the name of the task 211 See getFullName to get a hierarchical name including parent task names 216 """!Return a dictionary of all tasks as a shallow copy. 218 @return taskDict: a dict containing full task name: task object 219 for the top-level task and all subtasks, sub-subtasks, etc. 224 """!Create a subtask as a new instance self.<name> 226 The subtask must be defined by self.config.<name>, an instance of pex_config ConfigurableField 229 @param name brief name of subtask 230 @param **keyArgs extra keyword arguments used to construct the task. 231 The following arguments are automatically provided and cannot be overridden: 232 "config" and "parentTask". 234 taskField = getattr(self.
config, name,
None)
235 if taskField
is None:
236 raise KeyError(
"%s's config does not have field %r" % (self.
getFullName(), name))
237 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
238 setattr(self, name, subtask)
240 @contextlib.contextmanager
241 def timer(self, name, logLevel=Log.DEBUG):
242 """!Context manager to log performance data for an arbitrary block of code 244 @param[in] name name of code being timed; 245 data will be logged using item name: <name>Start<item> and <name>End<item> 246 @param[in] logLevel one of the lsst.log.Log level constants 250 with self.timer("someCodeToTime"): 254 See timer.logInfo for the information logged 256 logInfo(obj=self, prefix=name +
"Start", logLevel=logLevel)
260 logInfo(obj=self, prefix=name +
"End", logLevel=logLevel)
264 """!Make an lsst.pex.config.ConfigurableField for this task 266 Provides a convenient way to specify this task is a subtask of another task. 267 Here is an example of use: 269 class OtherTaskConfig(lsst.pex.config.Config) 270 aSubtask = ATaskClass.makeField("a brief description of what this task does") 273 @param[in] cls this class 274 @param[in] doc help text for the field 275 @return a lsst.pex.config.ConfigurableField for this task 277 return ConfigurableField(doc=doc, target=cls)
279 def _computeFullName(self, name):
280 """!Compute the full name of a subtask or metadata item, given its brief name 282 For example: if the full name of this task is "top.sub.sub2" 283 then _computeFullName("subname") returns "top.sub.sub2.subname". 285 @param[in] name brief name of subtask or metadata item 286 @return the full name: the "name" argument prefixed by the full task name and a period. Use to report errors for which a traceback is not useful.
def makeSubtask(self, name, keyArgs)
Create a subtask as a new instance self.
def makeField(cls, doc)
Make an lsst.pex.config.ConfigurableField for this task.
def emptyMetadata(self)
Empty (clear) the metadata for this Task and all sub-Tasks.
def getFullMetadata(self)
Get metadata for all tasks.
def getAllSchemaCatalogs(self)
Call getSchemaCatalogs() on all tasks in the hiearchy, combining the results into a single dict...
Base class for data processing tasks.
def logInfo(obj, prefix, logLevel=Log.DEBUG)
Log timer information to obj.metadata and obj.log.
def getSchemaCatalogs(self)
Return the schemas generated by this task.
def timer(self, name, logLevel=Log.DEBUG)
Context manager to log performance data for an arbitrary block of code.
def getName(self)
Return the name of the task.
def __init__(self, config=None, name=None, parentTask=None, log=None)
Create a Task.
def getFullName(self)
Return the task name as a hierarchical name including parent task names.
def getTaskDict(self)
Return a dictionary of all tasks as a shallow copy.