22 from __future__
import absolute_import, division
25 from builtins
import object
28 from lsst.pex.config
import ConfigurableField
29 from lsst.log
import Log
30 import lsst.daf.base
as dafBase
31 from .timer
import logInfo
33 __all__ = [
"Task",
"TaskError"]
37 """Use to report errors for which a traceback is not useful. 41 Examples of such errors: 43 - processCcd is asked to run detection, but not calibration, and no calexp is found. 44 - coadd finds no valid images in the specified patch. 50 """Base class for data processing tasks. 52 See :ref:`task-framework-overview` to learn what tasks are, and :ref:`creating-a-task` for more 53 information about writing tasks. 57 config : `Task.ConfigClass` instance, optional 58 Configuration for this task (an instance of Task.ConfigClass, which is a task-specific subclass of 59 `lsst.pex.config.Config`, or `None`. If `None`: 61 - If parentTask specified then defaults to parentTask.config.<name> 62 - If parentTask is None then defaults to self.ConfigClass() 64 name : `str`, optional 65 Brief name of task, or `None`; if `None` then defaults to `Task._DefaultName` 66 parentTask : `Task`-type, optional 67 The parent task of this subtask, if any. 69 - If `None` (a top-level task) then you must specify config and name is ignored. 70 - If not `None` (a subtask) then you must specify name. 71 log : `lsst.log.Log`, optional 72 Log whose name is used as a log name prefix, or `None` for no prefix. Ignored if is parentTask 73 specified, in which case ``parentTask.log``\ 's name is used as a prefix. The task's log name is 74 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's log is then a child logger of 75 ``parentTask.log`` (if ``parentTask`` specified), or a child logger of the log from the argument 76 (if ``log`` is not `None`). 81 Raised under these circumstances: 83 - If ``parentTask`` is `None` and ``config`` is `None`. 84 - If ``parentTask`` is not `None` and ``name`` is `None`. 85 - If ``name`` is `None` and ``_DefaultName`` does not exist. 89 Useful attributes include: 91 - ``log``: an lsst.log.Log 92 - ``config``: task-specific configuration; an instance of ``ConfigClass`` (see below). 93 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting task-specific metadata, 94 e.g. data quality and performance metrics. This is data that is only meant to be 95 persisted, never to be used by the task. 97 Subclasses typically have a method named ``run`` to perform the main data processing. Details: 99 - ``run`` should process the minimum reasonable amount of data, typically a single CCD. 100 Iteration, if desired, is performed by a caller of the run method. This is good design and allows 101 multiprocessing without the run method having to support it directly. 102 - If ``run`` can persist or unpersist data: 103 - ``run`` should accept a butler data reference (or a collection of data references, if appropriate, 105 - There should be a way to run the task without persisting data. Typically the run method returns all 106 data, even if it is persisted, and the task's config method offers a flag to disable persistence. 108 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* accept a blob such as a butler 109 data reference. How we will handle data references is still TBD, so don't make changes yet! 112 Subclasses must also have an attribute ``ConfigClass`` that is a subclass of `lsst.pex.config.Config` 113 which configures the task. Subclasses should also have an attribute ``_DefaultName``: 114 the default name if there is no parent task. ``_DefaultName`` is required for subclasses of 115 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task because it simplifies construction 116 (e.g. for unit tests). 118 Tasks intended to be run from the command line should be subclasses of `~lsst.pipe.base.CmdLineTask` 122 def __init__(self, config=None, name=None, parentTask=None, log=None):
126 if parentTask
is not None:
128 raise RuntimeError(
"name is required for a subtask")
130 self.
_fullName = parentTask._computeFullName(name)
132 config = getattr(parentTask.config, name)
134 loggerName = parentTask.log.getName() +
'.' + name
137 name = getattr(self,
"_DefaultName",
None)
139 raise RuntimeError(
"name is required for a task unless it has attribute _DefaultName")
140 name = self._DefaultName
144 config = self.ConfigClass()
147 if log
is not None and log.getName():
148 loggerName = log.getName() +
'.' + loggerName
150 self.
log = Log.getLogger(loggerName)
152 self.
_display = lsstDebug.Info(self.__module__).display
156 """Empty (clear) the metadata for this Task and all sub-Tasks. 159 subtask.metadata = dafBase.PropertyList()
162 """Get the schemas generated by this task. 166 schemaCatalogs : `dict` 167 Keys are butler dataset type, values are an empty catalog (an instance of the appropriate 168 `lsst.afw.table` Catalog type) for this task. 175 Subclasses that use schemas must override this method. The default implemenation returns 178 This method may be called at any time after the Task is constructed, which means that all task 179 schemas should be computed at construction time, *not* when data is actually processed. This 180 reflects the philosophy that the schema should not depend on the data. 182 Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well. 186 Task.getAllSchemaCatalogs 191 """Get schema catalogs for all tasks in the hierarchy, combining the results into a single dict. 195 schemacatalogs : `dict` 196 Keys are butler dataset type, values are a empty catalog (an instance of the appropriate 197 lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down 198 through all subtasks. 202 This method may be called on any task in the hierarchy; it will return the same answer, regardless. 204 The default implementation should always suffice. If your subtask uses schemas the override 205 `Task.getSchemaCatalogs`, not this method. 209 schemaDict.update(subtask.getSchemaCatalogs())
213 """Get metadata for all tasks. 217 metadata : `lsst.daf.base.PropertySet` 218 The `~lsst.daf.base.PropertySet` keys are the full task name. Values are metadata 219 for the top-level task and all subtasks, sub-subtasks, etc.. 223 The returned metadata includes timing information (if ``@timer.timeMethod`` is used) 224 and any metadata set by the task. The name of each item consists of the full task name 225 with ``.`` replaced by ``:``, followed by ``.`` and the name of the item, e.g.:: 227 topLevelTaskName:subtaskName:subsubtaskName.itemName 229 using ``:`` in the full task name disambiguates the rare situation that a task has a subtask 230 and a metadata item with the same name. 232 fullMetadata = dafBase.PropertySet()
234 fullMetadata.set(fullName.replace(
".",
":"), task.metadata)
238 """Get the task name as a hierarchical name including parent task names. 243 The full name consists of the name of the parent task and each subtask separated by periods. 246 - The full name of top-level task "top" is simply "top". 247 - The full name of subtask "sub" of top-level task "top" is "top.sub". 248 - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2". 253 """Get the name of the task. 267 """Get a dictionary of all tasks as a shallow copy. 272 Dictionary containing full task name: task object for the top-level task and all subtasks, 278 """Create a subtask as a new instance as the ``name`` attribute of this task. 283 Brief name of the subtask. 285 Extra keyword arguments used to construct the task. The following arguments are automatically 286 provided and cannot be overridden: 293 The subtask must be defined by ``Task.config.name``, an instance of pex_config ConfigurableField 296 taskField = getattr(self.
config, name,
None)
297 if taskField
is None:
298 raise KeyError(
"%s's config does not have field %r" % (self.
getFullName(), name))
299 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
300 setattr(self, name, subtask)
302 @contextlib.contextmanager
303 def timer(self, name, logLevel=Log.DEBUG):
304 """Context manager to log performance data for an arbitrary block of code. 309 Name of code being timed; data will be logged using item name: ``Start`` and ``End``. 311 A `lsst.log` level constant. 315 Creating a timer context:: 317 with self.timer("someCodeToTime"): 324 logInfo(obj=self, prefix=name +
"Start", logLevel=logLevel)
328 logInfo(obj=self, prefix=name +
"End", logLevel=logLevel)
332 """Make a `lsst.pex.config.ConfigurableField` for this task. 337 Help text for the field. 341 configurableField : `lsst.pex.config.ConfigurableField` 342 A `~ConfigurableField` for this task. 346 Provides a convenient way to specify this task is a subtask of another task. 348 Here is an example of use:: 350 class OtherTaskConfig(lsst.pex.config.Config) 351 aSubtask = ATaskClass.makeField("a brief description of what this task does") 353 return ConfigurableField(doc=doc, target=cls)
355 def _computeFullName(self, name):
356 """Compute the full name of a subtask or metadata item, given its brief name. 361 Brief name of subtask or metadata item. 366 The full name: the ``name`` argument prefixed by the full task name and a period. 370 For example: if the full name of this task is "top.sub.sub2" 371 then ``_computeFullName("subname")`` returns ``"top.sub.sub2.subname"``.
def makeSubtask(self, name, keyArgs)
def getFullMetadata(self)
def getAllSchemaCatalogs(self)
def logInfo(obj, prefix, logLevel=Log.DEBUG)
def getSchemaCatalogs(self)
def timer(self, name, logLevel=Log.DEBUG)
def __init__(self, config=None, name=None, parentTask=None, log=None)