22 __all__ = [
"Task",
"TaskError"]
30 from .timer
import logInfo
34 """Use to report errors for which a traceback is not useful.
38 Examples of such errors:
40 - processCcd is asked to run detection, but not calibration, and no calexp
42 - coadd finds no valid images in the specified patch.
48 r"""Base class for data processing tasks.
50 See :ref:`task-framework-overview` to learn what tasks are, and
51 :ref:`creating-a-task` for more information about writing tasks.
55 config : `Task.ConfigClass` instance, optional
56 Configuration for this task (an instance of Task.ConfigClass, which
57 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
60 - If parentTask specified then defaults to parentTask.config.<name>
61 - If parentTask is None then defaults to self.ConfigClass()
63 name : `str`, optional
64 Brief name of task, or `None`; if `None` then defaults to
66 parentTask : `Task`-type, optional
67 The parent task of this subtask, if any.
69 - If `None` (a top-level task) then you must specify config and name
71 - If not `None` (a subtask) then you must specify name.
72 log : `lsst.log.Log`, optional
73 Log whose name is used as a log name prefix, or `None` for no prefix.
74 Ignored if is parentTask specified, in which case
75 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
76 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
77 log is then a child logger of ``parentTask.log`` (if ``parentTask``
78 specified), or a child logger of the log from the argument
79 (if ``log`` is not `None`).
84 Raised under these circumstances:
86 - If ``parentTask`` is `None` and ``config`` is `None`.
87 - If ``parentTask`` is not `None` and ``name`` is `None`.
88 - If ``name`` is `None` and ``_DefaultName`` does not exist.
92 Useful attributes include:
94 - ``log``: an lsst.log.Log
95 - ``config``: task-specific configuration; an instance of ``ConfigClass``
97 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting
98 task-specific metadata, e.g. data quality and performance metrics.
99 This is data that is only meant to be persisted, never to be used by
102 Subclasses typically have a method named ``runDataRef`` to perform the
103 main data processing. Details:
105 - ``runDataRef`` should process the minimum reasonable amount of data,
106 typically a single CCD. Iteration, if desired, is performed by a caller
107 of the method. This is good design and allows multiprocessing without
108 the run method having to support it directly.
109 - If ``runDataRef`` can persist or unpersist data:
111 - ``runDataRef`` should accept a butler data reference (or a collection
112 of data references, if appropriate, e.g. coaddition).
113 - There should be a way to run the task without persisting data.
114 Typically the run method returns all data, even if it is persisted, and
115 the task's config method offers a flag to disable persistence.
117 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
118 accept a blob such as a butler data reference. How we will handle data
119 references is still TBD, so don't make changes yet!
122 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
123 of `lsst.pex.config.Config` which configures the task. Subclasses should
124 also have an attribute ``_DefaultName``: the default name if there is no
125 parent task. ``_DefaultName`` is required for subclasses of
126 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
127 because it simplifies construction (e.g. for unit tests).
129 Tasks intended to be run from the command line should be subclasses of
130 `~lsst.pipe.base.CmdLineTask` not Task.
133 def __init__(self, config=None, name=None, parentTask=None, log=None):
137 if parentTask
is not None:
139 raise RuntimeError(
"name is required for a subtask")
141 self.
_fullName = parentTask._computeFullName(name)
143 config = getattr(parentTask.config, name)
145 loggerName = parentTask.log.getName() +
'.' + name
148 name = getattr(self,
"_DefaultName",
None)
150 raise RuntimeError(
"name is required for a task unless it has attribute _DefaultName")
151 name = self._DefaultName
155 config = self.ConfigClass()
158 if log
is not None and log.getName():
159 loggerName = log.getName() +
'.' + loggerName
161 self.
log = Log.getLogger(loggerName)
167 """Empty (clear) the metadata for this Task and all sub-Tasks.
170 subtask.metadata = dafBase.PropertyList()
173 """Get the schemas generated by this task.
177 schemaCatalogs : `dict`
178 Keys are butler dataset type, values are an empty catalog (an
179 instance of the appropriate `lsst.afw.table` Catalog type) for
187 Subclasses that use schemas must override this method. The default
188 implementation returns an empty dict.
190 This method may be called at any time after the Task is constructed,
191 which means that all task schemas should be computed at construction
192 time, *not* when data is actually processed. This reflects the
193 philosophy that the schema should not depend on the data.
195 Returning catalogs rather than just schemas allows us to save e.g.
196 slots for SourceCatalog as well.
200 Task.getAllSchemaCatalogs
205 """Get schema catalogs for all tasks in the hierarchy, combining the
206 results into a single dict.
210 schemacatalogs : `dict`
211 Keys are butler dataset type, values are a empty catalog (an
212 instance of the appropriate `lsst.afw.table` Catalog type) for all
213 tasks in the hierarchy, from the top-level task down
214 through all subtasks.
218 This method may be called on any task in the hierarchy; it will return
219 the same answer, regardless.
221 The default implementation should always suffice. If your subtask uses
222 schemas the override `Task.getSchemaCatalogs`, not this method.
226 schemaDict.update(subtask.getSchemaCatalogs())
230 """Get metadata for all tasks.
234 metadata : `lsst.daf.base.PropertySet`
235 The `~lsst.daf.base.PropertySet` keys are the full task name.
236 Values are metadata for the top-level task and all subtasks,
241 The returned metadata includes timing information (if
242 ``@timer.timeMethod`` is used) and any metadata set by the task. The
243 name of each item consists of the full task name with ``.`` replaced
244 by ``:``, followed by ``.`` and the name of the item, e.g.::
246 topLevelTaskName:subtaskName:subsubtaskName.itemName
248 using ``:`` in the full task name disambiguates the rare situation
249 that a task has a subtask and a metadata item with the same name.
251 fullMetadata = dafBase.PropertySet()
253 fullMetadata.set(fullName.replace(
".",
":"), task.metadata)
257 """Get the task name as a hierarchical name including parent task
263 The full name consists of the name of the parent task and each
264 subtask separated by periods. For example:
266 - The full name of top-level task "top" is simply "top".
267 - The full name of subtask "sub" of top-level task "top" is
269 - The full name of subtask "sub2" of subtask "sub" of top-level
270 task "top" is "top.sub.sub2".
275 """Get the name of the task.
289 """Get a dictionary of all tasks as a shallow copy.
294 Dictionary containing full task name: task object for the top-level
295 task and all subtasks, sub-subtasks, etc.
300 """Create a subtask as a new instance as the ``name`` attribute of this
306 Brief name of the subtask.
308 Extra keyword arguments used to construct the task. The following
309 arguments are automatically provided and cannot be overridden:
316 The subtask must be defined by ``Task.config.name``, an instance of
317 `~lsst.pex.config.ConfigurableField` or
318 `~lsst.pex.config.RegistryField`.
320 taskField = getattr(self.
config, name,
None)
321 if taskField
is None:
322 raise KeyError(f
"{self.getFullName()}'s config does not have field {name!r}")
323 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
324 setattr(self, name, subtask)
326 @contextlib.contextmanager
327 def timer(self, name, logLevel=Log.DEBUG):
328 """Context manager to log performance data for an arbitrary block of
334 Name of code being timed; data will be logged using item name:
335 ``Start`` and ``End``.
337 A `lsst.log` level constant.
341 Creating a timer context:
343 .. code-block:: python
345 with self.timer("someCodeToTime"):
352 logInfo(obj=self, prefix=name +
"Start", logLevel=logLevel)
356 logInfo(obj=self, prefix=name +
"End", logLevel=logLevel)
360 """Make a `lsst.pex.config.ConfigurableField` for this task.
365 Help text for the field.
369 configurableField : `lsst.pex.config.ConfigurableField`
370 A `~ConfigurableField` for this task.
374 Provides a convenient way to specify this task is a subtask of another
377 Here is an example of use:
379 .. code-block:: python
381 class OtherTaskConfig(lsst.pex.config.Config):
382 aSubtask = ATaskClass.makeField("brief description of task")
384 return ConfigurableField(doc=doc, target=cls)
386 def _computeFullName(self, name):
387 """Compute the full name of a subtask or metadata item, given its brief
393 Brief name of subtask or metadata item.
398 The full name: the ``name`` argument prefixed by the full task name
403 For example: if the full name of this task is "top.sub.sub2"
404 then ``_computeFullName("subname")`` returns
405 ``"top.sub.sub2.subname"``.
407 return f
"{self._fullName}.{name}"
410 def _unpickle_via_factory(factory, args, kwargs):
411 """Unpickle something by calling a factory
413 Allows subclasses to unpickle using `__reduce__` with keyword
414 arguments as well as positional arguments.
416 return factory(*args, **kwargs)
418 def _reduce_kwargs(self):
419 """Returns a dict of the keyword arguments that should be used
422 Subclasses with additional arguments should always call the parent
423 class method to ensure that the standard parameters are included.
428 Keyword arguments to be used when pickling.