Coverage for python/lsst/pipe/base/task.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["Task", "TaskError"]
24import contextlib
26import lsstDebug
27from lsst.pex.config import ConfigurableField
28from lsst.log import Log
29import lsst.daf.base as dafBase
30from .timer import logInfo
33class TaskError(Exception):
34 """Use to report errors for which a traceback is not useful.
36 Notes
37 -----
38 Examples of such errors:
40 - processCcd is asked to run detection, but not calibration, and no calexp
41 is found.
42 - coadd finds no valid images in the specified patch.
43 """
44 pass
47class Task:
48 r"""Base class for data processing tasks.
50 See :ref:`task-framework-overview` to learn what tasks are, and
51 :ref:`creating-a-task` for more information about writing tasks.
53 Parameters
54 ----------
55 config : `Task.ConfigClass` instance, optional
56 Configuration for this task (an instance of Task.ConfigClass, which
57 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
58 If `None`:
60 - If parentTask specified then defaults to parentTask.config.\<name>
61 - If parentTask is None then defaults to self.ConfigClass()
63 name : `str`, optional
64 Brief name of task, or `None`; if `None` then defaults to
65 `Task._DefaultName`
66 parentTask : `Task`-type, optional
67 The parent task of this subtask, if any.
69 - If `None` (a top-level task) then you must specify config and name
70 is ignored.
71 - If not `None` (a subtask) then you must specify name.
72 log : `lsst.log.Log`, optional
73 Log whose name is used as a log name prefix, or `None` for no prefix.
74 Ignored if is parentTask specified, in which case
75 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
76 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
77 log is then a child logger of ``parentTask.log`` (if ``parentTask``
78 specified), or a child logger of the log from the argument
79 (if ``log`` is not `None`).
81 Raises
82 ------
83 RuntimeError
84 Raised under these circumstances:
86 - If ``parentTask`` is `None` and ``config`` is `None`.
87 - If ``parentTask`` is not `None` and ``name`` is `None`.
88 - If ``name`` is `None` and ``_DefaultName`` does not exist.
90 Notes
91 -----
92 Useful attributes include:
94 - ``log``: an lsst.log.Log
95 - ``config``: task-specific configuration; an instance of ``ConfigClass``
96 (see below).
97 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting
98 task-specific metadata, e.g. data quality and performance metrics.
99 This is data that is only meant to be persisted, never to be used by
100 the task.
102 Subclasses typically have a method named ``runDataRef`` to perform the
103 main data processing. Details:
105 - ``runDataRef`` should process the minimum reasonable amount of data,
106 typically a single CCD. Iteration, if desired, is performed by a caller
107 of the method. This is good design and allows multiprocessing without
108 the run method having to support it directly.
109 - If ``runDataRef`` can persist or unpersist data:
111 - ``runDataRef`` should accept a butler data reference (or a collection
112 of data references, if appropriate, e.g. coaddition).
113 - There should be a way to run the task without persisting data.
114 Typically the run method returns all data, even if it is persisted, and
115 the task's config method offers a flag to disable persistence.
117 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
118 accept a blob such as a butler data reference. How we will handle data
119 references is still TBD, so don't make changes yet!
120 RHL 2014-06-27
122 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
123 of `lsst.pex.config.Config` which configures the task. Subclasses should
124 also have an attribute ``_DefaultName``: the default name if there is no
125 parent task. ``_DefaultName`` is required for subclasses of
126 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
127 because it simplifies construction (e.g. for unit tests).
129 Tasks intended to be run from the command line should be subclasses of
130 `~lsst.pipe.base.CmdLineTask` not Task.
131 """
133 def __init__(self, config=None, name=None, parentTask=None, log=None):
134 self.metadata = dafBase.PropertyList()
135 self._parentTask = parentTask
137 if parentTask is not None:
138 if name is None:
139 raise RuntimeError("name is required for a subtask")
140 self._name = name
141 self._fullName = parentTask._computeFullName(name)
142 if config is None:
143 config = getattr(parentTask.config, name)
144 self._taskDict = parentTask._taskDict
145 loggerName = parentTask.log.getName() + '.' + name
146 else:
147 if name is None:
148 name = getattr(self, "_DefaultName", None)
149 if name is None:
150 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
151 name = self._DefaultName
152 self._name = name
153 self._fullName = self._name
154 if config is None:
155 config = self.ConfigClass()
156 self._taskDict = dict()
157 loggerName = self._fullName
158 if log is not None and log.getName():
159 loggerName = log.getName() + '.' + loggerName
161 self.log = Log.getLogger(loggerName)
162 self.config = config
163 self._display = lsstDebug.Info(self.__module__).display
164 self._taskDict[self._fullName] = self
166 def emptyMetadata(self):
167 """Empty (clear) the metadata for this Task and all sub-Tasks.
168 """
169 for subtask in self._taskDict.values():
170 subtask.metadata = dafBase.PropertyList()
172 def getSchemaCatalogs(self):
173 """Get the schemas generated by this task.
175 Returns
176 -------
177 schemaCatalogs : `dict`
178 Keys are butler dataset type, values are an empty catalog (an
179 instance of the appropriate `lsst.afw.table` Catalog type) for
180 this task.
182 Notes
183 -----
185 .. warning::
187 Subclasses that use schemas must override this method. The default
188 implementation returns an empty dict.
190 This method may be called at any time after the Task is constructed,
191 which means that all task schemas should be computed at construction
192 time, *not* when data is actually processed. This reflects the
193 philosophy that the schema should not depend on the data.
195 Returning catalogs rather than just schemas allows us to save e.g.
196 slots for SourceCatalog as well.
198 See also
199 --------
200 Task.getAllSchemaCatalogs
201 """
202 return {}
204 def getAllSchemaCatalogs(self):
205 """Get schema catalogs for all tasks in the hierarchy, combining the
206 results into a single dict.
208 Returns
209 -------
210 schemacatalogs : `dict`
211 Keys are butler dataset type, values are a empty catalog (an
212 instance of the appropriate `lsst.afw.table` Catalog type) for all
213 tasks in the hierarchy, from the top-level task down
214 through all subtasks.
216 Notes
217 -----
218 This method may be called on any task in the hierarchy; it will return
219 the same answer, regardless.
221 The default implementation should always suffice. If your subtask uses
222 schemas the override `Task.getSchemaCatalogs`, not this method.
223 """
224 schemaDict = self.getSchemaCatalogs()
225 for subtask in self._taskDict.values():
226 schemaDict.update(subtask.getSchemaCatalogs())
227 return schemaDict
229 def getFullMetadata(self):
230 """Get metadata for all tasks.
232 Returns
233 -------
234 metadata : `lsst.daf.base.PropertySet`
235 The `~lsst.daf.base.PropertySet` keys are the full task name.
236 Values are metadata for the top-level task and all subtasks,
237 sub-subtasks, etc.
239 Notes
240 -----
241 The returned metadata includes timing information (if
242 ``@timer.timeMethod`` is used) and any metadata set by the task. The
243 name of each item consists of the full task name with ``.`` replaced
244 by ``:``, followed by ``.`` and the name of the item, e.g.::
246 topLevelTaskName:subtaskName:subsubtaskName.itemName
248 using ``:`` in the full task name disambiguates the rare situation
249 that a task has a subtask and a metadata item with the same name.
250 """
251 fullMetadata = dafBase.PropertySet()
252 for fullName, task in self.getTaskDict().items():
253 fullMetadata.set(fullName.replace(".", ":"), task.metadata)
254 return fullMetadata
256 def getFullName(self):
257 """Get the task name as a hierarchical name including parent task
258 names.
260 Returns
261 -------
262 fullName : `str`
263 The full name consists of the name of the parent task and each
264 subtask separated by periods. For example:
266 - The full name of top-level task "top" is simply "top".
267 - The full name of subtask "sub" of top-level task "top" is
268 "top.sub".
269 - The full name of subtask "sub2" of subtask "sub" of top-level
270 task "top" is "top.sub.sub2".
271 """
272 return self._fullName
274 def getName(self):
275 """Get the name of the task.
277 Returns
278 -------
279 taskName : `str`
280 Name of the task.
282 See also
283 --------
284 getFullName
285 """
286 return self._name
288 def getTaskDict(self):
289 """Get a dictionary of all tasks as a shallow copy.
291 Returns
292 -------
293 taskDict : `dict`
294 Dictionary containing full task name: task object for the top-level
295 task and all subtasks, sub-subtasks, etc.
296 """
297 return self._taskDict.copy()
299 def makeSubtask(self, name, **keyArgs):
300 """Create a subtask as a new instance as the ``name`` attribute of this
301 task.
303 Parameters
304 ----------
305 name : `str`
306 Brief name of the subtask.
307 keyArgs
308 Extra keyword arguments used to construct the task. The following
309 arguments are automatically provided and cannot be overridden:
311 - "config".
312 - "parentTask".
314 Notes
315 -----
316 The subtask must be defined by ``Task.config.name``, an instance of
317 `~lsst.pex.config.ConfigurableField` or
318 `~lsst.pex.config.RegistryField`.
319 """
320 taskField = getattr(self.config, name, None)
321 if taskField is None:
322 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
323 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
324 setattr(self, name, subtask)
326 @contextlib.contextmanager
327 def timer(self, name, logLevel=Log.DEBUG):
328 """Context manager to log performance data for an arbitrary block of
329 code.
331 Parameters
332 ----------
333 name : `str`
334 Name of code being timed; data will be logged using item name:
335 ``Start`` and ``End``.
336 logLevel
337 A `lsst.log` level constant.
339 Examples
340 --------
341 Creating a timer context:
343 .. code-block:: python
345 with self.timer("someCodeToTime"):
346 pass # code to time
348 See also
349 --------
350 timer.logInfo
351 """
352 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
353 try:
354 yield
355 finally:
356 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
358 @classmethod
359 def makeField(cls, doc):
360 """Make a `lsst.pex.config.ConfigurableField` for this task.
362 Parameters
363 ----------
364 doc : `str`
365 Help text for the field.
367 Returns
368 -------
369 configurableField : `lsst.pex.config.ConfigurableField`
370 A `~ConfigurableField` for this task.
372 Examples
373 --------
374 Provides a convenient way to specify this task is a subtask of another
375 task.
377 Here is an example of use:
379 .. code-block:: python
381 class OtherTaskConfig(lsst.pex.config.Config):
382 aSubtask = ATaskClass.makeField("brief description of task")
383 """
384 return ConfigurableField(doc=doc, target=cls)
386 def _computeFullName(self, name):
387 """Compute the full name of a subtask or metadata item, given its brief
388 name.
390 Parameters
391 ----------
392 name : `str`
393 Brief name of subtask or metadata item.
395 Returns
396 -------
397 fullName : `str`
398 The full name: the ``name`` argument prefixed by the full task name
399 and a period.
401 Notes
402 -----
403 For example: if the full name of this task is "top.sub.sub2"
404 then ``_computeFullName("subname")`` returns
405 ``"top.sub.sub2.subname"``.
406 """
407 return f"{self._fullName}.{name}"
409 @staticmethod
410 def _unpickle_via_factory(factory, args, kwargs):
411 """Unpickle something by calling a factory
413 Allows subclasses to unpickle using `__reduce__` with keyword
414 arguments as well as positional arguments.
415 """
416 return factory(*args, **kwargs)
418 def _reduce_kwargs(self):
419 """Returns a dict of the keyword arguments that should be used
420 by `__reduce__`.
422 Subclasses with additional arguments should always call the parent
423 class method to ensure that the standard parameters are included.
425 Returns
426 -------
427 kwargs : `dict`
428 Keyword arguments to be used when pickling.
429 """
430 return dict(config=self.config, name=self._name, parentTask=self._parentTask,)
432 def __reduce__(self):
433 """Pickler.
434 """
435 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())