Coverage for python/lsst/pipe/base/task.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["Task", "TaskError"]
24import contextlib
26import lsstDebug
27from lsst.pex.config import ConfigurableField
28from lsst.log import Log
29import lsst.daf.base as dafBase
30from .timer import logInfo
33class TaskError(Exception):
34 """Use to report errors for which a traceback is not useful.
36 Notes
37 -----
38 Examples of such errors:
40 - processCcd is asked to run detection, but not calibration, and no calexp is found.
41 - coadd finds no valid images in the specified patch.
42 """
43 pass
46class Task:
47 r"""Base class for data processing tasks.
49 See :ref:`task-framework-overview` to learn what tasks are, and :ref:`creating-a-task` for more
50 information about writing tasks.
52 Parameters
53 ----------
54 config : `Task.ConfigClass` instance, optional
55 Configuration for this task (an instance of Task.ConfigClass, which is a task-specific subclass of
56 `lsst.pex.config.Config`, or `None`. If `None`:
58 - If parentTask specified then defaults to parentTask.config.\<name>
59 - If parentTask is None then defaults to self.ConfigClass()
61 name : `str`, optional
62 Brief name of task, or `None`; if `None` then defaults to `Task._DefaultName`
63 parentTask : `Task`-type, optional
64 The parent task of this subtask, if any.
66 - If `None` (a top-level task) then you must specify config and name is ignored.
67 - If not `None` (a subtask) then you must specify name.
68 log : `lsst.log.Log`, optional
69 Log whose name is used as a log name prefix, or `None` for no prefix. Ignored if is parentTask
70 specified, in which case ``parentTask.log``\ 's name is used as a prefix. The task's log name is
71 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's log is then a child logger of
72 ``parentTask.log`` (if ``parentTask`` specified), or a child logger of the log from the argument
73 (if ``log`` is not `None`).
75 Raises
76 ------
77 RuntimeError
78 Raised under these circumstances:
80 - If ``parentTask`` is `None` and ``config`` is `None`.
81 - If ``parentTask`` is not `None` and ``name`` is `None`.
82 - If ``name`` is `None` and ``_DefaultName`` does not exist.
84 Notes
85 -----
86 Useful attributes include:
88 - ``log``: an lsst.log.Log
89 - ``config``: task-specific configuration; an instance of ``ConfigClass`` (see below).
90 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting task-specific metadata,
91 e.g. data quality and performance metrics. This is data that is only meant to be
92 persisted, never to be used by the task.
94 Subclasses typically have a method named ``runDataRef`` to perform the main data processing. Details:
96 - ``runDataRef`` should process the minimum reasonable amount of data, typically a single CCD.
97 Iteration, if desired, is performed by a caller of the method. This is good design and allows
98 multiprocessing without the run method having to support it directly.
99 - If ``runDataRef`` can persist or unpersist data:
101 - ``runDataRef`` should accept a butler data reference (or a collection of data references,
102 if appropriate, e.g. coaddition).
103 - There should be a way to run the task without persisting data. Typically the run method returns all
104 data, even if it is persisted, and the task's config method offers a flag to disable persistence.
106 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* accept a blob such as a butler
107 data reference. How we will handle data references is still TBD, so don't make changes yet!
108 RHL 2014-06-27
110 Subclasses must also have an attribute ``ConfigClass`` that is a subclass of `lsst.pex.config.Config`
111 which configures the task. Subclasses should also have an attribute ``_DefaultName``:
112 the default name if there is no parent task. ``_DefaultName`` is required for subclasses of
113 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task because it simplifies construction
114 (e.g. for unit tests).
116 Tasks intended to be run from the command line should be subclasses of `~lsst.pipe.base.CmdLineTask`
117 not Task.
118 """
120 def __init__(self, config=None, name=None, parentTask=None, log=None):
121 self.metadata = dafBase.PropertyList()
122 self._parentTask = parentTask
124 if parentTask is not None:
125 if name is None:
126 raise RuntimeError("name is required for a subtask")
127 self._name = name
128 self._fullName = parentTask._computeFullName(name)
129 if config is None:
130 config = getattr(parentTask.config, name)
131 self._taskDict = parentTask._taskDict
132 loggerName = parentTask.log.getName() + '.' + name
133 else:
134 if name is None:
135 name = getattr(self, "_DefaultName", None)
136 if name is None:
137 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
138 name = self._DefaultName
139 self._name = name
140 self._fullName = self._name
141 if config is None:
142 config = self.ConfigClass()
143 self._taskDict = dict()
144 loggerName = self._fullName
145 if log is not None and log.getName():
146 loggerName = log.getName() + '.' + loggerName
148 self.log = Log.getLogger(loggerName)
149 self.config = config
150 self._display = lsstDebug.Info(self.__module__).display
151 self._taskDict[self._fullName] = self
153 def emptyMetadata(self):
154 """Empty (clear) the metadata for this Task and all sub-Tasks.
155 """
156 for subtask in self._taskDict.values():
157 subtask.metadata = dafBase.PropertyList()
159 def getSchemaCatalogs(self):
160 """Get the schemas generated by this task.
162 Returns
163 -------
164 schemaCatalogs : `dict`
165 Keys are butler dataset type, values are an empty catalog (an instance of the appropriate
166 `lsst.afw.table` Catalog type) for this task.
168 Notes
169 -----
171 .. warning::
173 Subclasses that use schemas must override this method. The default implemenation returns
174 an empty dict.
176 This method may be called at any time after the Task is constructed, which means that all task
177 schemas should be computed at construction time, *not* when data is actually processed. This
178 reflects the philosophy that the schema should not depend on the data.
180 Returning catalogs rather than just schemas allows us to save e.g. slots for SourceCatalog as well.
182 See also
183 --------
184 Task.getAllSchemaCatalogs
185 """
186 return {}
188 def getAllSchemaCatalogs(self):
189 """Get schema catalogs for all tasks in the hierarchy, combining the results into a single dict.
191 Returns
192 -------
193 schemacatalogs : `dict`
194 Keys are butler dataset type, values are a empty catalog (an instance of the appropriate
195 lsst.afw.table Catalog type) for all tasks in the hierarchy, from the top-level task down
196 through all subtasks.
198 Notes
199 -----
200 This method may be called on any task in the hierarchy; it will return the same answer, regardless.
202 The default implementation should always suffice. If your subtask uses schemas the override
203 `Task.getSchemaCatalogs`, not this method.
204 """
205 schemaDict = self.getSchemaCatalogs()
206 for subtask in self._taskDict.values():
207 schemaDict.update(subtask.getSchemaCatalogs())
208 return schemaDict
210 def getFullMetadata(self):
211 """Get metadata for all tasks.
213 Returns
214 -------
215 metadata : `lsst.daf.base.PropertySet`
216 The `~lsst.daf.base.PropertySet` keys are the full task name. Values are metadata
217 for the top-level task and all subtasks, sub-subtasks, etc..
219 Notes
220 -----
221 The returned metadata includes timing information (if ``@timer.timeMethod`` is used)
222 and any metadata set by the task. The name of each item consists of the full task name
223 with ``.`` replaced by ``:``, followed by ``.`` and the name of the item, e.g.::
225 topLevelTaskName:subtaskName:subsubtaskName.itemName
227 using ``:`` in the full task name disambiguates the rare situation that a task has a subtask
228 and a metadata item with the same name.
229 """
230 fullMetadata = dafBase.PropertySet()
231 for fullName, task in self.getTaskDict().items():
232 fullMetadata.set(fullName.replace(".", ":"), task.metadata)
233 return fullMetadata
235 def getFullName(self):
236 """Get the task name as a hierarchical name including parent task names.
238 Returns
239 -------
240 fullName : `str`
241 The full name consists of the name of the parent task and each subtask separated by periods.
242 For example:
244 - The full name of top-level task "top" is simply "top".
245 - The full name of subtask "sub" of top-level task "top" is "top.sub".
246 - The full name of subtask "sub2" of subtask "sub" of top-level task "top" is "top.sub.sub2".
247 """
248 return self._fullName
250 def getName(self):
251 """Get the name of the task.
253 Returns
254 -------
255 taskName : `str`
256 Name of the task.
258 See also
259 --------
260 getFullName
261 """
262 return self._name
264 def getTaskDict(self):
265 """Get a dictionary of all tasks as a shallow copy.
267 Returns
268 -------
269 taskDict : `dict`
270 Dictionary containing full task name: task object for the top-level task and all subtasks,
271 sub-subtasks, etc..
272 """
273 return self._taskDict.copy()
275 def makeSubtask(self, name, **keyArgs):
276 """Create a subtask as a new instance as the ``name`` attribute of this task.
278 Parameters
279 ----------
280 name : `str`
281 Brief name of the subtask.
282 keyArgs
283 Extra keyword arguments used to construct the task. The following arguments are automatically
284 provided and cannot be overridden:
286 - "config".
287 - "parentTask".
289 Notes
290 -----
291 The subtask must be defined by ``Task.config.name``, an instance of pex_config ConfigurableField
292 or RegistryField.
293 """
294 taskField = getattr(self.config, name, None)
295 if taskField is None:
296 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
297 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
298 setattr(self, name, subtask)
300 @contextlib.contextmanager
301 def timer(self, name, logLevel=Log.DEBUG):
302 """Context manager to log performance data for an arbitrary block of code.
304 Parameters
305 ----------
306 name : `str`
307 Name of code being timed; data will be logged using item name: ``Start`` and ``End``.
308 logLevel
309 A `lsst.log` level constant.
311 Examples
312 --------
313 Creating a timer context::
315 with self.timer("someCodeToTime"):
316 pass # code to time
318 See also
319 --------
320 timer.logInfo
321 """
322 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
323 try:
324 yield
325 finally:
326 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
328 @classmethod
329 def makeField(cls, doc):
330 """Make a `lsst.pex.config.ConfigurableField` for this task.
332 Parameters
333 ----------
334 doc : `str`
335 Help text for the field.
337 Returns
338 -------
339 configurableField : `lsst.pex.config.ConfigurableField`
340 A `~ConfigurableField` for this task.
342 Examples
343 --------
344 Provides a convenient way to specify this task is a subtask of another task.
346 Here is an example of use::
348 class OtherTaskConfig(lsst.pex.config.Config)
349 aSubtask = ATaskClass.makeField("a brief description of what this task does")
350 """
351 return ConfigurableField(doc=doc, target=cls)
353 def _computeFullName(self, name):
354 """Compute the full name of a subtask or metadata item, given its brief name.
356 Parameters
357 ----------
358 name : `str`
359 Brief name of subtask or metadata item.
361 Returns
362 -------
363 fullName : `str`
364 The full name: the ``name`` argument prefixed by the full task name and a period.
366 Notes
367 -----
368 For example: if the full name of this task is "top.sub.sub2"
369 then ``_computeFullName("subname")`` returns ``"top.sub.sub2.subname"``.
370 """
371 return f"{self._fullName}.{name}"
373 def __reduce__(self):
374 """Pickler.
375 """
376 return self.__class__, (self.config, self._name, self._parentTask, None)