Coverage for python/lsst/pipe/base/task.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["Task", "TaskError"]
24import contextlib
25import logging
27from lsst.pex.config import ConfigurableField
28import lsst.daf.base as dafBase
29from .timer import logInfo
30from .task_logging import getTaskLogger
32try:
33 import lsstDebug
34except ImportError:
35 lsstDebug = None
38class TaskError(Exception):
39 """Use to report errors for which a traceback is not useful.
41 Notes
42 -----
43 Examples of such errors:
45 - processCcd is asked to run detection, but not calibration, and no calexp
46 is found.
47 - coadd finds no valid images in the specified patch.
48 """
49 pass
52class Task:
53 r"""Base class for data processing tasks.
55 See :ref:`task-framework-overview` to learn what tasks are, and
56 :ref:`creating-a-task` for more information about writing tasks.
58 Parameters
59 ----------
60 config : `Task.ConfigClass` instance, optional
61 Configuration for this task (an instance of Task.ConfigClass, which
62 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
63 If `None`:
65 - If parentTask specified then defaults to parentTask.config.\<name>
66 - If parentTask is None then defaults to self.ConfigClass()
68 name : `str`, optional
69 Brief name of task, or `None`; if `None` then defaults to
70 `Task._DefaultName`
71 parentTask : `Task`-type, optional
72 The parent task of this subtask, if any.
74 - If `None` (a top-level task) then you must specify config and name
75 is ignored.
76 - If not `None` (a subtask) then you must specify name.
77 log : `logging.Logger` or subclass, optional
78 Log whose name is used as a log name prefix, or `None` for no prefix.
79 Ignored if is parentTask specified, in which case
80 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
81 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
82 log is then a child logger of ``parentTask.log`` (if ``parentTask``
83 specified), or a child logger of the log from the argument
84 (if ``log`` is not `None`).
86 Raises
87 ------
88 RuntimeError
89 Raised under these circumstances:
91 - If ``parentTask`` is `None` and ``config`` is `None`.
92 - If ``parentTask`` is not `None` and ``name`` is `None`.
93 - If ``name`` is `None` and ``_DefaultName`` does not exist.
95 Notes
96 -----
97 Useful attributes include:
99 - ``log``: an `logging.Logger` or subclass.
100 - ``config``: task-specific configuration; an instance of ``ConfigClass``
101 (see below).
102 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting
103 task-specific metadata, e.g. data quality and performance metrics.
104 This is data that is only meant to be persisted, never to be used by
105 the task.
107 Subclasses typically have a method named ``runDataRef`` to perform the
108 main data processing. Details:
110 - ``runDataRef`` should process the minimum reasonable amount of data,
111 typically a single CCD. Iteration, if desired, is performed by a caller
112 of the method. This is good design and allows multiprocessing without
113 the run method having to support it directly.
114 - If ``runDataRef`` can persist or unpersist data:
116 - ``runDataRef`` should accept a butler data reference (or a collection
117 of data references, if appropriate, e.g. coaddition).
118 - There should be a way to run the task without persisting data.
119 Typically the run method returns all data, even if it is persisted, and
120 the task's config method offers a flag to disable persistence.
122 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
123 accept a blob such as a butler data reference. How we will handle data
124 references is still TBD, so don't make changes yet!
125 RHL 2014-06-27
127 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
128 of `lsst.pex.config.Config` which configures the task. Subclasses should
129 also have an attribute ``_DefaultName``: the default name if there is no
130 parent task. ``_DefaultName`` is required for subclasses of
131 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
132 because it simplifies construction (e.g. for unit tests).
134 Tasks intended to be run from the command line should be subclasses of
135 `~lsst.pipe.base.CmdLineTask` not Task.
136 """
138 def __init__(self, config=None, name=None, parentTask=None, log=None):
139 self.metadata = dafBase.PropertyList()
140 self._parentTask = parentTask
142 if parentTask is not None:
143 if name is None:
144 raise RuntimeError("name is required for a subtask")
145 self._name = name
146 self._fullName = parentTask._computeFullName(name)
147 if config is None:
148 config = getattr(parentTask.config, name)
149 self._taskDict = parentTask._taskDict
150 loggerName = parentTask.log.getChild(name).name
151 else:
152 if name is None:
153 name = getattr(self, "_DefaultName", None)
154 if name is None:
155 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
156 name = self._DefaultName
157 self._name = name
158 self._fullName = self._name
159 if config is None:
160 config = self.ConfigClass()
161 self._taskDict = dict()
162 loggerName = self._fullName
163 if log is not None and log.name:
164 loggerName = log.getChild(loggerName).name
166 # Get a logger (that might be a subclass of logging.Logger).
167 self.log = getTaskLogger(loggerName)
168 self.config = config
169 if lsstDebug:
170 self._display = lsstDebug.Info(self.__module__).display
171 else:
172 self._display = None
173 self._taskDict[self._fullName] = self
175 def emptyMetadata(self):
176 """Empty (clear) the metadata for this Task and all sub-Tasks.
177 """
178 for subtask in self._taskDict.values():
179 subtask.metadata = dafBase.PropertyList()
181 def getSchemaCatalogs(self):
182 """Get the schemas generated by this task.
184 Returns
185 -------
186 schemaCatalogs : `dict`
187 Keys are butler dataset type, values are an empty catalog (an
188 instance of the appropriate `lsst.afw.table` Catalog type) for
189 this task.
191 Notes
192 -----
194 .. warning::
196 Subclasses that use schemas must override this method. The default
197 implementation returns an empty dict.
199 This method may be called at any time after the Task is constructed,
200 which means that all task schemas should be computed at construction
201 time, *not* when data is actually processed. This reflects the
202 philosophy that the schema should not depend on the data.
204 Returning catalogs rather than just schemas allows us to save e.g.
205 slots for SourceCatalog as well.
207 See also
208 --------
209 Task.getAllSchemaCatalogs
210 """
211 return {}
213 def getAllSchemaCatalogs(self):
214 """Get schema catalogs for all tasks in the hierarchy, combining the
215 results into a single dict.
217 Returns
218 -------
219 schemacatalogs : `dict`
220 Keys are butler dataset type, values are a empty catalog (an
221 instance of the appropriate `lsst.afw.table` Catalog type) for all
222 tasks in the hierarchy, from the top-level task down
223 through all subtasks.
225 Notes
226 -----
227 This method may be called on any task in the hierarchy; it will return
228 the same answer, regardless.
230 The default implementation should always suffice. If your subtask uses
231 schemas the override `Task.getSchemaCatalogs`, not this method.
232 """
233 schemaDict = self.getSchemaCatalogs()
234 for subtask in self._taskDict.values():
235 schemaDict.update(subtask.getSchemaCatalogs())
236 return schemaDict
238 def getFullMetadata(self):
239 """Get metadata for all tasks.
241 Returns
242 -------
243 metadata : `lsst.daf.base.PropertySet`
244 The `~lsst.daf.base.PropertySet` keys are the full task name.
245 Values are metadata for the top-level task and all subtasks,
246 sub-subtasks, etc.
248 Notes
249 -----
250 The returned metadata includes timing information (if
251 ``@timer.timeMethod`` is used) and any metadata set by the task. The
252 name of each item consists of the full task name with ``.`` replaced
253 by ``:``, followed by ``.`` and the name of the item, e.g.::
255 topLevelTaskName:subtaskName:subsubtaskName.itemName
257 using ``:`` in the full task name disambiguates the rare situation
258 that a task has a subtask and a metadata item with the same name.
259 """
260 fullMetadata = dafBase.PropertySet()
261 for fullName, task in self.getTaskDict().items():
262 fullMetadata.set(fullName.replace(".", ":"), task.metadata)
263 return fullMetadata
265 def getFullName(self):
266 """Get the task name as a hierarchical name including parent task
267 names.
269 Returns
270 -------
271 fullName : `str`
272 The full name consists of the name of the parent task and each
273 subtask separated by periods. For example:
275 - The full name of top-level task "top" is simply "top".
276 - The full name of subtask "sub" of top-level task "top" is
277 "top.sub".
278 - The full name of subtask "sub2" of subtask "sub" of top-level
279 task "top" is "top.sub.sub2".
280 """
281 return self._fullName
283 def getName(self):
284 """Get the name of the task.
286 Returns
287 -------
288 taskName : `str`
289 Name of the task.
291 See also
292 --------
293 getFullName
294 """
295 return self._name
297 def getTaskDict(self):
298 """Get a dictionary of all tasks as a shallow copy.
300 Returns
301 -------
302 taskDict : `dict`
303 Dictionary containing full task name: task object for the top-level
304 task and all subtasks, sub-subtasks, etc.
305 """
306 return self._taskDict.copy()
308 def makeSubtask(self, name, **keyArgs):
309 """Create a subtask as a new instance as the ``name`` attribute of this
310 task.
312 Parameters
313 ----------
314 name : `str`
315 Brief name of the subtask.
316 keyArgs
317 Extra keyword arguments used to construct the task. The following
318 arguments are automatically provided and cannot be overridden:
320 - "config".
321 - "parentTask".
323 Notes
324 -----
325 The subtask must be defined by ``Task.config.name``, an instance of
326 `~lsst.pex.config.ConfigurableField` or
327 `~lsst.pex.config.RegistryField`.
328 """
329 taskField = getattr(self.config, name, None)
330 if taskField is None:
331 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
332 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
333 setattr(self, name, subtask)
335 @contextlib.contextmanager
336 def timer(self, name, logLevel=logging.DEBUG):
337 """Context manager to log performance data for an arbitrary block of
338 code.
340 Parameters
341 ----------
342 name : `str`
343 Name of code being timed; data will be logged using item name:
344 ``Start`` and ``End``.
345 logLevel
346 A `logging` level constant.
348 Examples
349 --------
350 Creating a timer context:
352 .. code-block:: python
354 with self.timer("someCodeToTime"):
355 pass # code to time
357 See also
358 --------
359 timer.logInfo
360 """
361 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
362 try:
363 yield
364 finally:
365 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
367 @classmethod
368 def makeField(cls, doc):
369 """Make a `lsst.pex.config.ConfigurableField` for this task.
371 Parameters
372 ----------
373 doc : `str`
374 Help text for the field.
376 Returns
377 -------
378 configurableField : `lsst.pex.config.ConfigurableField`
379 A `~ConfigurableField` for this task.
381 Examples
382 --------
383 Provides a convenient way to specify this task is a subtask of another
384 task.
386 Here is an example of use:
388 .. code-block:: python
390 class OtherTaskConfig(lsst.pex.config.Config):
391 aSubtask = ATaskClass.makeField("brief description of task")
392 """
393 return ConfigurableField(doc=doc, target=cls)
395 def _computeFullName(self, name):
396 """Compute the full name of a subtask or metadata item, given its brief
397 name.
399 Parameters
400 ----------
401 name : `str`
402 Brief name of subtask or metadata item.
404 Returns
405 -------
406 fullName : `str`
407 The full name: the ``name`` argument prefixed by the full task name
408 and a period.
410 Notes
411 -----
412 For example: if the full name of this task is "top.sub.sub2"
413 then ``_computeFullName("subname")`` returns
414 ``"top.sub.sub2.subname"``.
415 """
416 return f"{self._fullName}.{name}"
418 @staticmethod
419 def _unpickle_via_factory(factory, args, kwargs):
420 """Unpickle something by calling a factory
422 Allows subclasses to unpickle using `__reduce__` with keyword
423 arguments as well as positional arguments.
424 """
425 return factory(*args, **kwargs)
427 def _reduce_kwargs(self):
428 """Returns a dict of the keyword arguments that should be used
429 by `__reduce__`.
431 Subclasses with additional arguments should always call the parent
432 class method to ensure that the standard parameters are included.
434 Returns
435 -------
436 kwargs : `dict`
437 Keyword arguments to be used when pickling.
438 """
439 return dict(config=self.config, name=self._name, parentTask=self._parentTask,)
441 def __reduce__(self):
442 """Pickler.
443 """
444 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())