Coverage for python/lsst/pipe/base/task.py: 29%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["Task", "TaskError"]
24import contextlib
25import logging
26from typing import Optional
28from lsst.pex.config import ConfigurableField
29import lsst.daf.base as dafBase
30from .timer import logInfo
31from .task_logging import getTaskLogger
33import weakref
35try:
36 import lsstDebug
37except ImportError:
38 lsstDebug = None
41class TaskError(Exception):
42 """Use to report errors for which a traceback is not useful.
44 Notes
45 -----
46 Examples of such errors:
48 - processCcd is asked to run detection, but not calibration, and no calexp
49 is found.
50 - coadd finds no valid images in the specified patch.
51 """
52 pass
55class Task:
56 r"""Base class for data processing tasks.
58 See :ref:`task-framework-overview` to learn what tasks are, and
59 :ref:`creating-a-task` for more information about writing tasks.
61 Parameters
62 ----------
63 config : `Task.ConfigClass` instance, optional
64 Configuration for this task (an instance of Task.ConfigClass, which
65 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
66 If `None`:
68 - If parentTask specified then defaults to parentTask.config.\<name>
69 - If parentTask is None then defaults to self.ConfigClass()
71 name : `str`, optional
72 Brief name of task, or `None`; if `None` then defaults to
73 `Task._DefaultName`
74 parentTask : `Task`-type, optional
75 The parent task of this subtask, if any.
77 - If `None` (a top-level task) then you must specify config and name
78 is ignored.
79 - If not `None` (a subtask) then you must specify name.
80 log : `logging.Logger` or subclass, optional
81 Log whose name is used as a log name prefix, or `None` for no prefix.
82 Ignored if is parentTask specified, in which case
83 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
84 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
85 log is then a child logger of ``parentTask.log`` (if ``parentTask``
86 specified), or a child logger of the log from the argument
87 (if ``log`` is not `None`).
89 Raises
90 ------
91 RuntimeError
92 Raised under these circumstances:
94 - If ``parentTask`` is `None` and ``config`` is `None`.
95 - If ``parentTask`` is not `None` and ``name`` is `None`.
96 - If ``name`` is `None` and ``_DefaultName`` does not exist.
98 Notes
99 -----
100 Useful attributes include:
102 - ``log``: an `logging.Logger` or subclass.
103 - ``config``: task-specific configuration; an instance of ``ConfigClass``
104 (see below).
105 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting
106 task-specific metadata, e.g. data quality and performance metrics.
107 This is data that is only meant to be persisted, never to be used by
108 the task.
110 Subclasses typically have a method named ``runDataRef`` to perform the
111 main data processing. Details:
113 - ``runDataRef`` should process the minimum reasonable amount of data,
114 typically a single CCD. Iteration, if desired, is performed by a caller
115 of the method. This is good design and allows multiprocessing without
116 the run method having to support it directly.
117 - If ``runDataRef`` can persist or unpersist data:
119 - ``runDataRef`` should accept a butler data reference (or a collection
120 of data references, if appropriate, e.g. coaddition).
121 - There should be a way to run the task without persisting data.
122 Typically the run method returns all data, even if it is persisted, and
123 the task's config method offers a flag to disable persistence.
125 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
126 accept a blob such as a butler data reference. How we will handle data
127 references is still TBD, so don't make changes yet!
128 RHL 2014-06-27
130 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
131 of `lsst.pex.config.Config` which configures the task. Subclasses should
132 also have an attribute ``_DefaultName``: the default name if there is no
133 parent task. ``_DefaultName`` is required for subclasses of
134 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
135 because it simplifies construction (e.g. for unit tests).
137 Tasks intended to be run from the command line should be subclasses of
138 `~lsst.pipe.base.CmdLineTask` not Task.
139 """
141 def __init__(self, config=None, name=None, parentTask=None, log=None):
142 self.metadata = dafBase.PropertyList()
143 self.__parentTask: Optional[weakref.ReferenceType]
144 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask)
146 if parentTask is not None:
147 if name is None:
148 raise RuntimeError("name is required for a subtask")
149 self._name = name
150 self._fullName = parentTask._computeFullName(name)
151 if config is None:
152 config = getattr(parentTask.config, name)
153 self._taskDict = parentTask._taskDict
154 loggerName = parentTask.log.getChild(name).name
155 else:
156 if name is None:
157 name = getattr(self, "_DefaultName", None)
158 if name is None:
159 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
160 name = self._DefaultName
161 self._name = name
162 self._fullName = self._name
163 if config is None:
164 config = self.ConfigClass()
165 self._taskDict = dict()
166 loggerName = self._fullName
167 if log is not None and log.name:
168 loggerName = log.getChild(loggerName).name
170 # Get a logger (that might be a subclass of logging.Logger).
171 self.log = getTaskLogger(loggerName)
172 self.config = config
173 if lsstDebug:
174 self._display = lsstDebug.Info(self.__module__).display
175 else:
176 self._display = None
177 self._taskDict[self._fullName] = weakref.ref(self)
179 @property
180 def _parentTask(self) -> Optional['Task']:
181 return self.__parentTask if self.__parentTask is None else self.__parentTask()
183 def emptyMetadata(self):
184 """Empty (clear) the metadata for this Task and all sub-Tasks.
185 """
186 for subtask in self._taskDict.values():
187 subtask().metadata = dafBase.PropertyList()
189 def getSchemaCatalogs(self):
190 """Get the schemas generated by this task.
192 Returns
193 -------
194 schemaCatalogs : `dict`
195 Keys are butler dataset type, values are an empty catalog (an
196 instance of the appropriate `lsst.afw.table` Catalog type) for
197 this task.
199 Notes
200 -----
202 .. warning::
204 Subclasses that use schemas must override this method. The default
205 implementation returns an empty dict.
207 This method may be called at any time after the Task is constructed,
208 which means that all task schemas should be computed at construction
209 time, *not* when data is actually processed. This reflects the
210 philosophy that the schema should not depend on the data.
212 Returning catalogs rather than just schemas allows us to save e.g.
213 slots for SourceCatalog as well.
215 See also
216 --------
217 Task.getAllSchemaCatalogs
218 """
219 return {}
221 def getAllSchemaCatalogs(self):
222 """Get schema catalogs for all tasks in the hierarchy, combining the
223 results into a single dict.
225 Returns
226 -------
227 schemacatalogs : `dict`
228 Keys are butler dataset type, values are a empty catalog (an
229 instance of the appropriate `lsst.afw.table` Catalog type) for all
230 tasks in the hierarchy, from the top-level task down
231 through all subtasks.
233 Notes
234 -----
235 This method may be called on any task in the hierarchy; it will return
236 the same answer, regardless.
238 The default implementation should always suffice. If your subtask uses
239 schemas the override `Task.getSchemaCatalogs`, not this method.
240 """
241 schemaDict = self.getSchemaCatalogs()
242 for subtask in self._taskDict.values():
243 schemaDict.update(subtask().getSchemaCatalogs())
244 return schemaDict
246 def getFullMetadata(self):
247 """Get metadata for all tasks.
249 Returns
250 -------
251 metadata : `lsst.daf.base.PropertySet`
252 The `~lsst.daf.base.PropertySet` keys are the full task name.
253 Values are metadata for the top-level task and all subtasks,
254 sub-subtasks, etc.
256 Notes
257 -----
258 The returned metadata includes timing information (if
259 ``@timer.timeMethod`` is used) and any metadata set by the task. The
260 name of each item consists of the full task name with ``.`` replaced
261 by ``:``, followed by ``.`` and the name of the item, e.g.::
263 topLevelTaskName:subtaskName:subsubtaskName.itemName
265 using ``:`` in the full task name disambiguates the rare situation
266 that a task has a subtask and a metadata item with the same name.
267 """
268 fullMetadata = dafBase.PropertySet()
269 for fullName, task in self.getTaskDict().items():
270 fullMetadata.set(fullName.replace(".", ":"), task().metadata)
271 return fullMetadata
273 def getFullName(self):
274 """Get the task name as a hierarchical name including parent task
275 names.
277 Returns
278 -------
279 fullName : `str`
280 The full name consists of the name of the parent task and each
281 subtask separated by periods. For example:
283 - The full name of top-level task "top" is simply "top".
284 - The full name of subtask "sub" of top-level task "top" is
285 "top.sub".
286 - The full name of subtask "sub2" of subtask "sub" of top-level
287 task "top" is "top.sub.sub2".
288 """
289 return self._fullName
291 def getName(self):
292 """Get the name of the task.
294 Returns
295 -------
296 taskName : `str`
297 Name of the task.
299 See also
300 --------
301 getFullName
302 """
303 return self._name
305 def getTaskDict(self):
306 """Get a dictionary of all tasks as a shallow copy.
308 Returns
309 -------
310 taskDict : `dict`
311 Dictionary containing full task name: task object for the top-level
312 task and all subtasks, sub-subtasks, etc.
313 """
314 return self._taskDict.copy()
316 def makeSubtask(self, name, **keyArgs):
317 """Create a subtask as a new instance as the ``name`` attribute of this
318 task.
320 Parameters
321 ----------
322 name : `str`
323 Brief name of the subtask.
324 keyArgs
325 Extra keyword arguments used to construct the task. The following
326 arguments are automatically provided and cannot be overridden:
328 - "config".
329 - "parentTask".
331 Notes
332 -----
333 The subtask must be defined by ``Task.config.name``, an instance of
334 `~lsst.pex.config.ConfigurableField` or
335 `~lsst.pex.config.RegistryField`.
336 """
337 taskField = getattr(self.config, name, None)
338 if taskField is None:
339 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
340 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
341 setattr(self, name, subtask)
343 @contextlib.contextmanager
344 def timer(self, name, logLevel=logging.DEBUG):
345 """Context manager to log performance data for an arbitrary block of
346 code.
348 Parameters
349 ----------
350 name : `str`
351 Name of code being timed; data will be logged using item name:
352 ``Start`` and ``End``.
353 logLevel
354 A `logging` level constant.
356 Examples
357 --------
358 Creating a timer context:
360 .. code-block:: python
362 with self.timer("someCodeToTime"):
363 pass # code to time
365 See also
366 --------
367 timer.logInfo
368 """
369 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
370 try:
371 yield
372 finally:
373 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
375 @classmethod
376 def makeField(cls, doc):
377 """Make a `lsst.pex.config.ConfigurableField` for this task.
379 Parameters
380 ----------
381 doc : `str`
382 Help text for the field.
384 Returns
385 -------
386 configurableField : `lsst.pex.config.ConfigurableField`
387 A `~ConfigurableField` for this task.
389 Examples
390 --------
391 Provides a convenient way to specify this task is a subtask of another
392 task.
394 Here is an example of use:
396 .. code-block:: python
398 class OtherTaskConfig(lsst.pex.config.Config):
399 aSubtask = ATaskClass.makeField("brief description of task")
400 """
401 return ConfigurableField(doc=doc, target=cls)
403 def _computeFullName(self, name):
404 """Compute the full name of a subtask or metadata item, given its brief
405 name.
407 Parameters
408 ----------
409 name : `str`
410 Brief name of subtask or metadata item.
412 Returns
413 -------
414 fullName : `str`
415 The full name: the ``name`` argument prefixed by the full task name
416 and a period.
418 Notes
419 -----
420 For example: if the full name of this task is "top.sub.sub2"
421 then ``_computeFullName("subname")`` returns
422 ``"top.sub.sub2.subname"``.
423 """
424 return f"{self._fullName}.{name}"
426 @staticmethod
427 def _unpickle_via_factory(factory, args, kwargs):
428 """Unpickle something by calling a factory
430 Allows subclasses to unpickle using `__reduce__` with keyword
431 arguments as well as positional arguments.
432 """
433 return factory(*args, **kwargs)
435 def _reduce_kwargs(self):
436 """Returns a dict of the keyword arguments that should be used
437 by `__reduce__`.
439 Subclasses with additional arguments should always call the parent
440 class method to ensure that the standard parameters are included.
442 Returns
443 -------
444 kwargs : `dict`
445 Keyword arguments to be used when pickling.
446 """
447 return dict(config=self.config, name=self._name, parentTask=self._parentTask,)
449 def __reduce__(self):
450 """Pickler.
451 """
452 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())