Coverage for python/lsst/pipe/base/task.py: 30%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["Task", "TaskError"]
24import contextlib
25import logging
26from typing import Optional
28import lsst.utils
29import lsst.utils.logging
30from lsst.utils.timer import logInfo
31from lsst.pex.config import ConfigurableField
32import lsst.daf.base as dafBase
34import weakref
36try:
37 import lsstDebug
38except ImportError:
39 lsstDebug = None
42class TaskError(Exception):
43 """Use to report errors for which a traceback is not useful.
45 Notes
46 -----
47 Examples of such errors:
49 - processCcd is asked to run detection, but not calibration, and no calexp
50 is found.
51 - coadd finds no valid images in the specified patch.
52 """
53 pass
56class Task:
57 r"""Base class for data processing tasks.
59 See :ref:`task-framework-overview` to learn what tasks are, and
60 :ref:`creating-a-task` for more information about writing tasks.
62 Parameters
63 ----------
64 config : `Task.ConfigClass` instance, optional
65 Configuration for this task (an instance of Task.ConfigClass, which
66 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
67 If `None`:
69 - If parentTask specified then defaults to parentTask.config.\<name>
70 - If parentTask is None then defaults to self.ConfigClass()
72 name : `str`, optional
73 Brief name of task, or `None`; if `None` then defaults to
74 `Task._DefaultName`
75 parentTask : `Task`-type, optional
76 The parent task of this subtask, if any.
78 - If `None` (a top-level task) then you must specify config and name
79 is ignored.
80 - If not `None` (a subtask) then you must specify name.
81 log : `logging.Logger` or subclass, optional
82 Log whose name is used as a log name prefix, or `None` for no prefix.
83 Ignored if is parentTask specified, in which case
84 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
85 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
86 log is then a child logger of ``parentTask.log`` (if ``parentTask``
87 specified), or a child logger of the log from the argument
88 (if ``log`` is not `None`).
90 Raises
91 ------
92 RuntimeError
93 Raised under these circumstances:
95 - If ``parentTask`` is `None` and ``config`` is `None`.
96 - If ``parentTask`` is not `None` and ``name`` is `None`.
97 - If ``name`` is `None` and ``_DefaultName`` does not exist.
99 Notes
100 -----
101 Useful attributes include:
103 - ``log``: an `logging.Logger` or subclass.
104 - ``config``: task-specific configuration; an instance of ``ConfigClass``
105 (see below).
106 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting
107 task-specific metadata, e.g. data quality and performance metrics.
108 This is data that is only meant to be persisted, never to be used by
109 the task.
111 Subclasses typically have a method named ``runDataRef`` to perform the
112 main data processing. Details:
114 - ``runDataRef`` should process the minimum reasonable amount of data,
115 typically a single CCD. Iteration, if desired, is performed by a caller
116 of the method. This is good design and allows multiprocessing without
117 the run method having to support it directly.
118 - If ``runDataRef`` can persist or unpersist data:
120 - ``runDataRef`` should accept a butler data reference (or a collection
121 of data references, if appropriate, e.g. coaddition).
122 - There should be a way to run the task without persisting data.
123 Typically the run method returns all data, even if it is persisted, and
124 the task's config method offers a flag to disable persistence.
126 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
127 accept a blob such as a butler data reference. How we will handle data
128 references is still TBD, so don't make changes yet!
129 RHL 2014-06-27
131 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
132 of `lsst.pex.config.Config` which configures the task. Subclasses should
133 also have an attribute ``_DefaultName``: the default name if there is no
134 parent task. ``_DefaultName`` is required for subclasses of
135 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
136 because it simplifies construction (e.g. for unit tests).
138 Tasks intended to be run from the command line should be subclasses of
139 `~lsst.pipe.base.CmdLineTask` not Task.
140 """
142 def __init__(self, config=None, name=None, parentTask=None, log=None):
143 self.metadata = dafBase.PropertyList()
144 self.__parentTask: Optional[weakref.ReferenceType]
145 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask)
147 if parentTask is not None:
148 if name is None:
149 raise RuntimeError("name is required for a subtask")
150 self._name = name
151 self._fullName = parentTask._computeFullName(name)
152 if config is None:
153 config = getattr(parentTask.config, name)
154 self._taskDict = parentTask._taskDict
155 loggerName = parentTask.log.getChild(name).name
156 else:
157 if name is None:
158 name = getattr(self, "_DefaultName", None)
159 if name is None:
160 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
161 name = self._DefaultName
162 self._name = name
163 self._fullName = self._name
164 if config is None:
165 config = self.ConfigClass()
166 self._taskDict = dict()
167 loggerName = self._fullName
168 if log is not None and log.name:
169 loggerName = log.getChild(loggerName).name
171 # Get a logger (that might be a subclass of logging.Logger).
172 self.log = lsst.utils.logging.getLogger(loggerName)
173 self.config = config
174 if lsstDebug:
175 self._display = lsstDebug.Info(self.__module__).display
176 else:
177 self._display = None
178 self._taskDict[self._fullName] = weakref.ref(self)
180 @property
181 def _parentTask(self) -> Optional['Task']:
182 return self.__parentTask if self.__parentTask is None else self.__parentTask()
184 def emptyMetadata(self):
185 """Empty (clear) the metadata for this Task and all sub-Tasks.
186 """
187 for subtask in self._taskDict.values():
188 subtask().metadata = dafBase.PropertyList()
190 def getSchemaCatalogs(self):
191 """Get the schemas generated by this task.
193 Returns
194 -------
195 schemaCatalogs : `dict`
196 Keys are butler dataset type, values are an empty catalog (an
197 instance of the appropriate `lsst.afw.table` Catalog type) for
198 this task.
200 Notes
201 -----
203 .. warning::
205 Subclasses that use schemas must override this method. The default
206 implementation returns an empty dict.
208 This method may be called at any time after the Task is constructed,
209 which means that all task schemas should be computed at construction
210 time, *not* when data is actually processed. This reflects the
211 philosophy that the schema should not depend on the data.
213 Returning catalogs rather than just schemas allows us to save e.g.
214 slots for SourceCatalog as well.
216 See also
217 --------
218 Task.getAllSchemaCatalogs
219 """
220 return {}
222 def getAllSchemaCatalogs(self):
223 """Get schema catalogs for all tasks in the hierarchy, combining the
224 results into a single dict.
226 Returns
227 -------
228 schemacatalogs : `dict`
229 Keys are butler dataset type, values are a empty catalog (an
230 instance of the appropriate `lsst.afw.table` Catalog type) for all
231 tasks in the hierarchy, from the top-level task down
232 through all subtasks.
234 Notes
235 -----
236 This method may be called on any task in the hierarchy; it will return
237 the same answer, regardless.
239 The default implementation should always suffice. If your subtask uses
240 schemas the override `Task.getSchemaCatalogs`, not this method.
241 """
242 schemaDict = self.getSchemaCatalogs()
243 for subtask in self._taskDict.values():
244 schemaDict.update(subtask().getSchemaCatalogs())
245 return schemaDict
247 def getFullMetadata(self):
248 """Get metadata for all tasks.
250 Returns
251 -------
252 metadata : `lsst.daf.base.PropertySet`
253 The `~lsst.daf.base.PropertySet` keys are the full task name.
254 Values are metadata for the top-level task and all subtasks,
255 sub-subtasks, etc.
257 Notes
258 -----
259 The returned metadata includes timing information (if
260 ``@timer.timeMethod`` is used) and any metadata set by the task. The
261 name of each item consists of the full task name with ``.`` replaced
262 by ``:``, followed by ``.`` and the name of the item, e.g.::
264 topLevelTaskName:subtaskName:subsubtaskName.itemName
266 using ``:`` in the full task name disambiguates the rare situation
267 that a task has a subtask and a metadata item with the same name.
268 """
269 fullMetadata = dafBase.PropertySet()
270 for fullName, task in self.getTaskDict().items():
271 fullMetadata.set(fullName.replace(".", ":"), task().metadata)
272 return fullMetadata
274 def getFullName(self):
275 """Get the task name as a hierarchical name including parent task
276 names.
278 Returns
279 -------
280 fullName : `str`
281 The full name consists of the name of the parent task and each
282 subtask separated by periods. For example:
284 - The full name of top-level task "top" is simply "top".
285 - The full name of subtask "sub" of top-level task "top" is
286 "top.sub".
287 - The full name of subtask "sub2" of subtask "sub" of top-level
288 task "top" is "top.sub.sub2".
289 """
290 return self._fullName
292 def getName(self):
293 """Get the name of the task.
295 Returns
296 -------
297 taskName : `str`
298 Name of the task.
300 See also
301 --------
302 getFullName
303 """
304 return self._name
306 def getTaskDict(self):
307 """Get a dictionary of all tasks as a shallow copy.
309 Returns
310 -------
311 taskDict : `dict`
312 Dictionary containing full task name: task object for the top-level
313 task and all subtasks, sub-subtasks, etc.
314 """
315 return self._taskDict.copy()
317 def makeSubtask(self, name, **keyArgs):
318 """Create a subtask as a new instance as the ``name`` attribute of this
319 task.
321 Parameters
322 ----------
323 name : `str`
324 Brief name of the subtask.
325 keyArgs
326 Extra keyword arguments used to construct the task. The following
327 arguments are automatically provided and cannot be overridden:
329 - "config".
330 - "parentTask".
332 Notes
333 -----
334 The subtask must be defined by ``Task.config.name``, an instance of
335 `~lsst.pex.config.ConfigurableField` or
336 `~lsst.pex.config.RegistryField`.
337 """
338 taskField = getattr(self.config, name, None)
339 if taskField is None:
340 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
341 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
342 setattr(self, name, subtask)
344 @contextlib.contextmanager
345 def timer(self, name, logLevel=logging.DEBUG):
346 """Context manager to log performance data for an arbitrary block of
347 code.
349 Parameters
350 ----------
351 name : `str`
352 Name of code being timed; data will be logged using item name:
353 ``Start`` and ``End``.
354 logLevel
355 A `logging` level constant.
357 Examples
358 --------
359 Creating a timer context:
361 .. code-block:: python
363 with self.timer("someCodeToTime"):
364 pass # code to time
366 See also
367 --------
368 timer.logInfo
369 """
370 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
371 try:
372 yield
373 finally:
374 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
376 @classmethod
377 def makeField(cls, doc):
378 """Make a `lsst.pex.config.ConfigurableField` for this task.
380 Parameters
381 ----------
382 doc : `str`
383 Help text for the field.
385 Returns
386 -------
387 configurableField : `lsst.pex.config.ConfigurableField`
388 A `~ConfigurableField` for this task.
390 Examples
391 --------
392 Provides a convenient way to specify this task is a subtask of another
393 task.
395 Here is an example of use:
397 .. code-block:: python
399 class OtherTaskConfig(lsst.pex.config.Config):
400 aSubtask = ATaskClass.makeField("brief description of task")
401 """
402 return ConfigurableField(doc=doc, target=cls)
404 def _computeFullName(self, name):
405 """Compute the full name of a subtask or metadata item, given its brief
406 name.
408 Parameters
409 ----------
410 name : `str`
411 Brief name of subtask or metadata item.
413 Returns
414 -------
415 fullName : `str`
416 The full name: the ``name`` argument prefixed by the full task name
417 and a period.
419 Notes
420 -----
421 For example: if the full name of this task is "top.sub.sub2"
422 then ``_computeFullName("subname")`` returns
423 ``"top.sub.sub2.subname"``.
424 """
425 return f"{self._fullName}.{name}"
427 @staticmethod
428 def _unpickle_via_factory(factory, args, kwargs):
429 """Unpickle something by calling a factory
431 Allows subclasses to unpickle using `__reduce__` with keyword
432 arguments as well as positional arguments.
433 """
434 return factory(*args, **kwargs)
436 def _reduce_kwargs(self):
437 """Returns a dict of the keyword arguments that should be used
438 by `__reduce__`.
440 Subclasses with additional arguments should always call the parent
441 class method to ensure that the standard parameters are included.
443 Returns
444 -------
445 kwargs : `dict`
446 Keyword arguments to be used when pickling.
447 """
448 return dict(config=self.config, name=self._name, parentTask=self._parentTask,)
450 def __reduce__(self):
451 """Pickler.
452 """
453 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())