Coverage for python/lsst/pipe/base/task.py: 30%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["Task", "TaskError"]
24import contextlib
25import logging
26from typing import Optional
28import lsst.utils
29import lsst.utils.logging
30from lsst.utils.timer import logInfo
31from lsst.pex.config import ConfigurableField
32import lsst.daf.base as dafBase
34import weakref
36try:
37 import lsstDebug
38except ImportError:
39 lsstDebug = None
42class TaskError(Exception):
43 """Use to report errors for which a traceback is not useful.
45 Notes
46 -----
47 Examples of such errors:
49 - processCcd is asked to run detection, but not calibration, and no calexp
50 is found.
51 - coadd finds no valid images in the specified patch.
52 """
53 pass
56class Task:
57 r"""Base class for data processing tasks.
59 See :ref:`task-framework-overview` to learn what tasks are, and
60 :ref:`creating-a-task` for more information about writing tasks.
62 Parameters
63 ----------
64 config : `Task.ConfigClass` instance, optional
65 Configuration for this task (an instance of Task.ConfigClass, which
66 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
67 If `None`:
69 - If parentTask specified then defaults to parentTask.config.\<name>
70 - If parentTask is None then defaults to self.ConfigClass()
72 name : `str`, optional
73 Brief name of task, or `None`; if `None` then defaults to
74 `Task._DefaultName`
75 parentTask : `Task`-type, optional
76 The parent task of this subtask, if any.
78 - If `None` (a top-level task) then you must specify config and name
79 is ignored.
80 - If not `None` (a subtask) then you must specify name.
81 log : `logging.Logger` or subclass, optional
82 Log whose name is used as a log name prefix, or `None` for no prefix.
83 Ignored if is parentTask specified, in which case
84 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
85 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
86 log is then a child logger of ``parentTask.log`` (if ``parentTask``
87 specified), or a child logger of the log from the argument
88 (if ``log`` is not `None`).
90 Raises
91 ------
92 RuntimeError
93 Raised under these circumstances:
95 - If ``parentTask`` is `None` and ``config`` is `None`.
96 - If ``parentTask`` is not `None` and ``name`` is `None`.
97 - If ``name`` is `None` and ``_DefaultName`` does not exist.
99 Notes
100 -----
101 Useful attributes include:
103 - ``log``: an `logging.Logger` or subclass.
104 - ``config``: task-specific configuration; an instance of ``ConfigClass``
105 (see below).
106 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting
107 task-specific metadata, e.g. data quality and performance metrics.
108 This is data that is only meant to be persisted, never to be used by
109 the task.
111 Subclasses typically have a method named ``runDataRef`` to perform the
112 main data processing. Details:
114 - ``runDataRef`` should process the minimum reasonable amount of data,
115 typically a single CCD. Iteration, if desired, is performed by a caller
116 of the method. This is good design and allows multiprocessing without
117 the run method having to support it directly.
118 - If ``runDataRef`` can persist or unpersist data:
120 - ``runDataRef`` should accept a butler data reference (or a collection
121 of data references, if appropriate, e.g. coaddition).
122 - There should be a way to run the task without persisting data.
123 Typically the run method returns all data, even if it is persisted, and
124 the task's config method offers a flag to disable persistence.
126 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
127 accept a blob such as a butler data reference. How we will handle data
128 references is still TBD, so don't make changes yet!
129 RHL 2014-06-27
131 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
132 of `lsst.pex.config.Config` which configures the task. Subclasses should
133 also have an attribute ``_DefaultName``: the default name if there is no
134 parent task. ``_DefaultName`` is required for subclasses of
135 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
136 because it simplifies construction (e.g. for unit tests).
138 Tasks intended to be run from the command line should be subclasses of
139 `~lsst.pipe.base.CmdLineTask` not Task.
140 """
142 _add_module_logger_prefix = True
143 """Control whether the module prefix should be prepended to default
144 logger names."""
146 def __init__(self, config=None, name=None, parentTask=None, log=None):
147 self.metadata = dafBase.PropertyList()
148 self.__parentTask: Optional[weakref.ReferenceType]
149 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask)
151 if parentTask is not None:
152 if name is None:
153 raise RuntimeError("name is required for a subtask")
154 self._name = name
155 self._fullName = parentTask._computeFullName(name)
156 if config is None:
157 config = getattr(parentTask.config, name)
158 self._taskDict = parentTask._taskDict
159 loggerName = parentTask.log.getChild(name).name
160 else:
161 if name is None:
162 name = getattr(self, "_DefaultName", None)
163 if name is None:
164 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
165 name = self._DefaultName
166 self._name = name
167 self._fullName = self._name
168 if config is None:
169 config = self.ConfigClass()
170 self._taskDict = dict()
171 loggerName = self._fullName
172 if log is not None and log.name:
173 loggerName = log.getChild(loggerName).name
174 elif self._add_module_logger_prefix:
175 # Prefix the logger name with the root module name.
176 # We want all Task loggers to have this prefix to make
177 # it easier to control them. This can be disabled by
178 # a Task setting the class property _add_module_logger_prefix
179 # to False -- in which case the logger name will not be
180 # modified.
181 module_name = self.__module__
182 module_root = module_name.split(".")[0] + "."
183 if not loggerName.startswith(module_root):
184 loggerName = module_root + loggerName
186 # Get a logger (that might be a subclass of logging.Logger).
187 self.log = lsst.utils.logging.getLogger(loggerName)
188 self.config = config
189 if lsstDebug:
190 self._display = lsstDebug.Info(self.__module__).display
191 else:
192 self._display = None
193 self._taskDict[self._fullName] = weakref.ref(self)
195 @property
196 def _parentTask(self) -> Optional['Task']:
197 return self.__parentTask if self.__parentTask is None else self.__parentTask()
199 def emptyMetadata(self):
200 """Empty (clear) the metadata for this Task and all sub-Tasks.
201 """
202 for subtask in self._taskDict.values():
203 subtask().metadata = dafBase.PropertyList()
205 def getSchemaCatalogs(self):
206 """Get the schemas generated by this task.
208 Returns
209 -------
210 schemaCatalogs : `dict`
211 Keys are butler dataset type, values are an empty catalog (an
212 instance of the appropriate `lsst.afw.table` Catalog type) for
213 this task.
215 Notes
216 -----
218 .. warning::
220 Subclasses that use schemas must override this method. The default
221 implementation returns an empty dict.
223 This method may be called at any time after the Task is constructed,
224 which means that all task schemas should be computed at construction
225 time, *not* when data is actually processed. This reflects the
226 philosophy that the schema should not depend on the data.
228 Returning catalogs rather than just schemas allows us to save e.g.
229 slots for SourceCatalog as well.
231 See also
232 --------
233 Task.getAllSchemaCatalogs
234 """
235 return {}
237 def getAllSchemaCatalogs(self):
238 """Get schema catalogs for all tasks in the hierarchy, combining the
239 results into a single dict.
241 Returns
242 -------
243 schemacatalogs : `dict`
244 Keys are butler dataset type, values are a empty catalog (an
245 instance of the appropriate `lsst.afw.table` Catalog type) for all
246 tasks in the hierarchy, from the top-level task down
247 through all subtasks.
249 Notes
250 -----
251 This method may be called on any task in the hierarchy; it will return
252 the same answer, regardless.
254 The default implementation should always suffice. If your subtask uses
255 schemas the override `Task.getSchemaCatalogs`, not this method.
256 """
257 schemaDict = self.getSchemaCatalogs()
258 for subtask in self._taskDict.values():
259 schemaDict.update(subtask().getSchemaCatalogs())
260 return schemaDict
262 def getFullMetadata(self):
263 """Get metadata for all tasks.
265 Returns
266 -------
267 metadata : `lsst.daf.base.PropertySet`
268 The `~lsst.daf.base.PropertySet` keys are the full task name.
269 Values are metadata for the top-level task and all subtasks,
270 sub-subtasks, etc.
272 Notes
273 -----
274 The returned metadata includes timing information (if
275 ``@timer.timeMethod`` is used) and any metadata set by the task. The
276 name of each item consists of the full task name with ``.`` replaced
277 by ``:``, followed by ``.`` and the name of the item, e.g.::
279 topLevelTaskName:subtaskName:subsubtaskName.itemName
281 using ``:`` in the full task name disambiguates the rare situation
282 that a task has a subtask and a metadata item with the same name.
283 """
284 fullMetadata = dafBase.PropertySet()
285 for fullName, task in self.getTaskDict().items():
286 fullMetadata.set(fullName.replace(".", ":"), task().metadata)
287 return fullMetadata
289 def getFullName(self):
290 """Get the task name as a hierarchical name including parent task
291 names.
293 Returns
294 -------
295 fullName : `str`
296 The full name consists of the name of the parent task and each
297 subtask separated by periods. For example:
299 - The full name of top-level task "top" is simply "top".
300 - The full name of subtask "sub" of top-level task "top" is
301 "top.sub".
302 - The full name of subtask "sub2" of subtask "sub" of top-level
303 task "top" is "top.sub.sub2".
304 """
305 return self._fullName
307 def getName(self):
308 """Get the name of the task.
310 Returns
311 -------
312 taskName : `str`
313 Name of the task.
315 See also
316 --------
317 getFullName
318 """
319 return self._name
321 def getTaskDict(self):
322 """Get a dictionary of all tasks as a shallow copy.
324 Returns
325 -------
326 taskDict : `dict`
327 Dictionary containing full task name: task object for the top-level
328 task and all subtasks, sub-subtasks, etc.
329 """
330 return self._taskDict.copy()
332 def makeSubtask(self, name, **keyArgs):
333 """Create a subtask as a new instance as the ``name`` attribute of this
334 task.
336 Parameters
337 ----------
338 name : `str`
339 Brief name of the subtask.
340 keyArgs
341 Extra keyword arguments used to construct the task. The following
342 arguments are automatically provided and cannot be overridden:
344 - "config".
345 - "parentTask".
347 Notes
348 -----
349 The subtask must be defined by ``Task.config.name``, an instance of
350 `~lsst.pex.config.ConfigurableField` or
351 `~lsst.pex.config.RegistryField`.
352 """
353 taskField = getattr(self.config, name, None)
354 if taskField is None:
355 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
356 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
357 setattr(self, name, subtask)
359 @contextlib.contextmanager
360 def timer(self, name, logLevel=logging.DEBUG):
361 """Context manager to log performance data for an arbitrary block of
362 code.
364 Parameters
365 ----------
366 name : `str`
367 Name of code being timed; data will be logged using item name:
368 ``Start`` and ``End``.
369 logLevel
370 A `logging` level constant.
372 Examples
373 --------
374 Creating a timer context:
376 .. code-block:: python
378 with self.timer("someCodeToTime"):
379 pass # code to time
381 See also
382 --------
383 timer.logInfo
384 """
385 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
386 try:
387 yield
388 finally:
389 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
391 @classmethod
392 def makeField(cls, doc):
393 """Make a `lsst.pex.config.ConfigurableField` for this task.
395 Parameters
396 ----------
397 doc : `str`
398 Help text for the field.
400 Returns
401 -------
402 configurableField : `lsst.pex.config.ConfigurableField`
403 A `~ConfigurableField` for this task.
405 Examples
406 --------
407 Provides a convenient way to specify this task is a subtask of another
408 task.
410 Here is an example of use:
412 .. code-block:: python
414 class OtherTaskConfig(lsst.pex.config.Config):
415 aSubtask = ATaskClass.makeField("brief description of task")
416 """
417 return ConfigurableField(doc=doc, target=cls)
419 def _computeFullName(self, name):
420 """Compute the full name of a subtask or metadata item, given its brief
421 name.
423 Parameters
424 ----------
425 name : `str`
426 Brief name of subtask or metadata item.
428 Returns
429 -------
430 fullName : `str`
431 The full name: the ``name`` argument prefixed by the full task name
432 and a period.
434 Notes
435 -----
436 For example: if the full name of this task is "top.sub.sub2"
437 then ``_computeFullName("subname")`` returns
438 ``"top.sub.sub2.subname"``.
439 """
440 return f"{self._fullName}.{name}"
442 @staticmethod
443 def _unpickle_via_factory(factory, args, kwargs):
444 """Unpickle something by calling a factory
446 Allows subclasses to unpickle using `__reduce__` with keyword
447 arguments as well as positional arguments.
448 """
449 return factory(*args, **kwargs)
451 def _reduce_kwargs(self):
452 """Returns a dict of the keyword arguments that should be used
453 by `__reduce__`.
455 Subclasses with additional arguments should always call the parent
456 class method to ensure that the standard parameters are included.
458 Returns
459 -------
460 kwargs : `dict`
461 Keyword arguments to be used when pickling.
462 """
463 return dict(config=self.config, name=self._name, parentTask=self._parentTask,)
465 def __reduce__(self):
466 """Pickler.
467 """
468 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())