Coverage for python/lsst/pipe/base/task.py: 31%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
22__all__ = ["Task", "TaskError"]
24import contextlib
25import logging
26import weakref
27from typing import Optional
29import lsst.utils
30import lsst.utils.logging
31from lsst.pex.config import ConfigurableField
32from lsst.utils.timer import logInfo
34try:
35 import lsstDebug
36except ImportError:
37 lsstDebug = None
39from ._task_metadata import TaskMetadata
41# This defines the Python type to use for task metadata. It is a private
42# class variable that can be accessed by other closely-related middleware
43# code and test code.
44_TASK_METADATA_TYPE = TaskMetadata
45_TASK_FULL_METADATA_TYPE = TaskMetadata
48class TaskError(Exception):
49 """Use to report errors for which a traceback is not useful.
51 Notes
52 -----
53 Examples of such errors:
55 - processCcd is asked to run detection, but not calibration, and no calexp
56 is found.
57 - coadd finds no valid images in the specified patch.
58 """
60 pass
63class Task:
64 r"""Base class for data processing tasks.
66 See :ref:`task-framework-overview` to learn what tasks are, and
67 :ref:`creating-a-task` for more information about writing tasks.
69 Parameters
70 ----------
71 config : `Task.ConfigClass` instance, optional
72 Configuration for this task (an instance of Task.ConfigClass, which
73 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
74 If `None`:
76 - If parentTask specified then defaults to parentTask.config.\<name>
77 - If parentTask is None then defaults to self.ConfigClass()
79 name : `str`, optional
80 Brief name of task, or `None`; if `None` then defaults to
81 `Task._DefaultName`
82 parentTask : `Task`-type, optional
83 The parent task of this subtask, if any.
85 - If `None` (a top-level task) then you must specify config and name
86 is ignored.
87 - If not `None` (a subtask) then you must specify name.
88 log : `logging.Logger` or subclass, optional
89 Log whose name is used as a log name prefix, or `None` for no prefix.
90 Ignored if is parentTask specified, in which case
91 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
92 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
93 log is then a child logger of ``parentTask.log`` (if ``parentTask``
94 specified), or a child logger of the log from the argument
95 (if ``log`` is not `None`).
97 Raises
98 ------
99 RuntimeError
100 Raised under these circumstances:
102 - If ``parentTask`` is `None` and ``config`` is `None`.
103 - If ``parentTask`` is not `None` and ``name`` is `None`.
104 - If ``name`` is `None` and ``_DefaultName`` does not exist.
106 Notes
107 -----
108 Useful attributes include:
110 - ``log``: an `logging.Logger` or subclass.
111 - ``config``: task-specific configuration; an instance of ``ConfigClass``
112 (see below).
113 - ``metadata``: a `TaskMetadata` for
114 collecting task-specific metadata, e.g. data quality and performance
115 metrics. This is data that is only meant to be persisted, never to be
116 used by the task.
118 Subclasses typically have a method named ``runDataRef`` to perform the
119 main data processing. Details:
121 - ``runDataRef`` should process the minimum reasonable amount of data,
122 typically a single CCD. Iteration, if desired, is performed by a caller
123 of the method. This is good design and allows multiprocessing without
124 the run method having to support it directly.
125 - If ``runDataRef`` can persist or unpersist data:
127 - ``runDataRef`` should accept a butler data reference (or a collection
128 of data references, if appropriate, e.g. coaddition).
129 - There should be a way to run the task without persisting data.
130 Typically the run method returns all data, even if it is persisted, and
131 the task's config method offers a flag to disable persistence.
133 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
134 accept a blob such as a butler data reference. How we will handle data
135 references is still TBD, so don't make changes yet!
136 RHL 2014-06-27
138 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
139 of `lsst.pex.config.Config` which configures the task. Subclasses should
140 also have an attribute ``_DefaultName``: the default name if there is no
141 parent task. ``_DefaultName`` is required for subclasses of
142 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
143 because it simplifies construction (e.g. for unit tests).
145 Tasks intended to be run from the command line should be subclasses of
146 `~lsst.pipe.base.CmdLineTask` not Task.
147 """
149 _add_module_logger_prefix = True
150 """Control whether the module prefix should be prepended to default
151 logger names."""
153 def __init__(self, config=None, name=None, parentTask=None, log=None):
154 self.metadata = _TASK_METADATA_TYPE()
155 self.__parentTask: Optional[weakref.ReferenceType]
156 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask)
158 if parentTask is not None:
159 if name is None:
160 raise RuntimeError("name is required for a subtask")
161 self._name = name
162 self._fullName = parentTask._computeFullName(name)
163 if config is None:
164 config = getattr(parentTask.config, name)
165 self._taskDict = parentTask._taskDict
166 loggerName = parentTask.log.getChild(name).name
167 else:
168 if name is None:
169 name = getattr(self, "_DefaultName", None)
170 if name is None:
171 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
172 name = self._DefaultName
173 self._name = name
174 self._fullName = self._name
175 if config is None:
176 config = self.ConfigClass()
177 self._taskDict = dict()
178 loggerName = self._fullName
179 if log is not None and log.name:
180 loggerName = log.getChild(loggerName).name
181 elif self._add_module_logger_prefix:
182 # Prefix the logger name with the root module name.
183 # We want all Task loggers to have this prefix to make
184 # it easier to control them. This can be disabled by
185 # a Task setting the class property _add_module_logger_prefix
186 # to False -- in which case the logger name will not be
187 # modified.
188 module_name = self.__module__
189 module_root = module_name.split(".")[0] + "."
190 if not loggerName.startswith(module_root):
191 loggerName = module_root + loggerName
193 # Get a logger (that might be a subclass of logging.Logger).
194 self.log = lsst.utils.logging.getLogger(loggerName)
195 self.config = config
196 if lsstDebug:
197 self._display = lsstDebug.Info(self.__module__).display
198 else:
199 self._display = None
200 self._taskDict[self._fullName] = weakref.ref(self)
202 @property
203 def _parentTask(self) -> Optional["Task"]:
204 return self.__parentTask if self.__parentTask is None else self.__parentTask()
206 def emptyMetadata(self):
207 """Empty (clear) the metadata for this Task and all sub-Tasks."""
208 for subtask in self._taskDict.values():
209 subtask().metadata = _TASK_METADATA_TYPE()
211 def getSchemaCatalogs(self):
212 """Get the schemas generated by this task.
214 Returns
215 -------
216 schemaCatalogs : `dict`
217 Keys are butler dataset type, values are an empty catalog (an
218 instance of the appropriate `lsst.afw.table` Catalog type) for
219 this task.
221 Notes
222 -----
224 .. warning::
226 Subclasses that use schemas must override this method. The default
227 implementation returns an empty dict.
229 This method may be called at any time after the Task is constructed,
230 which means that all task schemas should be computed at construction
231 time, *not* when data is actually processed. This reflects the
232 philosophy that the schema should not depend on the data.
234 Returning catalogs rather than just schemas allows us to save e.g.
235 slots for SourceCatalog as well.
237 See also
238 --------
239 Task.getAllSchemaCatalogs
240 """
241 return {}
243 def getAllSchemaCatalogs(self):
244 """Get schema catalogs for all tasks in the hierarchy, combining the
245 results into a single dict.
247 Returns
248 -------
249 schemacatalogs : `dict`
250 Keys are butler dataset type, values are a empty catalog (an
251 instance of the appropriate `lsst.afw.table` Catalog type) for all
252 tasks in the hierarchy, from the top-level task down
253 through all subtasks.
255 Notes
256 -----
257 This method may be called on any task in the hierarchy; it will return
258 the same answer, regardless.
260 The default implementation should always suffice. If your subtask uses
261 schemas the override `Task.getSchemaCatalogs`, not this method.
262 """
263 schemaDict = self.getSchemaCatalogs()
264 for subtask in self._taskDict.values():
265 schemaDict.update(subtask().getSchemaCatalogs())
266 return schemaDict
268 def getFullMetadata(self):
269 """Get metadata for all tasks.
271 Returns
272 -------
273 metadata : `TaskMetadata`
274 The keys are the full task name.
275 Values are metadata for the top-level task and all subtasks,
276 sub-subtasks, etc.
278 Notes
279 -----
280 The returned metadata includes timing information (if
281 ``@timer.timeMethod`` is used) and any metadata set by the task. The
282 name of each item consists of the full task name with ``.`` replaced
283 by ``:``, followed by ``.`` and the name of the item, e.g.::
285 topLevelTaskName:subtaskName:subsubtaskName.itemName
287 using ``:`` in the full task name disambiguates the rare situation
288 that a task has a subtask and a metadata item with the same name.
289 """
290 fullMetadata = _TASK_FULL_METADATA_TYPE()
291 for fullName, task in self.getTaskDict().items():
292 fullMetadata[fullName.replace(".", ":")] = task().metadata
293 return fullMetadata
295 def getFullName(self):
296 """Get the task name as a hierarchical name including parent task
297 names.
299 Returns
300 -------
301 fullName : `str`
302 The full name consists of the name of the parent task and each
303 subtask separated by periods. For example:
305 - The full name of top-level task "top" is simply "top".
306 - The full name of subtask "sub" of top-level task "top" is
307 "top.sub".
308 - The full name of subtask "sub2" of subtask "sub" of top-level
309 task "top" is "top.sub.sub2".
310 """
311 return self._fullName
313 def getName(self):
314 """Get the name of the task.
316 Returns
317 -------
318 taskName : `str`
319 Name of the task.
321 See also
322 --------
323 getFullName
324 """
325 return self._name
327 def getTaskDict(self):
328 """Get a dictionary of all tasks as a shallow copy.
330 Returns
331 -------
332 taskDict : `dict`
333 Dictionary containing full task name: task object for the top-level
334 task and all subtasks, sub-subtasks, etc.
335 """
336 return self._taskDict.copy()
338 def makeSubtask(self, name, **keyArgs):
339 """Create a subtask as a new instance as the ``name`` attribute of this
340 task.
342 Parameters
343 ----------
344 name : `str`
345 Brief name of the subtask.
346 keyArgs
347 Extra keyword arguments used to construct the task. The following
348 arguments are automatically provided and cannot be overridden:
350 - "config".
351 - "parentTask".
353 Notes
354 -----
355 The subtask must be defined by ``Task.config.name``, an instance of
356 `~lsst.pex.config.ConfigurableField` or
357 `~lsst.pex.config.RegistryField`.
358 """
359 taskField = getattr(self.config, name, None)
360 if taskField is None:
361 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
362 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
363 setattr(self, name, subtask)
365 @contextlib.contextmanager
366 def timer(self, name, logLevel=logging.DEBUG):
367 """Context manager to log performance data for an arbitrary block of
368 code.
370 Parameters
371 ----------
372 name : `str`
373 Name of code being timed; data will be logged using item name:
374 ``Start`` and ``End``.
375 logLevel
376 A `logging` level constant.
378 Examples
379 --------
380 Creating a timer context:
382 .. code-block:: python
384 with self.timer("someCodeToTime"):
385 pass # code to time
387 See also
388 --------
389 timer.logInfo
390 """
391 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
392 try:
393 yield
394 finally:
395 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
397 @classmethod
398 def makeField(cls, doc):
399 """Make a `lsst.pex.config.ConfigurableField` for this task.
401 Parameters
402 ----------
403 doc : `str`
404 Help text for the field.
406 Returns
407 -------
408 configurableField : `lsst.pex.config.ConfigurableField`
409 A `~ConfigurableField` for this task.
411 Examples
412 --------
413 Provides a convenient way to specify this task is a subtask of another
414 task.
416 Here is an example of use:
418 .. code-block:: python
420 class OtherTaskConfig(lsst.pex.config.Config):
421 aSubtask = ATaskClass.makeField("brief description of task")
422 """
423 return ConfigurableField(doc=doc, target=cls)
425 def _computeFullName(self, name):
426 """Compute the full name of a subtask or metadata item, given its brief
427 name.
429 Parameters
430 ----------
431 name : `str`
432 Brief name of subtask or metadata item.
434 Returns
435 -------
436 fullName : `str`
437 The full name: the ``name`` argument prefixed by the full task name
438 and a period.
440 Notes
441 -----
442 For example: if the full name of this task is "top.sub.sub2"
443 then ``_computeFullName("subname")`` returns
444 ``"top.sub.sub2.subname"``.
445 """
446 return f"{self._fullName}.{name}"
448 @staticmethod
449 def _unpickle_via_factory(factory, args, kwargs):
450 """Unpickle something by calling a factory
452 Allows subclasses to unpickle using `__reduce__` with keyword
453 arguments as well as positional arguments.
454 """
455 return factory(*args, **kwargs)
457 def _reduce_kwargs(self):
458 """Returns a dict of the keyword arguments that should be used
459 by `__reduce__`.
461 Subclasses with additional arguments should always call the parent
462 class method to ensure that the standard parameters are included.
464 Returns
465 -------
466 kwargs : `dict`
467 Keyword arguments to be used when pickling.
468 """
469 return dict(
470 config=self.config,
471 name=self._name,
472 parentTask=self._parentTask,
473 )
475 def __reduce__(self):
476 """Pickler."""
477 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())