Coverage for python/lsst/pipe/base/task.py: 31%
117 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 18:35 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 18:35 -0700
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
23from __future__ import annotations
25__all__ = ["Task", "TaskError"]
27import contextlib
28import logging
29import weakref
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Callable,
34 ClassVar,
35 Dict,
36 Iterator,
37 Optional,
38 Sequence,
39 Tuple,
40 Type,
41 Union,
42)
44import lsst.utils
45import lsst.utils.logging
46from lsst.pex.config import ConfigurableField
47from lsst.utils.timer import logInfo
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from lsst.pex.config import Config
52try:
53 import lsstDebug # type: ignore
54except ImportError:
55 lsstDebug = None
57from ._task_metadata import TaskMetadata
59# This defines the Python type to use for task metadata. It is a private
60# class variable that can be accessed by other closely-related middleware
61# code and test code.
62_TASK_METADATA_TYPE = TaskMetadata
63_TASK_FULL_METADATA_TYPE = TaskMetadata
66class TaskError(Exception):
67 """Use to report errors for which a traceback is not useful.
69 Notes
70 -----
71 Examples of such errors:
73 - processCcd is asked to run detection, but not calibration, and no calexp
74 is found.
75 - coadd finds no valid images in the specified patch.
76 """
78 pass
81class Task:
82 r"""Base class for data processing tasks.
84 See :ref:`task-framework-overview` to learn what tasks are, and
85 :ref:`creating-a-task` for more information about writing tasks.
87 Parameters
88 ----------
89 config : `Task.ConfigClass` instance, optional
90 Configuration for this task (an instance of Task.ConfigClass, which
91 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
92 If `None`:
94 - If parentTask specified then defaults to parentTask.config.\<name>
95 - If parentTask is None then defaults to self.ConfigClass()
97 name : `str`, optional
98 Brief name of task, or `None`; if `None` then defaults to
99 `Task._DefaultName`
100 parentTask : `Task`-type, optional
101 The parent task of this subtask, if any.
103 - If `None` (a top-level task) then you must specify config and name
104 is ignored.
105 - If not `None` (a subtask) then you must specify name.
106 log : `logging.Logger` or subclass, optional
107 Log whose name is used as a log name prefix, or `None` for no prefix.
108 Ignored if is parentTask specified, in which case
109 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
110 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
111 log is then a child logger of ``parentTask.log`` (if ``parentTask``
112 specified), or a child logger of the log from the argument
113 (if ``log`` is not `None`).
115 Raises
116 ------
117 RuntimeError
118 Raised under these circumstances:
120 - If ``parentTask`` is `None` and ``config`` is `None`.
121 - If ``parentTask`` is not `None` and ``name`` is `None`.
122 - If ``name`` is `None` and ``_DefaultName`` does not exist.
124 Notes
125 -----
126 Useful attributes include:
128 - ``log``: an `logging.Logger` or subclass.
129 - ``config``: task-specific configuration; an instance of ``ConfigClass``
130 (see below).
131 - ``metadata``: a `TaskMetadata` for
132 collecting task-specific metadata, e.g. data quality and performance
133 metrics. This is data that is only meant to be persisted, never to be
134 used by the task.
136 Subclasses typically have a method named ``runDataRef`` to perform the
137 main data processing. Details:
139 - ``runDataRef`` should process the minimum reasonable amount of data,
140 typically a single CCD. Iteration, if desired, is performed by a caller
141 of the method. This is good design and allows multiprocessing without
142 the run method having to support it directly.
143 - If ``runDataRef`` can persist or unpersist data:
145 - ``runDataRef`` should accept a butler data reference (or a collection
146 of data references, if appropriate, e.g. coaddition).
147 - There should be a way to run the task without persisting data.
148 Typically the run method returns all data, even if it is persisted, and
149 the task's config method offers a flag to disable persistence.
151 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not*
152 accept a blob such as a butler data reference. How we will handle data
153 references is still TBD, so don't make changes yet!
154 RHL 2014-06-27
156 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
157 of `lsst.pex.config.Config` which configures the task. Subclasses should
158 also have an attribute ``_DefaultName``: the default name if there is no
159 parent task. ``_DefaultName`` is required for subclasses of
160 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task
161 because it simplifies construction (e.g. for unit tests).
163 Tasks intended to be run from the command line should be subclasses of
164 `~lsst.pipe.base.CmdLineTask` not Task.
165 """
167 ConfigClass: ClassVar[Type[Config]]
168 _DefaultName: ClassVar[str]
170 _add_module_logger_prefix: bool = True
171 """Control whether the module prefix should be prepended to default
172 logger names."""
174 def __init__(
175 self,
176 config: Optional[Config] = None,
177 name: Optional[str] = None,
178 parentTask: Optional[Task] = None,
179 log: Optional[Union[logging.Logger, lsst.utils.logging.LsstLogAdapter]] = None,
180 ):
181 self.metadata = _TASK_METADATA_TYPE()
182 self.__parentTask: Optional[weakref.ReferenceType]
183 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask)
185 if parentTask is not None:
186 if name is None:
187 raise RuntimeError("name is required for a subtask")
188 self._name = name
189 self._fullName = parentTask._computeFullName(name)
190 if config is None:
191 config = getattr(parentTask.config, name)
192 self._taskDict: Dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict
193 loggerName = parentTask.log.getChild(name).name
194 else:
195 if name is None:
196 name = getattr(self, "_DefaultName", None)
197 if name is None:
198 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
199 name = self._DefaultName
200 self._name = name
201 self._fullName = self._name
202 if config is None:
203 config = self.ConfigClass()
204 self._taskDict = dict()
205 loggerName = self._fullName
206 if log is not None and log.name:
207 loggerName = log.getChild(loggerName).name
208 elif self._add_module_logger_prefix:
209 # Prefix the logger name with the root module name.
210 # We want all Task loggers to have this prefix to make
211 # it easier to control them. This can be disabled by
212 # a Task setting the class property _add_module_logger_prefix
213 # to False -- in which case the logger name will not be
214 # modified.
215 module_name = self.__module__
216 module_root = module_name.split(".")[0] + "."
217 if not loggerName.startswith(module_root):
218 loggerName = module_root + loggerName
220 # Get a logger (that might be a subclass of logging.Logger).
221 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName)
222 self.config: Config = config
223 if lsstDebug:
224 self._display = lsstDebug.Info(self.__module__).display
225 else:
226 self._display = None
227 self._taskDict[self._fullName] = weakref.ref(self)
229 @property
230 def _parentTask(self) -> Optional[Task]:
231 return self.__parentTask if self.__parentTask is None else self.__parentTask()
233 def emptyMetadata(self) -> None:
234 """Empty (clear) the metadata for this Task and all sub-Tasks."""
235 for wref in self._taskDict.values():
236 subtask = wref()
237 assert subtask is not None, "Unexpected garbage collection of subtask."
238 subtask.metadata = _TASK_METADATA_TYPE()
240 # We use Any instead of lsst.afw.table.BaseCatalog here to avoid a
241 # type-only dependency on afw. It's unclear whether this will survive
242 # Gen2 anyway, or how we might adapt it to work with non-afw catalogs
243 # (e.g. Parquet).
244 def getSchemaCatalogs(self) -> Dict[str, Any]:
245 """Get the schemas generated by this task.
247 Returns
248 -------
249 schemaCatalogs : `dict`
250 Keys are butler dataset type, values are an empty catalog (an
251 instance of the appropriate `lsst.afw.table` Catalog type) for
252 this task.
254 Notes
255 -----
257 .. warning::
259 Subclasses that use schemas must override this method. The default
260 implementation returns an empty dict.
262 This method may be called at any time after the Task is constructed,
263 which means that all task schemas should be computed at construction
264 time, *not* when data is actually processed. This reflects the
265 philosophy that the schema should not depend on the data.
267 Returning catalogs rather than just schemas allows us to save e.g.
268 slots for SourceCatalog as well.
270 See also
271 --------
272 Task.getAllSchemaCatalogs
273 """
274 return {}
276 def getAllSchemaCatalogs(self) -> Dict[str, Any]:
277 """Get schema catalogs for all tasks in the hierarchy, combining the
278 results into a single dict.
280 Returns
281 -------
282 schemacatalogs : `dict`
283 Keys are butler dataset type, values are a empty catalog (an
284 instance of the appropriate `lsst.afw.table` Catalog type) for all
285 tasks in the hierarchy, from the top-level task down
286 through all subtasks.
288 Notes
289 -----
290 This method may be called on any task in the hierarchy; it will return
291 the same answer, regardless.
293 The default implementation should always suffice. If your subtask uses
294 schemas the override `Task.getSchemaCatalogs`, not this method.
295 """
296 schemaDict = self.getSchemaCatalogs()
297 for wref in self._taskDict.values():
298 subtask = wref()
299 assert subtask is not None, "Unexpected garbage collection of subtask."
300 schemaDict.update(subtask.getSchemaCatalogs())
301 return schemaDict
303 def getFullMetadata(self) -> TaskMetadata:
304 """Get metadata for all tasks.
306 Returns
307 -------
308 metadata : `TaskMetadata`
309 The keys are the full task name.
310 Values are metadata for the top-level task and all subtasks,
311 sub-subtasks, etc.
313 Notes
314 -----
315 The returned metadata includes timing information (if
316 ``@timer.timeMethod`` is used) and any metadata set by the task. The
317 name of each item consists of the full task name with ``.`` replaced
318 by ``:``, followed by ``.`` and the name of the item, e.g.::
320 topLevelTaskName:subtaskName:subsubtaskName.itemName
322 using ``:`` in the full task name disambiguates the rare situation
323 that a task has a subtask and a metadata item with the same name.
324 """
325 fullMetadata = _TASK_FULL_METADATA_TYPE()
326 for fullName, wref in self.getTaskDict().items():
327 subtask = wref()
328 assert subtask is not None, "Unexpected garbage collection of subtask."
329 fullMetadata[fullName.replace(".", ":")] = subtask.metadata
330 return fullMetadata
332 def getFullName(self) -> str:
333 """Get the task name as a hierarchical name including parent task
334 names.
336 Returns
337 -------
338 fullName : `str`
339 The full name consists of the name of the parent task and each
340 subtask separated by periods. For example:
342 - The full name of top-level task "top" is simply "top".
343 - The full name of subtask "sub" of top-level task "top" is
344 "top.sub".
345 - The full name of subtask "sub2" of subtask "sub" of top-level
346 task "top" is "top.sub.sub2".
347 """
348 return self._fullName
350 def getName(self) -> str:
351 """Get the name of the task.
353 Returns
354 -------
355 taskName : `str`
356 Name of the task.
358 See also
359 --------
360 getFullName
361 """
362 return self._name
364 def getTaskDict(self) -> Dict[str, weakref.ReferenceType[Task]]:
365 """Get a dictionary of all tasks as a shallow copy.
367 Returns
368 -------
369 taskDict : `dict`
370 Dictionary containing full task name: task object for the top-level
371 task and all subtasks, sub-subtasks, etc.
372 """
373 return self._taskDict.copy()
375 def makeSubtask(self, name: str, **keyArgs: Any) -> None:
376 """Create a subtask as a new instance as the ``name`` attribute of this
377 task.
379 Parameters
380 ----------
381 name : `str`
382 Brief name of the subtask.
383 keyArgs
384 Extra keyword arguments used to construct the task. The following
385 arguments are automatically provided and cannot be overridden:
387 - "config".
388 - "parentTask".
390 Notes
391 -----
392 The subtask must be defined by ``Task.config.name``, an instance of
393 `~lsst.pex.config.ConfigurableField` or
394 `~lsst.pex.config.RegistryField`.
395 """
396 taskField = getattr(self.config, name, None)
397 if taskField is None:
398 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
399 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
400 setattr(self, name, subtask)
402 @contextlib.contextmanager
403 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]:
404 """Context manager to log performance data for an arbitrary block of
405 code.
407 Parameters
408 ----------
409 name : `str`
410 Name of code being timed; data will be logged using item name:
411 ``Start`` and ``End``.
412 logLevel
413 A `logging` level constant.
415 Examples
416 --------
417 Creating a timer context:
419 .. code-block:: python
421 with self.timer("someCodeToTime"):
422 pass # code to time
424 See also
425 --------
426 timer.logInfo
427 """
428 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
429 try:
430 yield
431 finally:
432 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
434 @classmethod
435 def makeField(cls, doc: str) -> ConfigurableField:
436 """Make a `lsst.pex.config.ConfigurableField` for this task.
438 Parameters
439 ----------
440 doc : `str`
441 Help text for the field.
443 Returns
444 -------
445 configurableField : `lsst.pex.config.ConfigurableField`
446 A `~ConfigurableField` for this task.
448 Examples
449 --------
450 Provides a convenient way to specify this task is a subtask of another
451 task.
453 Here is an example of use:
455 .. code-block:: python
457 class OtherTaskConfig(lsst.pex.config.Config):
458 aSubtask = ATaskClass.makeField("brief description of task")
459 """
460 return ConfigurableField(doc=doc, target=cls)
462 def _computeFullName(self, name: str) -> str:
463 """Compute the full name of a subtask or metadata item, given its brief
464 name.
466 Parameters
467 ----------
468 name : `str`
469 Brief name of subtask or metadata item.
471 Returns
472 -------
473 fullName : `str`
474 The full name: the ``name`` argument prefixed by the full task name
475 and a period.
477 Notes
478 -----
479 For example: if the full name of this task is "top.sub.sub2"
480 then ``_computeFullName("subname")`` returns
481 ``"top.sub.sub2.subname"``.
482 """
483 return f"{self._fullName}.{name}"
485 @staticmethod
486 def _unpickle_via_factory(
487 factory: Callable[..., Task], args: Sequence[Any], kwargs: Dict[str, Any]
488 ) -> Task:
489 """Unpickle something by calling a factory
491 Allows subclasses to unpickle using `__reduce__` with keyword
492 arguments as well as positional arguments.
493 """
494 return factory(*args, **kwargs)
496 def _reduce_kwargs(self) -> Dict[str, Any]:
497 """Returns a dict of the keyword arguments that should be used
498 by `__reduce__`.
500 Subclasses with additional arguments should always call the parent
501 class method to ensure that the standard parameters are included.
503 Returns
504 -------
505 kwargs : `dict`
506 Keyword arguments to be used when pickling.
507 """
508 return dict(
509 config=self.config,
510 name=self._name,
511 parentTask=self._parentTask,
512 )
514 def __reduce__(
515 self,
516 ) -> Tuple[
517 Callable[[Callable[..., Task], Sequence[Any], Dict[str, Any]], Task],
518 Tuple[Type[Task], Sequence[Any], Dict[str, Any]],
519 ]:
520 """Pickler."""
521 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())