Coverage for python/lsst/pipe/base/task.py: 32%
117 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-18 11:52 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-18 11:52 -0700
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
23from __future__ import annotations
25__all__ = ["Task", "TaskError"]
27import contextlib
28import logging
29import weakref
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Callable,
34 ClassVar,
35 Dict,
36 Iterator,
37 Optional,
38 Sequence,
39 Tuple,
40 Type,
41 Union,
42)
44import lsst.utils
45import lsst.utils.logging
46from lsst.pex.config import ConfigurableField
47from lsst.utils.timer import logInfo
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from lsst.pex.config import Config
52try:
53 import lsstDebug # type: ignore
54except ImportError:
55 lsstDebug = None
57from ._task_metadata import TaskMetadata
59# This defines the Python type to use for task metadata. It is a private
60# class variable that can be accessed by other closely-related middleware
61# code and test code.
62_TASK_METADATA_TYPE = TaskMetadata
63_TASK_FULL_METADATA_TYPE = TaskMetadata
66class TaskError(Exception):
67 """Use to report errors for which a traceback is not useful.
69 Notes
70 -----
71 Examples of such errors:
73 - processCcd is asked to run detection, but not calibration, and no calexp
74 is found.
75 - coadd finds no valid images in the specified patch.
76 """
78 pass
81class Task:
82 r"""Base class for data processing tasks.
84 See :ref:`task-framework-overview` to learn what tasks are, and
85 :ref:`creating-a-task` for more information about writing tasks.
87 Parameters
88 ----------
89 config : `Task.ConfigClass` instance, optional
90 Configuration for this task (an instance of Task.ConfigClass, which
91 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
92 If `None`:
94 - If parentTask specified then defaults to parentTask.config.\<name>
95 - If parentTask is None then defaults to self.ConfigClass()
97 name : `str`, optional
98 Brief name of task, or `None`; if `None` then defaults to
99 `Task._DefaultName`
100 parentTask : `Task`-type, optional
101 The parent task of this subtask, if any.
103 - If `None` (a top-level task) then you must specify config and name
104 is ignored.
105 - If not `None` (a subtask) then you must specify name.
106 log : `logging.Logger` or subclass, optional
107 Log whose name is used as a log name prefix, or `None` for no prefix.
108 Ignored if is parentTask specified, in which case
109 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
110 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
111 log is then a child logger of ``parentTask.log`` (if ``parentTask``
112 specified), or a child logger of the log from the argument
113 (if ``log`` is not `None`).
115 Raises
116 ------
117 RuntimeError
118 Raised under these circumstances:
120 - If ``parentTask`` is `None` and ``config`` is `None`.
121 - If ``parentTask`` is not `None` and ``name`` is `None`.
122 - If ``name`` is `None` and ``_DefaultName`` does not exist.
124 Notes
125 -----
126 Useful attributes include:
128 - ``log``: an `logging.Logger` or subclass.
129 - ``config``: task-specific configuration; an instance of ``ConfigClass``
130 (see below).
131 - ``metadata``: a `TaskMetadata` for
132 collecting task-specific metadata, e.g. data quality and performance
133 metrics. This is data that is only meant to be persisted, never to be
134 used by the task.
136 Use a `lsst.pipe.base.PipelineTask` subclass to perform I/O with a
137 Butler.
139 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
140 of `lsst.pex.config.Config` which configures the task. Subclasses should
141 also have an attribute ``_DefaultName``: the default name if there is no
142 parent task. ``_DefaultName`` is required for subclasses of
143 `~lsst.pipe.base.PipeLineTask` and recommended for subclasses of Task
144 because it simplifies construction (e.g. for unit tests).
145 """
147 ConfigClass: ClassVar[Type[Config]]
148 _DefaultName: ClassVar[str]
150 _add_module_logger_prefix: bool = True
151 """Control whether the module prefix should be prepended to default
152 logger names."""
154 def __init__(
155 self,
156 config: Optional[Config] = None,
157 name: Optional[str] = None,
158 parentTask: Optional[Task] = None,
159 log: Optional[Union[logging.Logger, lsst.utils.logging.LsstLogAdapter]] = None,
160 ):
161 self.metadata = _TASK_METADATA_TYPE()
162 self.__parentTask: Optional[weakref.ReferenceType]
163 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask)
165 if parentTask is not None:
166 if name is None:
167 raise RuntimeError("name is required for a subtask")
168 self._name = name
169 self._fullName = parentTask._computeFullName(name)
170 if config is None:
171 config = getattr(parentTask.config, name)
172 self._taskDict: Dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict
173 loggerName = parentTask.log.getChild(name).name
174 else:
175 if name is None:
176 name = getattr(self, "_DefaultName", None)
177 if name is None:
178 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
179 name = self._DefaultName
180 self._name = name
181 self._fullName = self._name
182 if config is None:
183 config = self.ConfigClass()
184 self._taskDict = dict()
185 loggerName = self._fullName
186 if log is not None and log.name:
187 loggerName = log.getChild(loggerName).name
188 elif self._add_module_logger_prefix:
189 # Prefix the logger name with the root module name.
190 # We want all Task loggers to have this prefix to make
191 # it easier to control them. This can be disabled by
192 # a Task setting the class property _add_module_logger_prefix
193 # to False -- in which case the logger name will not be
194 # modified.
195 module_name = self.__module__
196 module_root = module_name.split(".")[0] + "."
197 if not loggerName.startswith(module_root):
198 loggerName = module_root + loggerName
200 # Get a logger (that might be a subclass of logging.Logger).
201 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName)
202 self.config: Config = config
203 if lsstDebug:
204 self._display = lsstDebug.Info(self.__module__).display
205 else:
206 self._display = None
207 self._taskDict[self._fullName] = weakref.ref(self)
209 @property
210 def _parentTask(self) -> Optional[Task]:
211 return self.__parentTask if self.__parentTask is None else self.__parentTask()
213 def emptyMetadata(self) -> None:
214 """Empty (clear) the metadata for this Task and all sub-Tasks."""
215 for wref in self._taskDict.values():
216 subtask = wref()
217 assert subtask is not None, "Unexpected garbage collection of subtask."
218 subtask.metadata = _TASK_METADATA_TYPE()
220 # We use Any instead of lsst.afw.table.BaseCatalog here to avoid a
221 # type-only dependency on afw. It's unclear whether this will survive
222 # Gen2 anyway, or how we might adapt it to work with non-afw catalogs
223 # (e.g. Parquet).
224 def getSchemaCatalogs(self) -> Dict[str, Any]:
225 """Get the schemas generated by this task.
227 Returns
228 -------
229 schemaCatalogs : `dict`
230 Keys are butler dataset type, values are an empty catalog (an
231 instance of the appropriate `lsst.afw.table` Catalog type) for
232 this task.
234 Notes
235 -----
237 .. warning::
239 Subclasses that use schemas must override this method. The default
240 implementation returns an empty dict.
242 This method may be called at any time after the Task is constructed,
243 which means that all task schemas should be computed at construction
244 time, *not* when data is actually processed. This reflects the
245 philosophy that the schema should not depend on the data.
247 Returning catalogs rather than just schemas allows us to save e.g.
248 slots for SourceCatalog as well.
250 See also
251 --------
252 Task.getAllSchemaCatalogs
253 """
254 return {}
256 def getAllSchemaCatalogs(self) -> Dict[str, Any]:
257 """Get schema catalogs for all tasks in the hierarchy, combining the
258 results into a single dict.
260 Returns
261 -------
262 schemacatalogs : `dict`
263 Keys are butler dataset type, values are a empty catalog (an
264 instance of the appropriate `lsst.afw.table` Catalog type) for all
265 tasks in the hierarchy, from the top-level task down
266 through all subtasks.
268 Notes
269 -----
270 This method may be called on any task in the hierarchy; it will return
271 the same answer, regardless.
273 The default implementation should always suffice. If your subtask uses
274 schemas the override `Task.getSchemaCatalogs`, not this method.
275 """
276 schemaDict = self.getSchemaCatalogs()
277 for wref in self._taskDict.values():
278 subtask = wref()
279 assert subtask is not None, "Unexpected garbage collection of subtask."
280 schemaDict.update(subtask.getSchemaCatalogs())
281 return schemaDict
283 def getFullMetadata(self) -> TaskMetadata:
284 """Get metadata for all tasks.
286 Returns
287 -------
288 metadata : `TaskMetadata`
289 The keys are the full task name.
290 Values are metadata for the top-level task and all subtasks,
291 sub-subtasks, etc.
293 Notes
294 -----
295 The returned metadata includes timing information (if
296 ``@timer.timeMethod`` is used) and any metadata set by the task. The
297 name of each item consists of the full task name with ``.`` replaced
298 by ``:``, followed by ``.`` and the name of the item, e.g.::
300 topLevelTaskName:subtaskName:subsubtaskName.itemName
302 using ``:`` in the full task name disambiguates the rare situation
303 that a task has a subtask and a metadata item with the same name.
304 """
305 fullMetadata = _TASK_FULL_METADATA_TYPE()
306 for fullName, wref in self.getTaskDict().items():
307 subtask = wref()
308 assert subtask is not None, "Unexpected garbage collection of subtask."
309 fullMetadata[fullName.replace(".", ":")] = subtask.metadata
310 return fullMetadata
312 def getFullName(self) -> str:
313 """Get the task name as a hierarchical name including parent task
314 names.
316 Returns
317 -------
318 fullName : `str`
319 The full name consists of the name of the parent task and each
320 subtask separated by periods. For example:
322 - The full name of top-level task "top" is simply "top".
323 - The full name of subtask "sub" of top-level task "top" is
324 "top.sub".
325 - The full name of subtask "sub2" of subtask "sub" of top-level
326 task "top" is "top.sub.sub2".
327 """
328 return self._fullName
330 def getName(self) -> str:
331 """Get the name of the task.
333 Returns
334 -------
335 taskName : `str`
336 Name of the task.
338 See also
339 --------
340 getFullName
341 """
342 return self._name
344 def getTaskDict(self) -> Dict[str, weakref.ReferenceType[Task]]:
345 """Get a dictionary of all tasks as a shallow copy.
347 Returns
348 -------
349 taskDict : `dict`
350 Dictionary containing full task name: task object for the top-level
351 task and all subtasks, sub-subtasks, etc.
352 """
353 return self._taskDict.copy()
355 def makeSubtask(self, name: str, **keyArgs: Any) -> None:
356 """Create a subtask as a new instance as the ``name`` attribute of this
357 task.
359 Parameters
360 ----------
361 name : `str`
362 Brief name of the subtask.
363 keyArgs
364 Extra keyword arguments used to construct the task. The following
365 arguments are automatically provided and cannot be overridden:
367 - "config".
368 - "parentTask".
370 Notes
371 -----
372 The subtask must be defined by ``Task.config.name``, an instance of
373 `~lsst.pex.config.ConfigurableField` or
374 `~lsst.pex.config.RegistryField`.
375 """
376 taskField = getattr(self.config, name, None)
377 if taskField is None:
378 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
379 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
380 setattr(self, name, subtask)
382 @contextlib.contextmanager
383 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]:
384 """Context manager to log performance data for an arbitrary block of
385 code.
387 Parameters
388 ----------
389 name : `str`
390 Name of code being timed; data will be logged using item name:
391 ``Start`` and ``End``.
392 logLevel
393 A `logging` level constant.
395 Examples
396 --------
397 Creating a timer context:
399 .. code-block:: python
401 with self.timer("someCodeToTime"):
402 pass # code to time
404 See also
405 --------
406 timer.logInfo
407 """
408 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
409 try:
410 yield
411 finally:
412 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
414 @classmethod
415 def makeField(cls, doc: str) -> ConfigurableField:
416 """Make a `lsst.pex.config.ConfigurableField` for this task.
418 Parameters
419 ----------
420 doc : `str`
421 Help text for the field.
423 Returns
424 -------
425 configurableField : `lsst.pex.config.ConfigurableField`
426 A `~ConfigurableField` for this task.
428 Examples
429 --------
430 Provides a convenient way to specify this task is a subtask of another
431 task.
433 Here is an example of use:
435 .. code-block:: python
437 class OtherTaskConfig(lsst.pex.config.Config):
438 aSubtask = ATaskClass.makeField("brief description of task")
439 """
440 return ConfigurableField(doc=doc, target=cls)
442 def _computeFullName(self, name: str) -> str:
443 """Compute the full name of a subtask or metadata item, given its brief
444 name.
446 Parameters
447 ----------
448 name : `str`
449 Brief name of subtask or metadata item.
451 Returns
452 -------
453 fullName : `str`
454 The full name: the ``name`` argument prefixed by the full task name
455 and a period.
457 Notes
458 -----
459 For example: if the full name of this task is "top.sub.sub2"
460 then ``_computeFullName("subname")`` returns
461 ``"top.sub.sub2.subname"``.
462 """
463 return f"{self._fullName}.{name}"
465 @staticmethod
466 def _unpickle_via_factory(
467 factory: Callable[..., Task], args: Sequence[Any], kwargs: Dict[str, Any]
468 ) -> Task:
469 """Unpickle something by calling a factory
471 Allows subclasses to unpickle using `__reduce__` with keyword
472 arguments as well as positional arguments.
473 """
474 return factory(*args, **kwargs)
476 def _reduce_kwargs(self) -> Dict[str, Any]:
477 """Returns a dict of the keyword arguments that should be used
478 by `__reduce__`.
480 Subclasses with additional arguments should always call the parent
481 class method to ensure that the standard parameters are included.
483 Returns
484 -------
485 kwargs : `dict`
486 Keyword arguments to be used when pickling.
487 """
488 return dict(
489 config=self.config,
490 name=self._name,
491 parentTask=self._parentTask,
492 )
494 def __reduce__(
495 self,
496 ) -> Tuple[
497 Callable[[Callable[..., Task], Sequence[Any], Dict[str, Any]], Task],
498 Tuple[Type[Task], Sequence[Any], Dict[str, Any]],
499 ]:
500 """Pickler."""
501 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())