Coverage for python/lsst/pipe/base/task.py: 32%
118 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-11 01:21 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-11 01:21 -0700
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <http://www.lsstcorp.org/LegalNotices/>.
21#
23from __future__ import annotations
25__all__ = ["Task", "TaskError"]
27import contextlib
28import logging
29import weakref
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Callable,
34 ClassVar,
35 Dict,
36 Iterator,
37 Optional,
38 Sequence,
39 Tuple,
40 Type,
41 Union,
42)
44import lsst.utils
45import lsst.utils.logging
46from lsst.pex.config import ConfigurableField
47from lsst.utils.timer import logInfo
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from lsst.pex.config import Config
52try:
53 import lsstDebug # type: ignore
54except ImportError:
55 lsstDebug = None
57from ._task_metadata import TaskMetadata
59# This defines the Python type to use for task metadata. It is a private
60# class variable that can be accessed by other closely-related middleware
61# code and test code.
62_TASK_METADATA_TYPE = TaskMetadata
63_TASK_FULL_METADATA_TYPE = TaskMetadata
66class TaskError(Exception):
67 """Use to report errors for which a traceback is not useful.
69 Notes
70 -----
71 Examples of such errors:
73 - processCcd is asked to run detection, but not calibration, and no calexp
74 is found.
75 - coadd finds no valid images in the specified patch.
76 """
78 pass
81class Task:
82 r"""Base class for data processing tasks.
84 See :ref:`task-framework-overview` to learn what tasks are, and
85 :ref:`creating-a-task` for more information about writing tasks.
87 Parameters
88 ----------
89 config : `Task.ConfigClass` instance, optional
90 Configuration for this task (an instance of Task.ConfigClass, which
91 is a task-specific subclass of `lsst.pex.config.Config`, or `None`.
92 If `None`:
94 - If parentTask specified then defaults to parentTask.config.\<name>
95 - If parentTask is None then defaults to self.ConfigClass()
97 name : `str`, optional
98 Brief name of task, or `None`; if `None` then defaults to
99 `Task._DefaultName`
100 parentTask : `Task`-type, optional
101 The parent task of this subtask, if any.
103 - If `None` (a top-level task) then you must specify config and name
104 is ignored.
105 - If not `None` (a subtask) then you must specify name.
106 log : `logging.Logger` or subclass, optional
107 Log whose name is used as a log name prefix, or `None` for no prefix.
108 Ignored if is parentTask specified, in which case
109 ``parentTask.log``\ 's name is used as a prefix. The task's log name is
110 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's
111 log is then a child logger of ``parentTask.log`` (if ``parentTask``
112 specified), or a child logger of the log from the argument
113 (if ``log`` is not `None`).
115 Raises
116 ------
117 RuntimeError
118 Raised under these circumstances:
120 - If ``parentTask`` is `None` and ``config`` is `None`.
121 - If ``parentTask`` is not `None` and ``name`` is `None`.
122 - If ``name`` is `None` and ``_DefaultName`` does not exist.
124 Notes
125 -----
126 Useful attributes include:
128 - ``log``: an `logging.Logger` or subclass.
129 - ``config``: task-specific configuration; an instance of ``ConfigClass``
130 (see below).
131 - ``metadata``: a `TaskMetadata` for
132 collecting task-specific metadata, e.g. data quality and performance
133 metrics. This is data that is only meant to be persisted, never to be
134 used by the task.
136 Use a `lsst.pipe.base.PipelineTask` subclass to perform I/O with a
137 Butler.
139 Subclasses must also have an attribute ``ConfigClass`` that is a subclass
140 of `lsst.pex.config.Config` which configures the task. Subclasses should
141 also have an attribute ``_DefaultName``: the default name if there is no
142 parent task. ``_DefaultName`` is required for subclasses of
143 `~lsst.pipe.base.PipeLineTask` and recommended for subclasses of Task
144 because it simplifies construction (e.g. for unit tests).
145 """
147 ConfigClass: ClassVar[Type[Config]]
148 _DefaultName: ClassVar[str]
150 _add_module_logger_prefix: bool = True
151 """Control whether the module prefix should be prepended to default
152 logger names."""
154 def __init__(
155 self,
156 config: Optional[Config] = None,
157 name: Optional[str] = None,
158 parentTask: Optional[Task] = None,
159 log: Optional[Union[logging.Logger, lsst.utils.logging.LsstLogAdapter]] = None,
160 ):
161 self.metadata = _TASK_METADATA_TYPE()
162 self.__parentTask: Optional[weakref.ReferenceType]
163 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask)
165 if parentTask is not None:
166 if name is None:
167 raise RuntimeError("name is required for a subtask")
168 self._name = name
169 self._fullName = parentTask._computeFullName(name)
170 if config is None:
171 config = getattr(parentTask.config, name)
172 self._taskDict: Dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict
173 loggerName = parentTask.log.getChild(name).name
174 else:
175 if name is None:
176 name = getattr(self, "_DefaultName", None)
177 if name is None:
178 raise RuntimeError("name is required for a task unless it has attribute _DefaultName")
179 name = self._DefaultName
180 self._name = name
181 self._fullName = self._name
182 if config is None:
183 config = self.ConfigClass()
184 self._taskDict = dict()
185 loggerName = self._fullName
186 if log is not None and log.name:
187 loggerName = log.getChild(loggerName).name
188 elif self._add_module_logger_prefix:
189 # Prefix the logger name with the root module name.
190 # We want all Task loggers to have this prefix to make
191 # it easier to control them. This can be disabled by
192 # a Task setting the class property _add_module_logger_prefix
193 # to False -- in which case the logger name will not be
194 # modified.
195 module_name = self.__module__
196 module_root = module_name.split(".")[0] + "."
197 if not loggerName.startswith(module_root):
198 loggerName = module_root + loggerName
200 # Get a logger (that might be a subclass of logging.Logger).
201 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName)
202 self.config: Config = config
203 self.config.validate()
204 if lsstDebug:
205 self._display = lsstDebug.Info(self.__module__).display
206 else:
207 self._display = None
208 self._taskDict[self._fullName] = weakref.ref(self)
210 @property
211 def _parentTask(self) -> Optional[Task]:
212 return self.__parentTask if self.__parentTask is None else self.__parentTask()
214 def emptyMetadata(self) -> None:
215 """Empty (clear) the metadata for this Task and all sub-Tasks."""
216 for wref in self._taskDict.values():
217 subtask = wref()
218 assert subtask is not None, "Unexpected garbage collection of subtask."
219 subtask.metadata = _TASK_METADATA_TYPE()
221 # We use Any instead of lsst.afw.table.BaseCatalog here to avoid a
222 # type-only dependency on afw. It's unclear whether this will survive
223 # Gen2 anyway, or how we might adapt it to work with non-afw catalogs
224 # (e.g. Parquet).
225 def getSchemaCatalogs(self) -> Dict[str, Any]:
226 """Get the schemas generated by this task.
228 Returns
229 -------
230 schemaCatalogs : `dict`
231 Keys are butler dataset type, values are an empty catalog (an
232 instance of the appropriate `lsst.afw.table` Catalog type) for
233 this task.
235 Notes
236 -----
238 .. warning::
240 Subclasses that use schemas must override this method. The default
241 implementation returns an empty dict.
243 This method may be called at any time after the Task is constructed,
244 which means that all task schemas should be computed at construction
245 time, *not* when data is actually processed. This reflects the
246 philosophy that the schema should not depend on the data.
248 Returning catalogs rather than just schemas allows us to save e.g.
249 slots for SourceCatalog as well.
251 See also
252 --------
253 Task.getAllSchemaCatalogs
254 """
255 return {}
257 def getAllSchemaCatalogs(self) -> Dict[str, Any]:
258 """Get schema catalogs for all tasks in the hierarchy, combining the
259 results into a single dict.
261 Returns
262 -------
263 schemacatalogs : `dict`
264 Keys are butler dataset type, values are a empty catalog (an
265 instance of the appropriate `lsst.afw.table` Catalog type) for all
266 tasks in the hierarchy, from the top-level task down
267 through all subtasks.
269 Notes
270 -----
271 This method may be called on any task in the hierarchy; it will return
272 the same answer, regardless.
274 The default implementation should always suffice. If your subtask uses
275 schemas the override `Task.getSchemaCatalogs`, not this method.
276 """
277 schemaDict = self.getSchemaCatalogs()
278 for wref in self._taskDict.values():
279 subtask = wref()
280 assert subtask is not None, "Unexpected garbage collection of subtask."
281 schemaDict.update(subtask.getSchemaCatalogs())
282 return schemaDict
284 def getFullMetadata(self) -> TaskMetadata:
285 """Get metadata for all tasks.
287 Returns
288 -------
289 metadata : `TaskMetadata`
290 The keys are the full task name.
291 Values are metadata for the top-level task and all subtasks,
292 sub-subtasks, etc.
294 Notes
295 -----
296 The returned metadata includes timing information (if
297 ``@timer.timeMethod`` is used) and any metadata set by the task. The
298 name of each item consists of the full task name with ``.`` replaced
299 by ``:``, followed by ``.`` and the name of the item, e.g.::
301 topLevelTaskName:subtaskName:subsubtaskName.itemName
303 using ``:`` in the full task name disambiguates the rare situation
304 that a task has a subtask and a metadata item with the same name.
305 """
306 fullMetadata = _TASK_FULL_METADATA_TYPE()
307 for fullName, wref in self.getTaskDict().items():
308 subtask = wref()
309 assert subtask is not None, "Unexpected garbage collection of subtask."
310 fullMetadata[fullName.replace(".", ":")] = subtask.metadata
311 return fullMetadata
313 def getFullName(self) -> str:
314 """Get the task name as a hierarchical name including parent task
315 names.
317 Returns
318 -------
319 fullName : `str`
320 The full name consists of the name of the parent task and each
321 subtask separated by periods. For example:
323 - The full name of top-level task "top" is simply "top".
324 - The full name of subtask "sub" of top-level task "top" is
325 "top.sub".
326 - The full name of subtask "sub2" of subtask "sub" of top-level
327 task "top" is "top.sub.sub2".
328 """
329 return self._fullName
331 def getName(self) -> str:
332 """Get the name of the task.
334 Returns
335 -------
336 taskName : `str`
337 Name of the task.
339 See also
340 --------
341 getFullName
342 """
343 return self._name
345 def getTaskDict(self) -> Dict[str, weakref.ReferenceType[Task]]:
346 """Get a dictionary of all tasks as a shallow copy.
348 Returns
349 -------
350 taskDict : `dict`
351 Dictionary containing full task name: task object for the top-level
352 task and all subtasks, sub-subtasks, etc.
353 """
354 return self._taskDict.copy()
356 def makeSubtask(self, name: str, **keyArgs: Any) -> None:
357 """Create a subtask as a new instance as the ``name`` attribute of this
358 task.
360 Parameters
361 ----------
362 name : `str`
363 Brief name of the subtask.
364 keyArgs
365 Extra keyword arguments used to construct the task. The following
366 arguments are automatically provided and cannot be overridden:
368 - "config".
369 - "parentTask".
371 Notes
372 -----
373 The subtask must be defined by ``Task.config.name``, an instance of
374 `~lsst.pex.config.ConfigurableField` or
375 `~lsst.pex.config.RegistryField`.
376 """
377 taskField = getattr(self.config, name, None)
378 if taskField is None:
379 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}")
380 subtask = taskField.apply(name=name, parentTask=self, **keyArgs)
381 setattr(self, name, subtask)
383 @contextlib.contextmanager
384 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]:
385 """Context manager to log performance data for an arbitrary block of
386 code.
388 Parameters
389 ----------
390 name : `str`
391 Name of code being timed; data will be logged using item name:
392 ``Start`` and ``End``.
393 logLevel
394 A `logging` level constant.
396 Examples
397 --------
398 Creating a timer context:
400 .. code-block:: python
402 with self.timer("someCodeToTime"):
403 pass # code to time
405 See also
406 --------
407 timer.logInfo
408 """
409 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel)
410 try:
411 yield
412 finally:
413 logInfo(obj=self, prefix=name + "End", logLevel=logLevel)
415 @classmethod
416 def makeField(cls, doc: str) -> ConfigurableField:
417 """Make a `lsst.pex.config.ConfigurableField` for this task.
419 Parameters
420 ----------
421 doc : `str`
422 Help text for the field.
424 Returns
425 -------
426 configurableField : `lsst.pex.config.ConfigurableField`
427 A `~ConfigurableField` for this task.
429 Examples
430 --------
431 Provides a convenient way to specify this task is a subtask of another
432 task.
434 Here is an example of use:
436 .. code-block:: python
438 class OtherTaskConfig(lsst.pex.config.Config):
439 aSubtask = ATaskClass.makeField("brief description of task")
440 """
441 return ConfigurableField(doc=doc, target=cls)
443 def _computeFullName(self, name: str) -> str:
444 """Compute the full name of a subtask or metadata item, given its brief
445 name.
447 Parameters
448 ----------
449 name : `str`
450 Brief name of subtask or metadata item.
452 Returns
453 -------
454 fullName : `str`
455 The full name: the ``name`` argument prefixed by the full task name
456 and a period.
458 Notes
459 -----
460 For example: if the full name of this task is "top.sub.sub2"
461 then ``_computeFullName("subname")`` returns
462 ``"top.sub.sub2.subname"``.
463 """
464 return f"{self._fullName}.{name}"
466 @staticmethod
467 def _unpickle_via_factory(
468 factory: Callable[..., Task], args: Sequence[Any], kwargs: Dict[str, Any]
469 ) -> Task:
470 """Unpickle something by calling a factory
472 Allows subclasses to unpickle using `__reduce__` with keyword
473 arguments as well as positional arguments.
474 """
475 return factory(*args, **kwargs)
477 def _reduce_kwargs(self) -> Dict[str, Any]:
478 """Returns a dict of the keyword arguments that should be used
479 by `__reduce__`.
481 Subclasses with additional arguments should always call the parent
482 class method to ensure that the standard parameters are included.
484 Returns
485 -------
486 kwargs : `dict`
487 Keyword arguments to be used when pickling.
488 """
489 return dict(
490 config=self.config,
491 name=self._name,
492 parentTask=self._parentTask,
493 )
495 def __reduce__(
496 self,
497 ) -> Tuple[
498 Callable[[Callable[..., Task], Sequence[Any], Dict[str, Any]], Task],
499 Tuple[Type[Task], Sequence[Any], Dict[str, Any]],
500 ]:
501 """Pickler."""
502 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())