Coverage for python/lsst/pipe/base/task.py: 36%

109 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 02:55 -0700

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23from __future__ import annotations 

24 

25__all__ = ["Task", "TaskError"] 

26 

27import contextlib 

28import logging 

29import weakref 

30from collections.abc import Callable, Iterator, Sequence 

31from typing import TYPE_CHECKING, Any, ClassVar 

32 

33import lsst.utils 

34import lsst.utils.introspection 

35import lsst.utils.logging 

36from lsst.pex.config import ConfigurableField 

37from lsst.utils.timer import logInfo 

38 

39if TYPE_CHECKING: 

40 from lsst.pex.config import Config 

41 

42try: 

43 import lsstDebug # type: ignore 

44except ImportError: 

45 lsstDebug = None 

46 

47from ._task_metadata import TaskMetadata 

48 

49# This defines the Python type to use for task metadata. It is a private 

50# class variable that can be accessed by other closely-related middleware 

51# code and test code. 

52_TASK_METADATA_TYPE = TaskMetadata 

53_TASK_FULL_METADATA_TYPE = TaskMetadata 

54 

55 

56class TaskError(Exception): 

57 """Use to report errors for which a traceback is not useful. 

58 

59 Notes 

60 ----- 

61 Examples of such errors: 

62 

63 - processCcd is asked to run detection, but not calibration, and no calexp 

64 is found. 

65 - coadd finds no valid images in the specified patch. 

66 """ 

67 

68 pass 

69 

70 

71class Task: 

72 r"""Base class for data processing tasks. 

73 

74 See :ref:`task-framework-overview` to learn what tasks are, and 

75 :ref:`creating-a-task` for more information about writing tasks. 

76 

77 Parameters 

78 ---------- 

79 config : `Task.ConfigClass` instance, optional 

80 Configuration for this task (an instance of Task.ConfigClass, which 

81 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

82 If `None`: 

83 

84 - If parentTask specified then defaults to parentTask.config.\<name> 

85 - If parentTask is None then defaults to self.ConfigClass() 

86 

87 name : `str`, optional 

88 Brief name of task, or `None`; if `None` then defaults to 

89 `Task._DefaultName`. 

90 parentTask : `Task`-type, optional 

91 The parent task of this subtask, if any. 

92 

93 - If `None` (a top-level task) then you must specify config and name 

94 is ignored. 

95 - If not `None` (a subtask) then you must specify name. 

96 log : `logging.Logger` or subclass, optional 

97 Log whose name is used as a log name prefix, or `None` for no prefix. 

98 Ignored if is parentTask specified, in which case 

99 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

100 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

101 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

102 specified), or a child logger of the log from the argument 

103 (if ``log`` is not `None`). 

104 

105 Raises 

106 ------ 

107 RuntimeError 

108 Raised under these circumstances: 

109 

110 - If ``parentTask`` is `None` and ``config`` is `None`. 

111 - If ``parentTask`` is not `None` and ``name`` is `None`. 

112 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

113 

114 Notes 

115 ----- 

116 The constructor must use keyword parameters for everything other than 

117 the ``config`` parameter which can be positional or use keyword form. 

118 

119 Useful attributes include: 

120 

121 - ``log``: an `logging.Logger` or subclass. 

122 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

123 (see below). 

124 - ``metadata``: a `TaskMetadata` for 

125 collecting task-specific metadata, e.g. data quality and performance 

126 metrics. This is data that is only meant to be persisted, never to be 

127 used by the task. 

128 

129 Use a `lsst.pipe.base.PipelineTask` subclass to perform I/O with a 

130 Butler. 

131 

132 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

133 of `lsst.pex.config.Config` which configures the task. Subclasses should 

134 also have an attribute ``_DefaultName``: the default name if there is no 

135 parent task. ``_DefaultName`` is required for subclasses of 

136 `~lsst.pipe.base.PipeLineTask` and recommended for subclasses of Task 

137 because it simplifies construction (e.g. for unit tests). 

138 """ 

139 

140 ConfigClass: ClassVar[type[Config]] 

141 _DefaultName: ClassVar[str] 

142 

143 _add_module_logger_prefix: bool = True 

144 """Control whether the module prefix should be prepended to default 

145 logger names.""" 

146 

147 def __init__( 

148 self, 

149 config: Config | None = None, 

150 *, 

151 name: str | None = None, 

152 parentTask: Task | None = None, 

153 log: logging.Logger | lsst.utils.logging.LsstLogAdapter | None = None, 

154 ): 

155 self.metadata = _TASK_METADATA_TYPE() 

156 self.__parentTask: weakref.ReferenceType | None 

157 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

158 

159 if parentTask is not None: 

160 if name is None: 

161 raise RuntimeError("name is required for a subtask") 

162 self._name = name 

163 self._fullName = parentTask._computeFullName(name) 

164 if config is None: 

165 config = getattr(parentTask.config, name) 

166 self._taskDict: dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict 

167 loggerName = parentTask.log.getChild(name).name 

168 else: 

169 if name is None: 

170 name = getattr(self, "_DefaultName", None) 

171 if name is None: 

172 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

173 name = self._DefaultName 

174 self._name = name 

175 self._fullName = self._name 

176 if config is None: 

177 config = self.ConfigClass() 

178 self._taskDict = dict() 

179 loggerName = self._fullName 

180 if log is not None and log.name: 

181 loggerName = log.getChild(loggerName).name 

182 elif self._add_module_logger_prefix: 

183 # Prefix the logger name with the root module name. 

184 # We want all Task loggers to have this prefix to make 

185 # it easier to control them. This can be disabled by 

186 # a Task setting the class property _add_module_logger_prefix 

187 # to False -- in which case the logger name will not be 

188 # modified. 

189 module_name = self.__module__ 

190 module_root = module_name.split(".")[0] + "." 

191 if not loggerName.startswith(module_root): 

192 loggerName = module_root + loggerName 

193 

194 # Get a logger (that might be a subclass of logging.Logger). 

195 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName) 

196 self.config: Config = config 

197 self.config.validate() 

198 if lsstDebug: 

199 self._display = lsstDebug.Info(self.__module__).display 

200 else: 

201 self._display = None 

202 self._taskDict[self._fullName] = weakref.ref(self) 

203 

204 @property 

205 def _parentTask(self) -> Task | None: 

206 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

207 

208 def emptyMetadata(self) -> None: 

209 """Empty (clear) the metadata for this Task and all sub-Tasks.""" 

210 for wref in self._taskDict.values(): 

211 subtask = wref() 

212 assert subtask is not None, "Unexpected garbage collection of subtask." 

213 subtask.metadata = _TASK_METADATA_TYPE() 

214 

215 def getFullMetadata(self) -> TaskMetadata: 

216 """Get metadata for all tasks. 

217 

218 Returns 

219 ------- 

220 metadata : `TaskMetadata` 

221 The keys are the full task name. 

222 Values are metadata for the top-level task and all subtasks, 

223 sub-subtasks, etc. 

224 

225 Notes 

226 ----- 

227 The returned metadata includes timing information (if 

228 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

229 name of each item consists of the full task name with ``.`` replaced 

230 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

231 

232 topLevelTaskName:subtaskName:subsubtaskName.itemName 

233 

234 using ``:`` in the full task name disambiguates the rare situation 

235 that a task has a subtask and a metadata item with the same name. 

236 """ 

237 fullMetadata = _TASK_FULL_METADATA_TYPE() 

238 for fullName, wref in self.getTaskDict().items(): 

239 subtask = wref() 

240 assert subtask is not None, "Unexpected garbage collection of subtask." 

241 fullMetadata[fullName.replace(".", ":")] = subtask.metadata 

242 return fullMetadata 

243 

244 def getFullName(self) -> str: 

245 """Get the task name as a hierarchical name including parent task 

246 names. 

247 

248 Returns 

249 ------- 

250 fullName : `str` 

251 The full name consists of the name of the parent task and each 

252 subtask separated by periods. For example: 

253 

254 - The full name of top-level task "top" is simply "top". 

255 - The full name of subtask "sub" of top-level task "top" is 

256 "top.sub". 

257 - The full name of subtask "sub2" of subtask "sub" of top-level 

258 task "top" is "top.sub.sub2". 

259 """ 

260 return self._fullName 

261 

262 def getName(self) -> str: 

263 """Get the name of the task. 

264 

265 Returns 

266 ------- 

267 taskName : `str` 

268 Name of the task. 

269 

270 See Also 

271 -------- 

272 getFullName : Get the full name of the task. 

273 """ 

274 return self._name 

275 

276 def getTaskDict(self) -> dict[str, weakref.ReferenceType[Task]]: 

277 """Get a dictionary of all tasks as a shallow copy. 

278 

279 Returns 

280 ------- 

281 taskDict : `dict` 

282 Dictionary containing full task name: task object for the top-level 

283 task and all subtasks, sub-subtasks, etc. 

284 """ 

285 return self._taskDict.copy() 

286 

287 def makeSubtask(self, name: str, **keyArgs: Any) -> None: 

288 """Create a subtask as a new instance as the ``name`` attribute of this 

289 task. 

290 

291 Parameters 

292 ---------- 

293 name : `str` 

294 Brief name of the subtask. 

295 **keyArgs 

296 Extra keyword arguments used to construct the task. The following 

297 arguments are automatically provided and cannot be overridden: 

298 

299 - ``config``. 

300 - ``parentTask``. 

301 

302 Notes 

303 ----- 

304 The subtask must be defined by ``Task.config.name``, an instance of 

305 `~lsst.pex.config.ConfigurableField` or 

306 `~lsst.pex.config.RegistryField`. 

307 """ 

308 taskField = getattr(self.config, name, None) 

309 if taskField is None: 

310 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

311 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

312 setattr(self, name, subtask) 

313 

314 @contextlib.contextmanager 

315 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]: 

316 """Context manager to log performance data for an arbitrary block of 

317 code. 

318 

319 Parameters 

320 ---------- 

321 name : `str` 

322 Name of code being timed; data will be logged using item name: 

323 ``Start`` and ``End``. 

324 logLevel : `int` 

325 A `logging` level constant. 

326 

327 Examples 

328 -------- 

329 Creating a timer context: 

330 

331 .. code-block:: python 

332 

333 with self.timer("someCodeToTime"): 

334 pass # code to time 

335 

336 See Also 

337 -------- 

338 lsst.utils.timer.logInfo : Implementation function. 

339 """ 

340 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

341 try: 

342 yield 

343 finally: 

344 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

345 

346 @classmethod 

347 def makeField(cls, doc: str) -> ConfigurableField: 

348 """Make a `lsst.pex.config.ConfigurableField` for this task. 

349 

350 Parameters 

351 ---------- 

352 doc : `str` 

353 Help text for the field. 

354 

355 Returns 

356 ------- 

357 configurableField : `lsst.pex.config.ConfigurableField` 

358 A `~lsst.pex.config.ConfigurableField` for this task. 

359 

360 Examples 

361 -------- 

362 Provides a convenient way to specify this task is a subtask of another 

363 task. 

364 

365 Here is an example of use: 

366 

367 .. code-block:: python 

368 

369 class OtherTaskConfig(lsst.pex.config.Config): 

370 aSubtask = ATaskClass.makeField("brief description of task") 

371 """ 

372 return ConfigurableField(doc=doc, target=cls) 

373 

374 def _computeFullName(self, name: str) -> str: 

375 """Compute the full name of a subtask or metadata item, given its brief 

376 name. 

377 

378 Parameters 

379 ---------- 

380 name : `str` 

381 Brief name of subtask or metadata item. 

382 

383 Returns 

384 ------- 

385 fullName : `str` 

386 The full name: the ``name`` argument prefixed by the full task name 

387 and a period. 

388 

389 Notes 

390 ----- 

391 For example: if the full name of this task is "top.sub.sub2" 

392 then ``_computeFullName("subname")`` returns 

393 ``"top.sub.sub2.subname"``. 

394 """ 

395 return f"{self._fullName}.{name}" 

396 

397 @staticmethod 

398 def _unpickle_via_factory( 

399 factory: Callable[..., Task], args: Sequence[Any], kwargs: dict[str, Any] 

400 ) -> Task: 

401 """Unpickle something by calling a factory 

402 

403 Allows subclasses to unpickle using `__reduce__` with keyword 

404 arguments as well as positional arguments. 

405 """ 

406 return factory(*args, **kwargs) 

407 

408 def _reduce_kwargs(self) -> dict[str, Any]: 

409 """Return a dict of the keyword arguments that should be used 

410 by `__reduce__`. 

411 

412 Subclasses with additional arguments should always call the parent 

413 class method to ensure that the standard parameters are included. 

414 

415 Returns 

416 ------- 

417 kwargs : `dict` 

418 Keyword arguments to be used when pickling. 

419 """ 

420 return dict( 

421 config=self.config, 

422 name=self._name, 

423 parentTask=self._parentTask, 

424 ) 

425 

426 def __reduce__( 

427 self, 

428 ) -> tuple[ 

429 Callable[[Callable[..., Task], Sequence[Any], dict[str, Any]], Task], 

430 tuple[type[Task], Sequence[Any], dict[str, Any]], 

431 ]: 

432 """Pickler.""" 

433 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())