Coverage for python/lsst/pipe/base/task.py: 31%

108 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-25 09:14 +0000

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23from __future__ import annotations 

24 

25__all__ = ["Task", "TaskError"] 

26 

27import contextlib 

28import logging 

29import weakref 

30from collections.abc import Callable, Iterator, Sequence 

31from typing import TYPE_CHECKING, Any, ClassVar 

32 

33import lsst.utils 

34import lsst.utils.logging 

35from lsst.pex.config import ConfigurableField 

36from lsst.utils.timer import logInfo 

37 

38if TYPE_CHECKING: 

39 from lsst.pex.config import Config 

40 

41try: 

42 import lsstDebug # type: ignore 

43except ImportError: 

44 lsstDebug = None 

45 

46from ._task_metadata import TaskMetadata 

47 

48# This defines the Python type to use for task metadata. It is a private 

49# class variable that can be accessed by other closely-related middleware 

50# code and test code. 

51_TASK_METADATA_TYPE = TaskMetadata 

52_TASK_FULL_METADATA_TYPE = TaskMetadata 

53 

54 

55class TaskError(Exception): 

56 """Use to report errors for which a traceback is not useful. 

57 

58 Notes 

59 ----- 

60 Examples of such errors: 

61 

62 - processCcd is asked to run detection, but not calibration, and no calexp 

63 is found. 

64 - coadd finds no valid images in the specified patch. 

65 """ 

66 

67 pass 

68 

69 

70class Task: 

71 r"""Base class for data processing tasks. 

72 

73 See :ref:`task-framework-overview` to learn what tasks are, and 

74 :ref:`creating-a-task` for more information about writing tasks. 

75 

76 Parameters 

77 ---------- 

78 config : `Task.ConfigClass` instance, optional 

79 Configuration for this task (an instance of Task.ConfigClass, which 

80 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

81 If `None`: 

82 

83 - If parentTask specified then defaults to parentTask.config.\<name> 

84 - If parentTask is None then defaults to self.ConfigClass() 

85 

86 name : `str`, optional 

87 Brief name of task, or `None`; if `None` then defaults to 

88 `Task._DefaultName` 

89 parentTask : `Task`-type, optional 

90 The parent task of this subtask, if any. 

91 

92 - If `None` (a top-level task) then you must specify config and name 

93 is ignored. 

94 - If not `None` (a subtask) then you must specify name. 

95 log : `logging.Logger` or subclass, optional 

96 Log whose name is used as a log name prefix, or `None` for no prefix. 

97 Ignored if is parentTask specified, in which case 

98 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

99 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

100 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

101 specified), or a child logger of the log from the argument 

102 (if ``log`` is not `None`). 

103 

104 Raises 

105 ------ 

106 RuntimeError 

107 Raised under these circumstances: 

108 

109 - If ``parentTask`` is `None` and ``config`` is `None`. 

110 - If ``parentTask`` is not `None` and ``name`` is `None`. 

111 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

112 

113 Notes 

114 ----- 

115 The constructor must use keyword parameters for everything other than 

116 the ``config`` parameter which can be positional or use keyword form. 

117 

118 Useful attributes include: 

119 

120 - ``log``: an `logging.Logger` or subclass. 

121 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

122 (see below). 

123 - ``metadata``: a `TaskMetadata` for 

124 collecting task-specific metadata, e.g. data quality and performance 

125 metrics. This is data that is only meant to be persisted, never to be 

126 used by the task. 

127 

128 Use a `lsst.pipe.base.PipelineTask` subclass to perform I/O with a 

129 Butler. 

130 

131 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

132 of `lsst.pex.config.Config` which configures the task. Subclasses should 

133 also have an attribute ``_DefaultName``: the default name if there is no 

134 parent task. ``_DefaultName`` is required for subclasses of 

135 `~lsst.pipe.base.PipeLineTask` and recommended for subclasses of Task 

136 because it simplifies construction (e.g. for unit tests). 

137 """ 

138 

139 ConfigClass: ClassVar[type[Config]] 

140 _DefaultName: ClassVar[str] 

141 

142 _add_module_logger_prefix: bool = True 

143 """Control whether the module prefix should be prepended to default 

144 logger names.""" 

145 

146 def __init__( 

147 self, 

148 config: Config | None = None, 

149 *, 

150 name: str | None = None, 

151 parentTask: Task | None = None, 

152 log: logging.Logger | lsst.utils.logging.LsstLogAdapter | None = None, 

153 ): 

154 self.metadata = _TASK_METADATA_TYPE() 

155 self.__parentTask: weakref.ReferenceType | None 

156 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

157 

158 if parentTask is not None: 

159 if name is None: 

160 raise RuntimeError("name is required for a subtask") 

161 self._name = name 

162 self._fullName = parentTask._computeFullName(name) 

163 if config is None: 

164 config = getattr(parentTask.config, name) 

165 self._taskDict: dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict 

166 loggerName = parentTask.log.getChild(name).name 

167 else: 

168 if name is None: 

169 name = getattr(self, "_DefaultName", None) 

170 if name is None: 

171 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

172 name = self._DefaultName 

173 self._name = name 

174 self._fullName = self._name 

175 if config is None: 

176 config = self.ConfigClass() 

177 self._taskDict = dict() 

178 loggerName = self._fullName 

179 if log is not None and log.name: 

180 loggerName = log.getChild(loggerName).name 

181 elif self._add_module_logger_prefix: 

182 # Prefix the logger name with the root module name. 

183 # We want all Task loggers to have this prefix to make 

184 # it easier to control them. This can be disabled by 

185 # a Task setting the class property _add_module_logger_prefix 

186 # to False -- in which case the logger name will not be 

187 # modified. 

188 module_name = self.__module__ 

189 module_root = module_name.split(".")[0] + "." 

190 if not loggerName.startswith(module_root): 

191 loggerName = module_root + loggerName 

192 

193 # Get a logger (that might be a subclass of logging.Logger). 

194 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName) 

195 self.config: Config = config 

196 self.config.validate() 

197 if lsstDebug: 

198 self._display = lsstDebug.Info(self.__module__).display 

199 else: 

200 self._display = None 

201 self._taskDict[self._fullName] = weakref.ref(self) 

202 

203 @property 

204 def _parentTask(self) -> Task | None: 

205 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

206 

207 def emptyMetadata(self) -> None: 

208 """Empty (clear) the metadata for this Task and all sub-Tasks.""" 

209 for wref in self._taskDict.values(): 

210 subtask = wref() 

211 assert subtask is not None, "Unexpected garbage collection of subtask." 

212 subtask.metadata = _TASK_METADATA_TYPE() 

213 

214 def getFullMetadata(self) -> TaskMetadata: 

215 """Get metadata for all tasks. 

216 

217 Returns 

218 ------- 

219 metadata : `TaskMetadata` 

220 The keys are the full task name. 

221 Values are metadata for the top-level task and all subtasks, 

222 sub-subtasks, etc. 

223 

224 Notes 

225 ----- 

226 The returned metadata includes timing information (if 

227 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

228 name of each item consists of the full task name with ``.`` replaced 

229 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

230 

231 topLevelTaskName:subtaskName:subsubtaskName.itemName 

232 

233 using ``:`` in the full task name disambiguates the rare situation 

234 that a task has a subtask and a metadata item with the same name. 

235 """ 

236 fullMetadata = _TASK_FULL_METADATA_TYPE() 

237 for fullName, wref in self.getTaskDict().items(): 

238 subtask = wref() 

239 assert subtask is not None, "Unexpected garbage collection of subtask." 

240 fullMetadata[fullName.replace(".", ":")] = subtask.metadata 

241 return fullMetadata 

242 

243 def getFullName(self) -> str: 

244 """Get the task name as a hierarchical name including parent task 

245 names. 

246 

247 Returns 

248 ------- 

249 fullName : `str` 

250 The full name consists of the name of the parent task and each 

251 subtask separated by periods. For example: 

252 

253 - The full name of top-level task "top" is simply "top". 

254 - The full name of subtask "sub" of top-level task "top" is 

255 "top.sub". 

256 - The full name of subtask "sub2" of subtask "sub" of top-level 

257 task "top" is "top.sub.sub2". 

258 """ 

259 return self._fullName 

260 

261 def getName(self) -> str: 

262 """Get the name of the task. 

263 

264 Returns 

265 ------- 

266 taskName : `str` 

267 Name of the task. 

268 

269 See Also 

270 -------- 

271 getFullName 

272 """ 

273 return self._name 

274 

275 def getTaskDict(self) -> dict[str, weakref.ReferenceType[Task]]: 

276 """Get a dictionary of all tasks as a shallow copy. 

277 

278 Returns 

279 ------- 

280 taskDict : `dict` 

281 Dictionary containing full task name: task object for the top-level 

282 task and all subtasks, sub-subtasks, etc. 

283 """ 

284 return self._taskDict.copy() 

285 

286 def makeSubtask(self, name: str, **keyArgs: Any) -> None: 

287 """Create a subtask as a new instance as the ``name`` attribute of this 

288 task. 

289 

290 Parameters 

291 ---------- 

292 name : `str` 

293 Brief name of the subtask. 

294 **keyArgs 

295 Extra keyword arguments used to construct the task. The following 

296 arguments are automatically provided and cannot be overridden: 

297 

298 - ``config``. 

299 - ``parentTask``. 

300 

301 Notes 

302 ----- 

303 The subtask must be defined by ``Task.config.name``, an instance of 

304 `~lsst.pex.config.ConfigurableField` or 

305 `~lsst.pex.config.RegistryField`. 

306 """ 

307 taskField = getattr(self.config, name, None) 

308 if taskField is None: 

309 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

310 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

311 setattr(self, name, subtask) 

312 

313 @contextlib.contextmanager 

314 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]: 

315 """Context manager to log performance data for an arbitrary block of 

316 code. 

317 

318 Parameters 

319 ---------- 

320 name : `str` 

321 Name of code being timed; data will be logged using item name: 

322 ``Start`` and ``End``. 

323 logLevel 

324 A `logging` level constant. 

325 

326 Examples 

327 -------- 

328 Creating a timer context: 

329 

330 .. code-block:: python 

331 

332 with self.timer("someCodeToTime"): 

333 pass # code to time 

334 

335 See Also 

336 -------- 

337 lsst.utils.timer.logInfo 

338 """ 

339 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

340 try: 

341 yield 

342 finally: 

343 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

344 

345 @classmethod 

346 def makeField(cls, doc: str) -> ConfigurableField: 

347 """Make a `lsst.pex.config.ConfigurableField` for this task. 

348 

349 Parameters 

350 ---------- 

351 doc : `str` 

352 Help text for the field. 

353 

354 Returns 

355 ------- 

356 configurableField : `lsst.pex.config.ConfigurableField` 

357 A `~lsst.pex.config.ConfigurableField` for this task. 

358 

359 Examples 

360 -------- 

361 Provides a convenient way to specify this task is a subtask of another 

362 task. 

363 

364 Here is an example of use: 

365 

366 .. code-block:: python 

367 

368 class OtherTaskConfig(lsst.pex.config.Config): 

369 aSubtask = ATaskClass.makeField("brief description of task") 

370 """ 

371 return ConfigurableField(doc=doc, target=cls) 

372 

373 def _computeFullName(self, name: str) -> str: 

374 """Compute the full name of a subtask or metadata item, given its brief 

375 name. 

376 

377 Parameters 

378 ---------- 

379 name : `str` 

380 Brief name of subtask or metadata item. 

381 

382 Returns 

383 ------- 

384 fullName : `str` 

385 The full name: the ``name`` argument prefixed by the full task name 

386 and a period. 

387 

388 Notes 

389 ----- 

390 For example: if the full name of this task is "top.sub.sub2" 

391 then ``_computeFullName("subname")`` returns 

392 ``"top.sub.sub2.subname"``. 

393 """ 

394 return f"{self._fullName}.{name}" 

395 

396 @staticmethod 

397 def _unpickle_via_factory( 

398 factory: Callable[..., Task], args: Sequence[Any], kwargs: dict[str, Any] 

399 ) -> Task: 

400 """Unpickle something by calling a factory 

401 

402 Allows subclasses to unpickle using `__reduce__` with keyword 

403 arguments as well as positional arguments. 

404 """ 

405 return factory(*args, **kwargs) 

406 

407 def _reduce_kwargs(self) -> dict[str, Any]: 

408 """Return a dict of the keyword arguments that should be used 

409 by `__reduce__`. 

410 

411 Subclasses with additional arguments should always call the parent 

412 class method to ensure that the standard parameters are included. 

413 

414 Returns 

415 ------- 

416 kwargs : `dict` 

417 Keyword arguments to be used when pickling. 

418 """ 

419 return dict( 

420 config=self.config, 

421 name=self._name, 

422 parentTask=self._parentTask, 

423 ) 

424 

425 def __reduce__( 

426 self, 

427 ) -> tuple[ 

428 Callable[[Callable[..., Task], Sequence[Any], dict[str, Any]], Task], 

429 tuple[type[Task], Sequence[Any], dict[str, Any]], 

430 ]: 

431 """Pickler.""" 

432 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())