Coverage for python/lsst/pipe/base/task.py: 32%

117 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-19 12:18 -0700

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23from __future__ import annotations 

24 

25__all__ = ["Task", "TaskError"] 

26 

27import contextlib 

28import logging 

29import weakref 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Callable, 

34 ClassVar, 

35 Dict, 

36 Iterator, 

37 Optional, 

38 Sequence, 

39 Tuple, 

40 Type, 

41 Union, 

42) 

43 

44import lsst.utils 

45import lsst.utils.logging 

46from lsst.pex.config import ConfigurableField 

47from lsst.utils.timer import logInfo 

48 

49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from lsst.pex.config import Config 

51 

52try: 

53 import lsstDebug # type: ignore 

54except ImportError: 

55 lsstDebug = None 

56 

57from ._task_metadata import TaskMetadata 

58 

59# This defines the Python type to use for task metadata. It is a private 

60# class variable that can be accessed by other closely-related middleware 

61# code and test code. 

62_TASK_METADATA_TYPE = TaskMetadata 

63_TASK_FULL_METADATA_TYPE = TaskMetadata 

64 

65 

66class TaskError(Exception): 

67 """Use to report errors for which a traceback is not useful. 

68 

69 Notes 

70 ----- 

71 Examples of such errors: 

72 

73 - processCcd is asked to run detection, but not calibration, and no calexp 

74 is found. 

75 - coadd finds no valid images in the specified patch. 

76 """ 

77 

78 pass 

79 

80 

81class Task: 

82 r"""Base class for data processing tasks. 

83 

84 See :ref:`task-framework-overview` to learn what tasks are, and 

85 :ref:`creating-a-task` for more information about writing tasks. 

86 

87 Parameters 

88 ---------- 

89 config : `Task.ConfigClass` instance, optional 

90 Configuration for this task (an instance of Task.ConfigClass, which 

91 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

92 If `None`: 

93 

94 - If parentTask specified then defaults to parentTask.config.\<name> 

95 - If parentTask is None then defaults to self.ConfigClass() 

96 

97 name : `str`, optional 

98 Brief name of task, or `None`; if `None` then defaults to 

99 `Task._DefaultName` 

100 parentTask : `Task`-type, optional 

101 The parent task of this subtask, if any. 

102 

103 - If `None` (a top-level task) then you must specify config and name 

104 is ignored. 

105 - If not `None` (a subtask) then you must specify name. 

106 log : `logging.Logger` or subclass, optional 

107 Log whose name is used as a log name prefix, or `None` for no prefix. 

108 Ignored if is parentTask specified, in which case 

109 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

110 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

111 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

112 specified), or a child logger of the log from the argument 

113 (if ``log`` is not `None`). 

114 

115 Raises 

116 ------ 

117 RuntimeError 

118 Raised under these circumstances: 

119 

120 - If ``parentTask`` is `None` and ``config`` is `None`. 

121 - If ``parentTask`` is not `None` and ``name`` is `None`. 

122 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

123 

124 Notes 

125 ----- 

126 Useful attributes include: 

127 

128 - ``log``: an `logging.Logger` or subclass. 

129 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

130 (see below). 

131 - ``metadata``: a `TaskMetadata` for 

132 collecting task-specific metadata, e.g. data quality and performance 

133 metrics. This is data that is only meant to be persisted, never to be 

134 used by the task. 

135 

136 Use a `lsst.pipe.base.PipelineTask` subclass to perform I/O with a 

137 Butler. 

138 

139 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

140 of `lsst.pex.config.Config` which configures the task. Subclasses should 

141 also have an attribute ``_DefaultName``: the default name if there is no 

142 parent task. ``_DefaultName`` is required for subclasses of 

143 `~lsst.pipe.base.PipeLineTask` and recommended for subclasses of Task 

144 because it simplifies construction (e.g. for unit tests). 

145 """ 

146 

147 ConfigClass: ClassVar[Type[Config]] 

148 _DefaultName: ClassVar[str] 

149 

150 _add_module_logger_prefix: bool = True 

151 """Control whether the module prefix should be prepended to default 

152 logger names.""" 

153 

154 def __init__( 

155 self, 

156 config: Optional[Config] = None, 

157 name: Optional[str] = None, 

158 parentTask: Optional[Task] = None, 

159 log: Optional[Union[logging.Logger, lsst.utils.logging.LsstLogAdapter]] = None, 

160 ): 

161 self.metadata = _TASK_METADATA_TYPE() 

162 self.__parentTask: Optional[weakref.ReferenceType] 

163 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

164 

165 if parentTask is not None: 

166 if name is None: 

167 raise RuntimeError("name is required for a subtask") 

168 self._name = name 

169 self._fullName = parentTask._computeFullName(name) 

170 if config is None: 

171 config = getattr(parentTask.config, name) 

172 self._taskDict: Dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict 

173 loggerName = parentTask.log.getChild(name).name 

174 else: 

175 if name is None: 

176 name = getattr(self, "_DefaultName", None) 

177 if name is None: 

178 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

179 name = self._DefaultName 

180 self._name = name 

181 self._fullName = self._name 

182 if config is None: 

183 config = self.ConfigClass() 

184 self._taskDict = dict() 

185 loggerName = self._fullName 

186 if log is not None and log.name: 

187 loggerName = log.getChild(loggerName).name 

188 elif self._add_module_logger_prefix: 

189 # Prefix the logger name with the root module name. 

190 # We want all Task loggers to have this prefix to make 

191 # it easier to control them. This can be disabled by 

192 # a Task setting the class property _add_module_logger_prefix 

193 # to False -- in which case the logger name will not be 

194 # modified. 

195 module_name = self.__module__ 

196 module_root = module_name.split(".")[0] + "." 

197 if not loggerName.startswith(module_root): 

198 loggerName = module_root + loggerName 

199 

200 # Get a logger (that might be a subclass of logging.Logger). 

201 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName) 

202 self.config: Config = config 

203 if lsstDebug: 

204 self._display = lsstDebug.Info(self.__module__).display 

205 else: 

206 self._display = None 

207 self._taskDict[self._fullName] = weakref.ref(self) 

208 

209 @property 

210 def _parentTask(self) -> Optional[Task]: 

211 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

212 

213 def emptyMetadata(self) -> None: 

214 """Empty (clear) the metadata for this Task and all sub-Tasks.""" 

215 for wref in self._taskDict.values(): 

216 subtask = wref() 

217 assert subtask is not None, "Unexpected garbage collection of subtask." 

218 subtask.metadata = _TASK_METADATA_TYPE() 

219 

220 # We use Any instead of lsst.afw.table.BaseCatalog here to avoid a 

221 # type-only dependency on afw. It's unclear whether this will survive 

222 # Gen2 anyway, or how we might adapt it to work with non-afw catalogs 

223 # (e.g. Parquet). 

224 def getSchemaCatalogs(self) -> Dict[str, Any]: 

225 """Get the schemas generated by this task. 

226 

227 Returns 

228 ------- 

229 schemaCatalogs : `dict` 

230 Keys are butler dataset type, values are an empty catalog (an 

231 instance of the appropriate `lsst.afw.table` Catalog type) for 

232 this task. 

233 

234 Notes 

235 ----- 

236 

237 .. warning:: 

238 

239 Subclasses that use schemas must override this method. The default 

240 implementation returns an empty dict. 

241 

242 This method may be called at any time after the Task is constructed, 

243 which means that all task schemas should be computed at construction 

244 time, *not* when data is actually processed. This reflects the 

245 philosophy that the schema should not depend on the data. 

246 

247 Returning catalogs rather than just schemas allows us to save e.g. 

248 slots for SourceCatalog as well. 

249 

250 See also 

251 -------- 

252 Task.getAllSchemaCatalogs 

253 """ 

254 return {} 

255 

256 def getAllSchemaCatalogs(self) -> Dict[str, Any]: 

257 """Get schema catalogs for all tasks in the hierarchy, combining the 

258 results into a single dict. 

259 

260 Returns 

261 ------- 

262 schemacatalogs : `dict` 

263 Keys are butler dataset type, values are a empty catalog (an 

264 instance of the appropriate `lsst.afw.table` Catalog type) for all 

265 tasks in the hierarchy, from the top-level task down 

266 through all subtasks. 

267 

268 Notes 

269 ----- 

270 This method may be called on any task in the hierarchy; it will return 

271 the same answer, regardless. 

272 

273 The default implementation should always suffice. If your subtask uses 

274 schemas the override `Task.getSchemaCatalogs`, not this method. 

275 """ 

276 schemaDict = self.getSchemaCatalogs() 

277 for wref in self._taskDict.values(): 

278 subtask = wref() 

279 assert subtask is not None, "Unexpected garbage collection of subtask." 

280 schemaDict.update(subtask.getSchemaCatalogs()) 

281 return schemaDict 

282 

283 def getFullMetadata(self) -> TaskMetadata: 

284 """Get metadata for all tasks. 

285 

286 Returns 

287 ------- 

288 metadata : `TaskMetadata` 

289 The keys are the full task name. 

290 Values are metadata for the top-level task and all subtasks, 

291 sub-subtasks, etc. 

292 

293 Notes 

294 ----- 

295 The returned metadata includes timing information (if 

296 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

297 name of each item consists of the full task name with ``.`` replaced 

298 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

299 

300 topLevelTaskName:subtaskName:subsubtaskName.itemName 

301 

302 using ``:`` in the full task name disambiguates the rare situation 

303 that a task has a subtask and a metadata item with the same name. 

304 """ 

305 fullMetadata = _TASK_FULL_METADATA_TYPE() 

306 for fullName, wref in self.getTaskDict().items(): 

307 subtask = wref() 

308 assert subtask is not None, "Unexpected garbage collection of subtask." 

309 fullMetadata[fullName.replace(".", ":")] = subtask.metadata 

310 return fullMetadata 

311 

312 def getFullName(self) -> str: 

313 """Get the task name as a hierarchical name including parent task 

314 names. 

315 

316 Returns 

317 ------- 

318 fullName : `str` 

319 The full name consists of the name of the parent task and each 

320 subtask separated by periods. For example: 

321 

322 - The full name of top-level task "top" is simply "top". 

323 - The full name of subtask "sub" of top-level task "top" is 

324 "top.sub". 

325 - The full name of subtask "sub2" of subtask "sub" of top-level 

326 task "top" is "top.sub.sub2". 

327 """ 

328 return self._fullName 

329 

330 def getName(self) -> str: 

331 """Get the name of the task. 

332 

333 Returns 

334 ------- 

335 taskName : `str` 

336 Name of the task. 

337 

338 See also 

339 -------- 

340 getFullName 

341 """ 

342 return self._name 

343 

344 def getTaskDict(self) -> Dict[str, weakref.ReferenceType[Task]]: 

345 """Get a dictionary of all tasks as a shallow copy. 

346 

347 Returns 

348 ------- 

349 taskDict : `dict` 

350 Dictionary containing full task name: task object for the top-level 

351 task and all subtasks, sub-subtasks, etc. 

352 """ 

353 return self._taskDict.copy() 

354 

355 def makeSubtask(self, name: str, **keyArgs: Any) -> None: 

356 """Create a subtask as a new instance as the ``name`` attribute of this 

357 task. 

358 

359 Parameters 

360 ---------- 

361 name : `str` 

362 Brief name of the subtask. 

363 keyArgs 

364 Extra keyword arguments used to construct the task. The following 

365 arguments are automatically provided and cannot be overridden: 

366 

367 - "config". 

368 - "parentTask". 

369 

370 Notes 

371 ----- 

372 The subtask must be defined by ``Task.config.name``, an instance of 

373 `~lsst.pex.config.ConfigurableField` or 

374 `~lsst.pex.config.RegistryField`. 

375 """ 

376 taskField = getattr(self.config, name, None) 

377 if taskField is None: 

378 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

379 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

380 setattr(self, name, subtask) 

381 

382 @contextlib.contextmanager 

383 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]: 

384 """Context manager to log performance data for an arbitrary block of 

385 code. 

386 

387 Parameters 

388 ---------- 

389 name : `str` 

390 Name of code being timed; data will be logged using item name: 

391 ``Start`` and ``End``. 

392 logLevel 

393 A `logging` level constant. 

394 

395 Examples 

396 -------- 

397 Creating a timer context: 

398 

399 .. code-block:: python 

400 

401 with self.timer("someCodeToTime"): 

402 pass # code to time 

403 

404 See also 

405 -------- 

406 timer.logInfo 

407 """ 

408 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

409 try: 

410 yield 

411 finally: 

412 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

413 

414 @classmethod 

415 def makeField(cls, doc: str) -> ConfigurableField: 

416 """Make a `lsst.pex.config.ConfigurableField` for this task. 

417 

418 Parameters 

419 ---------- 

420 doc : `str` 

421 Help text for the field. 

422 

423 Returns 

424 ------- 

425 configurableField : `lsst.pex.config.ConfigurableField` 

426 A `~ConfigurableField` for this task. 

427 

428 Examples 

429 -------- 

430 Provides a convenient way to specify this task is a subtask of another 

431 task. 

432 

433 Here is an example of use: 

434 

435 .. code-block:: python 

436 

437 class OtherTaskConfig(lsst.pex.config.Config): 

438 aSubtask = ATaskClass.makeField("brief description of task") 

439 """ 

440 return ConfigurableField(doc=doc, target=cls) 

441 

442 def _computeFullName(self, name: str) -> str: 

443 """Compute the full name of a subtask or metadata item, given its brief 

444 name. 

445 

446 Parameters 

447 ---------- 

448 name : `str` 

449 Brief name of subtask or metadata item. 

450 

451 Returns 

452 ------- 

453 fullName : `str` 

454 The full name: the ``name`` argument prefixed by the full task name 

455 and a period. 

456 

457 Notes 

458 ----- 

459 For example: if the full name of this task is "top.sub.sub2" 

460 then ``_computeFullName("subname")`` returns 

461 ``"top.sub.sub2.subname"``. 

462 """ 

463 return f"{self._fullName}.{name}" 

464 

465 @staticmethod 

466 def _unpickle_via_factory( 

467 factory: Callable[..., Task], args: Sequence[Any], kwargs: Dict[str, Any] 

468 ) -> Task: 

469 """Unpickle something by calling a factory 

470 

471 Allows subclasses to unpickle using `__reduce__` with keyword 

472 arguments as well as positional arguments. 

473 """ 

474 return factory(*args, **kwargs) 

475 

476 def _reduce_kwargs(self) -> Dict[str, Any]: 

477 """Returns a dict of the keyword arguments that should be used 

478 by `__reduce__`. 

479 

480 Subclasses with additional arguments should always call the parent 

481 class method to ensure that the standard parameters are included. 

482 

483 Returns 

484 ------- 

485 kwargs : `dict` 

486 Keyword arguments to be used when pickling. 

487 """ 

488 return dict( 

489 config=self.config, 

490 name=self._name, 

491 parentTask=self._parentTask, 

492 ) 

493 

494 def __reduce__( 

495 self, 

496 ) -> Tuple[ 

497 Callable[[Callable[..., Task], Sequence[Any], Dict[str, Any]], Task], 

498 Tuple[Type[Task], Sequence[Any], Dict[str, Any]], 

499 ]: 

500 """Pickler.""" 

501 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())