Coverage for python/lsst/pipe/base/task.py: 32%

118 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-20 01:51 -0700

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23from __future__ import annotations 

24 

25__all__ = ["Task", "TaskError"] 

26 

27import contextlib 

28import logging 

29import weakref 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Callable, 

34 ClassVar, 

35 Dict, 

36 Iterator, 

37 Optional, 

38 Sequence, 

39 Tuple, 

40 Type, 

41 Union, 

42) 

43 

44import lsst.utils 

45import lsst.utils.logging 

46from lsst.pex.config import ConfigurableField 

47from lsst.utils.timer import logInfo 

48 

49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from lsst.pex.config import Config 

51 

52try: 

53 import lsstDebug # type: ignore 

54except ImportError: 

55 lsstDebug = None 

56 

57from ._task_metadata import TaskMetadata 

58 

59# This defines the Python type to use for task metadata. It is a private 

60# class variable that can be accessed by other closely-related middleware 

61# code and test code. 

62_TASK_METADATA_TYPE = TaskMetadata 

63_TASK_FULL_METADATA_TYPE = TaskMetadata 

64 

65 

66class TaskError(Exception): 

67 """Use to report errors for which a traceback is not useful. 

68 

69 Notes 

70 ----- 

71 Examples of such errors: 

72 

73 - processCcd is asked to run detection, but not calibration, and no calexp 

74 is found. 

75 - coadd finds no valid images in the specified patch. 

76 """ 

77 

78 pass 

79 

80 

81class Task: 

82 r"""Base class for data processing tasks. 

83 

84 See :ref:`task-framework-overview` to learn what tasks are, and 

85 :ref:`creating-a-task` for more information about writing tasks. 

86 

87 Parameters 

88 ---------- 

89 config : `Task.ConfigClass` instance, optional 

90 Configuration for this task (an instance of Task.ConfigClass, which 

91 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

92 If `None`: 

93 

94 - If parentTask specified then defaults to parentTask.config.\<name> 

95 - If parentTask is None then defaults to self.ConfigClass() 

96 

97 name : `str`, optional 

98 Brief name of task, or `None`; if `None` then defaults to 

99 `Task._DefaultName` 

100 parentTask : `Task`-type, optional 

101 The parent task of this subtask, if any. 

102 

103 - If `None` (a top-level task) then you must specify config and name 

104 is ignored. 

105 - If not `None` (a subtask) then you must specify name. 

106 log : `logging.Logger` or subclass, optional 

107 Log whose name is used as a log name prefix, or `None` for no prefix. 

108 Ignored if is parentTask specified, in which case 

109 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

110 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

111 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

112 specified), or a child logger of the log from the argument 

113 (if ``log`` is not `None`). 

114 

115 Raises 

116 ------ 

117 RuntimeError 

118 Raised under these circumstances: 

119 

120 - If ``parentTask`` is `None` and ``config`` is `None`. 

121 - If ``parentTask`` is not `None` and ``name`` is `None`. 

122 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

123 

124 Notes 

125 ----- 

126 Useful attributes include: 

127 

128 - ``log``: an `logging.Logger` or subclass. 

129 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

130 (see below). 

131 - ``metadata``: a `TaskMetadata` for 

132 collecting task-specific metadata, e.g. data quality and performance 

133 metrics. This is data that is only meant to be persisted, never to be 

134 used by the task. 

135 

136 Use a `lsst.pipe.base.PipelineTask` subclass to perform I/O with a 

137 Butler. 

138 

139 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

140 of `lsst.pex.config.Config` which configures the task. Subclasses should 

141 also have an attribute ``_DefaultName``: the default name if there is no 

142 parent task. ``_DefaultName`` is required for subclasses of 

143 `~lsst.pipe.base.PipeLineTask` and recommended for subclasses of Task 

144 because it simplifies construction (e.g. for unit tests). 

145 """ 

146 

147 ConfigClass: ClassVar[Type[Config]] 

148 _DefaultName: ClassVar[str] 

149 

150 _add_module_logger_prefix: bool = True 

151 """Control whether the module prefix should be prepended to default 

152 logger names.""" 

153 

154 def __init__( 

155 self, 

156 config: Optional[Config] = None, 

157 name: Optional[str] = None, 

158 parentTask: Optional[Task] = None, 

159 log: Optional[Union[logging.Logger, lsst.utils.logging.LsstLogAdapter]] = None, 

160 ): 

161 self.metadata = _TASK_METADATA_TYPE() 

162 self.__parentTask: Optional[weakref.ReferenceType] 

163 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

164 

165 if parentTask is not None: 

166 if name is None: 

167 raise RuntimeError("name is required for a subtask") 

168 self._name = name 

169 self._fullName = parentTask._computeFullName(name) 

170 if config is None: 

171 config = getattr(parentTask.config, name) 

172 self._taskDict: Dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict 

173 loggerName = parentTask.log.getChild(name).name 

174 else: 

175 if name is None: 

176 name = getattr(self, "_DefaultName", None) 

177 if name is None: 

178 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

179 name = self._DefaultName 

180 self._name = name 

181 self._fullName = self._name 

182 if config is None: 

183 config = self.ConfigClass() 

184 self._taskDict = dict() 

185 loggerName = self._fullName 

186 if log is not None and log.name: 

187 loggerName = log.getChild(loggerName).name 

188 elif self._add_module_logger_prefix: 

189 # Prefix the logger name with the root module name. 

190 # We want all Task loggers to have this prefix to make 

191 # it easier to control them. This can be disabled by 

192 # a Task setting the class property _add_module_logger_prefix 

193 # to False -- in which case the logger name will not be 

194 # modified. 

195 module_name = self.__module__ 

196 module_root = module_name.split(".")[0] + "." 

197 if not loggerName.startswith(module_root): 

198 loggerName = module_root + loggerName 

199 

200 # Get a logger (that might be a subclass of logging.Logger). 

201 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName) 

202 self.config: Config = config 

203 self.config.validate() 

204 if lsstDebug: 

205 self._display = lsstDebug.Info(self.__module__).display 

206 else: 

207 self._display = None 

208 self._taskDict[self._fullName] = weakref.ref(self) 

209 

210 @property 

211 def _parentTask(self) -> Optional[Task]: 

212 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

213 

214 def emptyMetadata(self) -> None: 

215 """Empty (clear) the metadata for this Task and all sub-Tasks.""" 

216 for wref in self._taskDict.values(): 

217 subtask = wref() 

218 assert subtask is not None, "Unexpected garbage collection of subtask." 

219 subtask.metadata = _TASK_METADATA_TYPE() 

220 

221 # We use Any instead of lsst.afw.table.BaseCatalog here to avoid a 

222 # type-only dependency on afw. It's unclear whether this will survive 

223 # Gen2 anyway, or how we might adapt it to work with non-afw catalogs 

224 # (e.g. Parquet). 

225 def getSchemaCatalogs(self) -> Dict[str, Any]: 

226 """Get the schemas generated by this task. 

227 

228 Returns 

229 ------- 

230 schemaCatalogs : `dict` 

231 Keys are butler dataset type, values are an empty catalog (an 

232 instance of the appropriate `lsst.afw.table` Catalog type) for 

233 this task. 

234 

235 Notes 

236 ----- 

237 

238 .. warning:: 

239 

240 Subclasses that use schemas must override this method. The default 

241 implementation returns an empty dict. 

242 

243 This method may be called at any time after the Task is constructed, 

244 which means that all task schemas should be computed at construction 

245 time, *not* when data is actually processed. This reflects the 

246 philosophy that the schema should not depend on the data. 

247 

248 Returning catalogs rather than just schemas allows us to save e.g. 

249 slots for SourceCatalog as well. 

250 

251 See also 

252 -------- 

253 Task.getAllSchemaCatalogs 

254 """ 

255 return {} 

256 

257 def getAllSchemaCatalogs(self) -> Dict[str, Any]: 

258 """Get schema catalogs for all tasks in the hierarchy, combining the 

259 results into a single dict. 

260 

261 Returns 

262 ------- 

263 schemacatalogs : `dict` 

264 Keys are butler dataset type, values are a empty catalog (an 

265 instance of the appropriate `lsst.afw.table` Catalog type) for all 

266 tasks in the hierarchy, from the top-level task down 

267 through all subtasks. 

268 

269 Notes 

270 ----- 

271 This method may be called on any task in the hierarchy; it will return 

272 the same answer, regardless. 

273 

274 The default implementation should always suffice. If your subtask uses 

275 schemas the override `Task.getSchemaCatalogs`, not this method. 

276 """ 

277 schemaDict = self.getSchemaCatalogs() 

278 for wref in self._taskDict.values(): 

279 subtask = wref() 

280 assert subtask is not None, "Unexpected garbage collection of subtask." 

281 schemaDict.update(subtask.getSchemaCatalogs()) 

282 return schemaDict 

283 

284 def getFullMetadata(self) -> TaskMetadata: 

285 """Get metadata for all tasks. 

286 

287 Returns 

288 ------- 

289 metadata : `TaskMetadata` 

290 The keys are the full task name. 

291 Values are metadata for the top-level task and all subtasks, 

292 sub-subtasks, etc. 

293 

294 Notes 

295 ----- 

296 The returned metadata includes timing information (if 

297 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

298 name of each item consists of the full task name with ``.`` replaced 

299 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

300 

301 topLevelTaskName:subtaskName:subsubtaskName.itemName 

302 

303 using ``:`` in the full task name disambiguates the rare situation 

304 that a task has a subtask and a metadata item with the same name. 

305 """ 

306 fullMetadata = _TASK_FULL_METADATA_TYPE() 

307 for fullName, wref in self.getTaskDict().items(): 

308 subtask = wref() 

309 assert subtask is not None, "Unexpected garbage collection of subtask." 

310 fullMetadata[fullName.replace(".", ":")] = subtask.metadata 

311 return fullMetadata 

312 

313 def getFullName(self) -> str: 

314 """Get the task name as a hierarchical name including parent task 

315 names. 

316 

317 Returns 

318 ------- 

319 fullName : `str` 

320 The full name consists of the name of the parent task and each 

321 subtask separated by periods. For example: 

322 

323 - The full name of top-level task "top" is simply "top". 

324 - The full name of subtask "sub" of top-level task "top" is 

325 "top.sub". 

326 - The full name of subtask "sub2" of subtask "sub" of top-level 

327 task "top" is "top.sub.sub2". 

328 """ 

329 return self._fullName 

330 

331 def getName(self) -> str: 

332 """Get the name of the task. 

333 

334 Returns 

335 ------- 

336 taskName : `str` 

337 Name of the task. 

338 

339 See also 

340 -------- 

341 getFullName 

342 """ 

343 return self._name 

344 

345 def getTaskDict(self) -> Dict[str, weakref.ReferenceType[Task]]: 

346 """Get a dictionary of all tasks as a shallow copy. 

347 

348 Returns 

349 ------- 

350 taskDict : `dict` 

351 Dictionary containing full task name: task object for the top-level 

352 task and all subtasks, sub-subtasks, etc. 

353 """ 

354 return self._taskDict.copy() 

355 

356 def makeSubtask(self, name: str, **keyArgs: Any) -> None: 

357 """Create a subtask as a new instance as the ``name`` attribute of this 

358 task. 

359 

360 Parameters 

361 ---------- 

362 name : `str` 

363 Brief name of the subtask. 

364 keyArgs 

365 Extra keyword arguments used to construct the task. The following 

366 arguments are automatically provided and cannot be overridden: 

367 

368 - "config". 

369 - "parentTask". 

370 

371 Notes 

372 ----- 

373 The subtask must be defined by ``Task.config.name``, an instance of 

374 `~lsst.pex.config.ConfigurableField` or 

375 `~lsst.pex.config.RegistryField`. 

376 """ 

377 taskField = getattr(self.config, name, None) 

378 if taskField is None: 

379 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

380 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

381 setattr(self, name, subtask) 

382 

383 @contextlib.contextmanager 

384 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]: 

385 """Context manager to log performance data for an arbitrary block of 

386 code. 

387 

388 Parameters 

389 ---------- 

390 name : `str` 

391 Name of code being timed; data will be logged using item name: 

392 ``Start`` and ``End``. 

393 logLevel 

394 A `logging` level constant. 

395 

396 Examples 

397 -------- 

398 Creating a timer context: 

399 

400 .. code-block:: python 

401 

402 with self.timer("someCodeToTime"): 

403 pass # code to time 

404 

405 See also 

406 -------- 

407 timer.logInfo 

408 """ 

409 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

410 try: 

411 yield 

412 finally: 

413 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

414 

415 @classmethod 

416 def makeField(cls, doc: str) -> ConfigurableField: 

417 """Make a `lsst.pex.config.ConfigurableField` for this task. 

418 

419 Parameters 

420 ---------- 

421 doc : `str` 

422 Help text for the field. 

423 

424 Returns 

425 ------- 

426 configurableField : `lsst.pex.config.ConfigurableField` 

427 A `~ConfigurableField` for this task. 

428 

429 Examples 

430 -------- 

431 Provides a convenient way to specify this task is a subtask of another 

432 task. 

433 

434 Here is an example of use: 

435 

436 .. code-block:: python 

437 

438 class OtherTaskConfig(lsst.pex.config.Config): 

439 aSubtask = ATaskClass.makeField("brief description of task") 

440 """ 

441 return ConfigurableField(doc=doc, target=cls) 

442 

443 def _computeFullName(self, name: str) -> str: 

444 """Compute the full name of a subtask or metadata item, given its brief 

445 name. 

446 

447 Parameters 

448 ---------- 

449 name : `str` 

450 Brief name of subtask or metadata item. 

451 

452 Returns 

453 ------- 

454 fullName : `str` 

455 The full name: the ``name`` argument prefixed by the full task name 

456 and a period. 

457 

458 Notes 

459 ----- 

460 For example: if the full name of this task is "top.sub.sub2" 

461 then ``_computeFullName("subname")`` returns 

462 ``"top.sub.sub2.subname"``. 

463 """ 

464 return f"{self._fullName}.{name}" 

465 

466 @staticmethod 

467 def _unpickle_via_factory( 

468 factory: Callable[..., Task], args: Sequence[Any], kwargs: Dict[str, Any] 

469 ) -> Task: 

470 """Unpickle something by calling a factory 

471 

472 Allows subclasses to unpickle using `__reduce__` with keyword 

473 arguments as well as positional arguments. 

474 """ 

475 return factory(*args, **kwargs) 

476 

477 def _reduce_kwargs(self) -> Dict[str, Any]: 

478 """Returns a dict of the keyword arguments that should be used 

479 by `__reduce__`. 

480 

481 Subclasses with additional arguments should always call the parent 

482 class method to ensure that the standard parameters are included. 

483 

484 Returns 

485 ------- 

486 kwargs : `dict` 

487 Keyword arguments to be used when pickling. 

488 """ 

489 return dict( 

490 config=self.config, 

491 name=self._name, 

492 parentTask=self._parentTask, 

493 ) 

494 

495 def __reduce__( 

496 self, 

497 ) -> Tuple[ 

498 Callable[[Callable[..., Task], Sequence[Any], Dict[str, Any]], Task], 

499 Tuple[Type[Task], Sequence[Any], Dict[str, Any]], 

500 ]: 

501 """Pickler.""" 

502 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())