Coverage for python/lsst/pipe/base/task.py: 31%

117 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-02 02:23 +0000

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23from __future__ import annotations 

24 

25__all__ = ["Task", "TaskError"] 

26 

27import contextlib 

28import logging 

29import weakref 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 Callable, 

34 ClassVar, 

35 Dict, 

36 Iterator, 

37 Optional, 

38 Sequence, 

39 Tuple, 

40 Type, 

41 Union, 

42) 

43 

44import lsst.utils 

45import lsst.utils.logging 

46from lsst.pex.config import ConfigurableField 

47from lsst.utils.timer import logInfo 

48 

49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from lsst.pex.config import Config 

51 

52try: 

53 import lsstDebug # type: ignore 

54except ImportError: 

55 lsstDebug = None 

56 

57from ._task_metadata import TaskMetadata 

58 

59# This defines the Python type to use for task metadata. It is a private 

60# class variable that can be accessed by other closely-related middleware 

61# code and test code. 

62_TASK_METADATA_TYPE = TaskMetadata 

63_TASK_FULL_METADATA_TYPE = TaskMetadata 

64 

65 

66class TaskError(Exception): 

67 """Use to report errors for which a traceback is not useful. 

68 

69 Notes 

70 ----- 

71 Examples of such errors: 

72 

73 - processCcd is asked to run detection, but not calibration, and no calexp 

74 is found. 

75 - coadd finds no valid images in the specified patch. 

76 """ 

77 

78 pass 

79 

80 

81class Task: 

82 r"""Base class for data processing tasks. 

83 

84 See :ref:`task-framework-overview` to learn what tasks are, and 

85 :ref:`creating-a-task` for more information about writing tasks. 

86 

87 Parameters 

88 ---------- 

89 config : `Task.ConfigClass` instance, optional 

90 Configuration for this task (an instance of Task.ConfigClass, which 

91 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

92 If `None`: 

93 

94 - If parentTask specified then defaults to parentTask.config.\<name> 

95 - If parentTask is None then defaults to self.ConfigClass() 

96 

97 name : `str`, optional 

98 Brief name of task, or `None`; if `None` then defaults to 

99 `Task._DefaultName` 

100 parentTask : `Task`-type, optional 

101 The parent task of this subtask, if any. 

102 

103 - If `None` (a top-level task) then you must specify config and name 

104 is ignored. 

105 - If not `None` (a subtask) then you must specify name. 

106 log : `logging.Logger` or subclass, optional 

107 Log whose name is used as a log name prefix, or `None` for no prefix. 

108 Ignored if is parentTask specified, in which case 

109 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

110 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

111 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

112 specified), or a child logger of the log from the argument 

113 (if ``log`` is not `None`). 

114 

115 Raises 

116 ------ 

117 RuntimeError 

118 Raised under these circumstances: 

119 

120 - If ``parentTask`` is `None` and ``config`` is `None`. 

121 - If ``parentTask`` is not `None` and ``name`` is `None`. 

122 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

123 

124 Notes 

125 ----- 

126 Useful attributes include: 

127 

128 - ``log``: an `logging.Logger` or subclass. 

129 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

130 (see below). 

131 - ``metadata``: a `TaskMetadata` for 

132 collecting task-specific metadata, e.g. data quality and performance 

133 metrics. This is data that is only meant to be persisted, never to be 

134 used by the task. 

135 

136 Subclasses typically have a method named ``runDataRef`` to perform the 

137 main data processing. Details: 

138 

139 - ``runDataRef`` should process the minimum reasonable amount of data, 

140 typically a single CCD. Iteration, if desired, is performed by a caller 

141 of the method. This is good design and allows multiprocessing without 

142 the run method having to support it directly. 

143 - If ``runDataRef`` can persist or unpersist data: 

144 

145 - ``runDataRef`` should accept a butler data reference (or a collection 

146 of data references, if appropriate, e.g. coaddition). 

147 - There should be a way to run the task without persisting data. 

148 Typically the run method returns all data, even if it is persisted, and 

149 the task's config method offers a flag to disable persistence. 

150 

151 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* 

152 accept a blob such as a butler data reference. How we will handle data 

153 references is still TBD, so don't make changes yet! 

154 RHL 2014-06-27 

155 

156 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

157 of `lsst.pex.config.Config` which configures the task. Subclasses should 

158 also have an attribute ``_DefaultName``: the default name if there is no 

159 parent task. ``_DefaultName`` is required for subclasses of 

160 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task 

161 because it simplifies construction (e.g. for unit tests). 

162 

163 Tasks intended to be run from the command line should be subclasses of 

164 `~lsst.pipe.base.CmdLineTask` not Task. 

165 """ 

166 

167 ConfigClass: ClassVar[Type[Config]] 

168 _DefaultName: ClassVar[str] 

169 

170 _add_module_logger_prefix: bool = True 

171 """Control whether the module prefix should be prepended to default 

172 logger names.""" 

173 

174 def __init__( 

175 self, 

176 config: Optional[Config] = None, 

177 name: Optional[str] = None, 

178 parentTask: Optional[Task] = None, 

179 log: Optional[Union[logging.Logger, lsst.utils.logging.LsstLogAdapter]] = None, 

180 ): 

181 self.metadata = _TASK_METADATA_TYPE() 

182 self.__parentTask: Optional[weakref.ReferenceType] 

183 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

184 

185 if parentTask is not None: 

186 if name is None: 

187 raise RuntimeError("name is required for a subtask") 

188 self._name = name 

189 self._fullName = parentTask._computeFullName(name) 

190 if config is None: 

191 config = getattr(parentTask.config, name) 

192 self._taskDict: Dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict 

193 loggerName = parentTask.log.getChild(name).name 

194 else: 

195 if name is None: 

196 name = getattr(self, "_DefaultName", None) 

197 if name is None: 

198 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

199 name = self._DefaultName 

200 self._name = name 

201 self._fullName = self._name 

202 if config is None: 

203 config = self.ConfigClass() 

204 self._taskDict = dict() 

205 loggerName = self._fullName 

206 if log is not None and log.name: 

207 loggerName = log.getChild(loggerName).name 

208 elif self._add_module_logger_prefix: 

209 # Prefix the logger name with the root module name. 

210 # We want all Task loggers to have this prefix to make 

211 # it easier to control them. This can be disabled by 

212 # a Task setting the class property _add_module_logger_prefix 

213 # to False -- in which case the logger name will not be 

214 # modified. 

215 module_name = self.__module__ 

216 module_root = module_name.split(".")[0] + "." 

217 if not loggerName.startswith(module_root): 

218 loggerName = module_root + loggerName 

219 

220 # Get a logger (that might be a subclass of logging.Logger). 

221 self.log: lsst.utils.logging.LsstLogAdapter = lsst.utils.logging.getLogger(loggerName) 

222 self.config: Config = config 

223 if lsstDebug: 

224 self._display = lsstDebug.Info(self.__module__).display 

225 else: 

226 self._display = None 

227 self._taskDict[self._fullName] = weakref.ref(self) 

228 

229 @property 

230 def _parentTask(self) -> Optional[Task]: 

231 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

232 

233 def emptyMetadata(self) -> None: 

234 """Empty (clear) the metadata for this Task and all sub-Tasks.""" 

235 for wref in self._taskDict.values(): 

236 subtask = wref() 

237 assert subtask is not None, "Unexpected garbage collection of subtask." 

238 subtask.metadata = _TASK_METADATA_TYPE() 

239 

240 # We use Any instead of lsst.afw.table.BaseCatalog here to avoid a 

241 # type-only dependency on afw. It's unclear whether this will survive 

242 # Gen2 anyway, or how we might adapt it to work with non-afw catalogs 

243 # (e.g. Parquet). 

244 def getSchemaCatalogs(self) -> Dict[str, Any]: 

245 """Get the schemas generated by this task. 

246 

247 Returns 

248 ------- 

249 schemaCatalogs : `dict` 

250 Keys are butler dataset type, values are an empty catalog (an 

251 instance of the appropriate `lsst.afw.table` Catalog type) for 

252 this task. 

253 

254 Notes 

255 ----- 

256 

257 .. warning:: 

258 

259 Subclasses that use schemas must override this method. The default 

260 implementation returns an empty dict. 

261 

262 This method may be called at any time after the Task is constructed, 

263 which means that all task schemas should be computed at construction 

264 time, *not* when data is actually processed. This reflects the 

265 philosophy that the schema should not depend on the data. 

266 

267 Returning catalogs rather than just schemas allows us to save e.g. 

268 slots for SourceCatalog as well. 

269 

270 See also 

271 -------- 

272 Task.getAllSchemaCatalogs 

273 """ 

274 return {} 

275 

276 def getAllSchemaCatalogs(self) -> Dict[str, Any]: 

277 """Get schema catalogs for all tasks in the hierarchy, combining the 

278 results into a single dict. 

279 

280 Returns 

281 ------- 

282 schemacatalogs : `dict` 

283 Keys are butler dataset type, values are a empty catalog (an 

284 instance of the appropriate `lsst.afw.table` Catalog type) for all 

285 tasks in the hierarchy, from the top-level task down 

286 through all subtasks. 

287 

288 Notes 

289 ----- 

290 This method may be called on any task in the hierarchy; it will return 

291 the same answer, regardless. 

292 

293 The default implementation should always suffice. If your subtask uses 

294 schemas the override `Task.getSchemaCatalogs`, not this method. 

295 """ 

296 schemaDict = self.getSchemaCatalogs() 

297 for wref in self._taskDict.values(): 

298 subtask = wref() 

299 assert subtask is not None, "Unexpected garbage collection of subtask." 

300 schemaDict.update(subtask.getSchemaCatalogs()) 

301 return schemaDict 

302 

303 def getFullMetadata(self) -> TaskMetadata: 

304 """Get metadata for all tasks. 

305 

306 Returns 

307 ------- 

308 metadata : `TaskMetadata` 

309 The keys are the full task name. 

310 Values are metadata for the top-level task and all subtasks, 

311 sub-subtasks, etc. 

312 

313 Notes 

314 ----- 

315 The returned metadata includes timing information (if 

316 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

317 name of each item consists of the full task name with ``.`` replaced 

318 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

319 

320 topLevelTaskName:subtaskName:subsubtaskName.itemName 

321 

322 using ``:`` in the full task name disambiguates the rare situation 

323 that a task has a subtask and a metadata item with the same name. 

324 """ 

325 fullMetadata = _TASK_FULL_METADATA_TYPE() 

326 for fullName, wref in self.getTaskDict().items(): 

327 subtask = wref() 

328 assert subtask is not None, "Unexpected garbage collection of subtask." 

329 fullMetadata[fullName.replace(".", ":")] = subtask.metadata 

330 return fullMetadata 

331 

332 def getFullName(self) -> str: 

333 """Get the task name as a hierarchical name including parent task 

334 names. 

335 

336 Returns 

337 ------- 

338 fullName : `str` 

339 The full name consists of the name of the parent task and each 

340 subtask separated by periods. For example: 

341 

342 - The full name of top-level task "top" is simply "top". 

343 - The full name of subtask "sub" of top-level task "top" is 

344 "top.sub". 

345 - The full name of subtask "sub2" of subtask "sub" of top-level 

346 task "top" is "top.sub.sub2". 

347 """ 

348 return self._fullName 

349 

350 def getName(self) -> str: 

351 """Get the name of the task. 

352 

353 Returns 

354 ------- 

355 taskName : `str` 

356 Name of the task. 

357 

358 See also 

359 -------- 

360 getFullName 

361 """ 

362 return self._name 

363 

364 def getTaskDict(self) -> Dict[str, weakref.ReferenceType[Task]]: 

365 """Get a dictionary of all tasks as a shallow copy. 

366 

367 Returns 

368 ------- 

369 taskDict : `dict` 

370 Dictionary containing full task name: task object for the top-level 

371 task and all subtasks, sub-subtasks, etc. 

372 """ 

373 return self._taskDict.copy() 

374 

375 def makeSubtask(self, name: str, **keyArgs: Any) -> None: 

376 """Create a subtask as a new instance as the ``name`` attribute of this 

377 task. 

378 

379 Parameters 

380 ---------- 

381 name : `str` 

382 Brief name of the subtask. 

383 keyArgs 

384 Extra keyword arguments used to construct the task. The following 

385 arguments are automatically provided and cannot be overridden: 

386 

387 - "config". 

388 - "parentTask". 

389 

390 Notes 

391 ----- 

392 The subtask must be defined by ``Task.config.name``, an instance of 

393 `~lsst.pex.config.ConfigurableField` or 

394 `~lsst.pex.config.RegistryField`. 

395 """ 

396 taskField = getattr(self.config, name, None) 

397 if taskField is None: 

398 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

399 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

400 setattr(self, name, subtask) 

401 

402 @contextlib.contextmanager 

403 def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]: 

404 """Context manager to log performance data for an arbitrary block of 

405 code. 

406 

407 Parameters 

408 ---------- 

409 name : `str` 

410 Name of code being timed; data will be logged using item name: 

411 ``Start`` and ``End``. 

412 logLevel 

413 A `logging` level constant. 

414 

415 Examples 

416 -------- 

417 Creating a timer context: 

418 

419 .. code-block:: python 

420 

421 with self.timer("someCodeToTime"): 

422 pass # code to time 

423 

424 See also 

425 -------- 

426 timer.logInfo 

427 """ 

428 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

429 try: 

430 yield 

431 finally: 

432 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

433 

434 @classmethod 

435 def makeField(cls, doc: str) -> ConfigurableField: 

436 """Make a `lsst.pex.config.ConfigurableField` for this task. 

437 

438 Parameters 

439 ---------- 

440 doc : `str` 

441 Help text for the field. 

442 

443 Returns 

444 ------- 

445 configurableField : `lsst.pex.config.ConfigurableField` 

446 A `~ConfigurableField` for this task. 

447 

448 Examples 

449 -------- 

450 Provides a convenient way to specify this task is a subtask of another 

451 task. 

452 

453 Here is an example of use: 

454 

455 .. code-block:: python 

456 

457 class OtherTaskConfig(lsst.pex.config.Config): 

458 aSubtask = ATaskClass.makeField("brief description of task") 

459 """ 

460 return ConfigurableField(doc=doc, target=cls) 

461 

462 def _computeFullName(self, name: str) -> str: 

463 """Compute the full name of a subtask or metadata item, given its brief 

464 name. 

465 

466 Parameters 

467 ---------- 

468 name : `str` 

469 Brief name of subtask or metadata item. 

470 

471 Returns 

472 ------- 

473 fullName : `str` 

474 The full name: the ``name`` argument prefixed by the full task name 

475 and a period. 

476 

477 Notes 

478 ----- 

479 For example: if the full name of this task is "top.sub.sub2" 

480 then ``_computeFullName("subname")`` returns 

481 ``"top.sub.sub2.subname"``. 

482 """ 

483 return f"{self._fullName}.{name}" 

484 

485 @staticmethod 

486 def _unpickle_via_factory( 

487 factory: Callable[..., Task], args: Sequence[Any], kwargs: Dict[str, Any] 

488 ) -> Task: 

489 """Unpickle something by calling a factory 

490 

491 Allows subclasses to unpickle using `__reduce__` with keyword 

492 arguments as well as positional arguments. 

493 """ 

494 return factory(*args, **kwargs) 

495 

496 def _reduce_kwargs(self) -> Dict[str, Any]: 

497 """Returns a dict of the keyword arguments that should be used 

498 by `__reduce__`. 

499 

500 Subclasses with additional arguments should always call the parent 

501 class method to ensure that the standard parameters are included. 

502 

503 Returns 

504 ------- 

505 kwargs : `dict` 

506 Keyword arguments to be used when pickling. 

507 """ 

508 return dict( 

509 config=self.config, 

510 name=self._name, 

511 parentTask=self._parentTask, 

512 ) 

513 

514 def __reduce__( 

515 self, 

516 ) -> Tuple[ 

517 Callable[[Callable[..., Task], Sequence[Any], Dict[str, Any]], Task], 

518 Tuple[Type[Task], Sequence[Any], Dict[str, Any]], 

519 ]: 

520 """Pickler.""" 

521 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())