Coverage for python/lsst/pipe/base/task.py: 33%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

110 statements  

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22__all__ = ["Task", "TaskError"] 

23 

24import contextlib 

25import logging 

26import weakref 

27from typing import Optional 

28 

29import lsst.utils 

30import lsst.utils.logging 

31from lsst.pex.config import ConfigurableField 

32from lsst.utils.timer import logInfo 

33 

34try: 

35 import lsstDebug 

36except ImportError: 

37 lsstDebug = None 

38 

39from ._task_metadata import TaskMetadata 

40 

41# The Task metadata can be represented as different Python types. 

42# Initially Task metadata was stored as a PropertyList but we want 

43# to migrate to TaskMetadata to have explicit control over how it works 

44# and how it is serialized. 

45METADATA_COMPATIBILITY = True 

46 

47if METADATA_COMPATIBILITY: 47 ↛ 53line 47 didn't jump to line 53, because the condition on line 47 was never false

48 import lsst.daf.base as dafBase 

49 

50 _TASK_METADATA_TYPE = dafBase.PropertyList 

51 _TASK_FULL_METADATA_TYPE = dafBase.PropertySet 

52else: 

53 _TASK_METADATA_TYPE = TaskMetadata 

54 _TASK_FULL_METADATA_TYPE = TaskMetadata 

55 

56 

57class TaskError(Exception): 

58 """Use to report errors for which a traceback is not useful. 

59 

60 Notes 

61 ----- 

62 Examples of such errors: 

63 

64 - processCcd is asked to run detection, but not calibration, and no calexp 

65 is found. 

66 - coadd finds no valid images in the specified patch. 

67 """ 

68 

69 pass 

70 

71 

72class Task: 

73 r"""Base class for data processing tasks. 

74 

75 See :ref:`task-framework-overview` to learn what tasks are, and 

76 :ref:`creating-a-task` for more information about writing tasks. 

77 

78 Parameters 

79 ---------- 

80 config : `Task.ConfigClass` instance, optional 

81 Configuration for this task (an instance of Task.ConfigClass, which 

82 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

83 If `None`: 

84 

85 - If parentTask specified then defaults to parentTask.config.\<name> 

86 - If parentTask is None then defaults to self.ConfigClass() 

87 

88 name : `str`, optional 

89 Brief name of task, or `None`; if `None` then defaults to 

90 `Task._DefaultName` 

91 parentTask : `Task`-type, optional 

92 The parent task of this subtask, if any. 

93 

94 - If `None` (a top-level task) then you must specify config and name 

95 is ignored. 

96 - If not `None` (a subtask) then you must specify name. 

97 log : `logging.Logger` or subclass, optional 

98 Log whose name is used as a log name prefix, or `None` for no prefix. 

99 Ignored if is parentTask specified, in which case 

100 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

101 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

102 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

103 specified), or a child logger of the log from the argument 

104 (if ``log`` is not `None`). 

105 

106 Raises 

107 ------ 

108 RuntimeError 

109 Raised under these circumstances: 

110 

111 - If ``parentTask`` is `None` and ``config`` is `None`. 

112 - If ``parentTask`` is not `None` and ``name`` is `None`. 

113 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

114 

115 Notes 

116 ----- 

117 Useful attributes include: 

118 

119 - ``log``: an `logging.Logger` or subclass. 

120 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

121 (see below). 

122 - ``metadata``: an `lsst.daf.base.PropertyList` or `TaskMetadata` for 

123 collecting task-specific metadata, e.g. data quality and performance 

124 metrics. This is data that is only meant to be persisted, never to be 

125 used by the task. 

126 

127 Subclasses typically have a method named ``runDataRef`` to perform the 

128 main data processing. Details: 

129 

130 - ``runDataRef`` should process the minimum reasonable amount of data, 

131 typically a single CCD. Iteration, if desired, is performed by a caller 

132 of the method. This is good design and allows multiprocessing without 

133 the run method having to support it directly. 

134 - If ``runDataRef`` can persist or unpersist data: 

135 

136 - ``runDataRef`` should accept a butler data reference (or a collection 

137 of data references, if appropriate, e.g. coaddition). 

138 - There should be a way to run the task without persisting data. 

139 Typically the run method returns all data, even if it is persisted, and 

140 the task's config method offers a flag to disable persistence. 

141 

142 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* 

143 accept a blob such as a butler data reference. How we will handle data 

144 references is still TBD, so don't make changes yet! 

145 RHL 2014-06-27 

146 

147 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

148 of `lsst.pex.config.Config` which configures the task. Subclasses should 

149 also have an attribute ``_DefaultName``: the default name if there is no 

150 parent task. ``_DefaultName`` is required for subclasses of 

151 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task 

152 because it simplifies construction (e.g. for unit tests). 

153 

154 Tasks intended to be run from the command line should be subclasses of 

155 `~lsst.pipe.base.CmdLineTask` not Task. 

156 """ 

157 

158 _add_module_logger_prefix = True 

159 """Control whether the module prefix should be prepended to default 

160 logger names.""" 

161 

162 def __init__(self, config=None, name=None, parentTask=None, log=None): 

163 self.metadata = _TASK_METADATA_TYPE() 

164 self.__parentTask: Optional[weakref.ReferenceType] 

165 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

166 

167 if parentTask is not None: 

168 if name is None: 

169 raise RuntimeError("name is required for a subtask") 

170 self._name = name 

171 self._fullName = parentTask._computeFullName(name) 

172 if config is None: 

173 config = getattr(parentTask.config, name) 

174 self._taskDict = parentTask._taskDict 

175 loggerName = parentTask.log.getChild(name).name 

176 else: 

177 if name is None: 

178 name = getattr(self, "_DefaultName", None) 

179 if name is None: 

180 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

181 name = self._DefaultName 

182 self._name = name 

183 self._fullName = self._name 

184 if config is None: 

185 config = self.ConfigClass() 

186 self._taskDict = dict() 

187 loggerName = self._fullName 

188 if log is not None and log.name: 

189 loggerName = log.getChild(loggerName).name 

190 elif self._add_module_logger_prefix: 

191 # Prefix the logger name with the root module name. 

192 # We want all Task loggers to have this prefix to make 

193 # it easier to control them. This can be disabled by 

194 # a Task setting the class property _add_module_logger_prefix 

195 # to False -- in which case the logger name will not be 

196 # modified. 

197 module_name = self.__module__ 

198 module_root = module_name.split(".")[0] + "." 

199 if not loggerName.startswith(module_root): 

200 loggerName = module_root + loggerName 

201 

202 # Get a logger (that might be a subclass of logging.Logger). 

203 self.log = lsst.utils.logging.getLogger(loggerName) 

204 self.config = config 

205 if lsstDebug: 

206 self._display = lsstDebug.Info(self.__module__).display 

207 else: 

208 self._display = None 

209 self._taskDict[self._fullName] = weakref.ref(self) 

210 

211 @property 

212 def _parentTask(self) -> Optional["Task"]: 

213 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

214 

215 def emptyMetadata(self): 

216 """Empty (clear) the metadata for this Task and all sub-Tasks.""" 

217 for subtask in self._taskDict.values(): 

218 subtask().metadata = _TASK_METADATA_TYPE() 

219 

220 def getSchemaCatalogs(self): 

221 """Get the schemas generated by this task. 

222 

223 Returns 

224 ------- 

225 schemaCatalogs : `dict` 

226 Keys are butler dataset type, values are an empty catalog (an 

227 instance of the appropriate `lsst.afw.table` Catalog type) for 

228 this task. 

229 

230 Notes 

231 ----- 

232 

233 .. warning:: 

234 

235 Subclasses that use schemas must override this method. The default 

236 implementation returns an empty dict. 

237 

238 This method may be called at any time after the Task is constructed, 

239 which means that all task schemas should be computed at construction 

240 time, *not* when data is actually processed. This reflects the 

241 philosophy that the schema should not depend on the data. 

242 

243 Returning catalogs rather than just schemas allows us to save e.g. 

244 slots for SourceCatalog as well. 

245 

246 See also 

247 -------- 

248 Task.getAllSchemaCatalogs 

249 """ 

250 return {} 

251 

252 def getAllSchemaCatalogs(self): 

253 """Get schema catalogs for all tasks in the hierarchy, combining the 

254 results into a single dict. 

255 

256 Returns 

257 ------- 

258 schemacatalogs : `dict` 

259 Keys are butler dataset type, values are a empty catalog (an 

260 instance of the appropriate `lsst.afw.table` Catalog type) for all 

261 tasks in the hierarchy, from the top-level task down 

262 through all subtasks. 

263 

264 Notes 

265 ----- 

266 This method may be called on any task in the hierarchy; it will return 

267 the same answer, regardless. 

268 

269 The default implementation should always suffice. If your subtask uses 

270 schemas the override `Task.getSchemaCatalogs`, not this method. 

271 """ 

272 schemaDict = self.getSchemaCatalogs() 

273 for subtask in self._taskDict.values(): 

274 schemaDict.update(subtask().getSchemaCatalogs()) 

275 return schemaDict 

276 

277 def getFullMetadata(self): 

278 """Get metadata for all tasks. 

279 

280 Returns 

281 ------- 

282 metadata : `lsst.daf.base.PropertySet` or `TaskMetadata` 

283 The keys are the full task name. 

284 Values are metadata for the top-level task and all subtasks, 

285 sub-subtasks, etc. 

286 

287 Notes 

288 ----- 

289 The returned metadata includes timing information (if 

290 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

291 name of each item consists of the full task name with ``.`` replaced 

292 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

293 

294 topLevelTaskName:subtaskName:subsubtaskName.itemName 

295 

296 using ``:`` in the full task name disambiguates the rare situation 

297 that a task has a subtask and a metadata item with the same name. 

298 """ 

299 fullMetadata = _TASK_FULL_METADATA_TYPE() 

300 for fullName, task in self.getTaskDict().items(): 

301 fullMetadata[fullName.replace(".", ":")] = task().metadata 

302 return fullMetadata 

303 

304 def getFullName(self): 

305 """Get the task name as a hierarchical name including parent task 

306 names. 

307 

308 Returns 

309 ------- 

310 fullName : `str` 

311 The full name consists of the name of the parent task and each 

312 subtask separated by periods. For example: 

313 

314 - The full name of top-level task "top" is simply "top". 

315 - The full name of subtask "sub" of top-level task "top" is 

316 "top.sub". 

317 - The full name of subtask "sub2" of subtask "sub" of top-level 

318 task "top" is "top.sub.sub2". 

319 """ 

320 return self._fullName 

321 

322 def getName(self): 

323 """Get the name of the task. 

324 

325 Returns 

326 ------- 

327 taskName : `str` 

328 Name of the task. 

329 

330 See also 

331 -------- 

332 getFullName 

333 """ 

334 return self._name 

335 

336 def getTaskDict(self): 

337 """Get a dictionary of all tasks as a shallow copy. 

338 

339 Returns 

340 ------- 

341 taskDict : `dict` 

342 Dictionary containing full task name: task object for the top-level 

343 task and all subtasks, sub-subtasks, etc. 

344 """ 

345 return self._taskDict.copy() 

346 

347 def makeSubtask(self, name, **keyArgs): 

348 """Create a subtask as a new instance as the ``name`` attribute of this 

349 task. 

350 

351 Parameters 

352 ---------- 

353 name : `str` 

354 Brief name of the subtask. 

355 keyArgs 

356 Extra keyword arguments used to construct the task. The following 

357 arguments are automatically provided and cannot be overridden: 

358 

359 - "config". 

360 - "parentTask". 

361 

362 Notes 

363 ----- 

364 The subtask must be defined by ``Task.config.name``, an instance of 

365 `~lsst.pex.config.ConfigurableField` or 

366 `~lsst.pex.config.RegistryField`. 

367 """ 

368 taskField = getattr(self.config, name, None) 

369 if taskField is None: 

370 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

371 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

372 setattr(self, name, subtask) 

373 

374 @contextlib.contextmanager 

375 def timer(self, name, logLevel=logging.DEBUG): 

376 """Context manager to log performance data for an arbitrary block of 

377 code. 

378 

379 Parameters 

380 ---------- 

381 name : `str` 

382 Name of code being timed; data will be logged using item name: 

383 ``Start`` and ``End``. 

384 logLevel 

385 A `logging` level constant. 

386 

387 Examples 

388 -------- 

389 Creating a timer context: 

390 

391 .. code-block:: python 

392 

393 with self.timer("someCodeToTime"): 

394 pass # code to time 

395 

396 See also 

397 -------- 

398 timer.logInfo 

399 """ 

400 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

401 try: 

402 yield 

403 finally: 

404 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

405 

406 @classmethod 

407 def makeField(cls, doc): 

408 """Make a `lsst.pex.config.ConfigurableField` for this task. 

409 

410 Parameters 

411 ---------- 

412 doc : `str` 

413 Help text for the field. 

414 

415 Returns 

416 ------- 

417 configurableField : `lsst.pex.config.ConfigurableField` 

418 A `~ConfigurableField` for this task. 

419 

420 Examples 

421 -------- 

422 Provides a convenient way to specify this task is a subtask of another 

423 task. 

424 

425 Here is an example of use: 

426 

427 .. code-block:: python 

428 

429 class OtherTaskConfig(lsst.pex.config.Config): 

430 aSubtask = ATaskClass.makeField("brief description of task") 

431 """ 

432 return ConfigurableField(doc=doc, target=cls) 

433 

434 def _computeFullName(self, name): 

435 """Compute the full name of a subtask or metadata item, given its brief 

436 name. 

437 

438 Parameters 

439 ---------- 

440 name : `str` 

441 Brief name of subtask or metadata item. 

442 

443 Returns 

444 ------- 

445 fullName : `str` 

446 The full name: the ``name`` argument prefixed by the full task name 

447 and a period. 

448 

449 Notes 

450 ----- 

451 For example: if the full name of this task is "top.sub.sub2" 

452 then ``_computeFullName("subname")`` returns 

453 ``"top.sub.sub2.subname"``. 

454 """ 

455 return f"{self._fullName}.{name}" 

456 

457 @staticmethod 

458 def _unpickle_via_factory(factory, args, kwargs): 

459 """Unpickle something by calling a factory 

460 

461 Allows subclasses to unpickle using `__reduce__` with keyword 

462 arguments as well as positional arguments. 

463 """ 

464 return factory(*args, **kwargs) 

465 

466 def _reduce_kwargs(self): 

467 """Returns a dict of the keyword arguments that should be used 

468 by `__reduce__`. 

469 

470 Subclasses with additional arguments should always call the parent 

471 class method to ensure that the standard parameters are included. 

472 

473 Returns 

474 ------- 

475 kwargs : `dict` 

476 Keyword arguments to be used when pickling. 

477 """ 

478 return dict( 

479 config=self.config, 

480 name=self._name, 

481 parentTask=self._parentTask, 

482 ) 

483 

484 def __reduce__(self): 

485 """Pickler.""" 

486 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())