Coverage for python/lsst/pipe/base/task.py: 30%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

103 statements  

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22__all__ = ["Task", "TaskError"] 

23 

24import contextlib 

25import logging 

26from typing import Optional 

27 

28import lsst.utils 

29import lsst.utils.logging 

30from lsst.utils.timer import logInfo 

31from lsst.pex.config import ConfigurableField 

32import lsst.daf.base as dafBase 

33 

34import weakref 

35 

36try: 

37 import lsstDebug 

38except ImportError: 

39 lsstDebug = None 

40 

41 

42class TaskError(Exception): 

43 """Use to report errors for which a traceback is not useful. 

44 

45 Notes 

46 ----- 

47 Examples of such errors: 

48 

49 - processCcd is asked to run detection, but not calibration, and no calexp 

50 is found. 

51 - coadd finds no valid images in the specified patch. 

52 """ 

53 pass 

54 

55 

56class Task: 

57 r"""Base class for data processing tasks. 

58 

59 See :ref:`task-framework-overview` to learn what tasks are, and 

60 :ref:`creating-a-task` for more information about writing tasks. 

61 

62 Parameters 

63 ---------- 

64 config : `Task.ConfigClass` instance, optional 

65 Configuration for this task (an instance of Task.ConfigClass, which 

66 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

67 If `None`: 

68 

69 - If parentTask specified then defaults to parentTask.config.\<name> 

70 - If parentTask is None then defaults to self.ConfigClass() 

71 

72 name : `str`, optional 

73 Brief name of task, or `None`; if `None` then defaults to 

74 `Task._DefaultName` 

75 parentTask : `Task`-type, optional 

76 The parent task of this subtask, if any. 

77 

78 - If `None` (a top-level task) then you must specify config and name 

79 is ignored. 

80 - If not `None` (a subtask) then you must specify name. 

81 log : `logging.Logger` or subclass, optional 

82 Log whose name is used as a log name prefix, or `None` for no prefix. 

83 Ignored if is parentTask specified, in which case 

84 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

85 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

86 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

87 specified), or a child logger of the log from the argument 

88 (if ``log`` is not `None`). 

89 

90 Raises 

91 ------ 

92 RuntimeError 

93 Raised under these circumstances: 

94 

95 - If ``parentTask`` is `None` and ``config`` is `None`. 

96 - If ``parentTask`` is not `None` and ``name`` is `None`. 

97 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

98 

99 Notes 

100 ----- 

101 Useful attributes include: 

102 

103 - ``log``: an `logging.Logger` or subclass. 

104 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

105 (see below). 

106 - ``metadata``: an `lsst.daf.base.PropertyList` for collecting 

107 task-specific metadata, e.g. data quality and performance metrics. 

108 This is data that is only meant to be persisted, never to be used by 

109 the task. 

110 

111 Subclasses typically have a method named ``runDataRef`` to perform the 

112 main data processing. Details: 

113 

114 - ``runDataRef`` should process the minimum reasonable amount of data, 

115 typically a single CCD. Iteration, if desired, is performed by a caller 

116 of the method. This is good design and allows multiprocessing without 

117 the run method having to support it directly. 

118 - If ``runDataRef`` can persist or unpersist data: 

119 

120 - ``runDataRef`` should accept a butler data reference (or a collection 

121 of data references, if appropriate, e.g. coaddition). 

122 - There should be a way to run the task without persisting data. 

123 Typically the run method returns all data, even if it is persisted, and 

124 the task's config method offers a flag to disable persistence. 

125 

126 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* 

127 accept a blob such as a butler data reference. How we will handle data 

128 references is still TBD, so don't make changes yet! 

129 RHL 2014-06-27 

130 

131 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

132 of `lsst.pex.config.Config` which configures the task. Subclasses should 

133 also have an attribute ``_DefaultName``: the default name if there is no 

134 parent task. ``_DefaultName`` is required for subclasses of 

135 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task 

136 because it simplifies construction (e.g. for unit tests). 

137 

138 Tasks intended to be run from the command line should be subclasses of 

139 `~lsst.pipe.base.CmdLineTask` not Task. 

140 """ 

141 

142 _add_module_logger_prefix = True 

143 """Control whether the module prefix should be prepended to default 

144 logger names.""" 

145 

146 def __init__(self, config=None, name=None, parentTask=None, log=None): 

147 self.metadata = dafBase.PropertyList() 

148 self.__parentTask: Optional[weakref.ReferenceType] 

149 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

150 

151 if parentTask is not None: 

152 if name is None: 

153 raise RuntimeError("name is required for a subtask") 

154 self._name = name 

155 self._fullName = parentTask._computeFullName(name) 

156 if config is None: 

157 config = getattr(parentTask.config, name) 

158 self._taskDict = parentTask._taskDict 

159 loggerName = parentTask.log.getChild(name).name 

160 else: 

161 if name is None: 

162 name = getattr(self, "_DefaultName", None) 

163 if name is None: 

164 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

165 name = self._DefaultName 

166 self._name = name 

167 self._fullName = self._name 

168 if config is None: 

169 config = self.ConfigClass() 

170 self._taskDict = dict() 

171 loggerName = self._fullName 

172 if log is not None and log.name: 

173 loggerName = log.getChild(loggerName).name 

174 elif self._add_module_logger_prefix: 

175 # Prefix the logger name with the root module name. 

176 # We want all Task loggers to have this prefix to make 

177 # it easier to control them. This can be disabled by 

178 # a Task setting the class property _add_module_logger_prefix 

179 # to False -- in which case the logger name will not be 

180 # modified. 

181 module_name = self.__module__ 

182 module_root = module_name.split(".")[0] + "." 

183 if not loggerName.startswith(module_root): 

184 loggerName = module_root + loggerName 

185 

186 # Get a logger (that might be a subclass of logging.Logger). 

187 self.log = lsst.utils.logging.getLogger(loggerName) 

188 self.config = config 

189 if lsstDebug: 

190 self._display = lsstDebug.Info(self.__module__).display 

191 else: 

192 self._display = None 

193 self._taskDict[self._fullName] = weakref.ref(self) 

194 

195 @property 

196 def _parentTask(self) -> Optional['Task']: 

197 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

198 

199 def emptyMetadata(self): 

200 """Empty (clear) the metadata for this Task and all sub-Tasks. 

201 """ 

202 for subtask in self._taskDict.values(): 

203 subtask().metadata = dafBase.PropertyList() 

204 

205 def getSchemaCatalogs(self): 

206 """Get the schemas generated by this task. 

207 

208 Returns 

209 ------- 

210 schemaCatalogs : `dict` 

211 Keys are butler dataset type, values are an empty catalog (an 

212 instance of the appropriate `lsst.afw.table` Catalog type) for 

213 this task. 

214 

215 Notes 

216 ----- 

217 

218 .. warning:: 

219 

220 Subclasses that use schemas must override this method. The default 

221 implementation returns an empty dict. 

222 

223 This method may be called at any time after the Task is constructed, 

224 which means that all task schemas should be computed at construction 

225 time, *not* when data is actually processed. This reflects the 

226 philosophy that the schema should not depend on the data. 

227 

228 Returning catalogs rather than just schemas allows us to save e.g. 

229 slots for SourceCatalog as well. 

230 

231 See also 

232 -------- 

233 Task.getAllSchemaCatalogs 

234 """ 

235 return {} 

236 

237 def getAllSchemaCatalogs(self): 

238 """Get schema catalogs for all tasks in the hierarchy, combining the 

239 results into a single dict. 

240 

241 Returns 

242 ------- 

243 schemacatalogs : `dict` 

244 Keys are butler dataset type, values are a empty catalog (an 

245 instance of the appropriate `lsst.afw.table` Catalog type) for all 

246 tasks in the hierarchy, from the top-level task down 

247 through all subtasks. 

248 

249 Notes 

250 ----- 

251 This method may be called on any task in the hierarchy; it will return 

252 the same answer, regardless. 

253 

254 The default implementation should always suffice. If your subtask uses 

255 schemas the override `Task.getSchemaCatalogs`, not this method. 

256 """ 

257 schemaDict = self.getSchemaCatalogs() 

258 for subtask in self._taskDict.values(): 

259 schemaDict.update(subtask().getSchemaCatalogs()) 

260 return schemaDict 

261 

262 def getFullMetadata(self): 

263 """Get metadata for all tasks. 

264 

265 Returns 

266 ------- 

267 metadata : `lsst.daf.base.PropertySet` 

268 The `~lsst.daf.base.PropertySet` keys are the full task name. 

269 Values are metadata for the top-level task and all subtasks, 

270 sub-subtasks, etc. 

271 

272 Notes 

273 ----- 

274 The returned metadata includes timing information (if 

275 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

276 name of each item consists of the full task name with ``.`` replaced 

277 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

278 

279 topLevelTaskName:subtaskName:subsubtaskName.itemName 

280 

281 using ``:`` in the full task name disambiguates the rare situation 

282 that a task has a subtask and a metadata item with the same name. 

283 """ 

284 fullMetadata = dafBase.PropertySet() 

285 for fullName, task in self.getTaskDict().items(): 

286 fullMetadata.set(fullName.replace(".", ":"), task().metadata) 

287 return fullMetadata 

288 

289 def getFullName(self): 

290 """Get the task name as a hierarchical name including parent task 

291 names. 

292 

293 Returns 

294 ------- 

295 fullName : `str` 

296 The full name consists of the name of the parent task and each 

297 subtask separated by periods. For example: 

298 

299 - The full name of top-level task "top" is simply "top". 

300 - The full name of subtask "sub" of top-level task "top" is 

301 "top.sub". 

302 - The full name of subtask "sub2" of subtask "sub" of top-level 

303 task "top" is "top.sub.sub2". 

304 """ 

305 return self._fullName 

306 

307 def getName(self): 

308 """Get the name of the task. 

309 

310 Returns 

311 ------- 

312 taskName : `str` 

313 Name of the task. 

314 

315 See also 

316 -------- 

317 getFullName 

318 """ 

319 return self._name 

320 

321 def getTaskDict(self): 

322 """Get a dictionary of all tasks as a shallow copy. 

323 

324 Returns 

325 ------- 

326 taskDict : `dict` 

327 Dictionary containing full task name: task object for the top-level 

328 task and all subtasks, sub-subtasks, etc. 

329 """ 

330 return self._taskDict.copy() 

331 

332 def makeSubtask(self, name, **keyArgs): 

333 """Create a subtask as a new instance as the ``name`` attribute of this 

334 task. 

335 

336 Parameters 

337 ---------- 

338 name : `str` 

339 Brief name of the subtask. 

340 keyArgs 

341 Extra keyword arguments used to construct the task. The following 

342 arguments are automatically provided and cannot be overridden: 

343 

344 - "config". 

345 - "parentTask". 

346 

347 Notes 

348 ----- 

349 The subtask must be defined by ``Task.config.name``, an instance of 

350 `~lsst.pex.config.ConfigurableField` or 

351 `~lsst.pex.config.RegistryField`. 

352 """ 

353 taskField = getattr(self.config, name, None) 

354 if taskField is None: 

355 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

356 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

357 setattr(self, name, subtask) 

358 

359 @contextlib.contextmanager 

360 def timer(self, name, logLevel=logging.DEBUG): 

361 """Context manager to log performance data for an arbitrary block of 

362 code. 

363 

364 Parameters 

365 ---------- 

366 name : `str` 

367 Name of code being timed; data will be logged using item name: 

368 ``Start`` and ``End``. 

369 logLevel 

370 A `logging` level constant. 

371 

372 Examples 

373 -------- 

374 Creating a timer context: 

375 

376 .. code-block:: python 

377 

378 with self.timer("someCodeToTime"): 

379 pass # code to time 

380 

381 See also 

382 -------- 

383 timer.logInfo 

384 """ 

385 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

386 try: 

387 yield 

388 finally: 

389 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

390 

391 @classmethod 

392 def makeField(cls, doc): 

393 """Make a `lsst.pex.config.ConfigurableField` for this task. 

394 

395 Parameters 

396 ---------- 

397 doc : `str` 

398 Help text for the field. 

399 

400 Returns 

401 ------- 

402 configurableField : `lsst.pex.config.ConfigurableField` 

403 A `~ConfigurableField` for this task. 

404 

405 Examples 

406 -------- 

407 Provides a convenient way to specify this task is a subtask of another 

408 task. 

409 

410 Here is an example of use: 

411 

412 .. code-block:: python 

413 

414 class OtherTaskConfig(lsst.pex.config.Config): 

415 aSubtask = ATaskClass.makeField("brief description of task") 

416 """ 

417 return ConfigurableField(doc=doc, target=cls) 

418 

419 def _computeFullName(self, name): 

420 """Compute the full name of a subtask or metadata item, given its brief 

421 name. 

422 

423 Parameters 

424 ---------- 

425 name : `str` 

426 Brief name of subtask or metadata item. 

427 

428 Returns 

429 ------- 

430 fullName : `str` 

431 The full name: the ``name`` argument prefixed by the full task name 

432 and a period. 

433 

434 Notes 

435 ----- 

436 For example: if the full name of this task is "top.sub.sub2" 

437 then ``_computeFullName("subname")`` returns 

438 ``"top.sub.sub2.subname"``. 

439 """ 

440 return f"{self._fullName}.{name}" 

441 

442 @staticmethod 

443 def _unpickle_via_factory(factory, args, kwargs): 

444 """Unpickle something by calling a factory 

445 

446 Allows subclasses to unpickle using `__reduce__` with keyword 

447 arguments as well as positional arguments. 

448 """ 

449 return factory(*args, **kwargs) 

450 

451 def _reduce_kwargs(self): 

452 """Returns a dict of the keyword arguments that should be used 

453 by `__reduce__`. 

454 

455 Subclasses with additional arguments should always call the parent 

456 class method to ensure that the standard parameters are included. 

457 

458 Returns 

459 ------- 

460 kwargs : `dict` 

461 Keyword arguments to be used when pickling. 

462 """ 

463 return dict(config=self.config, name=self._name, parentTask=self._parentTask,) 

464 

465 def __reduce__(self): 

466 """Pickler. 

467 """ 

468 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())