Coverage for python/lsst/pipe/base/task.py: 33%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

110 statements  

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <http://www.lsstcorp.org/LegalNotices/>. 

21# 

22__all__ = ["Task", "TaskError"] 

23 

24import contextlib 

25import logging 

26from typing import Optional 

27 

28import lsst.utils 

29import lsst.utils.logging 

30from lsst.utils.timer import logInfo 

31from lsst.pex.config import ConfigurableField 

32import weakref 

33 

34try: 

35 import lsstDebug 

36except ImportError: 

37 lsstDebug = None 

38 

39from ._task_metadata import TaskMetadata 

40 

41# The Task metadata can be represented as different Python types. 

42# Initially Task metadata was stored as a PropertyList but we want 

43# to migrate to TaskMetadata to have explicit control over how it works 

44# and how it is serialized. 

45METADATA_COMPATIBILITY = True 

46 

47if METADATA_COMPATIBILITY: 47 ↛ 52line 47 didn't jump to line 52, because the condition on line 47 was never false

48 import lsst.daf.base as dafBase 

49 _TASK_METADATA_TYPE = dafBase.PropertyList 

50 _TASK_FULL_METADATA_TYPE = dafBase.PropertySet 

51else: 

52 _TASK_METADATA_TYPE = TaskMetadata 

53 _TASK_FULL_METADATA_TYPE = TaskMetadata 

54 

55 

56class TaskError(Exception): 

57 """Use to report errors for which a traceback is not useful. 

58 

59 Notes 

60 ----- 

61 Examples of such errors: 

62 

63 - processCcd is asked to run detection, but not calibration, and no calexp 

64 is found. 

65 - coadd finds no valid images in the specified patch. 

66 """ 

67 pass 

68 

69 

70class Task: 

71 r"""Base class for data processing tasks. 

72 

73 See :ref:`task-framework-overview` to learn what tasks are, and 

74 :ref:`creating-a-task` for more information about writing tasks. 

75 

76 Parameters 

77 ---------- 

78 config : `Task.ConfigClass` instance, optional 

79 Configuration for this task (an instance of Task.ConfigClass, which 

80 is a task-specific subclass of `lsst.pex.config.Config`, or `None`. 

81 If `None`: 

82 

83 - If parentTask specified then defaults to parentTask.config.\<name> 

84 - If parentTask is None then defaults to self.ConfigClass() 

85 

86 name : `str`, optional 

87 Brief name of task, or `None`; if `None` then defaults to 

88 `Task._DefaultName` 

89 parentTask : `Task`-type, optional 

90 The parent task of this subtask, if any. 

91 

92 - If `None` (a top-level task) then you must specify config and name 

93 is ignored. 

94 - If not `None` (a subtask) then you must specify name. 

95 log : `logging.Logger` or subclass, optional 

96 Log whose name is used as a log name prefix, or `None` for no prefix. 

97 Ignored if is parentTask specified, in which case 

98 ``parentTask.log``\ 's name is used as a prefix. The task's log name is 

99 ``prefix + "." + name`` if a prefix exists, else ``name``. The task's 

100 log is then a child logger of ``parentTask.log`` (if ``parentTask`` 

101 specified), or a child logger of the log from the argument 

102 (if ``log`` is not `None`). 

103 

104 Raises 

105 ------ 

106 RuntimeError 

107 Raised under these circumstances: 

108 

109 - If ``parentTask`` is `None` and ``config`` is `None`. 

110 - If ``parentTask`` is not `None` and ``name`` is `None`. 

111 - If ``name`` is `None` and ``_DefaultName`` does not exist. 

112 

113 Notes 

114 ----- 

115 Useful attributes include: 

116 

117 - ``log``: an `logging.Logger` or subclass. 

118 - ``config``: task-specific configuration; an instance of ``ConfigClass`` 

119 (see below). 

120 - ``metadata``: an `lsst.daf.base.PropertyList` or `TaskMetadata` for 

121 collecting task-specific metadata, e.g. data quality and performance 

122 metrics. This is data that is only meant to be persisted, never to be 

123 used by the task. 

124 

125 Subclasses typically have a method named ``runDataRef`` to perform the 

126 main data processing. Details: 

127 

128 - ``runDataRef`` should process the minimum reasonable amount of data, 

129 typically a single CCD. Iteration, if desired, is performed by a caller 

130 of the method. This is good design and allows multiprocessing without 

131 the run method having to support it directly. 

132 - If ``runDataRef`` can persist or unpersist data: 

133 

134 - ``runDataRef`` should accept a butler data reference (or a collection 

135 of data references, if appropriate, e.g. coaddition). 

136 - There should be a way to run the task without persisting data. 

137 Typically the run method returns all data, even if it is persisted, and 

138 the task's config method offers a flag to disable persistence. 

139 

140 **Deprecated:** Tasks other than cmdLineTask.CmdLineTask%s should *not* 

141 accept a blob such as a butler data reference. How we will handle data 

142 references is still TBD, so don't make changes yet! 

143 RHL 2014-06-27 

144 

145 Subclasses must also have an attribute ``ConfigClass`` that is a subclass 

146 of `lsst.pex.config.Config` which configures the task. Subclasses should 

147 also have an attribute ``_DefaultName``: the default name if there is no 

148 parent task. ``_DefaultName`` is required for subclasses of 

149 `~lsst.pipe.base.CmdLineTask` and recommended for subclasses of Task 

150 because it simplifies construction (e.g. for unit tests). 

151 

152 Tasks intended to be run from the command line should be subclasses of 

153 `~lsst.pipe.base.CmdLineTask` not Task. 

154 """ 

155 

156 _add_module_logger_prefix = True 

157 """Control whether the module prefix should be prepended to default 

158 logger names.""" 

159 

160 def __init__(self, config=None, name=None, parentTask=None, log=None): 

161 self.metadata = _TASK_METADATA_TYPE() 

162 self.__parentTask: Optional[weakref.ReferenceType] 

163 self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) 

164 

165 if parentTask is not None: 

166 if name is None: 

167 raise RuntimeError("name is required for a subtask") 

168 self._name = name 

169 self._fullName = parentTask._computeFullName(name) 

170 if config is None: 

171 config = getattr(parentTask.config, name) 

172 self._taskDict = parentTask._taskDict 

173 loggerName = parentTask.log.getChild(name).name 

174 else: 

175 if name is None: 

176 name = getattr(self, "_DefaultName", None) 

177 if name is None: 

178 raise RuntimeError("name is required for a task unless it has attribute _DefaultName") 

179 name = self._DefaultName 

180 self._name = name 

181 self._fullName = self._name 

182 if config is None: 

183 config = self.ConfigClass() 

184 self._taskDict = dict() 

185 loggerName = self._fullName 

186 if log is not None and log.name: 

187 loggerName = log.getChild(loggerName).name 

188 elif self._add_module_logger_prefix: 

189 # Prefix the logger name with the root module name. 

190 # We want all Task loggers to have this prefix to make 

191 # it easier to control them. This can be disabled by 

192 # a Task setting the class property _add_module_logger_prefix 

193 # to False -- in which case the logger name will not be 

194 # modified. 

195 module_name = self.__module__ 

196 module_root = module_name.split(".")[0] + "." 

197 if not loggerName.startswith(module_root): 

198 loggerName = module_root + loggerName 

199 

200 # Get a logger (that might be a subclass of logging.Logger). 

201 self.log = lsst.utils.logging.getLogger(loggerName) 

202 self.config = config 

203 if lsstDebug: 

204 self._display = lsstDebug.Info(self.__module__).display 

205 else: 

206 self._display = None 

207 self._taskDict[self._fullName] = weakref.ref(self) 

208 

209 @property 

210 def _parentTask(self) -> Optional['Task']: 

211 return self.__parentTask if self.__parentTask is None else self.__parentTask() 

212 

213 def emptyMetadata(self): 

214 """Empty (clear) the metadata for this Task and all sub-Tasks. 

215 """ 

216 for subtask in self._taskDict.values(): 

217 subtask().metadata = _TASK_METADATA_TYPE() 

218 

219 def getSchemaCatalogs(self): 

220 """Get the schemas generated by this task. 

221 

222 Returns 

223 ------- 

224 schemaCatalogs : `dict` 

225 Keys are butler dataset type, values are an empty catalog (an 

226 instance of the appropriate `lsst.afw.table` Catalog type) for 

227 this task. 

228 

229 Notes 

230 ----- 

231 

232 .. warning:: 

233 

234 Subclasses that use schemas must override this method. The default 

235 implementation returns an empty dict. 

236 

237 This method may be called at any time after the Task is constructed, 

238 which means that all task schemas should be computed at construction 

239 time, *not* when data is actually processed. This reflects the 

240 philosophy that the schema should not depend on the data. 

241 

242 Returning catalogs rather than just schemas allows us to save e.g. 

243 slots for SourceCatalog as well. 

244 

245 See also 

246 -------- 

247 Task.getAllSchemaCatalogs 

248 """ 

249 return {} 

250 

251 def getAllSchemaCatalogs(self): 

252 """Get schema catalogs for all tasks in the hierarchy, combining the 

253 results into a single dict. 

254 

255 Returns 

256 ------- 

257 schemacatalogs : `dict` 

258 Keys are butler dataset type, values are a empty catalog (an 

259 instance of the appropriate `lsst.afw.table` Catalog type) for all 

260 tasks in the hierarchy, from the top-level task down 

261 through all subtasks. 

262 

263 Notes 

264 ----- 

265 This method may be called on any task in the hierarchy; it will return 

266 the same answer, regardless. 

267 

268 The default implementation should always suffice. If your subtask uses 

269 schemas the override `Task.getSchemaCatalogs`, not this method. 

270 """ 

271 schemaDict = self.getSchemaCatalogs() 

272 for subtask in self._taskDict.values(): 

273 schemaDict.update(subtask().getSchemaCatalogs()) 

274 return schemaDict 

275 

276 def getFullMetadata(self): 

277 """Get metadata for all tasks. 

278 

279 Returns 

280 ------- 

281 metadata : `lsst.daf.base.PropertySet` or `TaskMetadata` 

282 The keys are the full task name. 

283 Values are metadata for the top-level task and all subtasks, 

284 sub-subtasks, etc. 

285 

286 Notes 

287 ----- 

288 The returned metadata includes timing information (if 

289 ``@timer.timeMethod`` is used) and any metadata set by the task. The 

290 name of each item consists of the full task name with ``.`` replaced 

291 by ``:``, followed by ``.`` and the name of the item, e.g.:: 

292 

293 topLevelTaskName:subtaskName:subsubtaskName.itemName 

294 

295 using ``:`` in the full task name disambiguates the rare situation 

296 that a task has a subtask and a metadata item with the same name. 

297 """ 

298 fullMetadata = _TASK_FULL_METADATA_TYPE() 

299 for fullName, task in self.getTaskDict().items(): 

300 fullMetadata[fullName.replace(".", ":")] = task().metadata 

301 return fullMetadata 

302 

303 def getFullName(self): 

304 """Get the task name as a hierarchical name including parent task 

305 names. 

306 

307 Returns 

308 ------- 

309 fullName : `str` 

310 The full name consists of the name of the parent task and each 

311 subtask separated by periods. For example: 

312 

313 - The full name of top-level task "top" is simply "top". 

314 - The full name of subtask "sub" of top-level task "top" is 

315 "top.sub". 

316 - The full name of subtask "sub2" of subtask "sub" of top-level 

317 task "top" is "top.sub.sub2". 

318 """ 

319 return self._fullName 

320 

321 def getName(self): 

322 """Get the name of the task. 

323 

324 Returns 

325 ------- 

326 taskName : `str` 

327 Name of the task. 

328 

329 See also 

330 -------- 

331 getFullName 

332 """ 

333 return self._name 

334 

335 def getTaskDict(self): 

336 """Get a dictionary of all tasks as a shallow copy. 

337 

338 Returns 

339 ------- 

340 taskDict : `dict` 

341 Dictionary containing full task name: task object for the top-level 

342 task and all subtasks, sub-subtasks, etc. 

343 """ 

344 return self._taskDict.copy() 

345 

346 def makeSubtask(self, name, **keyArgs): 

347 """Create a subtask as a new instance as the ``name`` attribute of this 

348 task. 

349 

350 Parameters 

351 ---------- 

352 name : `str` 

353 Brief name of the subtask. 

354 keyArgs 

355 Extra keyword arguments used to construct the task. The following 

356 arguments are automatically provided and cannot be overridden: 

357 

358 - "config". 

359 - "parentTask". 

360 

361 Notes 

362 ----- 

363 The subtask must be defined by ``Task.config.name``, an instance of 

364 `~lsst.pex.config.ConfigurableField` or 

365 `~lsst.pex.config.RegistryField`. 

366 """ 

367 taskField = getattr(self.config, name, None) 

368 if taskField is None: 

369 raise KeyError(f"{self.getFullName()}'s config does not have field {name!r}") 

370 subtask = taskField.apply(name=name, parentTask=self, **keyArgs) 

371 setattr(self, name, subtask) 

372 

373 @contextlib.contextmanager 

374 def timer(self, name, logLevel=logging.DEBUG): 

375 """Context manager to log performance data for an arbitrary block of 

376 code. 

377 

378 Parameters 

379 ---------- 

380 name : `str` 

381 Name of code being timed; data will be logged using item name: 

382 ``Start`` and ``End``. 

383 logLevel 

384 A `logging` level constant. 

385 

386 Examples 

387 -------- 

388 Creating a timer context: 

389 

390 .. code-block:: python 

391 

392 with self.timer("someCodeToTime"): 

393 pass # code to time 

394 

395 See also 

396 -------- 

397 timer.logInfo 

398 """ 

399 logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) 

400 try: 

401 yield 

402 finally: 

403 logInfo(obj=self, prefix=name + "End", logLevel=logLevel) 

404 

405 @classmethod 

406 def makeField(cls, doc): 

407 """Make a `lsst.pex.config.ConfigurableField` for this task. 

408 

409 Parameters 

410 ---------- 

411 doc : `str` 

412 Help text for the field. 

413 

414 Returns 

415 ------- 

416 configurableField : `lsst.pex.config.ConfigurableField` 

417 A `~ConfigurableField` for this task. 

418 

419 Examples 

420 -------- 

421 Provides a convenient way to specify this task is a subtask of another 

422 task. 

423 

424 Here is an example of use: 

425 

426 .. code-block:: python 

427 

428 class OtherTaskConfig(lsst.pex.config.Config): 

429 aSubtask = ATaskClass.makeField("brief description of task") 

430 """ 

431 return ConfigurableField(doc=doc, target=cls) 

432 

433 def _computeFullName(self, name): 

434 """Compute the full name of a subtask or metadata item, given its brief 

435 name. 

436 

437 Parameters 

438 ---------- 

439 name : `str` 

440 Brief name of subtask or metadata item. 

441 

442 Returns 

443 ------- 

444 fullName : `str` 

445 The full name: the ``name`` argument prefixed by the full task name 

446 and a period. 

447 

448 Notes 

449 ----- 

450 For example: if the full name of this task is "top.sub.sub2" 

451 then ``_computeFullName("subname")`` returns 

452 ``"top.sub.sub2.subname"``. 

453 """ 

454 return f"{self._fullName}.{name}" 

455 

456 @staticmethod 

457 def _unpickle_via_factory(factory, args, kwargs): 

458 """Unpickle something by calling a factory 

459 

460 Allows subclasses to unpickle using `__reduce__` with keyword 

461 arguments as well as positional arguments. 

462 """ 

463 return factory(*args, **kwargs) 

464 

465 def _reduce_kwargs(self): 

466 """Returns a dict of the keyword arguments that should be used 

467 by `__reduce__`. 

468 

469 Subclasses with additional arguments should always call the parent 

470 class method to ensure that the standard parameters are included. 

471 

472 Returns 

473 ------- 

474 kwargs : `dict` 

475 Keyword arguments to be used when pickling. 

476 """ 

477 return dict(config=self.config, name=self._name, parentTask=self._parentTask,) 

478 

479 def __reduce__(self): 

480 """Pickler. 

481 """ 

482 return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs())