lsst.pipe.base  13.0-14-g8b3bf66+24
cmdLineTask.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division
23 import sys
24 import traceback
25 import functools
26 import contextlib
27 
28 from builtins import str
29 from builtins import object
30 
31 import lsst.utils
32 from lsst.base import disableImplicitThreading
33 import lsst.afw.table as afwTable
34 from .task import Task, TaskError
35 from .struct import Struct
36 from .argumentParser import ArgumentParser
37 from lsst.base import Packages
38 from lsst.log import Log
39 
40 __all__ = ["CmdLineTask", "TaskRunner", "ButlerInitializedTaskRunner"]
41 
42 
43 def _poolFunctionWrapper(function, arg):
44  """Wrapper around function to catch exceptions that don't inherit from `Exception`.
45 
46  Such exceptions aren't caught by multiprocessing, which causes the slave process to crash and you end up
47  hitting the timeout.
48  """
49  try:
50  return function(arg)
51  except Exception:
52  raise # No worries
53  except:
54  # Need to wrap the exception with something multiprocessing will recognise
55  cls, exc, tb = sys.exc_info()
56  log = Log.getDefaultLogger()
57  log.warn("Unhandled exception %s (%s):\n%s" % (cls.__name__, exc, traceback.format_exc()))
58  raise Exception("Unhandled exception: %s (%s)" % (cls.__name__, exc))
59 
60 
61 def _runPool(pool, timeout, function, iterable):
62  """Wrapper around ``pool.map_async``, to handle timeout
63 
64  This is required so as to trigger an immediate interrupt on the KeyboardInterrupt (Ctrl-C); see
65  http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
66 
67  Further wraps the function in ``_poolFunctionWrapper`` to catch exceptions
68  that don't inherit from `Exception`.
69  """
70  return pool.map_async(functools.partial(_poolFunctionWrapper, function), iterable).get(timeout)
71 
72 
73 @contextlib.contextmanager
74 def profile(filename, log=None):
75  """Context manager for profiling with cProfile.
76 
77 
78  Parameters
79  ----------
80  filename : `str`
81  Filename to which to write profile (profiling disabled if `None` or empty).
82  log : `lsst.log.Log`, optional
83  Log object for logging the profile operations.
84 
85  If profiling is enabled, the context manager returns the cProfile.Profile object (otherwise
86  it returns None), which allows additional control over profiling. You can obtain this using
87  the "as" clause, e.g.:
88 
89  with profile(filename) as prof:
90  runYourCodeHere()
91 
92  The output cumulative profile can be printed with a command-line like::
93 
94  python -c 'import pstats; pstats.Stats("<filename>").sort_stats("cumtime").print_stats(30)'
95  """
96  if not filename:
97  # Nothing to do
98  yield
99  return
100  from cProfile import Profile
101  profile = Profile()
102  if log is not None:
103  log.info("Enabling cProfile profiling")
104  profile.enable()
105  yield profile
106  profile.disable()
107  profile.dump_stats(filename)
108  if log is not None:
109  log.info("cProfile stats written to %s" % filename)
110 
111 
112 class TaskRunner(object):
113  """Run a command-line task, using `multiprocessing` if requested.
114 
115  Parameters
116  ----------
117  TaskClass : `lsst.pipe.base.Task` subclass
118  The class of the task to run.
119  parsedCmd : `argparse.Namespace`
120  The parsed command-line arguments, as returned by the task's argument parser's
121  `~lsst.pipe.base.ArgumentParser.parse_args` method.
122 
123  .. warning::
124 
125  Do not store ``parsedCmd``, as this instance is pickled (if multiprocessing) and parsedCmd may
126  contain non-picklable elements. It certainly contains more data than we need to send to each
127  instance of the task.
128  doReturnResults : `bool`, optional
129  Should run return the collected result from each invocation of the task? This is only intended for
130  unit tests and similar use. It can easily exhaust memory (if the task returns enough data and you
131  call it enough times) and it will fail when using multiprocessing if the returned data cannot be
132  pickled.
133 
134  Note that even if ``doReturnResults`` is False a struct with a single member "exitStatus" is returned,
135  with value 0 or 1 to be returned to the unix shell.
136 
137  Raises
138  ------
139  ImportError
140  If multiprocessing is requested (and the task supports it) but the multiprocessing library cannot be
141  imported.
142 
143  Notes
144  -----
145  Each command-line task (subclass of `lsst.pipe.base.CmdLineTask`) has a task runner. By default it is this
146  class, but some tasks require a subclass. See the manual :ref:`creating-a-command-line-task` for more
147  information. See `CmdLineTask.parseAndRun` to see how a task runner is used.
148 
149  You may use this task runner for your command-line task if your task has a run method that takes exactly
150  one argument: a butler data reference. Otherwise you must provide a task-specific subclass of this runner
151  for your task's ``RunnerClass`` that overrides `TaskRunner.getTargetList` and possibly
152  `TaskRunner.__call__`. See `TaskRunner.getTargetList` for details.
153 
154  This design matches the common pattern for command-line tasks: the run method takes a single data
155  reference, of some suitable name. Additional arguments are rare, and if present, require a subclass of
156  `TaskRunner` that calls these additional arguments by name.
157 
158  Instances of this class must be picklable in order to be compatible with multiprocessing. If
159  multiprocessing is requested (``parsedCmd.numProcesses > 1``) then `run` calls `prepareForMultiProcessing`
160  to jettison optional non-picklable elements. If your task runner is not compatible with multiprocessing
161  then indicate this in your task by setting class variable ``canMultiprocess=False``.
162 
163  Due to a `python bug`__, handling a `KeyboardInterrupt` properly `requires specifying a timeout`__. This
164  timeout (in sec) can be specified as the ``timeout`` element in the output from
165  `~lsst.pipe.base.ArgumentParser` (the ``parsedCmd``), if available, otherwise we use `TaskRunner.TIMEOUT`.
166 
167  .. __: http://bugs.python.org/issue8296
168  .. __: http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
169  """
170 
171  TIMEOUT = 3600*24*30
172  """Default timeout (seconds) for multiprocessing."""
173 
174  def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
175  self.TaskClass = TaskClass
176  self.doReturnResults = bool(doReturnResults)
177  self.config = parsedCmd.config
178  self.log = parsedCmd.log
179  self.doRaise = bool(parsedCmd.doraise)
180  self.clobberConfig = bool(parsedCmd.clobberConfig)
181  self.doBackup = not bool(parsedCmd.noBackupConfig)
182  self.numProcesses = int(getattr(parsedCmd, 'processes', 1))
183 
184  self.timeout = getattr(parsedCmd, 'timeout', None)
185  if self.timeout is None or self.timeout <= 0:
186  self.timeout = self.TIMEOUT
187 
188  if self.numProcesses > 1:
189  if not TaskClass.canMultiprocess:
190  self.log.warn("This task does not support multiprocessing; using one process")
191  self.numProcesses = 1
192 
194  """Prepare this instance for multiprocessing
195 
196  Optional non-picklable elements are removed.
197 
198  This is only called if the task is run under multiprocessing.
199  """
200  self.log = None
201 
202  def run(self, parsedCmd):
203  """Run the task on all targets.
204 
205  Parameters
206  ----------
207  parsedCmd : `argparse.Namespace`
208  Parsed command `argparse.Namespace`.
209 
210  Returns
211  -------
212  resultList : `list`
213  A list of results returned by `TaskRunner.__call__`, or an empty list if `TaskRunner.__call__`
214  is not called (e.g. if `TaskRunner.precall` returns `False`). See `TaskRunner.__call__`
215  for details.
216 
217  Notes
218  -----
219  The task is run under multiprocessing if `TaskRunner.numProcesses` is more than 1; otherwise
220  processing is serial.
221  """
222  resultList = []
223  if self.numProcesses > 1:
224  disableImplicitThreading() # To prevent thread contention
225  import multiprocessing
227  pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=1)
228  mapFunc = functools.partial(_runPool, pool, self.timeout)
229  else:
230  pool = None
231  mapFunc = map
232 
233  if self.precall(parsedCmd):
234  profileName = parsedCmd.profile if hasattr(parsedCmd, "profile") else None
235  log = parsedCmd.log
236  targetList = self.getTargetList(parsedCmd)
237  if len(targetList) > 0:
238  with profile(profileName, log):
239  # Run the task using self.__call__
240  resultList = list(mapFunc(self, targetList))
241  else:
242  log.warn("Not running the task because there is no data to process; "
243  "you may preview data using \"--show data\"")
244 
245  if pool is not None:
246  pool.close()
247  pool.join()
248 
249  return resultList
250 
251  @staticmethod
252  def getTargetList(parsedCmd, **kwargs):
253  """Get a list of (dataRef, kwargs) for `TaskRunner.__call__`.
254 
255  Parameters
256  ----------
257  parsedCmd : `argparse.Namespace`
258  The parsed command object returned by `lsst.pipe.base.argumentParser.ArgumentParser.parse_args`.
259  kwargs
260  Any additional keyword arguments. In the default `TaskRunner` this is an empty dict, but having
261  it simplifies overriding `TaskRunner` for tasks whose run method takes additional arguments
262  (see case (1) below).
263 
264  Notes
265  -----
266  The default implementation of `TaskRunner.getTargetList` and `TaskRunner.__call__` works for any
267  command-line task whose run method takes exactly one argument: a data reference. Otherwise you
268  must provide a variant of TaskRunner that overrides `TaskRunner.getTargetList` and possibly
269  `TaskRunner.__call__`. There are two cases.
270 
271  **Case 1**
272 
273  If your command-line task has a ``run`` method that takes one data reference followed by additional
274  arguments, then you need only override `TaskRunner.getTargetList` to return the additional arguments
275  as an argument dict. To make this easier, your overridden version of `~TaskRunner.getTargetList` may
276  call `TaskRunner.getTargetList` with the extra arguments as keyword arguments. For example, the
277  following adds an argument dict containing a single key: "calExpList", whose value is the list of data
278  IDs for the calexp ID argument::
279 
280  def getTargetList(parsedCmd):
281  return TaskRunner.getTargetList(
282  parsedCmd,
283  calExpList=parsedCmd.calexp.idList
284  )
285 
286  It is equivalent to this slightly longer version::
287 
288  @staticmethod
289  def getTargetList(parsedCmd):
290  argDict = dict(calExpList=parsedCmd.calexp.idList)
291  return [(dataId, argDict) for dataId in parsedCmd.id.idList]
292 
293  **Case 2**
294 
295  If your task does not meet condition (1) then you must override both TaskRunner.getTargetList and
296  `TaskRunner.__call__`. You may do this however you see fit, so long as `TaskRunner.getTargetList`
297  returns a list, each of whose elements is sent to `TaskRunner.__call__`, which runs your task.
298  """
299  return [(ref, kwargs) for ref in parsedCmd.id.refList]
300 
301  def makeTask(self, parsedCmd=None, args=None):
302  """Create a Task instance.
303 
304  Parameters
305  ----------
306  parsedCmd
307  Parsed command-line options (used for extra task args by some task runners).
308  args
309  Args tuple passed to `TaskRunner.__call__` (used for extra task arguments by some task runners).
310 
311  Notes
312  -----
313  ``makeTask`` can be called with either the ``parsedCmd`` argument or ``args`` argument set to None,
314  but it must construct identical Task instances in either case.
315 
316  Subclasses may ignore this method entirely if they reimplement both `TaskRunner.precall` and
317  `TaskRunner.__call__`.
318  """
319  return self.TaskClass(config=self.config, log=self.log)
320 
321  def _precallImpl(self, task, parsedCmd):
322  """The main work of `precall`.
323 
324  We write package versions, schemas and configs, or compare these to existing files on disk if present.
325  """
326  if not parsedCmd.noVersions:
327  task.writePackageVersions(parsedCmd.butler, clobber=parsedCmd.clobberVersions)
328  task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
329  task.writeSchemas(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
330 
331  def precall(self, parsedCmd):
332  """Hook for code that should run exactly once, before multiprocessing.
333 
334  Notes
335  -----
336  Must return True if `TaskRunner.__call__` should subsequently be called.
337 
338  .. warning::
339 
340  Implementations must take care to ensure that no unpicklable
341  attributes are added to the TaskRunner itself, for compatibility
342  with multiprocessing.
343 
344  The default implementation writes package versions, schemas and configs, or compares them to existing
345  files on disk if present.
346  """
347  task = self.makeTask(parsedCmd=parsedCmd)
348 
349  if self.doRaise:
350  self._precallImpl(task, parsedCmd)
351  else:
352  try:
353  self._precallImpl(task, parsedCmd)
354  except Exception as e:
355  task.log.fatal("Failed in task initialization: %s", e)
356  if not isinstance(e, TaskError):
357  traceback.print_exc(file=sys.stderr)
358  return False
359  return True
360 
361  def __call__(self, args):
362  """Run the Task on a single target.
363 
364  Parameters
365  ----------
366  args
367  Arguments for Task.run()
368 
369  Returns
370  -------
371  struct : `lsst.pipe.base.Struct`
372  Contains these fields if ``doReturnResults`` is `True`:
373 
374  - ``dataRef``: the provided data reference.
375  - ``metadata``: task metadata after execution of run.
376  - ``result``: result returned by task run, or `None` if the task fails.
377 
378  If ``doReturnResults`` is `False` the struct contains:
379 
380  - ``exitStatus``.
381 
382  Notes
383  -----
384  This default implementation assumes that the ``args`` is a tuple
385  containing a data reference and a dict of keyword arguments.
386 
387  .. warning::
388 
389  If you override this method and wish to return something when ``doReturnResults`` is `False`,
390  then it must be picklable to support multiprocessing and it should be small enough that pickling
391  and unpickling do not add excessive overhead.
392  """
393  dataRef, kwargs = args
394  if self.log is None:
395  self.log = Log.getDefaultLogger()
396  if hasattr(dataRef, "dataId"):
397  self.log.MDC("LABEL", str(dataRef.dataId))
398  elif isinstance(dataRef, (list, tuple)):
399  self.log.MDC("LABEL", str([ref.dataId for ref in dataRef if hasattr(ref, "dataId")]))
400  task = self.makeTask(args=args)
401  result = None # in case the task fails
402  exitStatus = 0 # exit status for the shell
403  if self.doRaise:
404  result = task.run(dataRef, **kwargs)
405  else:
406  try:
407  result = task.run(dataRef, **kwargs)
408  except Exception as e:
409  exitStatus = 1 # n.b. The shell exit value is the number of dataRefs returning
410  # non-zero, so the actual value used here is lost
411 
412  # don't use a try block as we need to preserve the original exception
413  if hasattr(dataRef, "dataId"):
414  task.log.fatal("Failed on dataId=%s: %s", dataRef.dataId, e)
415  elif isinstance(dataRef, (list, tuple)):
416  task.log.fatal("Failed on dataId=[%s]: %s",
417  ", ".join(str(ref.dataId) for ref in dataRef), e)
418  else:
419  task.log.fatal("Failed on dataRef=%s: %s", dataRef, e)
420 
421  if not isinstance(e, TaskError):
422  traceback.print_exc(file=sys.stderr)
423  task.writeMetadata(dataRef)
424 
425  # remove MDC so it does not show up outside of task context
426  self.log.MDCRemove("LABEL")
427 
428  if self.doReturnResults:
429  return Struct(
430  exitStatus=exitStatus,
431  dataRef=dataRef,
432  metadata=task.metadata,
433  result=result,
434  )
435  else:
436  return Struct(
437  exitStatus=exitStatus,
438  )
439 
440 
442  """A TaskRunner for `CmdLineTask`\ s that require a ``butler`` keyword argument to be passed to
443  their constructor.
444  """
445 
446  def makeTask(self, parsedCmd=None, args=None):
447  """A variant of the base version that passes a butler argument to the task's constructor.
448 
449  Parameters
450  ----------
451  parsedCmd : `argparse.Namespace`
452  Parsed command-line options, as returned by the `~lsst.pipe.base.ArgumentParser`; if specified
453  then args is ignored.
454  args
455  Other arguments; if ``parsedCmd`` is `None` then this must be specified.
456 
457  Raises
458  ------
459  RuntimeError
460  Raised if ``parsedCmd`` and ``args`` are both `None`.
461  """
462  if parsedCmd is not None:
463  butler = parsedCmd.butler
464  elif args is not None:
465  dataRef, kwargs = args
466  butler = dataRef.butlerSubset.butler
467  else:
468  raise RuntimeError("parsedCmd or args must be specified")
469  return self.TaskClass(config=self.config, log=self.log, butler=butler)
470 
471 
473  """Base class for command-line tasks: tasks that may be executed from the command-line.
474 
475  Notes
476  -----
477  See :ref:`task-framework-overview` to learn what tasks are and :ref:`creating-a-command-line-task` for
478  more information about writing command-line tasks.
479 
480  Subclasses must specify the following class variables:
481 
482  - ``ConfigClass``: configuration class for your task (a subclass of `lsst.pex.config.Config`, or if your
483  task needs no configuration, then `lsst.pex.config.Config` itself).
484  - ``_DefaultName``: default name used for this task (a str).
485 
486  Subclasses may also specify the following class variables:
487 
488  - ``RunnerClass``: a task runner class. The default is ``TaskRunner``, which works for any task
489  with a run method that takes exactly one argument: a data reference. If your task does
490  not meet this requirement then you must supply a variant of ``TaskRunner``; see ``TaskRunner``
491  for more information.
492  - ``canMultiprocess``: the default is `True`; set `False` if your task does not support multiprocessing.
493 
494  Subclasses must specify a method named ``run``:
495 
496  - By default ``run`` accepts a single butler data reference, but you can specify an alternate task runner
497  (subclass of ``TaskRunner``) as the value of class variable ``RunnerClass`` if your run method needs
498  something else.
499  - ``run`` is expected to return its data in a `lsst.pipe.base.Struct`. This provides safety for evolution
500  of the task since new values may be added without harming existing code.
501  - The data returned by ``run`` must be picklable if your task is to support multiprocessing.
502  """
503  RunnerClass = TaskRunner
504  canMultiprocess = True
505 
506  @classmethod
507  def applyOverrides(cls, config):
508  """A hook to allow a task to change the values of its config *after* the camera-specific
509  overrides are loaded but before any command-line overrides are applied.
510 
511  Parameters
512  ----------
513  config : instance of task's ``ConfigClass``
514  Task configuration.
515 
516  Notes
517  -----
518  This is necessary in some cases because the camera-specific overrides may retarget subtasks,
519  wiping out changes made in ConfigClass.setDefaults. See LSST Trac ticket #2282 for more discussion.
520 
521  .. warning::
522 
523  This is called by CmdLineTask.parseAndRun; other ways of constructing a config will not apply
524  these overrides.
525  """
526  pass
527 
528  @classmethod
529  def parseAndRun(cls, args=None, config=None, log=None, doReturnResults=False):
530  """Parse an argument list and run the command.
531 
532  Parameters
533  ----------
534  args : `list`, optional
535  List of command-line arguments; if `None` use `sys.argv`.
536  config : `lsst.pex.config.Config`-type, optional
537  Config for task. If `None` use `Task.ConfigClass`.
538  log : `lsst.log.Log`-type, optional
539  Log. If `None` use the default log.
540  doReturnResults : `bool`, optional
541  If `True`, return the results of this task. Default is `False`. This is only intended for
542  unit tests and similar use. It can easily exhaust memory (if the task returns enough data and you
543  call it enough times) and it will fail when using multiprocessing if the returned data cannot be
544  pickled.
545 
546  Returns
547  -------
548  struct : `lsst.pipe.base.Struct`
549  Fields are:
550 
551  - ``argumentParser``: the argument parser.
552  - ``parsedCmd``: the parsed command returned by the argument parser's
553  `lsst.pipe.base.ArgumentParser.parse_args` method.
554  - ``taskRunner``: the task runner used to run the task (an instance of `Task.RunnerClass`).
555  - ``resultList``: results returned by the task runner's ``run`` method, one entry per invocation.
556  This will typically be a list of `None` unless ``doReturnResults`` is `True`;
557  see `Task.RunnerClass` (`TaskRunner` by default) for more information.
558 
559  Notes
560  -----
561  Calling this method with no arguments specified is the standard way to run a command-line task
562  from the command-line. For an example see ``pipe_tasks`` ``bin/makeSkyMap.py`` or almost any other
563  file in that directory.
564 
565  If one or more of the dataIds fails then this routine will exit (with a status giving the
566  number of failed dataIds) rather than returning this struct; this behaviour can be
567  overridden by specifying the ``--noExit`` command-line option.
568  """
569  if args is None:
570  commandAsStr = " ".join(sys.argv)
571  args = sys.argv[1:]
572  else:
573  commandAsStr = "{}{}".format(lsst.utils.get_caller_name(skip=1), tuple(args))
574 
575  argumentParser = cls._makeArgumentParser()
576  if config is None:
577  config = cls.ConfigClass()
578  parsedCmd = argumentParser.parse_args(config=config, args=args, log=log, override=cls.applyOverrides)
579  # print this message after parsing the command so the log is fully configured
580  parsedCmd.log.info("Running: %s", commandAsStr)
581 
582  taskRunner = cls.RunnerClass(TaskClass=cls, parsedCmd=parsedCmd, doReturnResults=doReturnResults)
583  resultList = taskRunner.run(parsedCmd)
584 
585  try:
586  nFailed = sum(((res.exitStatus != 0) for res in resultList))
587  except Exception as e:
588  parsedCmd.log.warn("Unable to retrieve exit status (%s); assuming success", e)
589  nFailed = 0
590 
591  if nFailed > 0:
592  if parsedCmd.noExit:
593  parsedCmd.log.warn("%d dataRefs failed; not exiting as --noExit was set", nFailed)
594  else:
595  sys.exit(nFailed)
596 
597  return Struct(
598  argumentParser=argumentParser,
599  parsedCmd=parsedCmd,
600  taskRunner=taskRunner,
601  resultList=resultList,
602  )
603 
604  @classmethod
605  def _makeArgumentParser(cls):
606  """Create and return an argument parser.
607 
608  Returns
609  -------
610  parser : `lsst.pipe.base.ArgumentParser`
611  The argument parser for this task.
612 
613  Notes
614  -----
615  By default this returns an `~lsst.pipe.base.ArgumentParser` with one ID argument named `--id` of
616  dataset type ``raw``.
617 
618  Your task subclass may need to override this method to change the dataset type or data ref level,
619  or to add additional data ID arguments. If you add additional data ID arguments or your task's
620  run method takes more than a single data reference then you will also have to provide a task-specific
621  task runner (see TaskRunner for more information).
622  """
623  parser = ArgumentParser(name=cls._DefaultName)
624  parser.add_id_argument(name="--id", datasetType="raw",
625  help="data IDs, e.g. --id visit=12345 ccd=1,2^0,3")
626  return parser
627 
628  def writeConfig(self, butler, clobber=False, doBackup=True):
629  """Write the configuration used for processing the data, or check that an existing
630  one is equal to the new one if present.
631 
632  Parameters
633  ----------
634  butler : `lsst.daf.persistence.Butler`
635  Data butler used to write the config. The config is written to dataset type
636  `CmdLineTask._getConfigName`.
637  clobber : `bool`, optional
638  A boolean flag that controls what happens if a config already has been saved:
639  - `True`: overwrite or rename the existing config, depending on ``doBackup``.
640  - `False`: raise `TaskError` if this config does not match the existing config.
641  doBackup : bool, optional
642  Set to `True` to backup the config files if clobbering.
643  """
644  configName = self._getConfigName()
645  if configName is None:
646  return
647  if clobber:
648  butler.put(self.config, configName, doBackup=doBackup)
649  elif butler.datasetExists(configName):
650  # this may be subject to a race condition; see #2789
651  try:
652  oldConfig = butler.get(configName, immediate=True)
653  except Exception as exc:
654  raise type(exc)("Unable to read stored config file %s (%s); consider using --clobber-config" %
655  (configName, exc))
656 
657  def logConfigMismatch(msg):
658  self.log.fatal("Comparing configuration: %s", msg)
659 
660  if not self.config.compare(oldConfig, shortcut=False, output=logConfigMismatch):
661  raise TaskError(
662  ("Config does not match existing task config %r on disk; tasks configurations " +
663  "must be consistent within the same output repo (override with --clobber-config)") %
664  (configName,))
665  else:
666  butler.put(self.config, configName)
667 
668  def writeSchemas(self, butler, clobber=False, doBackup=True):
669  """Write the schemas returned by `lsst.pipe.base.Task.getAllSchemaCatalogs`.
670 
671  Parameters
672  ----------
673  butler : `lsst.daf.persistence.Butler`
674  Data butler used to write the schema. Each schema is written to the dataset type specified as the
675  key in the dict returned by `~lsst.pipe.base.Task.getAllSchemaCatalogs`.
676  clobber : `bool`, optional
677  A boolean flag that controls what happens if a schema already has been saved:
678  - `True`: overwrite or rename the existing schema, depending on ``doBackup``.
679  - `False`: raise `TaskError` if this schema does not match the existing schema.
680  doBackup : `bool`, optional
681  Set to `True` to backup the schema files if clobbering.
682 
683  Notes
684  -----
685  If ``clobber`` is `False` and an existing schema does not match a current schema,
686  then some schemas may have been saved successfully and others may not, and there is no easy way to
687  tell which is which.
688  """
689  for dataset, catalog in self.getAllSchemaCatalogs().items():
690  schemaDataset = dataset + "_schema"
691  if clobber:
692  butler.put(catalog, schemaDataset, doBackup=doBackup)
693  elif butler.datasetExists(schemaDataset):
694  oldSchema = butler.get(schemaDataset, immediate=True).getSchema()
695  if not oldSchema.compare(catalog.getSchema(), afwTable.Schema.IDENTICAL):
696  raise TaskError(
697  ("New schema does not match schema %r on disk; schemas must be " +
698  " consistent within the same output repo (override with --clobber-config)") %
699  (dataset,))
700  else:
701  butler.put(catalog, schemaDataset)
702 
703  def writeMetadata(self, dataRef):
704  """Write the metadata produced from processing the data.
705 
706  Parameters
707  ----------
708  dataRef
709  Butler data reference used to write the metadata.
710  The metadata is written to dataset type `CmdLineTask._getMetadataName`.
711  """
712  try:
713  metadataName = self._getMetadataName()
714  if metadataName is not None:
715  dataRef.put(self.getFullMetadata(), metadataName)
716  except Exception as e:
717  self.log.warn("Could not persist metadata for dataId=%s: %s", dataRef.dataId, e)
718 
719  def writePackageVersions(self, butler, clobber=False, doBackup=True, dataset="packages"):
720  """Compare and write package versions.
721 
722  Parameters
723  ----------
724  butler : `lsst.daf.persistence.Butler`
725  Data butler used to read/write the package versions.
726  clobber : `bool`, optional
727  A boolean flag that controls what happens if versions already have been saved:
728  - `True`: overwrite or rename the existing version info, depending on ``doBackup``.
729  - `False`: raise `TaskError` if this version info does not match the existing.
730  doBackup : `bool`, optional
731  If `True` and clobbering, old package version files are backed up.
732  dataset : `str`, optional
733  Name of dataset to read/write.
734 
735  Raises
736  ------
737  TaskError
738  Raised if there is a version mismatch with current and persisted lists of package versions.
739 
740  Notes
741  -----
742  Note that this operation is subject to a race condition.
743  """
744  packages = Packages.fromSystem()
745 
746  if clobber:
747  return butler.put(packages, dataset, doBackup=doBackup)
748  if not butler.datasetExists(dataset):
749  return butler.put(packages, dataset)
750 
751  try:
752  old = butler.get(dataset, immediate=True)
753  except Exception as exc:
754  raise type(exc)("Unable to read stored version dataset %s (%s); "
755  "consider using --clobber-versions or --no-versions" %
756  (dataset, exc))
757  # Note that because we can only detect python modules that have been imported, the stored
758  # list of products may be more or less complete than what we have now. What's important is
759  # that the products that are in common have the same version.
760  diff = packages.difference(old)
761  if diff:
762  raise TaskError(
763  "Version mismatch (" +
764  "; ".join("%s: %s vs %s" % (pkg, diff[pkg][1], diff[pkg][0]) for pkg in diff) +
765  "); consider using --clobber-versions or --no-versions")
766  # Update the old set of packages in case we have more packages that haven't been persisted.
767  extra = packages.extra(old)
768  if extra:
769  old.update(packages)
770  butler.put(old, dataset, doBackup=doBackup)
771 
772  def _getConfigName(self):
773  """Get the name of the config dataset type, or `None` if config is not to be persisted.
774 
775  Notes
776  -----
777  The name may depend on the config; that is why this is not a class method.
778  """
779  return self._DefaultName + "_config"
780 
781  def _getMetadataName(self):
782  """Get the name of the metadata dataset type, or `None` if metadata is not to be persisted.
783 
784  Notes
785  -----
786  The name may depend on the config; that is why this is not a class method.
787  """
788  return self._DefaultName + "_metadata"
def parseAndRun(cls, args=None, config=None, log=None, doReturnResults=False)
Definition: cmdLineTask.py:529
def _precallImpl(self, task, parsedCmd)
Definition: cmdLineTask.py:321
def getFullMetadata(self)
Definition: task.py:212
def writePackageVersions(self, butler, clobber=False, doBackup=True, dataset="packages")
Definition: cmdLineTask.py:719
def getAllSchemaCatalogs(self)
Definition: task.py:190
def writeSchemas(self, butler, clobber=False, doBackup=True)
Definition: cmdLineTask.py:668
def makeTask(self, parsedCmd=None, args=None)
Definition: cmdLineTask.py:301
def __init__(self, TaskClass, parsedCmd, doReturnResults=False)
Definition: cmdLineTask.py:174
def profile(filename, log=None)
Definition: cmdLineTask.py:74
def makeTask(self, parsedCmd=None, args=None)
Definition: cmdLineTask.py:446
def getTargetList(parsedCmd, kwargs)
Definition: cmdLineTask.py:252
def writeConfig(self, butler, clobber=False, doBackup=True)
Definition: cmdLineTask.py:628