lsst.pipe.base  15.0-5-g73aaf0b
cmdLineTask.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division
23 import sys
24 import traceback
25 import functools
26 import contextlib
27 
28 from builtins import str
29 from builtins import object
30 
31 import lsst.utils
32 from lsst.base import disableImplicitThreading
33 import lsst.afw.table as afwTable
34 from .task import Task, TaskError
35 from .struct import Struct
36 from .argumentParser import ArgumentParser
37 from lsst.base import Packages
38 from lsst.log import Log
39 
40 __all__ = ["CmdLineTask", "TaskRunner", "ButlerInitializedTaskRunner"]
41 
42 
43 def _runPool(pool, timeout, function, iterable):
44  """Wrapper around ``pool.map_async``, to handle timeout
45 
46  This is required so as to trigger an immediate interrupt on the KeyboardInterrupt (Ctrl-C); see
47  http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
48 
49  Further wraps the function in ``_poolFunctionWrapper`` to catch exceptions
50  that don't inherit from `Exception`.
51  """
52  return pool.map_async(function, iterable).get(timeout)
53 
54 
55 @contextlib.contextmanager
56 def profile(filename, log=None):
57  """Context manager for profiling with cProfile.
58 
59 
60  Parameters
61  ----------
62  filename : `str`
63  Filename to which to write profile (profiling disabled if `None` or empty).
64  log : `lsst.log.Log`, optional
65  Log object for logging the profile operations.
66 
67  If profiling is enabled, the context manager returns the cProfile.Profile object (otherwise
68  it returns None), which allows additional control over profiling. You can obtain this using
69  the "as" clause, e.g.:
70 
71  with profile(filename) as prof:
72  runYourCodeHere()
73 
74  The output cumulative profile can be printed with a command-line like::
75 
76  python -c 'import pstats; pstats.Stats("<filename>").sort_stats("cumtime").print_stats(30)'
77  """
78  if not filename:
79  # Nothing to do
80  yield
81  return
82  from cProfile import Profile
83  profile = Profile()
84  if log is not None:
85  log.info("Enabling cProfile profiling")
86  profile.enable()
87  yield profile
88  profile.disable()
89  profile.dump_stats(filename)
90  if log is not None:
91  log.info("cProfile stats written to %s" % filename)
92 
93 
94 class TaskRunner(object):
95  """Run a command-line task, using `multiprocessing` if requested.
96 
97  Parameters
98  ----------
99  TaskClass : `lsst.pipe.base.Task` subclass
100  The class of the task to run.
101  parsedCmd : `argparse.Namespace`
102  The parsed command-line arguments, as returned by the task's argument parser's
103  `~lsst.pipe.base.ArgumentParser.parse_args` method.
104 
105  .. warning::
106 
107  Do not store ``parsedCmd``, as this instance is pickled (if multiprocessing) and parsedCmd may
108  contain non-picklable elements. It certainly contains more data than we need to send to each
109  instance of the task.
110  doReturnResults : `bool`, optional
111  Should run return the collected result from each invocation of the task? This is only intended for
112  unit tests and similar use. It can easily exhaust memory (if the task returns enough data and you
113  call it enough times) and it will fail when using multiprocessing if the returned data cannot be
114  pickled.
115 
116  Note that even if ``doReturnResults`` is False a struct with a single member "exitStatus" is returned,
117  with value 0 or 1 to be returned to the unix shell.
118 
119  Raises
120  ------
121  ImportError
122  If multiprocessing is requested (and the task supports it) but the multiprocessing library cannot be
123  imported.
124 
125  Notes
126  -----
127  Each command-line task (subclass of `lsst.pipe.base.CmdLineTask`) has a task runner. By default it is this
128  class, but some tasks require a subclass. See the manual :ref:`creating-a-command-line-task` for more
129  information. See `CmdLineTask.parseAndRun` to see how a task runner is used.
130 
131  You may use this task runner for your command-line task if your task has a run method that takes exactly
132  one argument: a butler data reference. Otherwise you must provide a task-specific subclass of this runner
133  for your task's ``RunnerClass`` that overrides `TaskRunner.getTargetList` and possibly
134  `TaskRunner.__call__`. See `TaskRunner.getTargetList` for details.
135 
136  This design matches the common pattern for command-line tasks: the run method takes a single data
137  reference, of some suitable name. Additional arguments are rare, and if present, require a subclass of
138  `TaskRunner` that calls these additional arguments by name.
139 
140  Instances of this class must be picklable in order to be compatible with multiprocessing. If
141  multiprocessing is requested (``parsedCmd.numProcesses > 1``) then `run` calls `prepareForMultiProcessing`
142  to jettison optional non-picklable elements. If your task runner is not compatible with multiprocessing
143  then indicate this in your task by setting class variable ``canMultiprocess=False``.
144 
145  Due to a `python bug`__, handling a `KeyboardInterrupt` properly `requires specifying a timeout`__. This
146  timeout (in sec) can be specified as the ``timeout`` element in the output from
147  `~lsst.pipe.base.ArgumentParser` (the ``parsedCmd``), if available, otherwise we use `TaskRunner.TIMEOUT`.
148 
149  By default, we disable "implicit" threading -- ie, as provided by underlying numerical libraries such as
150  MKL or BLAS. This is designed to avoid thread contention both when a single command line task spawns
151  multiple processes and when multiple users are running on a shared system. Users can override this
152  behaviour by setting the ``LSST_ALLOW_IMPLICIT_THREADS`` environment variable.
153 
154  .. __: http://bugs.python.org/issue8296
155  .. __: http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
156  """
157 
158  TIMEOUT = 3600*24*30
159  """Default timeout (seconds) for multiprocessing."""
160 
161  def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
162  self.TaskClass = TaskClass
163  self.doReturnResults = bool(doReturnResults)
164  self.config = parsedCmd.config
165  self.log = parsedCmd.log
166  self.doRaise = bool(parsedCmd.doraise)
167  self.clobberConfig = bool(parsedCmd.clobberConfig)
168  self.doBackup = not bool(parsedCmd.noBackupConfig)
169  self.numProcesses = int(getattr(parsedCmd, 'processes', 1))
170 
171  self.timeout = getattr(parsedCmd, 'timeout', None)
172  if self.timeout is None or self.timeout <= 0:
173  self.timeout = self.TIMEOUT
174 
175  if self.numProcesses > 1:
176  if not TaskClass.canMultiprocess:
177  self.log.warn("This task does not support multiprocessing; using one process")
178  self.numProcesses = 1
179 
181  """Prepare this instance for multiprocessing
182 
183  Optional non-picklable elements are removed.
184 
185  This is only called if the task is run under multiprocessing.
186  """
187  self.log = None
188 
189  def run(self, parsedCmd):
190  """Run the task on all targets.
191 
192  Parameters
193  ----------
194  parsedCmd : `argparse.Namespace`
195  Parsed command `argparse.Namespace`.
196 
197  Returns
198  -------
199  resultList : `list`
200  A list of results returned by `TaskRunner.__call__`, or an empty list if `TaskRunner.__call__`
201  is not called (e.g. if `TaskRunner.precall` returns `False`). See `TaskRunner.__call__`
202  for details.
203 
204  Notes
205  -----
206  The task is run under multiprocessing if `TaskRunner.numProcesses` is more than 1; otherwise
207  processing is serial.
208  """
209  resultList = []
210  disableImplicitThreading() # To prevent thread contention
211  if self.numProcesses > 1:
212  import multiprocessing
214  pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=1)
215  mapFunc = functools.partial(_runPool, pool, self.timeout)
216  else:
217  pool = None
218  mapFunc = map
219 
220  if self.precall(parsedCmd):
221  profileName = parsedCmd.profile if hasattr(parsedCmd, "profile") else None
222  log = parsedCmd.log
223  targetList = self.getTargetList(parsedCmd)
224  if len(targetList) > 0:
225  with profile(profileName, log):
226  # Run the task using self.__call__
227  resultList = list(mapFunc(self, targetList))
228  else:
229  log.warn("Not running the task because there is no data to process; "
230  "you may preview data using \"--show data\"")
231 
232  if pool is not None:
233  pool.close()
234  pool.join()
235 
236  return resultList
237 
238  @staticmethod
239  def getTargetList(parsedCmd, **kwargs):
240  """Get a list of (dataRef, kwargs) for `TaskRunner.__call__`.
241 
242  Parameters
243  ----------
244  parsedCmd : `argparse.Namespace`
245  The parsed command object returned by `lsst.pipe.base.argumentParser.ArgumentParser.parse_args`.
246  kwargs
247  Any additional keyword arguments. In the default `TaskRunner` this is an empty dict, but having
248  it simplifies overriding `TaskRunner` for tasks whose run method takes additional arguments
249  (see case (1) below).
250 
251  Notes
252  -----
253  The default implementation of `TaskRunner.getTargetList` and `TaskRunner.__call__` works for any
254  command-line task whose run method takes exactly one argument: a data reference. Otherwise you
255  must provide a variant of TaskRunner that overrides `TaskRunner.getTargetList` and possibly
256  `TaskRunner.__call__`. There are two cases.
257 
258  **Case 1**
259 
260  If your command-line task has a ``run`` method that takes one data reference followed by additional
261  arguments, then you need only override `TaskRunner.getTargetList` to return the additional arguments
262  as an argument dict. To make this easier, your overridden version of `~TaskRunner.getTargetList` may
263  call `TaskRunner.getTargetList` with the extra arguments as keyword arguments. For example, the
264  following adds an argument dict containing a single key: "calExpList", whose value is the list of data
265  IDs for the calexp ID argument::
266 
267  def getTargetList(parsedCmd):
268  return TaskRunner.getTargetList(
269  parsedCmd,
270  calExpList=parsedCmd.calexp.idList
271  )
272 
273  It is equivalent to this slightly longer version::
274 
275  @staticmethod
276  def getTargetList(parsedCmd):
277  argDict = dict(calExpList=parsedCmd.calexp.idList)
278  return [(dataId, argDict) for dataId in parsedCmd.id.idList]
279 
280  **Case 2**
281 
282  If your task does not meet condition (1) then you must override both TaskRunner.getTargetList and
283  `TaskRunner.__call__`. You may do this however you see fit, so long as `TaskRunner.getTargetList`
284  returns a list, each of whose elements is sent to `TaskRunner.__call__`, which runs your task.
285  """
286  return [(ref, kwargs) for ref in parsedCmd.id.refList]
287 
288  def makeTask(self, parsedCmd=None, args=None):
289  """Create a Task instance.
290 
291  Parameters
292  ----------
293  parsedCmd
294  Parsed command-line options (used for extra task args by some task runners).
295  args
296  Args tuple passed to `TaskRunner.__call__` (used for extra task arguments by some task runners).
297 
298  Notes
299  -----
300  ``makeTask`` can be called with either the ``parsedCmd`` argument or ``args`` argument set to None,
301  but it must construct identical Task instances in either case.
302 
303  Subclasses may ignore this method entirely if they reimplement both `TaskRunner.precall` and
304  `TaskRunner.__call__`.
305  """
306  return self.TaskClass(config=self.config, log=self.log)
307 
308  def _precallImpl(self, task, parsedCmd):
309  """The main work of `precall`.
310 
311  We write package versions, schemas and configs, or compare these to existing files on disk if present.
312  """
313  if not parsedCmd.noVersions:
314  task.writePackageVersions(parsedCmd.butler, clobber=parsedCmd.clobberVersions)
315  task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
316  task.writeSchemas(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
317 
318  def precall(self, parsedCmd):
319  """Hook for code that should run exactly once, before multiprocessing.
320 
321  Notes
322  -----
323  Must return True if `TaskRunner.__call__` should subsequently be called.
324 
325  .. warning::
326 
327  Implementations must take care to ensure that no unpicklable
328  attributes are added to the TaskRunner itself, for compatibility
329  with multiprocessing.
330 
331  The default implementation writes package versions, schemas and configs, or compares them to existing
332  files on disk if present.
333  """
334  task = self.makeTask(parsedCmd=parsedCmd)
335 
336  if self.doRaise:
337  self._precallImpl(task, parsedCmd)
338  else:
339  try:
340  self._precallImpl(task, parsedCmd)
341  except Exception as e:
342  task.log.fatal("Failed in task initialization: %s", e)
343  if not isinstance(e, TaskError):
344  traceback.print_exc(file=sys.stderr)
345  return False
346  return True
347 
348  def __call__(self, args):
349  """Run the Task on a single target.
350 
351  Parameters
352  ----------
353  args
354  Arguments for Task.run()
355 
356  Returns
357  -------
358  struct : `lsst.pipe.base.Struct`
359  Contains these fields if ``doReturnResults`` is `True`:
360 
361  - ``dataRef``: the provided data reference.
362  - ``metadata``: task metadata after execution of run.
363  - ``result``: result returned by task run, or `None` if the task fails.
364  - ``exitStatus``: 0 if the task completed successfully, 1 otherwise.
365 
366  If ``doReturnResults`` is `False` the struct contains:
367 
368  - ``exitStatus``: 0 if the task completed successfully, 1 otherwise.
369 
370  Notes
371  -----
372  This default implementation assumes that the ``args`` is a tuple
373  containing a data reference and a dict of keyword arguments.
374 
375  .. warning::
376 
377  If you override this method and wish to return something when ``doReturnResults`` is `False`,
378  then it must be picklable to support multiprocessing and it should be small enough that pickling
379  and unpickling do not add excessive overhead.
380  """
381  dataRef, kwargs = args
382  if self.log is None:
383  self.log = Log.getDefaultLogger()
384  if hasattr(dataRef, "dataId"):
385  self.log.MDC("LABEL", str(dataRef.dataId))
386  elif isinstance(dataRef, (list, tuple)):
387  self.log.MDC("LABEL", str([ref.dataId for ref in dataRef if hasattr(ref, "dataId")]))
388  task = self.makeTask(args=args)
389  result = None # in case the task fails
390  exitStatus = 0 # exit status for the shell
391  if self.doRaise:
392  result = task.run(dataRef, **kwargs)
393  else:
394  try:
395  result = task.run(dataRef, **kwargs)
396  except Exception as e:
397  # The shell exit value will be the number of dataRefs returning
398  # non-zero, so the actual value used here is lost.
399  exitStatus = 1
400 
401  # don't use a try block as we need to preserve the original exception
402  eName = type(e).__name__
403  if hasattr(dataRef, "dataId"):
404  task.log.fatal("Failed on dataId=%s: %s: %s", dataRef.dataId, eName, e)
405  elif isinstance(dataRef, (list, tuple)):
406  task.log.fatal("Failed on dataIds=[%s]: %s: %s",
407  ", ".join(str(ref.dataId) for ref in dataRef), eName, e)
408  else:
409  task.log.fatal("Failed on dataRef=%s: %s: %s", dataRef, eName, e)
410 
411  if not isinstance(e, TaskError):
412  traceback.print_exc(file=sys.stderr)
413 
414  # Ensure all errors have been logged and aren't hanging around in a buffer
415  sys.stdout.flush()
416  sys.stderr.flush()
417 
418  task.writeMetadata(dataRef)
419 
420  # remove MDC so it does not show up outside of task context
421  self.log.MDCRemove("LABEL")
422 
423  if self.doReturnResults:
424  return Struct(
425  exitStatus=exitStatus,
426  dataRef=dataRef,
427  metadata=task.metadata,
428  result=result,
429  )
430  else:
431  return Struct(
432  exitStatus=exitStatus,
433  )
434 
435 
437  """A `TaskRunner` for `CmdLineTask`\ s that require a ``butler`` keyword argument to be passed to
438  their constructor.
439  """
440 
441  def makeTask(self, parsedCmd=None, args=None):
442  """A variant of the base version that passes a butler argument to the task's constructor.
443 
444  Parameters
445  ----------
446  parsedCmd : `argparse.Namespace`
447  Parsed command-line options, as returned by the `~lsst.pipe.base.ArgumentParser`; if specified
448  then args is ignored.
449  args
450  Other arguments; if ``parsedCmd`` is `None` then this must be specified.
451 
452  Raises
453  ------
454  RuntimeError
455  Raised if ``parsedCmd`` and ``args`` are both `None`.
456  """
457  if parsedCmd is not None:
458  butler = parsedCmd.butler
459  elif args is not None:
460  dataRef, kwargs = args
461  butler = dataRef.butlerSubset.butler
462  else:
463  raise RuntimeError("parsedCmd or args must be specified")
464  return self.TaskClass(config=self.config, log=self.log, butler=butler)
465 
466 
468  """Base class for command-line tasks: tasks that may be executed from the command-line.
469 
470  Notes
471  -----
472  See :ref:`task-framework-overview` to learn what tasks are and :ref:`creating-a-command-line-task` for
473  more information about writing command-line tasks.
474 
475  Subclasses must specify the following class variables:
476 
477  - ``ConfigClass``: configuration class for your task (a subclass of `lsst.pex.config.Config`, or if your
478  task needs no configuration, then `lsst.pex.config.Config` itself).
479  - ``_DefaultName``: default name used for this task (a str).
480 
481  Subclasses may also specify the following class variables:
482 
483  - ``RunnerClass``: a task runner class. The default is ``TaskRunner``, which works for any task
484  with a run method that takes exactly one argument: a data reference. If your task does
485  not meet this requirement then you must supply a variant of ``TaskRunner``; see ``TaskRunner``
486  for more information.
487  - ``canMultiprocess``: the default is `True`; set `False` if your task does not support multiprocessing.
488 
489  Subclasses must specify a method named ``run``:
490 
491  - By default ``run`` accepts a single butler data reference, but you can specify an alternate task runner
492  (subclass of ``TaskRunner``) as the value of class variable ``RunnerClass`` if your run method needs
493  something else.
494  - ``run`` is expected to return its data in a `lsst.pipe.base.Struct`. This provides safety for evolution
495  of the task since new values may be added without harming existing code.
496  - The data returned by ``run`` must be picklable if your task is to support multiprocessing.
497  """
498  RunnerClass = TaskRunner
499  canMultiprocess = True
500 
501  @classmethod
502  def applyOverrides(cls, config):
503  """A hook to allow a task to change the values of its config *after* the camera-specific
504  overrides are loaded but before any command-line overrides are applied.
505 
506  Parameters
507  ----------
508  config : instance of task's ``ConfigClass``
509  Task configuration.
510 
511  Notes
512  -----
513  This is necessary in some cases because the camera-specific overrides may retarget subtasks,
514  wiping out changes made in ConfigClass.setDefaults. See LSST Trac ticket #2282 for more discussion.
515 
516  .. warning::
517 
518  This is called by CmdLineTask.parseAndRun; other ways of constructing a config will not apply
519  these overrides.
520  """
521  pass
522 
523  @classmethod
524  def parseAndRun(cls, args=None, config=None, log=None, doReturnResults=False):
525  """Parse an argument list and run the command.
526 
527  Parameters
528  ----------
529  args : `list`, optional
530  List of command-line arguments; if `None` use `sys.argv`.
531  config : `lsst.pex.config.Config`-type, optional
532  Config for task. If `None` use `Task.ConfigClass`.
533  log : `lsst.log.Log`-type, optional
534  Log. If `None` use the default log.
535  doReturnResults : `bool`, optional
536  If `True`, return the results of this task. Default is `False`. This is only intended for
537  unit tests and similar use. It can easily exhaust memory (if the task returns enough data and you
538  call it enough times) and it will fail when using multiprocessing if the returned data cannot be
539  pickled.
540 
541  Returns
542  -------
543  struct : `lsst.pipe.base.Struct`
544  Fields are:
545 
546  - ``argumentParser``: the argument parser.
547  - ``parsedCmd``: the parsed command returned by the argument parser's
548  `lsst.pipe.base.ArgumentParser.parse_args` method.
549  - ``taskRunner``: the task runner used to run the task (an instance of `Task.RunnerClass`).
550  - ``resultList``: results returned by the task runner's ``run`` method, one entry per invocation.
551  This will typically be a list of `None` unless ``doReturnResults`` is `True`;
552  see `Task.RunnerClass` (`TaskRunner` by default) for more information.
553 
554  Notes
555  -----
556  Calling this method with no arguments specified is the standard way to run a command-line task
557  from the command-line. For an example see ``pipe_tasks`` ``bin/makeSkyMap.py`` or almost any other
558  file in that directory.
559 
560  If one or more of the dataIds fails then this routine will exit (with a status giving the
561  number of failed dataIds) rather than returning this struct; this behaviour can be
562  overridden by specifying the ``--noExit`` command-line option.
563  """
564  if args is None:
565  commandAsStr = " ".join(sys.argv)
566  args = sys.argv[1:]
567  else:
568  commandAsStr = "{}{}".format(lsst.utils.get_caller_name(skip=1), tuple(args))
569 
570  argumentParser = cls._makeArgumentParser()
571  if config is None:
572  config = cls.ConfigClass()
573  parsedCmd = argumentParser.parse_args(config=config, args=args, log=log, override=cls.applyOverrides)
574  # print this message after parsing the command so the log is fully configured
575  parsedCmd.log.info("Running: %s", commandAsStr)
576 
577  taskRunner = cls.RunnerClass(TaskClass=cls, parsedCmd=parsedCmd, doReturnResults=doReturnResults)
578  resultList = taskRunner.run(parsedCmd)
579 
580  try:
581  nFailed = sum(((res.exitStatus != 0) for res in resultList))
582  except (TypeError, AttributeError) as e:
583  # NOTE: TypeError if resultList is None, AttributeError if it doesn't have exitStatus.
584  parsedCmd.log.warn("Unable to retrieve exit status (%s); assuming success", e)
585  nFailed = 0
586 
587  if nFailed > 0:
588  if parsedCmd.noExit:
589  parsedCmd.log.error("%d dataRefs failed; not exiting as --noExit was set", nFailed)
590  else:
591  sys.exit(nFailed)
592 
593  return Struct(
594  argumentParser=argumentParser,
595  parsedCmd=parsedCmd,
596  taskRunner=taskRunner,
597  resultList=resultList,
598  )
599 
600  @classmethod
601  def _makeArgumentParser(cls):
602  """Create and return an argument parser.
603 
604  Returns
605  -------
606  parser : `lsst.pipe.base.ArgumentParser`
607  The argument parser for this task.
608 
609  Notes
610  -----
611  By default this returns an `~lsst.pipe.base.ArgumentParser` with one ID argument named `--id` of
612  dataset type ``raw``.
613 
614  Your task subclass may need to override this method to change the dataset type or data ref level,
615  or to add additional data ID arguments. If you add additional data ID arguments or your task's
616  run method takes more than a single data reference then you will also have to provide a task-specific
617  task runner (see TaskRunner for more information).
618  """
619  parser = ArgumentParser(name=cls._DefaultName)
620  parser.add_id_argument(name="--id", datasetType="raw",
621  help="data IDs, e.g. --id visit=12345 ccd=1,2^0,3")
622  return parser
623 
624  def writeConfig(self, butler, clobber=False, doBackup=True):
625  """Write the configuration used for processing the data, or check that an existing
626  one is equal to the new one if present.
627 
628  Parameters
629  ----------
630  butler : `lsst.daf.persistence.Butler`
631  Data butler used to write the config. The config is written to dataset type
632  `CmdLineTask._getConfigName`.
633  clobber : `bool`, optional
634  A boolean flag that controls what happens if a config already has been saved:
635  - `True`: overwrite or rename the existing config, depending on ``doBackup``.
636  - `False`: raise `TaskError` if this config does not match the existing config.
637  doBackup : bool, optional
638  Set to `True` to backup the config files if clobbering.
639  """
640  configName = self._getConfigName()
641  if configName is None:
642  return
643  if clobber:
644  butler.put(self.config, configName, doBackup=doBackup)
645  elif butler.datasetExists(configName, write=True):
646  # this may be subject to a race condition; see #2789
647  try:
648  oldConfig = butler.get(configName, immediate=True)
649  except Exception as exc:
650  raise type(exc)("Unable to read stored config file %s (%s); consider using --clobber-config" %
651  (configName, exc))
652 
653  def logConfigMismatch(msg):
654  self.log.fatal("Comparing configuration: %s", msg)
655 
656  if not self.config.compare(oldConfig, shortcut=False, output=logConfigMismatch):
657  raise TaskError(
658  ("Config does not match existing task config %r on disk; tasks configurations " +
659  "must be consistent within the same output repo (override with --clobber-config)") %
660  (configName,))
661  else:
662  butler.put(self.config, configName)
663 
664  def writeSchemas(self, butler, clobber=False, doBackup=True):
665  """Write the schemas returned by `lsst.pipe.base.Task.getAllSchemaCatalogs`.
666 
667  Parameters
668  ----------
669  butler : `lsst.daf.persistence.Butler`
670  Data butler used to write the schema. Each schema is written to the dataset type specified as the
671  key in the dict returned by `~lsst.pipe.base.Task.getAllSchemaCatalogs`.
672  clobber : `bool`, optional
673  A boolean flag that controls what happens if a schema already has been saved:
674  - `True`: overwrite or rename the existing schema, depending on ``doBackup``.
675  - `False`: raise `TaskError` if this schema does not match the existing schema.
676  doBackup : `bool`, optional
677  Set to `True` to backup the schema files if clobbering.
678 
679  Notes
680  -----
681  If ``clobber`` is `False` and an existing schema does not match a current schema,
682  then some schemas may have been saved successfully and others may not, and there is no easy way to
683  tell which is which.
684  """
685  for dataset, catalog in self.getAllSchemaCatalogs().items():
686  schemaDataset = dataset + "_schema"
687  if clobber:
688  butler.put(catalog, schemaDataset, doBackup=doBackup)
689  elif butler.datasetExists(schemaDataset, write=True):
690  oldSchema = butler.get(schemaDataset, immediate=True).getSchema()
691  if not oldSchema.compare(catalog.getSchema(), afwTable.Schema.IDENTICAL):
692  raise TaskError(
693  ("New schema does not match schema %r on disk; schemas must be " +
694  " consistent within the same output repo (override with --clobber-config)") %
695  (dataset,))
696  else:
697  butler.put(catalog, schemaDataset)
698 
699  def writeMetadata(self, dataRef):
700  """Write the metadata produced from processing the data.
701 
702  Parameters
703  ----------
704  dataRef
705  Butler data reference used to write the metadata.
706  The metadata is written to dataset type `CmdLineTask._getMetadataName`.
707  """
708  try:
709  metadataName = self._getMetadataName()
710  if metadataName is not None:
711  dataRef.put(self.getFullMetadata(), metadataName)
712  except Exception as e:
713  self.log.warn("Could not persist metadata for dataId=%s: %s", dataRef.dataId, e)
714 
715  def writePackageVersions(self, butler, clobber=False, doBackup=True, dataset="packages"):
716  """Compare and write package versions.
717 
718  Parameters
719  ----------
720  butler : `lsst.daf.persistence.Butler`
721  Data butler used to read/write the package versions.
722  clobber : `bool`, optional
723  A boolean flag that controls what happens if versions already have been saved:
724  - `True`: overwrite or rename the existing version info, depending on ``doBackup``.
725  - `False`: raise `TaskError` if this version info does not match the existing.
726  doBackup : `bool`, optional
727  If `True` and clobbering, old package version files are backed up.
728  dataset : `str`, optional
729  Name of dataset to read/write.
730 
731  Raises
732  ------
733  TaskError
734  Raised if there is a version mismatch with current and persisted lists of package versions.
735 
736  Notes
737  -----
738  Note that this operation is subject to a race condition.
739  """
740  packages = Packages.fromSystem()
741 
742  if clobber:
743  return butler.put(packages, dataset, doBackup=doBackup)
744  if not butler.datasetExists(dataset, write=True):
745  return butler.put(packages, dataset)
746 
747  try:
748  old = butler.get(dataset, immediate=True)
749  except Exception as exc:
750  raise type(exc)("Unable to read stored version dataset %s (%s); "
751  "consider using --clobber-versions or --no-versions" %
752  (dataset, exc))
753  # Note that because we can only detect python modules that have been imported, the stored
754  # list of products may be more or less complete than what we have now. What's important is
755  # that the products that are in common have the same version.
756  diff = packages.difference(old)
757  if diff:
758  raise TaskError(
759  "Version mismatch (" +
760  "; ".join("%s: %s vs %s" % (pkg, diff[pkg][1], diff[pkg][0]) for pkg in diff) +
761  "); consider using --clobber-versions or --no-versions")
762  # Update the old set of packages in case we have more packages that haven't been persisted.
763  extra = packages.extra(old)
764  if extra:
765  old.update(packages)
766  butler.put(old, dataset, doBackup=doBackup)
767 
768  def _getConfigName(self):
769  """Get the name of the config dataset type, or `None` if config is not to be persisted.
770 
771  Notes
772  -----
773  The name may depend on the config; that is why this is not a class method.
774  """
775  return self._DefaultName + "_config"
776 
777  def _getMetadataName(self):
778  """Get the name of the metadata dataset type, or `None` if metadata is not to be persisted.
779 
780  Notes
781  -----
782  The name may depend on the config; that is why this is not a class method.
783  """
784  return self._DefaultName + "_metadata"
def parseAndRun(cls, args=None, config=None, log=None, doReturnResults=False)
Definition: cmdLineTask.py:524
def _precallImpl(self, task, parsedCmd)
Definition: cmdLineTask.py:308
def getFullMetadata(self)
Definition: task.py:213
def writePackageVersions(self, butler, clobber=False, doBackup=True, dataset="packages")
Definition: cmdLineTask.py:715
def getAllSchemaCatalogs(self)
Definition: task.py:191
def writeSchemas(self, butler, clobber=False, doBackup=True)
Definition: cmdLineTask.py:664
def makeTask(self, parsedCmd=None, args=None)
Definition: cmdLineTask.py:288
def __init__(self, TaskClass, parsedCmd, doReturnResults=False)
Definition: cmdLineTask.py:161
def profile(filename, log=None)
Definition: cmdLineTask.py:56
def makeTask(self, parsedCmd=None, args=None)
Definition: cmdLineTask.py:441
def getTargetList(parsedCmd, kwargs)
Definition: cmdLineTask.py:239
def writeConfig(self, butler, clobber=False, doBackup=True)
Definition: cmdLineTask.py:624