lsst.pipe.base  13.0-5-g0e05785
 All Classes Namespaces Files Functions Variables Pages
cmdLineTask.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division
23 import sys
24 import traceback
25 import functools
26 import contextlib
27 
28 from builtins import str
29 from builtins import object
30 
31 import lsst.utils
32 from lsst.base import disableImplicitThreading
33 import lsst.afw.table as afwTable
34 from .task import Task, TaskError
35 from .struct import Struct
36 from .argumentParser import ArgumentParser
37 from lsst.base import Packages
38 from lsst.log import Log
39 
40 __all__ = ["CmdLineTask", "TaskRunner", "ButlerInitializedTaskRunner"]
41 
42 
43 def _poolFunctionWrapper(function, arg):
44  """Wrapper around function to catch exceptions that don't inherit from Exception
45 
46  Such exceptions aren't caught by multiprocessing, which causes the slave
47  process to crash and you end up hitting the timeout.
48  """
49  try:
50  return function(arg)
51  except Exception:
52  raise # No worries
53  except:
54  # Need to wrap the exception with something multiprocessing will recognise
55  cls, exc, tb = sys.exc_info()
56  log = Log.getDefaultLogger()
57  log.warn("Unhandled exception %s (%s):\n%s" % (cls.__name__, exc, traceback.format_exc()))
58  raise Exception("Unhandled exception: %s (%s)" % (cls.__name__, exc))
59 
60 
61 def _runPool(pool, timeout, function, iterable):
62  """Wrapper around pool.map_async, to handle timeout
63 
64  This is required so as to trigger an immediate interrupt on the KeyboardInterrupt (Ctrl-C); see
65  http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
66 
67  Further wraps the function in _poolFunctionWrapper to catch exceptions
68  that don't inherit from Exception.
69  """
70  return pool.map_async(functools.partial(_poolFunctionWrapper, function), iterable).get(timeout)
71 
72 
73 @contextlib.contextmanager
74 def profile(filename, log=None):
75  """!Context manager for profiling with cProfile
76 
77  @param filename filename to which to write profile (profiling disabled if None or empty)
78  @param log log object for logging the profile operations
79 
80  If profiling is enabled, the context manager returns the cProfile.Profile object (otherwise
81  it returns None), which allows additional control over profiling. You can obtain this using
82  the "as" clause, e.g.:
83 
84  with profile(filename) as prof:
85  runYourCodeHere()
86 
87  The output cumulative profile can be printed with a command-line like:
88 
89  python -c 'import pstats; pstats.Stats("<filename>").sort_stats("cumtime").print_stats(30)'
90  """
91  if not filename:
92  # Nothing to do
93  yield
94  return
95  from cProfile import Profile
96  profile = Profile()
97  if log is not None:
98  log.info("Enabling cProfile profiling")
99  profile.enable()
100  yield profile
101  profile.disable()
102  profile.dump_stats(filename)
103  if log is not None:
104  log.info("cProfile stats written to %s" % filename)
105 
106 
107 class TaskRunner(object):
108  """!Run a command-line task, using multiprocessing if requested.
109 
110  Each command-line task (subclass of CmdLineTask) has a task runner. By default it is
111  this class, but some tasks require a subclass. See the manual "how to write a command-line task"
112  in the pipe_tasks documentation for more information.
113  See CmdLineTask.parseAndRun to see how a task runner is used.
114 
115  You may use this task runner for your command-line task if your task has a run method
116  that takes exactly one argument: a butler data reference. Otherwise you must
117  provide a task-specific subclass of this runner for your task's `RunnerClass`
118  that overrides TaskRunner.getTargetList and possibly TaskRunner.\_\_call\_\_.
119  See TaskRunner.getTargetList for details.
120 
121  This design matches the common pattern for command-line tasks: the run method takes a single
122  data reference, of some suitable name. Additional arguments are rare, and if present, require
123  a subclass of TaskRunner that calls these additional arguments by name.
124 
125  Instances of this class must be picklable in order to be compatible with multiprocessing.
126  If multiprocessing is requested (parsedCmd.numProcesses > 1) then run() calls prepareForMultiProcessing
127  to jettison optional non-picklable elements. If your task runner is not compatible with multiprocessing
128  then indicate this in your task by setting class variable canMultiprocess=False.
129 
130  Due to a python bug [1], handling a KeyboardInterrupt properly requires specifying a timeout [2]. This
131  timeout (in sec) can be specified as the "timeout" element in the output from ArgumentParser
132  (the "parsedCmd"), if available, otherwise we use TaskRunner.TIMEOUT_DEFAULT.
133 
134  [1] http://bugs.python.org/issue8296
135  [2] http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool)
136  """
137  TIMEOUT = 9999 # Default timeout (sec) for multiprocessing
138 
139  def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
140  """!Construct a TaskRunner
141 
142  @warning Do not store parsedCmd, as this instance is pickled (if multiprocessing) and parsedCmd may
143  contain non-picklable elements. It certainly contains more data than we need to send to each
144  instance of the task.
145 
146  @param TaskClass The class of the task to run
147  @param parsedCmd The parsed command-line arguments, as returned by the task's argument parser's
148  parse_args method.
149  @param doReturnResults Should run return the collected result from each invocation of the task?
150  This is only intended for unit tests and similar use.
151  It can easily exhaust memory (if the task returns enough data and you call it enough times)
152  and it will fail when using multiprocessing if the returned data cannot be pickled.
153 
154  @throws ImportError if multiprocessing requested (and the task supports it)
155  but the multiprocessing library cannot be imported.
156  """
157  self.TaskClass = TaskClass
158  self.doReturnResults = bool(doReturnResults)
159  self.config = parsedCmd.config
160  self.log = parsedCmd.log
161  self.doRaise = bool(parsedCmd.doraise)
162  self.clobberConfig = bool(parsedCmd.clobberConfig)
163  self.doBackup = not bool(parsedCmd.noBackupConfig)
164  self.numProcesses = int(getattr(parsedCmd, 'processes', 1))
165 
166  self.timeout = getattr(parsedCmd, 'timeout', None)
167  if self.timeout is None or self.timeout <= 0:
168  self.timeout = self.TIMEOUT
169 
170  if self.numProcesses > 1:
171  if not TaskClass.canMultiprocess:
172  self.log.warn("This task does not support multiprocessing; using one process")
173  self.numProcesses = 1
174 
176  """!Prepare this instance for multiprocessing by removing optional non-picklable elements.
177 
178  This is only called if the task is run under multiprocessing.
179  """
180  self.log = None
181 
182  def run(self, parsedCmd):
183  """!Run the task on all targets.
184 
185  The task is run under multiprocessing if numProcesses > 1; otherwise processing is serial.
186 
187  @return a list of results returned by TaskRunner.\_\_call\_\_, or an empty list if
188  TaskRunner.\_\_call\_\_ is not called (e.g. if TaskRunner.precall returns `False`).
189  See TaskRunner.\_\_call\_\_ for details.
190  """
191  resultList = []
192  if self.numProcesses > 1:
193  disableImplicitThreading() # To prevent thread contention
194  import multiprocessing
196  pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=1)
197  mapFunc = functools.partial(_runPool, pool, self.timeout)
198  else:
199  pool = None
200  mapFunc = map
201 
202  if self.precall(parsedCmd):
203  profileName = parsedCmd.profile if hasattr(parsedCmd, "profile") else None
204  log = parsedCmd.log
205  targetList = self.getTargetList(parsedCmd)
206  if len(targetList) > 0:
207  with profile(profileName, log):
208  # Run the task using self.__call__
209  resultList = list(mapFunc(self, targetList))
210  else:
211  log.warn("Not running the task because there is no data to process; "
212  "you may preview data using \"--show data\"")
213 
214  if pool is not None:
215  pool.close()
216  pool.join()
217 
218  return resultList
219 
220  @staticmethod
221  def getTargetList(parsedCmd, **kwargs):
222  """!Return a list of (dataRef, kwargs) to be used as arguments for TaskRunner.\_\_call\_\_.
223 
224  @param parsedCmd the parsed command object (an argparse.Namespace) returned by
225  \ref argumentParser.ArgumentParser.parse_args "ArgumentParser.parse_args".
226  @param **kwargs any additional keyword arguments. In the default TaskRunner
227  this is an empty dict, but having it simplifies overriding TaskRunner for tasks
228  whose run method takes additional arguments (see case (1) below).
229 
230  The default implementation of TaskRunner.getTargetList and TaskRunner.\_\_call\_\_ works for any
231  command-line task whose run method takes exactly one argument: a data reference.
232  Otherwise you must provide a variant of TaskRunner that overrides TaskRunner.getTargetList
233  and possibly TaskRunner.\_\_call\_\_. There are two cases:
234 
235  (1) If your command-line task has a `run` method that takes one data reference followed by additional
236  arguments, then you need only override TaskRunner.getTargetList to return the additional arguments as
237  an argument dict. To make this easier, your overridden version of getTargetList may call
238  TaskRunner.getTargetList with the extra arguments as keyword arguments. For example,
239  the following adds an argument dict containing a single key: "calExpList", whose value is the list
240  of data IDs for the calexp ID argument:
241 
242  \code
243  \@staticmethod
244  def getTargetList(parsedCmd):
245  return TaskRunner.getTargetList(parsedCmd, calExpList=parsedCmd.calexp.idList)
246  \endcode
247 
248  It is equivalent to this slightly longer version:
249 
250  \code
251  \@staticmethod
252  def getTargetList(parsedCmd):
253  argDict = dict(calExpList=parsedCmd.calexp.idList)
254  return [(dataId, argDict) for dataId in parsedCmd.id.idList]
255  \endcode
256 
257  (2) If your task does not meet condition (1) then you must override both TaskRunner.getTargetList
258  and TaskRunner.\_\_call\_\_. You may do this however you see fit, so long as TaskRunner.getTargetList
259  returns a list, each of whose elements is sent to TaskRunner.\_\_call\_\_, which runs your task.
260  """
261  return [(ref, kwargs) for ref in parsedCmd.id.refList]
262 
263  def makeTask(self, parsedCmd=None, args=None):
264  """!Create a Task instance
265 
266  @param[in] parsedCmd parsed command-line options (used for extra task args by some task runners)
267  @param[in] args args tuple passed to TaskRunner.\_\_call\_\_ (used for extra task arguments
268  by some task runners)
269 
270  makeTask() can be called with either the 'parsedCmd' argument or 'args' argument set to None,
271  but it must construct identical Task instances in either case.
272 
273  Subclasses may ignore this method entirely if they reimplement both TaskRunner.precall and
274  TaskRunner.\_\_call\_\_
275  """
276  return self.TaskClass(config=self.config, log=self.log)
277 
278  def _precallImpl(self, task, parsedCmd):
279  """The main work of 'precall'
280 
281  We write package versions, schemas and configs, or compare these to existing
282  files on disk if present.
283  """
284  if not parsedCmd.noVersions:
285  task.writePackageVersions(parsedCmd.butler, clobber=parsedCmd.clobberVersions)
286  task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
287  task.writeSchemas(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
288 
289  def precall(self, parsedCmd):
290  """!Hook for code that should run exactly once, before multiprocessing is invoked.
291 
292  Must return True if TaskRunner.\_\_call\_\_ should subsequently be called.
293 
294  @warning Implementations must take care to ensure that no unpicklable attributes are added to
295  the TaskRunner itself, for compatibility with multiprocessing.
296 
297  The default implementation writes package versions, schemas and configs, or compares
298  them to existing files on disk if present.
299  """
300  task = self.makeTask(parsedCmd=parsedCmd)
301 
302  if self.doRaise:
303  self._precallImpl(task, parsedCmd)
304  else:
305  try:
306  self._precallImpl(task, parsedCmd)
307  except Exception as e:
308  task.log.fatal("Failed in task initialization: %s", e)
309  if not isinstance(e, TaskError):
310  traceback.print_exc(file=sys.stderr)
311  return False
312  return True
313 
314  def __call__(self, args):
315  """!Run the Task on a single target.
316 
317  This default implementation assumes that the 'args' is a tuple
318  containing a data reference and a dict of keyword arguments.
319 
320  @warning if you override this method and wish to return something when
321  doReturnResults is false, then it must be picklable to support
322  multiprocessing and it should be small enough that pickling and
323  unpickling do not add excessive overhead.
324 
325  @param args Arguments for Task.run()
326 
327  @return:
328  - None if doReturnResults false
329  - A pipe_base Struct containing these fields if doReturnResults true:
330  - dataRef: the provided data reference
331  - metadata: task metadata after execution of run
332  - result: result returned by task run, or None if the task fails
333  """
334  dataRef, kwargs = args
335  if self.log is None:
336  self.log = Log.getDefaultLogger()
337  if hasattr(dataRef, "dataId"):
338  self.log.MDC("LABEL", str(dataRef.dataId))
339  elif isinstance(dataRef, (list, tuple)):
340  self.log.MDC("LABEL", str([ref.dataId for ref in dataRef if hasattr(ref, "dataId")]))
341  task = self.makeTask(args=args)
342  result = None # in case the task fails
343  if self.doRaise:
344  result = task.run(dataRef, **kwargs)
345  else:
346  try:
347  result = task.run(dataRef, **kwargs)
348  except Exception as e:
349  # don't use a try block as we need to preserve the original exception
350  if hasattr(dataRef, "dataId"):
351  task.log.fatal("Failed on dataId=%s: %s", dataRef.dataId, e)
352  elif isinstance(dataRef, (list, tuple)):
353  task.log.fatal("Failed on dataId=[%s]: %s",
354  ", ".join(str(ref.dataId) for ref in dataRef), e)
355  else:
356  task.log.fatal("Failed on dataRef=%s: %s", dataRef, e)
357 
358  if not isinstance(e, TaskError):
359  traceback.print_exc(file=sys.stderr)
360  task.writeMetadata(dataRef)
361 
362  if self.doReturnResults:
363  return Struct(
364  dataRef=dataRef,
365  metadata=task.metadata,
366  result=result,
367  )
368 
369 
371  """!A TaskRunner for CmdLineTasks that require a 'butler' keyword argument to be passed to
372  their constructor.
373  """
374 
375  def makeTask(self, parsedCmd=None, args=None):
376  """!A variant of the base version that passes a butler argument to the task's constructor
377 
378  @param[in] parsedCmd parsed command-line options, as returned by the argument parser;
379  if specified then args is ignored
380  @param[in] args other arguments; if parsedCmd is None then this must be specified
381 
382  @throw RuntimeError if parsedCmd and args are both None
383  """
384  if parsedCmd is not None:
385  butler = parsedCmd.butler
386  elif args is not None:
387  dataRef, kwargs = args
388  butler = dataRef.butlerSubset.butler
389  else:
390  raise RuntimeError("parsedCmd or args must be specified")
391  return self.TaskClass(config=self.config, log=self.log, butler=butler)
392 
393 
394 class CmdLineTask(Task):
395  """!Base class for command-line tasks: tasks that may be executed from the command line
396 
397  See \ref pipeBase_introduction "pipe_base introduction" to learn what tasks are,
398  and \ref pipeTasks_writeCmdLineTask "how to write a command-line task" for more information
399  about writing command-line tasks.
400  If the second link is broken (as it will be before the documentation is cross-linked)
401  then look at the main page of pipe_tasks documentation for a link.
402 
403  Subclasses must specify the following class variables:
404  * ConfigClass: configuration class for your task (a subclass of \ref lsst.pex.config.config.Config
405  "lsst.pex.config.Config", or if your task needs no configuration, then
406  \ref lsst.pex.config.config.Config "lsst.pex.config.Config" itself)
407  * _DefaultName: default name used for this task (a str)
408 
409  Subclasses may also specify the following class variables:
410  * RunnerClass: a task runner class. The default is TaskRunner, which works for any task
411  with a run method that takes exactly one argument: a data reference. If your task does
412  not meet this requirement then you must supply a variant of TaskRunner; see TaskRunner
413  for more information.
414  * canMultiprocess: the default is True; set False if your task does not support multiprocessing.
415 
416  Subclasses must specify a method named "run":
417  - By default `run` accepts a single butler data reference, but you can specify an alternate task runner
418  (subclass of TaskRunner) as the value of class variable `RunnerClass` if your run method needs
419  something else.
420  - `run` is expected to return its data in a Struct. This provides safety for evolution of the task
421  since new values may be added without harming existing code.
422  - The data returned by `run` must be picklable if your task is to support multiprocessing.
423  """
424  RunnerClass = TaskRunner
425  canMultiprocess = True
426 
427  @classmethod
428  def applyOverrides(cls, config):
429  """!A hook to allow a task to change the values of its config *after* the camera-specific
430  overrides are loaded but before any command-line overrides are applied.
431 
432  This is necessary in some cases because the camera-specific overrides may retarget subtasks,
433  wiping out changes made in ConfigClass.setDefaults. See LSST Trac ticket #2282 for more discussion.
434 
435  @warning This is called by CmdLineTask.parseAndRun; other ways of constructing a config
436  will not apply these overrides.
437 
438  @param[in] cls the class object
439  @param[in] config task configuration (an instance of cls.ConfigClass)
440  """
441  pass
442 
443  @classmethod
444  def parseAndRun(cls, args=None, config=None, log=None, doReturnResults=False):
445  """!Parse an argument list and run the command
446 
447  Calling this method with no arguments specified is the standard way to run a command-line task
448  from the command line. For an example see pipe_tasks `bin/makeSkyMap.py` or almost any other
449  file in that directory.
450 
451  @param cls the class object
452  @param args list of command-line arguments; if `None` use sys.argv
453  @param config config for task (instance of pex_config Config); if `None` use cls.ConfigClass()
454  @param log log (instance of lsst.log.Log); if `None` use the default log
455  @param doReturnResults Return the collected results from each invocation of the task?
456  This is only intended for unit tests and similar use.
457  It can easily exhaust memory (if the task returns enough data and you call it enough times)
458  and it will fail when using multiprocessing if the returned data cannot be pickled.
459 
460  @return a Struct containing:
461  - argumentParser: the argument parser
462  - parsedCmd: the parsed command returned by the argument parser's parse_args method
463  - taskRunner: the task runner used to run the task (an instance of cls.RunnerClass)
464  - resultList: results returned by the task runner's run method, one entry per invocation.
465  This will typically be a list of `None` unless doReturnResults is `True`;
466  see cls.RunnerClass (TaskRunner by default) for more information.
467  """
468  if args is None:
469  commandAsStr = " ".join(sys.argv)
470  args = sys.argv[1:]
471  else:
472  commandAsStr = "{}{}".format(lsst.utils.get_caller_name(skip=1), tuple(args))
473 
474  argumentParser = cls._makeArgumentParser()
475  if config is None:
476  config = cls.ConfigClass()
477  parsedCmd = argumentParser.parse_args(config=config, args=args, log=log, override=cls.applyOverrides)
478  # print this message after parsing the command so the log is fully configured
479  parsedCmd.log.info("Running: %s", commandAsStr)
480 
481  taskRunner = cls.RunnerClass(TaskClass=cls, parsedCmd=parsedCmd, doReturnResults=doReturnResults)
482  resultList = taskRunner.run(parsedCmd)
483  return Struct(
484  argumentParser=argumentParser,
485  parsedCmd=parsedCmd,
486  taskRunner=taskRunner,
487  resultList=resultList,
488  )
489 
490  @classmethod
491  def _makeArgumentParser(cls):
492  """!Create and return an argument parser
493 
494  @param[in] cls the class object
495  @return the argument parser for this task.
496 
497  By default this returns an ArgumentParser with one ID argument named `--id` of dataset type "raw".
498 
499  Your task subclass may need to override this method to change the dataset type or data ref level,
500  or to add additional data ID arguments. If you add additional data ID arguments or your task's
501  run method takes more than a single data reference then you will also have to provide a task-specific
502  task runner (see TaskRunner for more information).
503  """
504  parser = ArgumentParser(name=cls._DefaultName)
505  parser.add_id_argument(name="--id", datasetType="raw",
506  help="data IDs, e.g. --id visit=12345 ccd=1,2^0,3")
507  return parser
508 
509  def writeConfig(self, butler, clobber=False, doBackup=True):
510  """!Write the configuration used for processing the data, or check that an existing
511  one is equal to the new one if present.
512 
513  @param[in] butler data butler used to write the config.
514  The config is written to dataset type self._getConfigName()
515  @param[in] clobber a boolean flag that controls what happens if a config already has been saved:
516  - True: overwrite or rename the existing config, depending on `doBackup`
517  - False: raise TaskError if this config does not match the existing config
518  @param[in] doBackup if clobbering, should we backup the old files?
519  """
520  configName = self._getConfigName()
521  if configName is None:
522  return
523  if clobber:
524  butler.put(self.config, configName, doBackup=doBackup)
525  elif butler.datasetExists(configName):
526  # this may be subject to a race condition; see #2789
527  try:
528  oldConfig = butler.get(configName, immediate=True)
529  except Exception as exc:
530  raise type(exc)("Unable to read stored config file %s (%s); consider using --clobber-config" %
531  (configName, exc))
532 
533  def logConfigMismatch(msg):
534  self.log.fatal("Comparing configuration: %s", msg)
535 
536  if not self.config.compare(oldConfig, shortcut=False, output=logConfigMismatch):
537  raise TaskError(
538  ("Config does not match existing task config %r on disk; tasks configurations " +
539  "must be consistent within the same output repo (override with --clobber-config)") %
540  (configName,))
541  else:
542  butler.put(self.config, configName)
543 
544  def writeSchemas(self, butler, clobber=False, doBackup=True):
545  """!Write the schemas returned by \ref task.Task.getAllSchemaCatalogs "getAllSchemaCatalogs"
546 
547  @param[in] butler data butler used to write the schema.
548  Each schema is written to the dataset type specified as the key in the dict returned by
549  \ref task.Task.getAllSchemaCatalogs "getAllSchemaCatalogs".
550  @param[in] clobber a boolean flag that controls what happens if a schema already has been saved:
551  - True: overwrite or rename the existing schema, depending on `doBackup`
552  - False: raise TaskError if this schema does not match the existing schema
553  @param[in] doBackup if clobbering, should we backup the old files?
554 
555  @warning if clobber is False and an existing schema does not match a current schema,
556  then some schemas may have been saved successfully and others may not, and there is no easy way to
557  tell which is which.
558  """
559  for dataset, catalog in self.getAllSchemaCatalogs().items():
560  schemaDataset = dataset + "_schema"
561  if clobber:
562  butler.put(catalog, schemaDataset, doBackup=doBackup)
563  elif butler.datasetExists(schemaDataset):
564  oldSchema = butler.get(schemaDataset, immediate=True).getSchema()
565  if not oldSchema.compare(catalog.getSchema(), afwTable.Schema.IDENTICAL):
566  raise TaskError(
567  ("New schema does not match schema %r on disk; schemas must be " +
568  " consistent within the same output repo (override with --clobber-config)") %
569  (dataset,))
570  else:
571  butler.put(catalog, schemaDataset)
572 
573  def writeMetadata(self, dataRef):
574  """!Write the metadata produced from processing the data
575 
576  @param[in] dataRef butler data reference used to write the metadata.
577  The metadata is written to dataset type self._getMetadataName()
578  """
579  try:
580  metadataName = self._getMetadataName()
581  if metadataName is not None:
582  dataRef.put(self.getFullMetadata(), metadataName)
583  except Exception as e:
584  self.log.warn("Could not persist metadata for dataId=%s: %s", dataRef.dataId, e)
585 
586  def writePackageVersions(self, butler, clobber=False, doBackup=True, dataset="packages"):
587  """!Compare and write package versions
588 
589  We retrieve the persisted list of packages and compare with what we're currently using.
590  We raise TaskError if there's a version mismatch.
591 
592  Note that this operation is subject to a race condition.
593 
594  @param[in] butler data butler used to read/write the package versions
595  @param[in] clobber a boolean flag that controls what happens if versions already have been saved:
596  - True: overwrite or rename the existing version info, depending on `doBackup`
597  - False: raise TaskError if this version info does not match the existing
598  @param[in] doBackup if clobbering, should we backup the old files?
599  @param[in] dataset name of dataset to read/write
600  """
601  packages = Packages.fromSystem()
602 
603  if clobber:
604  return butler.put(packages, dataset, doBackup=doBackup)
605  if not butler.datasetExists(dataset):
606  return butler.put(packages, dataset)
607 
608  try:
609  old = butler.get(dataset, immediate=True)
610  except Exception as exc:
611  raise type(exc)("Unable to read stored version dataset %s (%s); "
612  "consider using --clobber-versions or --no-versions" %
613  (dataset, exc))
614  # Note that because we can only detect python modules that have been imported, the stored
615  # list of products may be more or less complete than what we have now. What's important is
616  # that the products that are in common have the same version.
617  diff = packages.difference(old)
618  if diff:
619  raise TaskError(
620  "Version mismatch (" +
621  "; ".join("%s: %s vs %s" % (pkg, diff[pkg][1], diff[pkg][0]) for pkg in diff) +
622  "); consider using --clobber-versions or --no-versions")
623  # Update the old set of packages in case we have more packages that haven't been persisted.
624  extra = packages.extra(old)
625  if extra:
626  old.update(packages)
627  butler.put(old, dataset, doBackup=doBackup)
628 
629  def _getConfigName(self):
630  """!Return the name of the config dataset type, or None if config is not to be persisted
631 
632  @note The name may depend on the config; that is why this is not a class method.
633  """
634  return self._DefaultName + "_config"
635 
636  def _getMetadataName(self):
637  """!Return the name of the metadata dataset type, or None if metadata is not to be persisted
638 
639  @note The name may depend on the config; that is why this is not a class method.
640  """
641  return self._DefaultName + "_metadata"
def precall
Hook for code that should run exactly once, before multiprocessing is invoked.
Definition: cmdLineTask.py:289
def __init__
Construct a TaskRunner.
Definition: cmdLineTask.py:139
def makeTask
Create a Task instance.
Definition: cmdLineTask.py:263
def writePackageVersions
Compare and write package versions.
Definition: cmdLineTask.py:586
def makeTask
A variant of the base version that passes a butler argument to the task&#39;s constructor.
Definition: cmdLineTask.py:375
def run
Run the task on all targets.
Definition: cmdLineTask.py:182
def __call__
Run the Task on a single target.
Definition: cmdLineTask.py:314
def writeConfig
Write the configuration used for processing the data, or check that an existing one is equal to the n...
Definition: cmdLineTask.py:509
def _getConfigName
Return the name of the config dataset type, or None if config is not to be persisted.
Definition: cmdLineTask.py:629
def applyOverrides
A hook to allow a task to change the values of its config after the camera-specific overrides are loa...
Definition: cmdLineTask.py:428
def parseAndRun
Parse an argument list and run the command.
Definition: cmdLineTask.py:444
def _getMetadataName
Return the name of the metadata dataset type, or None if metadata is not to be persisted.
Definition: cmdLineTask.py:636
def writeMetadata
Write the metadata produced from processing the data.
Definition: cmdLineTask.py:573
def prepareForMultiProcessing
Prepare this instance for multiprocessing by removing optional non-picklable elements.
Definition: cmdLineTask.py:175
def getTargetList
Return a list of (dataRef, kwargs) to be used as arguments for TaskRunner.
Definition: cmdLineTask.py:221
A TaskRunner for CmdLineTasks that require a &#39;butler&#39; keyword argument to be passed to their construc...
Definition: cmdLineTask.py:370
Run a command-line task, using multiprocessing if requested.
Definition: cmdLineTask.py:107
def profile
Context manager for profiling with cProfile.
Definition: cmdLineTask.py:74
def writeSchemas
Write the schemas returned by getAllSchemaCatalogs.
Definition: cmdLineTask.py:544
Base class for command-line tasks: tasks that may be executed from the command line.
Definition: cmdLineTask.py:394