lsst.pipe.base  13.0-9-g1c7d9c5+5
 All Classes Namespaces Files Functions Variables Pages
cmdLineTask.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008-2015 AURA/LSST.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <https://www.lsstcorp.org/LegalNotices/>.
21 #
22 from __future__ import absolute_import, division
23 import sys
24 import traceback
25 import functools
26 import contextlib
27 
28 from builtins import str
29 from builtins import object
30 
31 import lsst.utils
32 from lsst.base import disableImplicitThreading
33 import lsst.afw.table as afwTable
34 from .task import Task, TaskError
35 from .struct import Struct
36 from .argumentParser import ArgumentParser
37 from lsst.base import Packages
38 from lsst.log import Log
39 
40 __all__ = ["CmdLineTask", "TaskRunner", "ButlerInitializedTaskRunner"]
41 
42 
43 def _poolFunctionWrapper(function, arg):
44  """Wrapper around function to catch exceptions that don't inherit from Exception
45 
46  Such exceptions aren't caught by multiprocessing, which causes the slave
47  process to crash and you end up hitting the timeout.
48  """
49  try:
50  return function(arg)
51  except Exception:
52  raise # No worries
53  except:
54  # Need to wrap the exception with something multiprocessing will recognise
55  cls, exc, tb = sys.exc_info()
56  log = Log.getDefaultLogger()
57  log.warn("Unhandled exception %s (%s):\n%s" % (cls.__name__, exc, traceback.format_exc()))
58  raise Exception("Unhandled exception: %s (%s)" % (cls.__name__, exc))
59 
60 
61 def _runPool(pool, timeout, function, iterable):
62  """Wrapper around pool.map_async, to handle timeout
63 
64  This is required so as to trigger an immediate interrupt on the KeyboardInterrupt (Ctrl-C); see
65  http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
66 
67  Further wraps the function in _poolFunctionWrapper to catch exceptions
68  that don't inherit from Exception.
69  """
70  return pool.map_async(functools.partial(_poolFunctionWrapper, function), iterable).get(timeout)
71 
72 
73 @contextlib.contextmanager
74 def profile(filename, log=None):
75  """!Context manager for profiling with cProfile
76 
77  @param filename filename to which to write profile (profiling disabled if None or empty)
78  @param log log object for logging the profile operations
79 
80  If profiling is enabled, the context manager returns the cProfile.Profile object (otherwise
81  it returns None), which allows additional control over profiling. You can obtain this using
82  the "as" clause, e.g.:
83 
84  with profile(filename) as prof:
85  runYourCodeHere()
86 
87  The output cumulative profile can be printed with a command-line like:
88 
89  python -c 'import pstats; pstats.Stats("<filename>").sort_stats("cumtime").print_stats(30)'
90  """
91  if not filename:
92  # Nothing to do
93  yield
94  return
95  from cProfile import Profile
96  profile = Profile()
97  if log is not None:
98  log.info("Enabling cProfile profiling")
99  profile.enable()
100  yield profile
101  profile.disable()
102  profile.dump_stats(filename)
103  if log is not None:
104  log.info("cProfile stats written to %s" % filename)
105 
106 
107 class TaskRunner(object):
108  """Run a command-line task, using multiprocessing if requested.
109 
110  Each command-line task (subclass of CmdLineTask) has a task runner. By
111  default it is this class, but some tasks require a subclass. See the
112  manual "how to write a command-line task" in the pipe_tasks documentation
113  for more information. See CmdLineTask.parseAndRun to see how a task runner
114  is used.
115 
116  You may use this task runner for your command-line task if your task has
117  a run method that takes exactly one argument: a butler data reference.
118  Otherwise you must provide a task-specific subclass of this runner for
119  your task's `RunnerClass` that overrides TaskRunner.getTargetList and
120  possibly TaskRunner.\_\_call\_\_. See TaskRunner.getTargetList for
121  details.
122 
123  This design matches the common pattern for command-line tasks: the run
124  method takes a single data reference, of some suitable name. Additional
125  arguments are rare, and if present, require a subclass of TaskRunner that
126  calls these additional arguments by name.
127 
128  Instances of this class must be picklable in order to be compatible with
129  multiprocessing. If multiprocessing is requested
130  (parsedCmd.numProcesses > 1) then run() calls prepareForMultiProcessing
131  to jettison optional non-picklable elements. If your task runner is not
132  compatible with multiprocessing then indicate this in your task by setting
133  class variable canMultiprocess=False.
134 
135  Due to a python bug [1], handling a KeyboardInterrupt properly requires
136  specifying a timeout [2]. This timeout (in sec) can be specified as the
137  "timeout" element in the output from ArgumentParser (the "parsedCmd"), if
138  available, otherwise we use TaskRunner.TIMEOUT.
139 
140  [1] http://bugs.python.org/issue8296
141  [2] http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool)
142  """
143  TIMEOUT = 3600*24*30 # Default timeout (sec) for multiprocessing
144 
145  def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
146  """!Construct a TaskRunner
147 
148  @warning Do not store parsedCmd, as this instance is pickled (if
149  multiprocessing) and parsedCmd may contain non-picklable elements.
150  It certainly contains more data than we need to send to each
151  instance of the task.
152 
153  @param TaskClass The class of the task to run
154  @param parsedCmd The parsed command-line arguments, as returned by
155  the task's argument parser's parse_args method.
156  @param doReturnResults Should run return the collected result from
157  each invocation of the task? This is only intended for unit tests
158  and similar use. It can easily exhaust memory (if the task
159  returns enough data and you call it enough times) and it will
160  fail when using multiprocessing if the returned data cannot be
161  pickled.
162 
163  @throws ImportError if multiprocessing requested (and the task
164  supports it) but the multiprocessing library cannot be
165  imported.
166  """
167  self.TaskClass = TaskClass
168  self.doReturnResults = bool(doReturnResults)
169  self.config = parsedCmd.config
170  self.log = parsedCmd.log
171  self.doRaise = bool(parsedCmd.doraise)
172  self.clobberConfig = bool(parsedCmd.clobberConfig)
173  self.doBackup = not bool(parsedCmd.noBackupConfig)
174  self.numProcesses = int(getattr(parsedCmd, 'processes', 1))
175 
176  self.timeout = getattr(parsedCmd, 'timeout', None)
177  if self.timeout is None or self.timeout <= 0:
178  self.timeout = self.TIMEOUT
179 
180  if self.numProcesses > 1:
181  if not TaskClass.canMultiprocess:
182  self.log.warn("This task does not support multiprocessing; using one process")
183  self.numProcesses = 1
184 
186  """Prepare this instance for multiprocessing
187 
188  Optional non-picklable elements are removed.
189 
190  This is only called if the task is run under multiprocessing.
191  """
192  self.log = None
193 
194  def run(self, parsedCmd):
195  """!Run the task on all targets.
196 
197  The task is run under multiprocessing if numProcesses > 1; otherwise
198  processing is serial.
199 
200  @return a list of results returned by TaskRunner.\_\_call\_\_, or an
201  empty list if TaskRunner.\_\_call\_\_ is not called (e.g. if
202  TaskRunner.precall returns `False`). See TaskRunner.\_\_call\_\_
203  for details.
204  """
205  resultList = []
206  if self.numProcesses > 1:
207  disableImplicitThreading() # To prevent thread contention
208  import multiprocessing
210  pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=1)
211  mapFunc = functools.partial(_runPool, pool, self.timeout)
212  else:
213  pool = None
214  mapFunc = map
215 
216  if self.precall(parsedCmd):
217  profileName = parsedCmd.profile if hasattr(parsedCmd, "profile") else None
218  log = parsedCmd.log
219  targetList = self.getTargetList(parsedCmd)
220  if len(targetList) > 0:
221  with profile(profileName, log):
222  # Run the task using self.__call__
223  resultList = list(mapFunc(self, targetList))
224  else:
225  log.warn("Not running the task because there is no data to process; "
226  "you may preview data using \"--show data\"")
227 
228  if pool is not None:
229  pool.close()
230  pool.join()
231 
232  return resultList
233 
234  @staticmethod
235  def getTargetList(parsedCmd, **kwargs):
236  """!Return a list of (dataRef, kwargs) for TaskRunner.\_\_call\_\_.
237 
238  @param parsedCmd the parsed command object (an argparse.Namespace)
239  returned by \ref argumentParser.ArgumentParser.parse_args
240  "ArgumentParser.parse_args".
241  @param **kwargs any additional keyword arguments. In the default
242  TaskRunner this is an empty dict, but having it simplifies
243  overriding TaskRunner for tasks whose run method takes additional
244  arguments (see case (1) below).
245 
246  The default implementation of TaskRunner.getTargetList and
247  TaskRunner.\_\_call\_\_ works for any command-line task whose run
248  method takes exactly one argument: a data reference. Otherwise you
249  must provide a variant of TaskRunner that overrides
250  TaskRunner.getTargetList and possibly TaskRunner.\_\_call\_\_.
251  There are two cases:
252 
253  (1) If your command-line task has a `run` method that takes one data
254  reference followed by additional arguments, then you need only
255  override TaskRunner.getTargetList to return the additional arguments
256  as an argument dict. To make this easier, your overridden version of
257  getTargetList may call TaskRunner.getTargetList with the extra
258  arguments as keyword arguments. For example, the following adds an
259  argument dict containing a single key: "calExpList", whose value is
260  the list of data IDs for the calexp ID argument:
261 
262  \code
263  \@staticmethod
264  def getTargetList(parsedCmd):
265  return TaskRunner.getTargetList(
266  parsedCmd,
267  calExpList=parsedCmd.calexp.idList
268  )
269  \endcode
270 
271  It is equivalent to this slightly longer version:
272 
273  \code
274  \@staticmethod
275  def getTargetList(parsedCmd):
276  argDict = dict(calExpList=parsedCmd.calexp.idList)
277  return [(dataId, argDict) for dataId in parsedCmd.id.idList]
278  \endcode
279 
280  (2) If your task does not meet condition (1) then you must override
281  both TaskRunner.getTargetList and TaskRunner.\_\_call\_\_. You may do
282  this however you see fit, so long as TaskRunner.getTargetList
283  returns a list, each of whose elements is sent to
284  TaskRunner.\_\_call\_\_, which runs your task.
285  """
286  return [(ref, kwargs) for ref in parsedCmd.id.refList]
287 
288  def makeTask(self, parsedCmd=None, args=None):
289  """!Create a Task instance
290 
291  @param[in] parsedCmd parsed command-line options (used for extra
292  task args by some task runners)
293  @param[in] args args tuple passed to TaskRunner.\_\_call\_\_
294  (used for extra task arguments by some task runners)
295 
296  makeTask() can be called with either the 'parsedCmd' argument or
297  'args' argument set to None, but it must construct identical Task
298  instances in either case.
299 
300  Subclasses may ignore this method entirely if they reimplement
301  both TaskRunner.precall and TaskRunner.\_\_call\_\_
302  """
303  return self.TaskClass(config=self.config, log=self.log)
304 
305  def _precallImpl(self, task, parsedCmd):
306  """The main work of 'precall'
307 
308  We write package versions, schemas and configs, or compare these to
309  existing files on disk if present.
310  """
311  if not parsedCmd.noVersions:
312  task.writePackageVersions(parsedCmd.butler, clobber=parsedCmd.clobberVersions)
313  task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
314  task.writeSchemas(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)
315 
316  def precall(self, parsedCmd):
317  """Hook for code that should run exactly once, before multiprocessing
318 
319  Must return True if TaskRunner.\_\_call\_\_ should subsequently be
320  called.
321 
322  @warning Implementations must take care to ensure that no unpicklable
323  attributes are added to the TaskRunner itself, for compatibility
324  with multiprocessing.
325 
326  The default implementation writes package versions, schemas and
327  configs, or compares them to existing files on disk if present.
328  """
329  task = self.makeTask(parsedCmd=parsedCmd)
330 
331  if self.doRaise:
332  self._precallImpl(task, parsedCmd)
333  else:
334  try:
335  self._precallImpl(task, parsedCmd)
336  except Exception as e:
337  task.log.fatal("Failed in task initialization: %s", e)
338  if not isinstance(e, TaskError):
339  traceback.print_exc(file=sys.stderr)
340  return False
341  return True
342 
343  def __call__(self, args):
344  """!Run the Task on a single target.
345 
346  This default implementation assumes that the 'args' is a tuple
347  containing a data reference and a dict of keyword arguments.
348 
349  @warning if you override this method and wish to return something
350  when doReturnResults is false, then it must be picklable to support
351  multiprocessing and it should be small enough that pickling and
352  unpickling do not add excessive overhead.
353 
354  @param args Arguments for Task.run()
355 
356  @return:
357  - None if doReturnResults false
358  - A pipe_base Struct containing these fields if doReturnResults true:
359  - dataRef: the provided data reference
360  - metadata: task metadata after execution of run
361  - result: result returned by task run, or None if the task fails
362  """
363  dataRef, kwargs = args
364  if self.log is None:
365  self.log = Log.getDefaultLogger()
366  if hasattr(dataRef, "dataId"):
367  self.log.MDC("LABEL", str(dataRef.dataId))
368  elif isinstance(dataRef, (list, tuple)):
369  self.log.MDC("LABEL", str([ref.dataId for ref in dataRef if hasattr(ref, "dataId")]))
370  task = self.makeTask(args=args)
371  result = None # in case the task fails
372  if self.doRaise:
373  result = task.run(dataRef, **kwargs)
374  else:
375  try:
376  result = task.run(dataRef, **kwargs)
377  except Exception as e:
378  # don't use a try block as we need to preserve the original exception
379  if hasattr(dataRef, "dataId"):
380  task.log.fatal("Failed on dataId=%s: %s", dataRef.dataId, e)
381  elif isinstance(dataRef, (list, tuple)):
382  task.log.fatal("Failed on dataId=[%s]: %s",
383  ", ".join(str(ref.dataId) for ref in dataRef), e)
384  else:
385  task.log.fatal("Failed on dataRef=%s: %s", dataRef, e)
386 
387  if not isinstance(e, TaskError):
388  traceback.print_exc(file=sys.stderr)
389  task.writeMetadata(dataRef)
390 
391  # remove MDC so it does not show up outside of task context
392  self.log.MDCRemove("LABEL")
393 
394  if self.doReturnResults:
395  return Struct(
396  dataRef=dataRef,
397  metadata=task.metadata,
398  result=result,
399  )
400 
401 
403  """!A TaskRunner for CmdLineTasks that require a 'butler' keyword argument to be passed to
404  their constructor.
405  """
406 
407  def makeTask(self, parsedCmd=None, args=None):
408  """!A variant of the base version that passes a butler argument to the task's constructor
409 
410  @param[in] parsedCmd parsed command-line options, as returned by the argument parser;
411  if specified then args is ignored
412  @param[in] args other arguments; if parsedCmd is None then this must be specified
413 
414  @throw RuntimeError if parsedCmd and args are both None
415  """
416  if parsedCmd is not None:
417  butler = parsedCmd.butler
418  elif args is not None:
419  dataRef, kwargs = args
420  butler = dataRef.butlerSubset.butler
421  else:
422  raise RuntimeError("parsedCmd or args must be specified")
423  return self.TaskClass(config=self.config, log=self.log, butler=butler)
424 
425 
426 class CmdLineTask(Task):
427  """!Base class for command-line tasks: tasks that may be executed from the command line
428 
429  See \ref pipeBase_introduction "pipe_base introduction" to learn what tasks are,
430  and \ref pipeTasks_writeCmdLineTask "how to write a command-line task" for more information
431  about writing command-line tasks.
432  If the second link is broken (as it will be before the documentation is cross-linked)
433  then look at the main page of pipe_tasks documentation for a link.
434 
435  Subclasses must specify the following class variables:
436  * ConfigClass: configuration class for your task (a subclass of \ref lsst.pex.config.config.Config
437  "lsst.pex.config.Config", or if your task needs no configuration, then
438  \ref lsst.pex.config.config.Config "lsst.pex.config.Config" itself)
439  * _DefaultName: default name used for this task (a str)
440 
441  Subclasses may also specify the following class variables:
442  * RunnerClass: a task runner class. The default is TaskRunner, which works for any task
443  with a run method that takes exactly one argument: a data reference. If your task does
444  not meet this requirement then you must supply a variant of TaskRunner; see TaskRunner
445  for more information.
446  * canMultiprocess: the default is True; set False if your task does not support multiprocessing.
447 
448  Subclasses must specify a method named "run":
449  - By default `run` accepts a single butler data reference, but you can specify an alternate task runner
450  (subclass of TaskRunner) as the value of class variable `RunnerClass` if your run method needs
451  something else.
452  - `run` is expected to return its data in a Struct. This provides safety for evolution of the task
453  since new values may be added without harming existing code.
454  - The data returned by `run` must be picklable if your task is to support multiprocessing.
455  """
456  RunnerClass = TaskRunner
457  canMultiprocess = True
458 
459  @classmethod
460  def applyOverrides(cls, config):
461  """!A hook to allow a task to change the values of its config *after* the camera-specific
462  overrides are loaded but before any command-line overrides are applied.
463 
464  This is necessary in some cases because the camera-specific overrides may retarget subtasks,
465  wiping out changes made in ConfigClass.setDefaults. See LSST Trac ticket #2282 for more discussion.
466 
467  @warning This is called by CmdLineTask.parseAndRun; other ways of constructing a config
468  will not apply these overrides.
469 
470  @param[in] cls the class object
471  @param[in] config task configuration (an instance of cls.ConfigClass)
472  """
473  pass
474 
475  @classmethod
476  def parseAndRun(cls, args=None, config=None, log=None, doReturnResults=False):
477  """!Parse an argument list and run the command
478 
479  Calling this method with no arguments specified is the standard way to run a command-line task
480  from the command line. For an example see pipe_tasks `bin/makeSkyMap.py` or almost any other
481  file in that directory.
482 
483  @param cls the class object
484  @param args list of command-line arguments; if `None` use sys.argv
485  @param config config for task (instance of pex_config Config); if `None` use cls.ConfigClass()
486  @param log log (instance of lsst.log.Log); if `None` use the default log
487  @param doReturnResults Return the collected results from each invocation of the task?
488  This is only intended for unit tests and similar use.
489  It can easily exhaust memory (if the task returns enough data and you call it enough times)
490  and it will fail when using multiprocessing if the returned data cannot be pickled.
491 
492  @return a Struct containing:
493  - argumentParser: the argument parser
494  - parsedCmd: the parsed command returned by the argument parser's parse_args method
495  - taskRunner: the task runner used to run the task (an instance of cls.RunnerClass)
496  - resultList: results returned by the task runner's run method, one entry per invocation.
497  This will typically be a list of `None` unless doReturnResults is `True`;
498  see cls.RunnerClass (TaskRunner by default) for more information.
499  """
500  if args is None:
501  commandAsStr = " ".join(sys.argv)
502  args = sys.argv[1:]
503  else:
504  commandAsStr = "{}{}".format(lsst.utils.get_caller_name(skip=1), tuple(args))
505 
506  argumentParser = cls._makeArgumentParser()
507  if config is None:
508  config = cls.ConfigClass()
509  parsedCmd = argumentParser.parse_args(config=config, args=args, log=log, override=cls.applyOverrides)
510  # print this message after parsing the command so the log is fully configured
511  parsedCmd.log.info("Running: %s", commandAsStr)
512 
513  taskRunner = cls.RunnerClass(TaskClass=cls, parsedCmd=parsedCmd, doReturnResults=doReturnResults)
514  resultList = taskRunner.run(parsedCmd)
515  return Struct(
516  argumentParser=argumentParser,
517  parsedCmd=parsedCmd,
518  taskRunner=taskRunner,
519  resultList=resultList,
520  )
521 
522  @classmethod
523  def _makeArgumentParser(cls):
524  """!Create and return an argument parser
525 
526  @param[in] cls the class object
527  @return the argument parser for this task.
528 
529  By default this returns an ArgumentParser with one ID argument named `--id` of dataset type "raw".
530 
531  Your task subclass may need to override this method to change the dataset type or data ref level,
532  or to add additional data ID arguments. If you add additional data ID arguments or your task's
533  run method takes more than a single data reference then you will also have to provide a task-specific
534  task runner (see TaskRunner for more information).
535  """
536  parser = ArgumentParser(name=cls._DefaultName)
537  parser.add_id_argument(name="--id", datasetType="raw",
538  help="data IDs, e.g. --id visit=12345 ccd=1,2^0,3")
539  return parser
540 
541  def writeConfig(self, butler, clobber=False, doBackup=True):
542  """!Write the configuration used for processing the data, or check that an existing
543  one is equal to the new one if present.
544 
545  @param[in] butler data butler used to write the config.
546  The config is written to dataset type self._getConfigName()
547  @param[in] clobber a boolean flag that controls what happens if a config already has been saved:
548  - True: overwrite or rename the existing config, depending on `doBackup`
549  - False: raise TaskError if this config does not match the existing config
550  @param[in] doBackup if clobbering, should we backup the old files?
551  """
552  configName = self._getConfigName()
553  if configName is None:
554  return
555  if clobber:
556  butler.put(self.config, configName, doBackup=doBackup)
557  elif butler.datasetExists(configName):
558  # this may be subject to a race condition; see #2789
559  try:
560  oldConfig = butler.get(configName, immediate=True)
561  except Exception as exc:
562  raise type(exc)("Unable to read stored config file %s (%s); consider using --clobber-config" %
563  (configName, exc))
564 
565  def logConfigMismatch(msg):
566  self.log.fatal("Comparing configuration: %s", msg)
567 
568  if not self.config.compare(oldConfig, shortcut=False, output=logConfigMismatch):
569  raise TaskError(
570  ("Config does not match existing task config %r on disk; tasks configurations " +
571  "must be consistent within the same output repo (override with --clobber-config)") %
572  (configName,))
573  else:
574  butler.put(self.config, configName)
575 
576  def writeSchemas(self, butler, clobber=False, doBackup=True):
577  """!Write the schemas returned by \ref task.Task.getAllSchemaCatalogs "getAllSchemaCatalogs"
578 
579  @param[in] butler data butler used to write the schema.
580  Each schema is written to the dataset type specified as the key in the dict returned by
581  \ref task.Task.getAllSchemaCatalogs "getAllSchemaCatalogs".
582  @param[in] clobber a boolean flag that controls what happens if a schema already has been saved:
583  - True: overwrite or rename the existing schema, depending on `doBackup`
584  - False: raise TaskError if this schema does not match the existing schema
585  @param[in] doBackup if clobbering, should we backup the old files?
586 
587  @warning if clobber is False and an existing schema does not match a current schema,
588  then some schemas may have been saved successfully and others may not, and there is no easy way to
589  tell which is which.
590  """
591  for dataset, catalog in self.getAllSchemaCatalogs().items():
592  schemaDataset = dataset + "_schema"
593  if clobber:
594  butler.put(catalog, schemaDataset, doBackup=doBackup)
595  elif butler.datasetExists(schemaDataset):
596  oldSchema = butler.get(schemaDataset, immediate=True).getSchema()
597  if not oldSchema.compare(catalog.getSchema(), afwTable.Schema.IDENTICAL):
598  raise TaskError(
599  ("New schema does not match schema %r on disk; schemas must be " +
600  " consistent within the same output repo (override with --clobber-config)") %
601  (dataset,))
602  else:
603  butler.put(catalog, schemaDataset)
604 
605  def writeMetadata(self, dataRef):
606  """!Write the metadata produced from processing the data
607 
608  @param[in] dataRef butler data reference used to write the metadata.
609  The metadata is written to dataset type self._getMetadataName()
610  """
611  try:
612  metadataName = self._getMetadataName()
613  if metadataName is not None:
614  dataRef.put(self.getFullMetadata(), metadataName)
615  except Exception as e:
616  self.log.warn("Could not persist metadata for dataId=%s: %s", dataRef.dataId, e)
617 
618  def writePackageVersions(self, butler, clobber=False, doBackup=True, dataset="packages"):
619  """!Compare and write package versions
620 
621  We retrieve the persisted list of packages and compare with what we're currently using.
622  We raise TaskError if there's a version mismatch.
623 
624  Note that this operation is subject to a race condition.
625 
626  @param[in] butler data butler used to read/write the package versions
627  @param[in] clobber a boolean flag that controls what happens if versions already have been saved:
628  - True: overwrite or rename the existing version info, depending on `doBackup`
629  - False: raise TaskError if this version info does not match the existing
630  @param[in] doBackup if clobbering, should we backup the old files?
631  @param[in] dataset name of dataset to read/write
632  """
633  packages = Packages.fromSystem()
634 
635  if clobber:
636  return butler.put(packages, dataset, doBackup=doBackup)
637  if not butler.datasetExists(dataset):
638  return butler.put(packages, dataset)
639 
640  try:
641  old = butler.get(dataset, immediate=True)
642  except Exception as exc:
643  raise type(exc)("Unable to read stored version dataset %s (%s); "
644  "consider using --clobber-versions or --no-versions" %
645  (dataset, exc))
646  # Note that because we can only detect python modules that have been imported, the stored
647  # list of products may be more or less complete than what we have now. What's important is
648  # that the products that are in common have the same version.
649  diff = packages.difference(old)
650  if diff:
651  raise TaskError(
652  "Version mismatch (" +
653  "; ".join("%s: %s vs %s" % (pkg, diff[pkg][1], diff[pkg][0]) for pkg in diff) +
654  "); consider using --clobber-versions or --no-versions")
655  # Update the old set of packages in case we have more packages that haven't been persisted.
656  extra = packages.extra(old)
657  if extra:
658  old.update(packages)
659  butler.put(old, dataset, doBackup=doBackup)
660 
661  def _getConfigName(self):
662  """!Return the name of the config dataset type, or None if config is not to be persisted
663 
664  @note The name may depend on the config; that is why this is not a class method.
665  """
666  return self._DefaultName + "_config"
667 
668  def _getMetadataName(self):
669  """!Return the name of the metadata dataset type, or None if metadata is not to be persisted
670 
671  @note The name may depend on the config; that is why this is not a class method.
672  """
673  return self._DefaultName + "_metadata"
def __init__
Construct a TaskRunner.
Definition: cmdLineTask.py:145
def makeTask
Create a Task instance.
Definition: cmdLineTask.py:288
def writePackageVersions
Compare and write package versions.
Definition: cmdLineTask.py:618
def makeTask
A variant of the base version that passes a butler argument to the task&#39;s constructor.
Definition: cmdLineTask.py:407
def run
Run the task on all targets.
Definition: cmdLineTask.py:194
def __call__
Run the Task on a single target.
Definition: cmdLineTask.py:343
def writeConfig
Write the configuration used for processing the data, or check that an existing one is equal to the n...
Definition: cmdLineTask.py:541
def _getConfigName
Return the name of the config dataset type, or None if config is not to be persisted.
Definition: cmdLineTask.py:661
def applyOverrides
A hook to allow a task to change the values of its config after the camera-specific overrides are loa...
Definition: cmdLineTask.py:460
def parseAndRun
Parse an argument list and run the command.
Definition: cmdLineTask.py:476
def _getMetadataName
Return the name of the metadata dataset type, or None if metadata is not to be persisted.
Definition: cmdLineTask.py:668
def writeMetadata
Write the metadata produced from processing the data.
Definition: cmdLineTask.py:605
def getTargetList
Return a list of (dataRef, kwargs) for TaskRunner.
Definition: cmdLineTask.py:235
A TaskRunner for CmdLineTasks that require a &#39;butler&#39; keyword argument to be passed to their construc...
Definition: cmdLineTask.py:402
def profile
Context manager for profiling with cProfile.
Definition: cmdLineTask.py:74
def writeSchemas
Write the schemas returned by getAllSchemaCatalogs.
Definition: cmdLineTask.py:576
Base class for command-line tasks: tasks that may be executed from the command line.
Definition: cmdLineTask.py:426