Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%
374 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-09 09:37 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-09 09:37 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25from __future__ import annotations
27__all__ = ["CmdLineFwk"]
29import atexit
30import contextlib
31import copy
32import datetime
33import getpass
34import logging
35import shutil
36from collections.abc import Iterable, Mapping, Sequence
37from types import SimpleNamespace
38from typing import TYPE_CHECKING
40import astropy.units as u
41from astropy.table import Table
42from lsst.daf.butler import (
43 Butler,
44 CollectionType,
45 DatasetId,
46 DatasetRef,
47 DatastoreCacheManager,
48 QuantumBackedButler,
49)
50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
51from lsst.daf.butler.registry.wildcards import CollectionWildcard
52from lsst.pipe.base import (
53 ExecutionResources,
54 GraphBuilder,
55 Instrument,
56 Pipeline,
57 PipelineDatasetTypes,
58 QuantumGraph,
59 buildExecutionButler,
60)
61from lsst.utils import doImportType
62from lsst.utils.threads import disable_implicit_threading
64from . import util
65from .dotTools import graph2dot, pipeline2dot
66from .executionGraphFixup import ExecutionGraphFixup
67from .mpGraphExecutor import MPGraphExecutor
68from .preExecInit import PreExecInit, PreExecInitLimited
69from .singleQuantumExecutor import SingleQuantumExecutor
71if TYPE_CHECKING:
72 from lsst.daf.butler import (
73 Config,
74 DatasetType,
75 DatastoreRecordData,
76 DimensionUniverse,
77 LimitedButler,
78 Quantum,
79 Registry,
80 )
81 from lsst.pipe.base import TaskDef, TaskFactory
84# ----------------------------------
85# Local non-exported definitions --
86# ----------------------------------
88_LOG = logging.getLogger(__name__)
91class _OutputChainedCollectionInfo:
92 """A helper class for handling command-line arguments related to an output
93 `~lsst.daf.butler.CollectionType.CHAINED` collection.
95 Parameters
96 ----------
97 registry : `lsst.daf.butler.Registry`
98 Butler registry that collections will be added to and/or queried from.
99 name : `str`
100 Name of the collection given on the command line.
101 """
103 def __init__(self, registry: Registry, name: str):
104 self.name = name
105 try:
106 self.chain = tuple(registry.getCollectionChain(name))
107 self.exists = True
108 except MissingCollectionError:
109 self.chain = ()
110 self.exists = False
112 def __str__(self) -> str:
113 return self.name
115 name: str
116 """Name of the collection provided on the command line (`str`).
117 """
119 exists: bool
120 """Whether this collection already exists in the registry (`bool`).
121 """
123 chain: tuple[str, ...]
124 """The definition of the collection, if it already exists (`tuple`[`str`]).
126 Empty if the collection does not already exist.
127 """
130class _OutputRunCollectionInfo:
131 """A helper class for handling command-line arguments related to an output
132 `~lsst.daf.butler.CollectionType.RUN` collection.
134 Parameters
135 ----------
136 registry : `lsst.daf.butler.Registry`
137 Butler registry that collections will be added to and/or queried from.
138 name : `str`
139 Name of the collection given on the command line.
140 """
142 def __init__(self, registry: Registry, name: str):
143 self.name = name
144 try:
145 actualType = registry.getCollectionType(name)
146 if actualType is not CollectionType.RUN:
147 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
148 self.exists = True
149 except MissingCollectionError:
150 self.exists = False
152 name: str
153 """Name of the collection provided on the command line (`str`).
154 """
156 exists: bool
157 """Whether this collection already exists in the registry (`bool`).
158 """
161class _ButlerFactory:
162 """A helper class for processing command-line arguments related to input
163 and output collections.
165 Parameters
166 ----------
167 registry : `lsst.daf.butler.Registry`
168 Butler registry that collections will be added to and/or queried from.
170 args : `types.SimpleNamespace`
171 Parsed command-line arguments. The following attributes are used,
172 either at construction or in later methods.
174 ``output``
175 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
176 input/output collection.
178 ``output_run``
179 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
180 collection.
182 ``extend_run``
183 A boolean indicating whether ``output_run`` should already exist
184 and be extended.
186 ``replace_run``
187 A boolean indicating that (if `True`) ``output_run`` should already
188 exist but will be removed from the output chained collection and
189 replaced with a new one.
191 ``prune_replaced``
192 A boolean indicating whether to prune the replaced run (requires
193 ``replace_run``).
195 ``inputs``
196 Input collections of any type; see
197 :ref:`daf_butler_ordered_collection_searches` for details.
199 ``butler_config``
200 Path to a data repository root or configuration file.
202 writeable : `bool`
203 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a
204 context where actual writes should happens, and hence no output run
205 is necessary.
207 Raises
208 ------
209 ValueError
210 Raised if ``writeable is True`` but there are no output collections.
211 """
213 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool):
214 if args.output is not None:
215 self.output = _OutputChainedCollectionInfo(registry, args.output)
216 else:
217 self.output = None
218 if args.output_run is not None:
219 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
220 elif self.output is not None:
221 if args.extend_run:
222 if not self.output.chain:
223 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
224 runName = self.output.chain[0]
225 else:
226 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}"
227 self.outputRun = _OutputRunCollectionInfo(registry, runName)
228 elif not writeable:
229 # If we're not writing yet, ok to have no output run.
230 self.outputRun = None
231 else:
232 raise ValueError("Cannot write without at least one of (--output, --output-run).")
233 # Recursively flatten any input CHAINED collections. We do this up
234 # front so we can tell if the user passes the same inputs on subsequent
235 # calls, even though we also flatten when we define the output CHAINED
236 # collection.
237 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
239 def check(self, args: SimpleNamespace) -> None:
240 """Check command-line options for consistency with each other and the
241 data repository.
243 Parameters
244 ----------
245 args : `types.SimpleNamespace`
246 Parsed command-line arguments. See class documentation for the
247 construction parameter of the same name.
248 """
249 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
250 if self.inputs and self.output is not None and self.output.exists:
251 # Passing the same inputs that were used to initialize the output
252 # collection is allowed; this means they must _end_ with the same
253 # collections, because we push new runs to the front of the chain.
254 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1], strict=False):
255 if c1 != c2:
256 raise ValueError(
257 f"Output CHAINED collection {self.output.name!r} exists, but it ends with "
258 "a different sequence of input collections than those given: "
259 f"{c1!r} != {c2!r} in inputs={self.inputs} vs "
260 f"{self.output.name}={self.output.chain}."
261 )
262 if len(self.inputs) > len(self.output.chain):
263 nNew = len(self.inputs) - len(self.output.chain)
264 raise ValueError(
265 f"Cannot add new input collections {self.inputs[:nNew]} after "
266 "output collection is first created."
267 )
268 if args.extend_run:
269 if self.outputRun is None:
270 raise ValueError("Cannot --extend-run when no output collection is given.")
271 elif not self.outputRun.exists:
272 raise ValueError(
273 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
274 )
275 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
276 raise ValueError(
277 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
278 )
279 if args.prune_replaced and not args.replace_run:
280 raise ValueError("--prune-replaced requires --replace-run.")
281 if args.replace_run and (self.output is None or not self.output.exists):
282 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
284 @classmethod
285 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]:
286 """Parse arguments to support implementations of `makeReadButler` and
287 `makeButlerAndCollections`.
289 Parameters
290 ----------
291 args : `types.SimpleNamespace`
292 Parsed command-line arguments. See class documentation for the
293 construction parameter of the same name.
295 Returns
296 -------
297 butler : `lsst.daf.butler.Butler`
298 A read-only butler constructed from the repo at
299 ``args.butler_config``, but with no default collections.
300 inputs : `~collections.abc.Sequence` [ `str` ]
301 A collection search path constructed according to ``args``.
302 self : `_ButlerFactory`
303 A new `_ButlerFactory` instance representing the processed version
304 of ``args``.
305 """
306 butler = Butler(args.butler_config, writeable=False)
307 self = cls(butler.registry, args, writeable=False)
308 self.check(args)
309 if self.output and self.output.exists:
310 if args.replace_run:
311 replaced = self.output.chain[0]
312 inputs = list(self.output.chain[1:])
313 _LOG.debug(
314 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
315 )
316 else:
317 inputs = [self.output.name]
318 else:
319 inputs = list(self.inputs)
320 if args.extend_run:
321 assert self.outputRun is not None, "Output collection has to be specified."
322 inputs.insert(0, self.outputRun.name)
323 collSearch = CollectionWildcard.from_expression(inputs).require_ordered()
324 return butler, collSearch, self
326 @classmethod
327 def makeReadButler(cls, args: SimpleNamespace) -> Butler:
328 """Construct a read-only butler according to the given command-line
329 arguments.
331 Parameters
332 ----------
333 args : `types.SimpleNamespace`
334 Parsed command-line arguments. See class documentation for the
335 construction parameter of the same name.
337 Returns
338 -------
339 butler : `lsst.daf.butler.Butler`
340 A read-only butler initialized with the collections specified by
341 ``args``.
342 """
343 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
344 butler, inputs, _ = cls._makeReadParts(args)
345 _LOG.debug("Preparing butler to read from %s.", inputs)
346 return Butler(butler=butler, collections=inputs)
348 @classmethod
349 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]:
350 """Return a read-only registry, a collection search path, and the name
351 of the run to be used for future writes.
353 Parameters
354 ----------
355 args : `types.SimpleNamespace`
356 Parsed command-line arguments. See class documentation for the
357 construction parameter of the same name.
359 Returns
360 -------
361 butler : `lsst.daf.butler.Butler`
362 A read-only butler that collections will be added to and/or queried
363 from.
364 inputs : `Sequence` [ `str` ]
365 Collections to search for datasets.
366 run : `str` or `None`
367 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
368 if it already exists, or `None` if it does not.
369 """
370 butler, inputs, self = cls._makeReadParts(args)
371 run: str | None = None
372 if args.extend_run:
373 assert self.outputRun is not None, "Output collection has to be specified."
374 if self.outputRun is not None:
375 run = self.outputRun.name
376 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
377 return butler, inputs, run
379 @staticmethod
380 def defineDatastoreCache() -> None:
381 """Define where datastore cache directories should be found.
383 Notes
384 -----
385 All the jobs should share a datastore cache if applicable. This
386 method asks for a shared fallback cache to be defined and then
387 configures an exit handler to clean it up.
388 """
389 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
390 if defined:
391 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True)
392 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir)
394 @classmethod
395 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler:
396 """Return a read-write butler initialized to write to and read from
397 the collections specified by the given command-line arguments.
399 Parameters
400 ----------
401 args : `types.SimpleNamespace`
402 Parsed command-line arguments. See class documentation for the
403 construction parameter of the same name.
404 taskDefs : iterable of `TaskDef`, optional
405 Definitions for tasks in a pipeline. This argument is only needed
406 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
407 "unstore".
409 Returns
410 -------
411 butler : `lsst.daf.butler.Butler`
412 A read-write butler initialized according to the given arguments.
413 """
414 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
415 butler = Butler(args.butler_config, writeable=True)
416 self = cls(butler.registry, args, writeable=True)
417 self.check(args)
418 assert self.outputRun is not None, "Output collection has to be specified." # for mypy
419 if self.output is not None:
420 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
421 if args.replace_run:
422 replaced = chainDefinition.pop(0)
423 if args.prune_replaced == "unstore":
424 # Remove datasets from datastore
425 with butler.transaction():
426 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced)
427 # we want to remove regular outputs but keep
428 # initOutputs, configs, and versions.
429 if taskDefs is not None:
430 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
431 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
432 butler.pruneDatasets(refs, unstore=True, disassociate=False)
433 elif args.prune_replaced == "purge":
434 # Erase entire collection and all datasets, need to remove
435 # collection from its chain collection first.
436 with butler.transaction():
437 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
438 butler.removeRuns([replaced], unstore=True)
439 elif args.prune_replaced is not None:
440 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
441 if not self.output.exists:
442 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
443 if not args.extend_run:
444 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
445 chainDefinition.insert(0, self.outputRun.name)
446 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
447 _LOG.debug(
448 "Preparing butler to write to '%s' and read from '%s'=%s",
449 self.outputRun.name,
450 self.output.name,
451 chainDefinition,
452 )
453 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
454 else:
455 inputs = (self.outputRun.name,) + self.inputs
456 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
457 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
458 return butler
460 output: _OutputChainedCollectionInfo | None
461 """Information about the output chained collection, if there is or will be
462 one (`_OutputChainedCollectionInfo` or `None`).
463 """
465 outputRun: _OutputRunCollectionInfo | None
466 """Information about the output run collection, if there is or will be
467 one (`_OutputRunCollectionInfo` or `None`).
468 """
470 inputs: tuple[str, ...]
471 """Input collections provided directly by the user (`tuple` [ `str` ]).
472 """
475class _QBBFactory:
476 """Class which is a callable for making QBB instances."""
478 def __init__(
479 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType]
480 ):
481 self.butler_config = butler_config
482 self.dimensions = dimensions
483 self.dataset_types = dataset_types
485 def __call__(self, quantum: Quantum) -> LimitedButler:
486 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`.
488 Factory method to create QuantumBackedButler instances.
489 """
490 return QuantumBackedButler.initialize(
491 config=self.butler_config,
492 quantum=quantum,
493 dimensions=self.dimensions,
494 dataset_types=self.dataset_types,
495 )
498# ------------------------
499# Exported definitions --
500# ------------------------
503class CmdLineFwk:
504 """PipelineTask framework which executes tasks from command line.
506 In addition to executing tasks this activator provides additional methods
507 for task management like dumping configuration or execution chain.
508 """
510 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
512 def __init__(self) -> None:
513 pass
515 def makePipeline(self, args: SimpleNamespace) -> Pipeline:
516 """Build a pipeline from command line arguments.
518 Parameters
519 ----------
520 args : `types.SimpleNamespace`
521 Parsed command line
523 Returns
524 -------
525 pipeline : `~lsst.pipe.base.Pipeline`
526 """
527 if args.pipeline:
528 pipeline = Pipeline.from_uri(args.pipeline)
529 else:
530 pipeline = Pipeline("anonymous")
532 # loop over all pipeline actions and apply them in order
533 for action in args.pipeline_actions:
534 if action.action == "add_instrument":
535 pipeline.addInstrument(action.value)
537 elif action.action == "new_task":
538 pipeline.addTask(action.value, action.label)
540 elif action.action == "delete_task":
541 pipeline.removeTask(action.label)
543 elif action.action == "config":
544 # action value string is "field=value", split it at '='
545 field, _, value = action.value.partition("=")
546 pipeline.addConfigOverride(action.label, field, value)
548 elif action.action == "configfile":
549 pipeline.addConfigFile(action.label, action.value)
551 else:
552 raise ValueError(f"Unexpected pipeline action: {action.action}")
554 if args.save_pipeline:
555 pipeline.write_to_uri(args.save_pipeline)
557 if args.pipeline_dot:
558 pipeline2dot(pipeline, args.pipeline_dot)
560 return pipeline
562 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None:
563 """Build a graph from command line arguments.
565 Parameters
566 ----------
567 pipeline : `~lsst.pipe.base.Pipeline`
568 Pipeline, can be empty or ``None`` if graph is read from a file.
569 args : `types.SimpleNamespace`
570 Parsed command line
572 Returns
573 -------
574 graph : `~lsst.pipe.base.QuantumGraph` or `None`
575 If resulting graph is empty then `None` is returned.
576 """
577 # make sure that --extend-run always enables --skip-existing
578 if args.extend_run:
579 args.skip_existing = True
581 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args)
583 if args.skip_existing and run:
584 args.skip_existing_in += (run,)
586 if args.qgraph:
587 # click passes empty tuple as default value for qgraph_node_id
588 nodes = args.qgraph_node_id or None
589 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id)
591 # pipeline can not be provided in this case
592 if pipeline:
593 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
594 if args.show_qgraph_header:
595 print(QuantumGraph.readHeader(args.qgraph))
596 else:
597 task_defs = list(pipeline.toExpandedPipeline())
598 if args.mock:
599 from lsst.pipe.base.tests.mocks import mock_task_defs
601 task_defs = mock_task_defs(
602 task_defs,
603 unmocked_dataset_types=args.unmocked_dataset_types,
604 force_failures=args.mock_failure,
605 )
606 # make execution plan (a.k.a. DAG) for pipeline
607 graphBuilder = GraphBuilder(
608 butler.registry,
609 skipExistingIn=args.skip_existing_in,
610 clobberOutputs=args.clobber_outputs,
611 datastore=butler._datastore if args.qgraph_datastore_records else None,
612 )
613 # accumulate metadata
614 metadata = {
615 "input": args.input,
616 "output": args.output,
617 "butler_argument": args.butler_config,
618 "output_run": run,
619 "extend_run": args.extend_run,
620 "skip_existing_in": args.skip_existing_in,
621 "skip_existing": args.skip_existing,
622 "data_query": args.data_query,
623 "user": getpass.getuser(),
624 "time": f"{datetime.datetime.now()}",
625 }
626 assert run is not None, "Butler output run collection must be defined"
627 qgraph = graphBuilder.makeGraph(
628 task_defs,
629 collections,
630 run,
631 args.data_query,
632 metadata=metadata,
633 datasetQueryConstraint=args.dataset_query_constraint,
634 dataId=pipeline.get_data_id(butler.dimensions),
635 )
636 if args.show_qgraph_header:
637 qgraph.buildAndPrintHeader()
639 if len(qgraph) == 0:
640 # Nothing to do.
641 return None
642 self._summarize_qgraph(qgraph)
644 if args.save_qgraph:
645 qgraph.saveUri(args.save_qgraph)
647 if args.save_single_quanta:
648 for quantumNode in qgraph:
649 sqgraph = qgraph.subset(quantumNode)
650 uri = args.save_single_quanta.format(quantumNode)
651 sqgraph.saveUri(uri)
653 if args.qgraph_dot:
654 graph2dot(qgraph, args.qgraph_dot)
656 if args.execution_butler_location:
657 butler = Butler(args.butler_config)
658 newArgs = copy.deepcopy(args)
660 def builderShim(butler: Butler) -> Butler:
661 newArgs.butler_config = butler._config
662 # Calling makeWriteButler is done for the side effects of
663 # calling that method, maining parsing all the args into
664 # collection names, creating collections, etc.
665 newButler = _ButlerFactory.makeWriteButler(newArgs)
666 return newButler
668 # Include output collection in collections for input
669 # files if it exists in the repo.
670 all_inputs = args.input
671 if args.output is not None:
672 with contextlib.suppress(MissingCollectionError):
673 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
675 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
676 buildExecutionButler(
677 butler,
678 qgraph,
679 args.execution_butler_location,
680 run,
681 butlerModifier=builderShim,
682 collections=all_inputs,
683 clobber=args.clobber_execution_butler,
684 datastoreRoot=args.target_datastore_root,
685 transfer=args.transfer,
686 )
688 return qgraph
690 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources:
691 """Construct the execution resource class from arguments.
693 Parameters
694 ----------
695 args : `types.SimpleNamespace`
696 Parsed command line.
698 Returns
699 -------
700 resources : `~lsst.pipe.base.ExecutionResources`
701 The resources available to each quantum.
702 """
703 return ExecutionResources(
704 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB
705 )
707 def runPipeline(
708 self,
709 graph: QuantumGraph,
710 taskFactory: TaskFactory,
711 args: SimpleNamespace,
712 butler: Butler | None = None,
713 ) -> None:
714 """Execute complete QuantumGraph.
716 Parameters
717 ----------
718 graph : `~lsst.pipe.base.QuantumGraph`
719 Execution graph.
720 taskFactory : `~lsst.pipe.base.TaskFactory`
721 Task factory
722 args : `types.SimpleNamespace`
723 Parsed command line
724 butler : `~lsst.daf.butler.Butler`, optional
725 Data Butler instance, if not defined then new instance is made
726 using command line options.
727 """
728 # Check that output run defined on command line is consistent with
729 # quantum graph.
730 if args.output_run and graph.metadata:
731 graph_output_run = graph.metadata.get("output_run", args.output_run)
732 if graph_output_run != args.output_run:
733 raise ValueError(
734 f"Output run defined on command line ({args.output_run}) has to be "
735 f"identical to graph metadata ({graph_output_run}). "
736 "To update graph metadata run `pipetask update-graph-run` command."
737 )
739 # Make sure that --extend-run always enables --skip-existing,
740 # clobbering should be disabled if --extend-run is not specified.
741 if args.extend_run:
742 args.skip_existing = True
743 else:
744 args.clobber_outputs = False
746 if not args.enable_implicit_threading:
747 disable_implicit_threading()
749 # Make butler instance. QuantumGraph should have an output run defined,
750 # but we ignore it here and let command line decide actual output run.
751 if butler is None:
752 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
754 if args.skip_existing:
755 args.skip_existing_in += (butler.run,)
757 # Enable lsstDebug debugging. Note that this is done once in the
758 # main process before PreExecInit and it is also repeated before
759 # running each task in SingleQuantumExecutor (which may not be
760 # needed if `multiprocessing` always uses fork start method).
761 if args.enableLsstDebug:
762 try:
763 _LOG.debug("Will try to import debug.py")
764 import debug # type: ignore # noqa:F401
765 except ImportError:
766 _LOG.warn("No 'debug' module found.")
768 # Save all InitOutputs, configs, etc.
769 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
770 preExecInit.initialize(
771 graph,
772 saveInitOutputs=not args.skip_init_writes,
773 registerDatasetTypes=args.register_dataset_types,
774 saveVersions=not args.no_versions,
775 )
777 if not args.init_only:
778 graphFixup = self._importGraphFixup(args)
779 resources = self._make_execution_resources(args)
780 quantumExecutor = SingleQuantumExecutor(
781 butler,
782 taskFactory,
783 skipExistingIn=args.skip_existing_in,
784 clobberOutputs=args.clobber_outputs,
785 enableLsstDebug=args.enableLsstDebug,
786 exitOnKnownError=args.fail_fast,
787 resources=resources,
788 )
790 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
791 executor = MPGraphExecutor(
792 numProc=args.processes,
793 timeout=timeout,
794 startMethod=args.start_method,
795 quantumExecutor=quantumExecutor,
796 failFast=args.fail_fast,
797 pdb=args.pdb,
798 executionGraphFixup=graphFixup,
799 )
800 # Have to reset connection pool to avoid sharing connections with
801 # forked processes.
802 butler.registry.resetConnectionPool()
803 try:
804 with util.profile(args.profile, _LOG):
805 executor.execute(graph)
806 finally:
807 if args.summary:
808 report = executor.getReport()
809 if report:
810 with open(args.summary, "w") as out:
811 # Do not save fields that are not set.
812 out.write(report.json(exclude_none=True, indent=2))
814 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table:
815 """Generate astropy table listing the number of quanta per task for a
816 given quantum graph.
818 Parameters
819 ----------
820 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph`
821 A QuantumGraph object.
823 Returns
824 -------
825 qg_task_table : `astropy.table.table.Table`
826 An astropy table containing columns: Quanta and Tasks.
827 """
828 qg_quanta, qg_tasks = [], []
829 for task_def in qgraph.iterTaskGraph():
830 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def)
831 qg_quanta.append(num_qnodes)
832 qg_tasks.append(task_def.label)
833 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks))
834 return qg_task_table
836 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int:
837 """Report a summary of the quanta in the graph.
839 Parameters
840 ----------
841 qgraph : `lsst.pipe.base.QuantumGraph`
842 The graph to be summarized.
844 Returns
845 -------
846 n_quanta : `int`
847 The number of quanta in the graph.
848 """
849 n_quanta = len(qgraph)
850 if n_quanta == 0:
851 _LOG.info("QuantumGraph contains no quanta.")
852 else:
853 if _LOG.isEnabledFor(logging.INFO):
854 qg_task_table = self._generateTaskTable(qgraph)
855 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all())
856 quanta_str = "quantum" if n_quanta == 1 else "quanta"
857 n_tasks = len(qgraph.taskGraph)
858 n_tasks_plural = "" if n_tasks == 1 else "s"
859 _LOG.info(
860 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s",
861 n_quanta,
862 quanta_str,
863 n_tasks,
864 n_tasks_plural,
865 qgraph.graphID,
866 qg_task_table_formatted,
867 )
868 return n_quanta
870 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None:
871 """Import/instantiate graph fixup object.
873 Parameters
874 ----------
875 args : `types.SimpleNamespace`
876 Parsed command line.
878 Returns
879 -------
880 fixup : `ExecutionGraphFixup` or `None`
882 Raises
883 ------
884 ValueError
885 Raised if import fails, method call raises exception, or returned
886 instance has unexpected type.
887 """
888 if args.graph_fixup:
889 try:
890 factory = doImportType(args.graph_fixup)
891 except Exception as exc:
892 raise ValueError("Failed to import graph fixup class/method") from exc
893 try:
894 fixup = factory()
895 except Exception as exc:
896 raise ValueError("Failed to make instance of graph fixup") from exc
897 if not isinstance(fixup, ExecutionGraphFixup):
898 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
899 return fixup
900 return None
902 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
903 # Load quantum graph. We do not really need individual Quanta here,
904 # but we need datastore records for initInputs, and those are only
905 # available from Quanta, so load the whole thing.
906 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id)
907 universe = qgraph.universe
909 # Collect all init input/output dataset IDs.
910 predicted_inputs: set[DatasetId] = set()
911 predicted_outputs: set[DatasetId] = set()
912 for taskDef in qgraph.iterTaskGraph():
913 if (refs := qgraph.initInputRefs(taskDef)) is not None:
914 predicted_inputs.update(ref.id for ref in refs)
915 if (refs := qgraph.initOutputRefs(taskDef)) is not None:
916 predicted_outputs.update(ref.id for ref in refs)
917 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs())
918 # remove intermediates from inputs
919 predicted_inputs -= predicted_outputs
921 # Very inefficient way to extract datastore records from quantum graph,
922 # we have to scan all quanta and look at their datastore records.
923 datastore_records: dict[str, DatastoreRecordData] = {}
924 for quantum_node in qgraph:
925 for store_name, records in quantum_node.quantum.datastore_records.items():
926 subset = records.subset(predicted_inputs)
927 if subset is not None:
928 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
930 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
932 # Make butler from everything.
933 butler = QuantumBackedButler.from_predicted(
934 config=args.butler_config,
935 predicted_inputs=predicted_inputs,
936 predicted_outputs=predicted_outputs,
937 dimensions=universe,
938 datastore_records=datastore_records,
939 search_paths=args.config_search_path,
940 dataset_types=dataset_types,
941 )
943 # Save all InitOutputs, configs, etc.
944 preExecInit = PreExecInitLimited(butler, task_factory)
945 preExecInit.initialize(qgraph)
947 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
948 # Load quantum graph.
949 nodes = args.qgraph_node_id or None
950 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id)
952 if qgraph.metadata is None:
953 raise ValueError("QuantumGraph is missing metadata, cannot continue.")
955 self._summarize_qgraph(qgraph)
957 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
959 _butler_factory = _QBBFactory(
960 butler_config=args.butler_config,
961 dimensions=qgraph.universe,
962 dataset_types=dataset_types,
963 )
965 # make special quantum executor
966 resources = self._make_execution_resources(args)
967 quantumExecutor = SingleQuantumExecutor(
968 butler=None,
969 taskFactory=task_factory,
970 enableLsstDebug=args.enableLsstDebug,
971 exitOnKnownError=args.fail_fast,
972 limited_butler_factory=_butler_factory,
973 resources=resources,
974 )
976 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
977 executor = MPGraphExecutor(
978 numProc=args.processes,
979 timeout=timeout,
980 startMethod=args.start_method,
981 quantumExecutor=quantumExecutor,
982 failFast=args.fail_fast,
983 pdb=args.pdb,
984 )
985 try:
986 with util.profile(args.profile, _LOG):
987 executor.execute(qgraph)
988 finally:
989 if args.summary:
990 report = executor.getReport()
991 if report:
992 with open(args.summary, "w") as out:
993 # Do not save fields that are not set.
994 out.write(report.json(exclude_none=True, indent=2))