Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%
385 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-18 09:41 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-18 09:41 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining CmdLineFwk class and related methods.
29"""
31from __future__ import annotations
33__all__ = ["CmdLineFwk"]
35import atexit
36import contextlib
37import copy
38import datetime
39import getpass
40import logging
41import shutil
42from collections.abc import Iterable, Mapping, Sequence
43from types import SimpleNamespace
45import astropy.units as u
46from astropy.table import Table
47from lsst.daf.butler import (
48 Butler,
49 CollectionType,
50 Config,
51 DatasetId,
52 DatasetRef,
53 DatasetType,
54 DimensionUniverse,
55 LimitedButler,
56 Quantum,
57 QuantumBackedButler,
58 Registry,
59)
60from lsst.daf.butler.datastore.cache_manager import DatastoreCacheManager
61from lsst.daf.butler.datastore.record_data import DatastoreRecordData
62from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
63from lsst.daf.butler.registry.wildcards import CollectionWildcard
64from lsst.pipe.base import (
65 ExecutionResources,
66 GraphBuilder,
67 Instrument,
68 Pipeline,
69 PipelineDatasetTypes,
70 QuantumGraph,
71 TaskDef,
72 TaskFactory,
73 buildExecutionButler,
74)
75from lsst.utils import doImportType
76from lsst.utils.threads import disable_implicit_threading
78from . import util
79from .dotTools import graph2dot, pipeline2dot
80from .executionGraphFixup import ExecutionGraphFixup
81from .mpGraphExecutor import MPGraphExecutor
82from .preExecInit import PreExecInit, PreExecInitLimited
83from .singleQuantumExecutor import SingleQuantumExecutor
85# ----------------------------------
86# Local non-exported definitions --
87# ----------------------------------
89_LOG = logging.getLogger(__name__)
92class _OutputChainedCollectionInfo:
93 """A helper class for handling command-line arguments related to an output
94 `~lsst.daf.butler.CollectionType.CHAINED` collection.
96 Parameters
97 ----------
98 registry : `lsst.daf.butler.Registry`
99 Butler registry that collections will be added to and/or queried from.
100 name : `str`
101 Name of the collection given on the command line.
102 """
104 def __init__(self, registry: Registry, name: str):
105 self.name = name
106 try:
107 self.chain = tuple(registry.getCollectionChain(name))
108 self.exists = True
109 except MissingCollectionError:
110 self.chain = ()
111 self.exists = False
113 def __str__(self) -> str:
114 return self.name
116 name: str
117 """Name of the collection provided on the command line (`str`).
118 """
120 exists: bool
121 """Whether this collection already exists in the registry (`bool`).
122 """
124 chain: tuple[str, ...]
125 """The definition of the collection, if it already exists (`tuple`[`str`]).
127 Empty if the collection does not already exist.
128 """
131class _OutputRunCollectionInfo:
132 """A helper class for handling command-line arguments related to an output
133 `~lsst.daf.butler.CollectionType.RUN` collection.
135 Parameters
136 ----------
137 registry : `lsst.daf.butler.Registry`
138 Butler registry that collections will be added to and/or queried from.
139 name : `str`
140 Name of the collection given on the command line.
141 """
143 def __init__(self, registry: Registry, name: str):
144 self.name = name
145 try:
146 actualType = registry.getCollectionType(name)
147 if actualType is not CollectionType.RUN:
148 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
149 self.exists = True
150 except MissingCollectionError:
151 self.exists = False
153 name: str
154 """Name of the collection provided on the command line (`str`).
155 """
157 exists: bool
158 """Whether this collection already exists in the registry (`bool`).
159 """
162class _ButlerFactory:
163 """A helper class for processing command-line arguments related to input
164 and output collections.
166 Parameters
167 ----------
168 registry : `lsst.daf.butler.Registry`
169 Butler registry that collections will be added to and/or queried from.
171 args : `types.SimpleNamespace`
172 Parsed command-line arguments. The following attributes are used,
173 either at construction or in later methods.
175 ``output``
176 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
177 input/output collection.
179 ``output_run``
180 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
181 collection.
183 ``extend_run``
184 A boolean indicating whether ``output_run`` should already exist
185 and be extended.
187 ``replace_run``
188 A boolean indicating that (if `True`) ``output_run`` should already
189 exist but will be removed from the output chained collection and
190 replaced with a new one.
192 ``prune_replaced``
193 A boolean indicating whether to prune the replaced run (requires
194 ``replace_run``).
196 ``rebase``
197 A boolean indicating whether to force the ``output`` collection
198 to be consistent with ``inputs`` and ``output`` run such that the
199 ``output`` collection has output run collections first (i.e. those
200 that start with the same prefix), then the new inputs, then any
201 original inputs not included in the new inputs.
203 ``inputs``
204 Input collections of any type; see
205 :ref:`daf_butler_ordered_collection_searches` for details.
207 ``butler_config``
208 Path to a data repository root or configuration file.
210 writeable : `bool`
211 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a
212 context where actual writes should happens, and hence no output run
213 is necessary.
215 Raises
216 ------
217 ValueError
218 Raised if ``writeable is True`` but there are no output collections.
219 """
221 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool):
222 if args.output is not None:
223 self.output = _OutputChainedCollectionInfo(registry, args.output)
224 else:
225 self.output = None
226 if args.output_run is not None:
227 if args.rebase and self.output and not args.output_run.startswith(self.output.name):
228 raise ValueError("Cannot rebase if output run does not start with output collection name.")
229 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
230 elif self.output is not None:
231 if args.extend_run:
232 if not self.output.chain:
233 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
234 runName = self.output.chain[0]
235 else:
236 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}"
237 self.outputRun = _OutputRunCollectionInfo(registry, runName)
238 elif not writeable:
239 # If we're not writing yet, ok to have no output run.
240 self.outputRun = None
241 else:
242 raise ValueError("Cannot write without at least one of (--output, --output-run).")
243 # Recursively flatten any input CHAINED collections. We do this up
244 # front so we can tell if the user passes the same inputs on subsequent
245 # calls, even though we also flatten when we define the output CHAINED
246 # collection.
247 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
249 # If things are inconsistent and user has asked for a rebase then
250 # construct the new output chain.
251 if args.rebase and self._checkOutputInputConsistency():
252 assert self.output is not None
253 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)]
254 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain])
255 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain])
256 self.output.chain = tuple(newOutputChain)
258 def check(self, args: SimpleNamespace) -> None:
259 """Check command-line options for consistency with each other and the
260 data repository.
262 Parameters
263 ----------
264 args : `types.SimpleNamespace`
265 Parsed command-line arguments. See class documentation for the
266 construction parameter of the same name.
267 """
268 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
269 if consistencyError := self._checkOutputInputConsistency():
270 raise ValueError(consistencyError)
272 if args.extend_run:
273 if self.outputRun is None:
274 raise ValueError("Cannot --extend-run when no output collection is given.")
275 elif not self.outputRun.exists:
276 raise ValueError(
277 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
278 )
279 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
280 raise ValueError(
281 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
282 )
283 if args.prune_replaced and not args.replace_run:
284 raise ValueError("--prune-replaced requires --replace-run.")
285 if args.replace_run and (self.output is None or not self.output.exists):
286 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
288 def _checkOutputInputConsistency(self) -> str | None:
289 if self.inputs and self.output is not None and self.output.exists:
290 # Passing the same inputs that were used to initialize the output
291 # collection is allowed; this means the inputs must appear as a
292 # contiguous subsequence of outputs (normally they're also at the
293 # end, but --rebase will in general put them in the middle).
294 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))):
295 if self.inputs == self.output.chain[n : n + len(self.inputs)]:
296 return None
297 return (
298 f"Output CHAINED collection {self.output.name!r} exists and does not include the "
299 f"same sequence of (flattened) input collections {self.inputs} as a contiguous "
300 "subsequence. "
301 "Use --rebase to ignore this problem and reset the output collection, but note that "
302 "this may obfuscate what inputs were actually used to produce these outputs."
303 )
304 return None
306 @classmethod
307 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]:
308 """Parse arguments to support implementations of `makeReadButler` and
309 `makeButlerAndCollections`.
311 Parameters
312 ----------
313 args : `types.SimpleNamespace`
314 Parsed command-line arguments. See class documentation for the
315 construction parameter of the same name.
317 Returns
318 -------
319 butler : `lsst.daf.butler.Butler`
320 A read-only butler constructed from the repo at
321 ``args.butler_config``, but with no default collections.
322 inputs : `~collections.abc.Sequence` [ `str` ]
323 A collection search path constructed according to ``args``.
324 self : `_ButlerFactory`
325 A new `_ButlerFactory` instance representing the processed version
326 of ``args``.
327 """
328 butler = Butler(args.butler_config, writeable=False)
329 self = cls(butler.registry, args, writeable=False)
330 self.check(args)
331 if self.output and self.output.exists:
332 if args.replace_run:
333 replaced = self.output.chain[0]
334 inputs = list(self.output.chain[1:])
335 _LOG.debug(
336 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
337 )
338 else:
339 inputs = [self.output.name]
340 else:
341 inputs = list(self.inputs)
342 if args.extend_run:
343 assert self.outputRun is not None, "Output collection has to be specified."
344 inputs.insert(0, self.outputRun.name)
345 collSearch = CollectionWildcard.from_expression(inputs).require_ordered()
346 return butler, collSearch, self
348 @classmethod
349 def makeReadButler(cls, args: SimpleNamespace) -> Butler:
350 """Construct a read-only butler according to the given command-line
351 arguments.
353 Parameters
354 ----------
355 args : `types.SimpleNamespace`
356 Parsed command-line arguments. See class documentation for the
357 construction parameter of the same name.
359 Returns
360 -------
361 butler : `lsst.daf.butler.Butler`
362 A read-only butler initialized with the collections specified by
363 ``args``.
364 """
365 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
366 butler, inputs, _ = cls._makeReadParts(args)
367 _LOG.debug("Preparing butler to read from %s.", inputs)
368 return Butler(butler=butler, collections=inputs)
370 @classmethod
371 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]:
372 """Return a read-only registry, a collection search path, and the name
373 of the run to be used for future writes.
375 Parameters
376 ----------
377 args : `types.SimpleNamespace`
378 Parsed command-line arguments. See class documentation for the
379 construction parameter of the same name.
381 Returns
382 -------
383 butler : `lsst.daf.butler.Butler`
384 A read-only butler that collections will be added to and/or queried
385 from.
386 inputs : `Sequence` [ `str` ]
387 Collections to search for datasets.
388 run : `str` or `None`
389 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
390 if it already exists, or `None` if it does not.
391 """
392 butler, inputs, self = cls._makeReadParts(args)
393 run: str | None = None
394 if args.extend_run:
395 assert self.outputRun is not None, "Output collection has to be specified."
396 if self.outputRun is not None:
397 run = self.outputRun.name
398 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
399 return butler, inputs, run
401 @staticmethod
402 def defineDatastoreCache() -> None:
403 """Define where datastore cache directories should be found.
405 Notes
406 -----
407 All the jobs should share a datastore cache if applicable. This
408 method asks for a shared fallback cache to be defined and then
409 configures an exit handler to clean it up.
410 """
411 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
412 if defined:
413 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True)
414 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir)
416 @classmethod
417 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler:
418 """Return a read-write butler initialized to write to and read from
419 the collections specified by the given command-line arguments.
421 Parameters
422 ----------
423 args : `types.SimpleNamespace`
424 Parsed command-line arguments. See class documentation for the
425 construction parameter of the same name.
426 taskDefs : iterable of `TaskDef`, optional
427 Definitions for tasks in a pipeline. This argument is only needed
428 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
429 "unstore".
431 Returns
432 -------
433 butler : `lsst.daf.butler.Butler`
434 A read-write butler initialized according to the given arguments.
435 """
436 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
437 butler = Butler(args.butler_config, writeable=True)
438 self = cls(butler.registry, args, writeable=True)
439 self.check(args)
440 assert self.outputRun is not None, "Output collection has to be specified." # for mypy
441 if self.output is not None:
442 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
443 if args.replace_run:
444 replaced = chainDefinition.pop(0)
445 if args.prune_replaced == "unstore":
446 # Remove datasets from datastore
447 with butler.transaction():
448 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced)
449 # we want to remove regular outputs but keep
450 # initOutputs, configs, and versions.
451 if taskDefs is not None:
452 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
453 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
454 butler.pruneDatasets(refs, unstore=True, disassociate=False)
455 elif args.prune_replaced == "purge":
456 # Erase entire collection and all datasets, need to remove
457 # collection from its chain collection first.
458 with butler.transaction():
459 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
460 butler.removeRuns([replaced], unstore=True)
461 elif args.prune_replaced is not None:
462 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
463 if not self.output.exists:
464 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
465 if not args.extend_run:
466 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
467 chainDefinition.insert(0, self.outputRun.name)
468 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
469 _LOG.debug(
470 "Preparing butler to write to '%s' and read from '%s'=%s",
471 self.outputRun.name,
472 self.output.name,
473 chainDefinition,
474 )
475 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
476 else:
477 inputs = (self.outputRun.name,) + self.inputs
478 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
479 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
480 return butler
482 output: _OutputChainedCollectionInfo | None
483 """Information about the output chained collection, if there is or will be
484 one (`_OutputChainedCollectionInfo` or `None`).
485 """
487 outputRun: _OutputRunCollectionInfo | None
488 """Information about the output run collection, if there is or will be
489 one (`_OutputRunCollectionInfo` or `None`).
490 """
492 inputs: tuple[str, ...]
493 """Input collections provided directly by the user (`tuple` [ `str` ]).
494 """
497class _QBBFactory:
498 """Class which is a callable for making QBB instances."""
500 def __init__(
501 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType]
502 ):
503 self.butler_config = butler_config
504 self.dimensions = dimensions
505 self.dataset_types = dataset_types
507 def __call__(self, quantum: Quantum) -> LimitedButler:
508 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`.
510 Factory method to create QuantumBackedButler instances.
511 """
512 return QuantumBackedButler.initialize(
513 config=self.butler_config,
514 quantum=quantum,
515 dimensions=self.dimensions,
516 dataset_types=self.dataset_types,
517 )
520# ------------------------
521# Exported definitions --
522# ------------------------
525class CmdLineFwk:
526 """PipelineTask framework which executes tasks from command line.
528 In addition to executing tasks this activator provides additional methods
529 for task management like dumping configuration or execution chain.
530 """
532 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
534 def __init__(self) -> None:
535 pass
537 def makePipeline(self, args: SimpleNamespace) -> Pipeline:
538 """Build a pipeline from command line arguments.
540 Parameters
541 ----------
542 args : `types.SimpleNamespace`
543 Parsed command line
545 Returns
546 -------
547 pipeline : `~lsst.pipe.base.Pipeline`
548 """
549 if args.pipeline:
550 pipeline = Pipeline.from_uri(args.pipeline)
551 else:
552 pipeline = Pipeline("anonymous")
554 # loop over all pipeline actions and apply them in order
555 for action in args.pipeline_actions:
556 if action.action == "add_instrument":
557 pipeline.addInstrument(action.value)
559 elif action.action == "new_task":
560 pipeline.addTask(action.value, action.label)
562 elif action.action == "delete_task":
563 pipeline.removeTask(action.label)
565 elif action.action == "config":
566 # action value string is "field=value", split it at '='
567 field, _, value = action.value.partition("=")
568 pipeline.addConfigOverride(action.label, field, value)
570 elif action.action == "configfile":
571 pipeline.addConfigFile(action.label, action.value)
573 else:
574 raise ValueError(f"Unexpected pipeline action: {action.action}")
576 if args.save_pipeline:
577 pipeline.write_to_uri(args.save_pipeline)
579 if args.pipeline_dot:
580 pipeline2dot(pipeline, args.pipeline_dot)
582 return pipeline
584 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None:
585 """Build a graph from command line arguments.
587 Parameters
588 ----------
589 pipeline : `~lsst.pipe.base.Pipeline`
590 Pipeline, can be empty or ``None`` if graph is read from a file.
591 args : `types.SimpleNamespace`
592 Parsed command line
594 Returns
595 -------
596 graph : `~lsst.pipe.base.QuantumGraph` or `None`
597 If resulting graph is empty then `None` is returned.
598 """
599 # make sure that --extend-run always enables --skip-existing
600 if args.extend_run:
601 args.skip_existing = True
603 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args)
605 if args.skip_existing and run:
606 args.skip_existing_in += (run,)
608 if args.qgraph:
609 # click passes empty tuple as default value for qgraph_node_id
610 nodes = args.qgraph_node_id or None
611 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id)
613 # pipeline can not be provided in this case
614 if pipeline:
615 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
616 if args.show_qgraph_header:
617 print(QuantumGraph.readHeader(args.qgraph))
618 else:
619 task_defs = list(pipeline.toExpandedPipeline())
620 if args.mock:
621 from lsst.pipe.base.tests.mocks import mock_task_defs
623 task_defs = mock_task_defs(
624 task_defs,
625 unmocked_dataset_types=args.unmocked_dataset_types,
626 force_failures=args.mock_failure,
627 )
628 # make execution plan (a.k.a. DAG) for pipeline
629 graphBuilder = GraphBuilder(
630 butler.registry,
631 skipExistingIn=args.skip_existing_in,
632 clobberOutputs=args.clobber_outputs,
633 datastore=butler._datastore if args.qgraph_datastore_records else None,
634 )
635 # accumulate metadata
636 metadata = {
637 "input": args.input,
638 "output": args.output,
639 "butler_argument": args.butler_config,
640 "output_run": run,
641 "extend_run": args.extend_run,
642 "skip_existing_in": args.skip_existing_in,
643 "skip_existing": args.skip_existing,
644 "data_query": args.data_query,
645 "user": getpass.getuser(),
646 "time": f"{datetime.datetime.now()}",
647 }
648 assert run is not None, "Butler output run collection must be defined"
649 qgraph = graphBuilder.makeGraph(
650 task_defs,
651 collections,
652 run,
653 args.data_query,
654 metadata=metadata,
655 datasetQueryConstraint=args.dataset_query_constraint,
656 dataId=pipeline.get_data_id(butler.dimensions),
657 )
658 if args.show_qgraph_header:
659 qgraph.buildAndPrintHeader()
661 if len(qgraph) == 0:
662 # Nothing to do.
663 return None
664 self._summarize_qgraph(qgraph)
666 if args.save_qgraph:
667 qgraph.saveUri(args.save_qgraph)
669 if args.save_single_quanta:
670 for quantumNode in qgraph:
671 sqgraph = qgraph.subset(quantumNode)
672 uri = args.save_single_quanta.format(quantumNode)
673 sqgraph.saveUri(uri)
675 if args.qgraph_dot:
676 graph2dot(qgraph, args.qgraph_dot)
678 if args.execution_butler_location:
679 butler = Butler(args.butler_config)
680 newArgs = copy.deepcopy(args)
682 def builderShim(butler: Butler) -> Butler:
683 newArgs.butler_config = butler._config
684 # Calling makeWriteButler is done for the side effects of
685 # calling that method, maining parsing all the args into
686 # collection names, creating collections, etc.
687 newButler = _ButlerFactory.makeWriteButler(newArgs)
688 return newButler
690 # Include output collection in collections for input
691 # files if it exists in the repo.
692 all_inputs = args.input
693 if args.output is not None:
694 with contextlib.suppress(MissingCollectionError):
695 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
697 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
698 buildExecutionButler(
699 butler,
700 qgraph,
701 args.execution_butler_location,
702 run,
703 butlerModifier=builderShim,
704 collections=all_inputs,
705 clobber=args.clobber_execution_butler,
706 datastoreRoot=args.target_datastore_root,
707 transfer=args.transfer,
708 )
710 return qgraph
712 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources:
713 """Construct the execution resource class from arguments.
715 Parameters
716 ----------
717 args : `types.SimpleNamespace`
718 Parsed command line.
720 Returns
721 -------
722 resources : `~lsst.pipe.base.ExecutionResources`
723 The resources available to each quantum.
724 """
725 return ExecutionResources(
726 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB
727 )
729 def runPipeline(
730 self,
731 graph: QuantumGraph,
732 taskFactory: TaskFactory,
733 args: SimpleNamespace,
734 butler: Butler | None = None,
735 ) -> None:
736 """Execute complete QuantumGraph.
738 Parameters
739 ----------
740 graph : `~lsst.pipe.base.QuantumGraph`
741 Execution graph.
742 taskFactory : `~lsst.pipe.base.TaskFactory`
743 Task factory
744 args : `types.SimpleNamespace`
745 Parsed command line
746 butler : `~lsst.daf.butler.Butler`, optional
747 Data Butler instance, if not defined then new instance is made
748 using command line options.
749 """
750 # Check that output run defined on command line is consistent with
751 # quantum graph.
752 if args.output_run and graph.metadata:
753 graph_output_run = graph.metadata.get("output_run", args.output_run)
754 if graph_output_run != args.output_run:
755 raise ValueError(
756 f"Output run defined on command line ({args.output_run}) has to be "
757 f"identical to graph metadata ({graph_output_run}). "
758 "To update graph metadata run `pipetask update-graph-run` command."
759 )
761 # Make sure that --extend-run always enables --skip-existing,
762 # clobbering should be disabled if --extend-run is not specified.
763 if args.extend_run:
764 args.skip_existing = True
765 else:
766 args.clobber_outputs = False
768 if not args.enable_implicit_threading:
769 disable_implicit_threading()
771 # Make butler instance. QuantumGraph should have an output run defined,
772 # but we ignore it here and let command line decide actual output run.
773 if butler is None:
774 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
776 if args.skip_existing:
777 args.skip_existing_in += (butler.run,)
779 # Enable lsstDebug debugging. Note that this is done once in the
780 # main process before PreExecInit and it is also repeated before
781 # running each task in SingleQuantumExecutor (which may not be
782 # needed if `multiprocessing` always uses fork start method).
783 if args.enableLsstDebug:
784 try:
785 _LOG.debug("Will try to import debug.py")
786 import debug # type: ignore # noqa:F401
787 except ImportError:
788 _LOG.warn("No 'debug' module found.")
790 # Save all InitOutputs, configs, etc.
791 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
792 preExecInit.initialize(
793 graph,
794 saveInitOutputs=not args.skip_init_writes,
795 registerDatasetTypes=args.register_dataset_types,
796 saveVersions=not args.no_versions,
797 )
799 if not args.init_only:
800 graphFixup = self._importGraphFixup(args)
801 resources = self._make_execution_resources(args)
802 quantumExecutor = SingleQuantumExecutor(
803 butler,
804 taskFactory,
805 skipExistingIn=args.skip_existing_in,
806 clobberOutputs=args.clobber_outputs,
807 enableLsstDebug=args.enableLsstDebug,
808 exitOnKnownError=args.fail_fast,
809 resources=resources,
810 )
812 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
813 executor = MPGraphExecutor(
814 numProc=args.processes,
815 timeout=timeout,
816 startMethod=args.start_method,
817 quantumExecutor=quantumExecutor,
818 failFast=args.fail_fast,
819 pdb=args.pdb,
820 executionGraphFixup=graphFixup,
821 )
822 # Have to reset connection pool to avoid sharing connections with
823 # forked processes.
824 butler.registry.resetConnectionPool()
825 try:
826 with util.profile(args.profile, _LOG):
827 executor.execute(graph)
828 finally:
829 if args.summary:
830 report = executor.getReport()
831 if report:
832 with open(args.summary, "w") as out:
833 # Do not save fields that are not set.
834 out.write(report.json(exclude_none=True, indent=2))
836 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table:
837 """Generate astropy table listing the number of quanta per task for a
838 given quantum graph.
840 Parameters
841 ----------
842 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph`
843 A QuantumGraph object.
845 Returns
846 -------
847 qg_task_table : `astropy.table.table.Table`
848 An astropy table containing columns: Quanta and Tasks.
849 """
850 qg_quanta, qg_tasks = [], []
851 for task_def in qgraph.iterTaskGraph():
852 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def)
853 qg_quanta.append(num_qnodes)
854 qg_tasks.append(task_def.label)
855 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks))
856 return qg_task_table
858 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int:
859 """Report a summary of the quanta in the graph.
861 Parameters
862 ----------
863 qgraph : `lsst.pipe.base.QuantumGraph`
864 The graph to be summarized.
866 Returns
867 -------
868 n_quanta : `int`
869 The number of quanta in the graph.
870 """
871 n_quanta = len(qgraph)
872 if n_quanta == 0:
873 _LOG.info("QuantumGraph contains no quanta.")
874 else:
875 if _LOG.isEnabledFor(logging.INFO):
876 qg_task_table = self._generateTaskTable(qgraph)
877 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all())
878 quanta_str = "quantum" if n_quanta == 1 else "quanta"
879 n_tasks = len(qgraph.taskGraph)
880 n_tasks_plural = "" if n_tasks == 1 else "s"
881 _LOG.info(
882 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s",
883 n_quanta,
884 quanta_str,
885 n_tasks,
886 n_tasks_plural,
887 qgraph.graphID,
888 qg_task_table_formatted,
889 )
890 return n_quanta
892 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None:
893 """Import/instantiate graph fixup object.
895 Parameters
896 ----------
897 args : `types.SimpleNamespace`
898 Parsed command line.
900 Returns
901 -------
902 fixup : `ExecutionGraphFixup` or `None`
904 Raises
905 ------
906 ValueError
907 Raised if import fails, method call raises exception, or returned
908 instance has unexpected type.
909 """
910 if args.graph_fixup:
911 try:
912 factory = doImportType(args.graph_fixup)
913 except Exception as exc:
914 raise ValueError("Failed to import graph fixup class/method") from exc
915 try:
916 fixup = factory()
917 except Exception as exc:
918 raise ValueError("Failed to make instance of graph fixup") from exc
919 if not isinstance(fixup, ExecutionGraphFixup):
920 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
921 return fixup
922 return None
924 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
925 # Load quantum graph. We do not really need individual Quanta here,
926 # but we need datastore records for initInputs, and those are only
927 # available from Quanta, so load the whole thing.
928 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id)
929 universe = qgraph.universe
931 # Collect all init input/output dataset IDs.
932 predicted_inputs: set[DatasetId] = set()
933 predicted_outputs: set[DatasetId] = set()
934 for taskDef in qgraph.iterTaskGraph():
935 if (refs := qgraph.initInputRefs(taskDef)) is not None:
936 predicted_inputs.update(ref.id for ref in refs)
937 if (refs := qgraph.initOutputRefs(taskDef)) is not None:
938 predicted_outputs.update(ref.id for ref in refs)
939 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs())
940 # remove intermediates from inputs
941 predicted_inputs -= predicted_outputs
943 # Very inefficient way to extract datastore records from quantum graph,
944 # we have to scan all quanta and look at their datastore records.
945 datastore_records: dict[str, DatastoreRecordData] = {}
946 for quantum_node in qgraph:
947 for store_name, records in quantum_node.quantum.datastore_records.items():
948 subset = records.subset(predicted_inputs)
949 if subset is not None:
950 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
952 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
954 # Make butler from everything.
955 butler = QuantumBackedButler.from_predicted(
956 config=args.butler_config,
957 predicted_inputs=predicted_inputs,
958 predicted_outputs=predicted_outputs,
959 dimensions=universe,
960 datastore_records=datastore_records,
961 search_paths=args.config_search_path,
962 dataset_types=dataset_types,
963 )
965 # Save all InitOutputs, configs, etc.
966 preExecInit = PreExecInitLimited(butler, task_factory)
967 preExecInit.initialize(qgraph)
969 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
970 # Load quantum graph.
971 nodes = args.qgraph_node_id or None
972 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id)
974 if qgraph.metadata is None:
975 raise ValueError("QuantumGraph is missing metadata, cannot continue.")
977 self._summarize_qgraph(qgraph)
979 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
981 _butler_factory = _QBBFactory(
982 butler_config=args.butler_config,
983 dimensions=qgraph.universe,
984 dataset_types=dataset_types,
985 )
987 # make special quantum executor
988 resources = self._make_execution_resources(args)
989 quantumExecutor = SingleQuantumExecutor(
990 butler=None,
991 taskFactory=task_factory,
992 enableLsstDebug=args.enableLsstDebug,
993 exitOnKnownError=args.fail_fast,
994 limited_butler_factory=_butler_factory,
995 resources=resources,
996 )
998 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
999 executor = MPGraphExecutor(
1000 numProc=args.processes,
1001 timeout=timeout,
1002 startMethod=args.start_method,
1003 quantumExecutor=quantumExecutor,
1004 failFast=args.fail_fast,
1005 pdb=args.pdb,
1006 )
1007 try:
1008 with util.profile(args.profile, _LOG):
1009 executor.execute(qgraph)
1010 finally:
1011 if args.summary:
1012 report = executor.getReport()
1013 if report:
1014 with open(args.summary, "w") as out:
1015 # Do not save fields that are not set.
1016 out.write(report.json(exclude_none=True, indent=2))