Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%
390 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-20 11:03 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-20 11:03 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining CmdLineFwk class and related methods.
29"""
31from __future__ import annotations
33__all__ = ["CmdLineFwk"]
35import atexit
36import contextlib
37import copy
38import datetime
39import getpass
40import logging
41import shutil
42from collections.abc import Iterable, Mapping, Sequence
43from types import SimpleNamespace
45import astropy.units as u
46import lsst.utils.timer
47from astropy.table import Table
48from lsst.daf.butler import (
49 Butler,
50 CollectionType,
51 Config,
52 DatasetId,
53 DatasetRef,
54 DatasetType,
55 DimensionUniverse,
56 LimitedButler,
57 Quantum,
58 QuantumBackedButler,
59 Registry,
60)
61from lsst.daf.butler.datastore.cache_manager import DatastoreCacheManager
62from lsst.daf.butler.datastore.record_data import DatastoreRecordData
63from lsst.daf.butler.direct_butler import DirectButler
64from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
65from lsst.daf.butler.registry.wildcards import CollectionWildcard
66from lsst.pipe.base import (
67 ExecutionResources,
68 GraphBuilder,
69 Instrument,
70 Pipeline,
71 PipelineDatasetTypes,
72 QuantumGraph,
73 TaskDef,
74 TaskFactory,
75 buildExecutionButler,
76)
77from lsst.utils import doImportType
78from lsst.utils.threads import disable_implicit_threading
80from .dotTools import graph2dot, pipeline2dot
81from .executionGraphFixup import ExecutionGraphFixup
82from .mpGraphExecutor import MPGraphExecutor
83from .preExecInit import PreExecInit, PreExecInitLimited
84from .singleQuantumExecutor import SingleQuantumExecutor
86# ----------------------------------
87# Local non-exported definitions --
88# ----------------------------------
90_LOG = logging.getLogger(__name__)
93class _OutputChainedCollectionInfo:
94 """A helper class for handling command-line arguments related to an output
95 `~lsst.daf.butler.CollectionType.CHAINED` collection.
97 Parameters
98 ----------
99 registry : `lsst.daf.butler.Registry`
100 Butler registry that collections will be added to and/or queried from.
101 name : `str`
102 Name of the collection given on the command line.
103 """
105 def __init__(self, registry: Registry, name: str):
106 self.name = name
107 try:
108 self.chain = tuple(registry.getCollectionChain(name))
109 self.exists = True
110 except MissingCollectionError:
111 self.chain = ()
112 self.exists = False
114 def __str__(self) -> str:
115 return self.name
117 name: str
118 """Name of the collection provided on the command line (`str`).
119 """
121 exists: bool
122 """Whether this collection already exists in the registry (`bool`).
123 """
125 chain: tuple[str, ...]
126 """The definition of the collection, if it already exists (`tuple`[`str`]).
128 Empty if the collection does not already exist.
129 """
132class _OutputRunCollectionInfo:
133 """A helper class for handling command-line arguments related to an output
134 `~lsst.daf.butler.CollectionType.RUN` collection.
136 Parameters
137 ----------
138 registry : `lsst.daf.butler.Registry`
139 Butler registry that collections will be added to and/or queried from.
140 name : `str`
141 Name of the collection given on the command line.
142 """
144 def __init__(self, registry: Registry, name: str):
145 self.name = name
146 try:
147 actualType = registry.getCollectionType(name)
148 if actualType is not CollectionType.RUN:
149 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
150 self.exists = True
151 except MissingCollectionError:
152 self.exists = False
154 name: str
155 """Name of the collection provided on the command line (`str`).
156 """
158 exists: bool
159 """Whether this collection already exists in the registry (`bool`).
160 """
163class _ButlerFactory:
164 """A helper class for processing command-line arguments related to input
165 and output collections.
167 Parameters
168 ----------
169 registry : `lsst.daf.butler.Registry`
170 Butler registry that collections will be added to and/or queried from.
172 args : `types.SimpleNamespace`
173 Parsed command-line arguments. The following attributes are used,
174 either at construction or in later methods.
176 ``output``
177 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
178 input/output collection.
180 ``output_run``
181 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
182 collection.
184 ``extend_run``
185 A boolean indicating whether ``output_run`` should already exist
186 and be extended.
188 ``replace_run``
189 A boolean indicating that (if `True`) ``output_run`` should already
190 exist but will be removed from the output chained collection and
191 replaced with a new one.
193 ``prune_replaced``
194 A boolean indicating whether to prune the replaced run (requires
195 ``replace_run``).
197 ``rebase``
198 A boolean indicating whether to force the ``output`` collection
199 to be consistent with ``inputs`` and ``output`` run such that the
200 ``output`` collection has output run collections first (i.e. those
201 that start with the same prefix), then the new inputs, then any
202 original inputs not included in the new inputs.
204 ``inputs``
205 Input collections of any type; see
206 :ref:`daf_butler_ordered_collection_searches` for details.
208 ``butler_config``
209 Path to a data repository root or configuration file.
211 writeable : `bool`
212 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a
213 context where actual writes should happens, and hence no output run
214 is necessary.
216 Raises
217 ------
218 ValueError
219 Raised if ``writeable is True`` but there are no output collections.
220 """
222 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool):
223 if args.output is not None:
224 self.output = _OutputChainedCollectionInfo(registry, args.output)
225 else:
226 self.output = None
227 if args.output_run is not None:
228 if args.rebase and self.output and not args.output_run.startswith(self.output.name):
229 raise ValueError("Cannot rebase if output run does not start with output collection name.")
230 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
231 elif self.output is not None:
232 if args.extend_run:
233 if not self.output.chain:
234 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
235 runName = self.output.chain[0]
236 else:
237 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}"
238 self.outputRun = _OutputRunCollectionInfo(registry, runName)
239 elif not writeable:
240 # If we're not writing yet, ok to have no output run.
241 self.outputRun = None
242 else:
243 raise ValueError("Cannot write without at least one of (--output, --output-run).")
244 # Recursively flatten any input CHAINED collections. We do this up
245 # front so we can tell if the user passes the same inputs on subsequent
246 # calls, even though we also flatten when we define the output CHAINED
247 # collection.
248 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
250 # If things are inconsistent and user has asked for a rebase then
251 # construct the new output chain.
252 if args.rebase and self._checkOutputInputConsistency():
253 assert self.output is not None
254 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)]
255 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain])
256 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain])
257 self.output.chain = tuple(newOutputChain)
259 def check(self, args: SimpleNamespace) -> None:
260 """Check command-line options for consistency with each other and the
261 data repository.
263 Parameters
264 ----------
265 args : `types.SimpleNamespace`
266 Parsed command-line arguments. See class documentation for the
267 construction parameter of the same name.
268 """
269 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
270 if consistencyError := self._checkOutputInputConsistency():
271 raise ValueError(consistencyError)
273 if args.extend_run:
274 if self.outputRun is None:
275 raise ValueError("Cannot --extend-run when no output collection is given.")
276 elif not self.outputRun.exists:
277 raise ValueError(
278 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
279 )
280 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
281 raise ValueError(
282 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
283 )
284 if args.prune_replaced and not args.replace_run:
285 raise ValueError("--prune-replaced requires --replace-run.")
286 if args.replace_run and (self.output is None or not self.output.exists):
287 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
289 def _checkOutputInputConsistency(self) -> str | None:
290 if self.inputs and self.output is not None and self.output.exists:
291 # Passing the same inputs that were used to initialize the output
292 # collection is allowed; this means the inputs must appear as a
293 # contiguous subsequence of outputs (normally they're also at the
294 # end, but --rebase will in general put them in the middle).
295 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))):
296 if self.inputs == self.output.chain[n : n + len(self.inputs)]:
297 return None
298 return (
299 f"Output CHAINED collection {self.output.name!r} exists and does not include the "
300 f"same sequence of (flattened) input collections {self.inputs} as a contiguous "
301 "subsequence. "
302 "Use --rebase to ignore this problem and reset the output collection, but note that "
303 "this may obfuscate what inputs were actually used to produce these outputs."
304 )
305 return None
307 @classmethod
308 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]:
309 """Parse arguments to support implementations of `makeReadButler` and
310 `makeButlerAndCollections`.
312 Parameters
313 ----------
314 args : `types.SimpleNamespace`
315 Parsed command-line arguments. See class documentation for the
316 construction parameter of the same name.
318 Returns
319 -------
320 butler : `lsst.daf.butler.Butler`
321 A read-only butler constructed from the repo at
322 ``args.butler_config``, but with no default collections.
323 inputs : `~collections.abc.Sequence` [ `str` ]
324 A collection search path constructed according to ``args``.
325 self : `_ButlerFactory`
326 A new `_ButlerFactory` instance representing the processed version
327 of ``args``.
328 """
329 butler = Butler.from_config(args.butler_config, writeable=False)
330 self = cls(butler.registry, args, writeable=False)
331 self.check(args)
332 if self.output and self.output.exists:
333 if args.replace_run:
334 replaced = self.output.chain[0]
335 inputs = list(self.output.chain[1:])
336 _LOG.debug(
337 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
338 )
339 else:
340 inputs = [self.output.name]
341 else:
342 inputs = list(self.inputs)
343 if args.extend_run:
344 assert self.outputRun is not None, "Output collection has to be specified."
345 inputs.insert(0, self.outputRun.name)
346 collSearch = CollectionWildcard.from_expression(inputs).require_ordered()
347 return butler, collSearch, self
349 @classmethod
350 def makeReadButler(cls, args: SimpleNamespace) -> Butler:
351 """Construct a read-only butler according to the given command-line
352 arguments.
354 Parameters
355 ----------
356 args : `types.SimpleNamespace`
357 Parsed command-line arguments. See class documentation for the
358 construction parameter of the same name.
360 Returns
361 -------
362 butler : `lsst.daf.butler.Butler`
363 A read-only butler initialized with the collections specified by
364 ``args``.
365 """
366 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
367 butler, inputs, _ = cls._makeReadParts(args)
368 _LOG.debug("Preparing butler to read from %s.", inputs)
369 return Butler.from_config(butler=butler, collections=inputs)
371 @classmethod
372 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]:
373 """Return a read-only registry, a collection search path, and the name
374 of the run to be used for future writes.
376 Parameters
377 ----------
378 args : `types.SimpleNamespace`
379 Parsed command-line arguments. See class documentation for the
380 construction parameter of the same name.
382 Returns
383 -------
384 butler : `lsst.daf.butler.Butler`
385 A read-only butler that collections will be added to and/or queried
386 from.
387 inputs : `Sequence` [ `str` ]
388 Collections to search for datasets.
389 run : `str` or `None`
390 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
391 if it already exists, or `None` if it does not.
392 """
393 butler, inputs, self = cls._makeReadParts(args)
394 run: str | None = None
395 if args.extend_run:
396 assert self.outputRun is not None, "Output collection has to be specified."
397 if self.outputRun is not None:
398 run = self.outputRun.name
399 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
400 return butler, inputs, run
402 @staticmethod
403 def defineDatastoreCache() -> None:
404 """Define where datastore cache directories should be found.
406 Notes
407 -----
408 All the jobs should share a datastore cache if applicable. This
409 method asks for a shared fallback cache to be defined and then
410 configures an exit handler to clean it up.
411 """
412 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
413 if defined:
414 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True)
415 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir)
417 @classmethod
418 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler:
419 """Return a read-write butler initialized to write to and read from
420 the collections specified by the given command-line arguments.
422 Parameters
423 ----------
424 args : `types.SimpleNamespace`
425 Parsed command-line arguments. See class documentation for the
426 construction parameter of the same name.
427 taskDefs : iterable of `TaskDef`, optional
428 Definitions for tasks in a pipeline. This argument is only needed
429 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
430 "unstore".
432 Returns
433 -------
434 butler : `lsst.daf.butler.Butler`
435 A read-write butler initialized according to the given arguments.
436 """
437 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
438 butler = Butler.from_config(args.butler_config, writeable=True)
439 self = cls(butler.registry, args, writeable=True)
440 self.check(args)
441 assert self.outputRun is not None, "Output collection has to be specified." # for mypy
442 if self.output is not None:
443 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
444 if args.replace_run:
445 replaced = chainDefinition.pop(0)
446 if args.prune_replaced == "unstore":
447 # Remove datasets from datastore
448 with butler.transaction():
449 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced)
450 # we want to remove regular outputs but keep
451 # initOutputs, configs, and versions.
452 if taskDefs is not None:
453 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
454 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
455 butler.pruneDatasets(refs, unstore=True, disassociate=False)
456 elif args.prune_replaced == "purge":
457 # Erase entire collection and all datasets, need to remove
458 # collection from its chain collection first.
459 with butler.transaction():
460 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
461 butler.removeRuns([replaced], unstore=True)
462 elif args.prune_replaced is not None:
463 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
464 if not self.output.exists:
465 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
466 if not args.extend_run:
467 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
468 chainDefinition.insert(0, self.outputRun.name)
469 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
470 _LOG.debug(
471 "Preparing butler to write to '%s' and read from '%s'=%s",
472 self.outputRun.name,
473 self.output.name,
474 chainDefinition,
475 )
476 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
477 else:
478 inputs = (self.outputRun.name,) + self.inputs
479 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
480 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
481 return butler
483 output: _OutputChainedCollectionInfo | None
484 """Information about the output chained collection, if there is or will be
485 one (`_OutputChainedCollectionInfo` or `None`).
486 """
488 outputRun: _OutputRunCollectionInfo | None
489 """Information about the output run collection, if there is or will be
490 one (`_OutputRunCollectionInfo` or `None`).
491 """
493 inputs: tuple[str, ...]
494 """Input collections provided directly by the user (`tuple` [ `str` ]).
495 """
498class _QBBFactory:
499 """Class which is a callable for making QBB instances."""
501 def __init__(
502 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType]
503 ):
504 self.butler_config = butler_config
505 self.dimensions = dimensions
506 self.dataset_types = dataset_types
508 def __call__(self, quantum: Quantum) -> LimitedButler:
509 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`.
511 Factory method to create QuantumBackedButler instances.
512 """
513 return QuantumBackedButler.initialize(
514 config=self.butler_config,
515 quantum=quantum,
516 dimensions=self.dimensions,
517 dataset_types=self.dataset_types,
518 )
521# ------------------------
522# Exported definitions --
523# ------------------------
526class CmdLineFwk:
527 """PipelineTask framework which executes tasks from command line.
529 In addition to executing tasks this activator provides additional methods
530 for task management like dumping configuration or execution chain.
531 """
533 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
535 def __init__(self) -> None:
536 pass
538 def makePipeline(self, args: SimpleNamespace) -> Pipeline:
539 """Build a pipeline from command line arguments.
541 Parameters
542 ----------
543 args : `types.SimpleNamespace`
544 Parsed command line.
546 Returns
547 -------
548 pipeline : `~lsst.pipe.base.Pipeline`
549 Newly-constructed pipeline.
550 """
551 if args.pipeline:
552 pipeline = Pipeline.from_uri(args.pipeline)
553 else:
554 pipeline = Pipeline("anonymous")
556 # loop over all pipeline actions and apply them in order
557 for action in args.pipeline_actions:
558 if action.action == "add_instrument":
559 pipeline.addInstrument(action.value)
561 elif action.action == "new_task":
562 pipeline.addTask(action.value, action.label)
564 elif action.action == "delete_task":
565 pipeline.removeTask(action.label)
567 elif action.action == "config":
568 # action value string is "field=value", split it at '='
569 field, _, value = action.value.partition("=")
570 pipeline.addConfigOverride(action.label, field, value)
572 elif action.action == "configfile":
573 pipeline.addConfigFile(action.label, action.value)
575 else:
576 raise ValueError(f"Unexpected pipeline action: {action.action}")
578 if args.save_pipeline:
579 pipeline.write_to_uri(args.save_pipeline)
581 if args.pipeline_dot:
582 pipeline2dot(pipeline, args.pipeline_dot)
584 return pipeline
586 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None:
587 """Build a graph from command line arguments.
589 Parameters
590 ----------
591 pipeline : `~lsst.pipe.base.Pipeline`
592 Pipeline, can be empty or ``None`` if graph is read from a file.
593 args : `types.SimpleNamespace`
594 Parsed command line.
596 Returns
597 -------
598 graph : `~lsst.pipe.base.QuantumGraph` or `None`
599 If resulting graph is empty then `None` is returned.
600 """
601 # make sure that --extend-run always enables --skip-existing
602 if args.extend_run:
603 args.skip_existing = True
605 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args)
607 if args.skip_existing and run:
608 args.skip_existing_in += (run,)
610 if args.qgraph:
611 # click passes empty tuple as default value for qgraph_node_id
612 nodes = args.qgraph_node_id or None
613 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id)
615 # pipeline can not be provided in this case
616 if pipeline:
617 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
618 if args.show_qgraph_header:
619 print(QuantumGraph.readHeader(args.qgraph))
620 else:
621 task_defs = list(pipeline.toExpandedPipeline())
622 if args.mock:
623 from lsst.pipe.base.tests.mocks import mock_task_defs
625 task_defs = mock_task_defs(
626 task_defs,
627 unmocked_dataset_types=args.unmocked_dataset_types,
628 force_failures=args.mock_failure,
629 )
630 # make execution plan (a.k.a. DAG) for pipeline
631 graphBuilder = GraphBuilder(
632 butler.registry,
633 skipExistingIn=args.skip_existing_in,
634 clobberOutputs=args.clobber_outputs,
635 datastore=butler._datastore if args.qgraph_datastore_records else None,
636 )
637 # accumulate metadata
638 metadata = {
639 "input": args.input,
640 "output": args.output,
641 "butler_argument": args.butler_config,
642 "output_run": run,
643 "extend_run": args.extend_run,
644 "skip_existing_in": args.skip_existing_in,
645 "skip_existing": args.skip_existing,
646 "data_query": args.data_query,
647 "user": getpass.getuser(),
648 "time": f"{datetime.datetime.now()}",
649 }
650 assert run is not None, "Butler output run collection must be defined"
651 qgraph = graphBuilder.makeGraph(
652 task_defs,
653 collections,
654 run,
655 args.data_query,
656 metadata=metadata,
657 datasetQueryConstraint=args.dataset_query_constraint,
658 dataId=pipeline.get_data_id(butler.dimensions),
659 )
660 if args.show_qgraph_header:
661 qgraph.buildAndPrintHeader()
663 if len(qgraph) == 0:
664 # Nothing to do.
665 return None
666 self._summarize_qgraph(qgraph)
668 if args.save_qgraph:
669 qgraph.saveUri(args.save_qgraph)
671 if args.save_single_quanta:
672 for quantumNode in qgraph:
673 sqgraph = qgraph.subset(quantumNode)
674 uri = args.save_single_quanta.format(quantumNode)
675 sqgraph.saveUri(uri)
677 if args.qgraph_dot:
678 graph2dot(qgraph, args.qgraph_dot)
680 if args.execution_butler_location:
681 butler = Butler.from_config(args.butler_config)
682 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler"
683 newArgs = copy.deepcopy(args)
685 def builderShim(butler: Butler) -> Butler:
686 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler"
687 newArgs.butler_config = butler._config
688 # Calling makeWriteButler is done for the side effects of
689 # calling that method, maining parsing all the args into
690 # collection names, creating collections, etc.
691 newButler = _ButlerFactory.makeWriteButler(newArgs)
692 return newButler
694 # Include output collection in collections for input
695 # files if it exists in the repo.
696 all_inputs = args.input
697 if args.output is not None:
698 with contextlib.suppress(MissingCollectionError):
699 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
701 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
702 buildExecutionButler(
703 butler,
704 qgraph,
705 args.execution_butler_location,
706 run,
707 butlerModifier=builderShim,
708 collections=all_inputs,
709 clobber=args.clobber_execution_butler,
710 datastoreRoot=args.target_datastore_root,
711 transfer=args.transfer,
712 )
714 return qgraph
716 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources:
717 """Construct the execution resource class from arguments.
719 Parameters
720 ----------
721 args : `types.SimpleNamespace`
722 Parsed command line.
724 Returns
725 -------
726 resources : `~lsst.pipe.base.ExecutionResources`
727 The resources available to each quantum.
728 """
729 return ExecutionResources(
730 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB
731 )
733 def runPipeline(
734 self,
735 graph: QuantumGraph,
736 taskFactory: TaskFactory,
737 args: SimpleNamespace,
738 butler: Butler | None = None,
739 ) -> None:
740 """Execute complete QuantumGraph.
742 Parameters
743 ----------
744 graph : `~lsst.pipe.base.QuantumGraph`
745 Execution graph.
746 taskFactory : `~lsst.pipe.base.TaskFactory`
747 Task factory.
748 args : `types.SimpleNamespace`
749 Parsed command line.
750 butler : `~lsst.daf.butler.Butler`, optional
751 Data Butler instance, if not defined then new instance is made
752 using command line options.
753 """
754 if not args.enable_implicit_threading:
755 disable_implicit_threading()
757 # Check that output run defined on command line is consistent with
758 # quantum graph.
759 if args.output_run and graph.metadata:
760 graph_output_run = graph.metadata.get("output_run", args.output_run)
761 if graph_output_run != args.output_run:
762 raise ValueError(
763 f"Output run defined on command line ({args.output_run}) has to be "
764 f"identical to graph metadata ({graph_output_run}). "
765 "To update graph metadata run `pipetask update-graph-run` command."
766 )
768 # Make sure that --extend-run always enables --skip-existing,
769 # clobbering should be disabled if --extend-run is not specified.
770 if args.extend_run:
771 args.skip_existing = True
772 else:
773 args.clobber_outputs = False
775 # Make butler instance. QuantumGraph should have an output run defined,
776 # but we ignore it here and let command line decide actual output run.
777 if butler is None:
778 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
780 if args.skip_existing:
781 args.skip_existing_in += (butler.run,)
783 # Enable lsstDebug debugging. Note that this is done once in the
784 # main process before PreExecInit and it is also repeated before
785 # running each task in SingleQuantumExecutor (which may not be
786 # needed if `multiprocessing` always uses fork start method).
787 if args.enableLsstDebug:
788 try:
789 _LOG.debug("Will try to import debug.py")
790 import debug # type: ignore # noqa:F401
791 except ImportError:
792 _LOG.warn("No 'debug' module found.")
794 # Save all InitOutputs, configs, etc.
795 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
796 preExecInit.initialize(
797 graph,
798 saveInitOutputs=not args.skip_init_writes,
799 registerDatasetTypes=args.register_dataset_types,
800 saveVersions=not args.no_versions,
801 )
803 if not args.init_only:
804 graphFixup = self._importGraphFixup(args)
805 resources = self._make_execution_resources(args)
806 quantumExecutor = SingleQuantumExecutor(
807 butler,
808 taskFactory,
809 skipExistingIn=args.skip_existing_in,
810 clobberOutputs=args.clobber_outputs,
811 enableLsstDebug=args.enableLsstDebug,
812 exitOnKnownError=args.fail_fast,
813 resources=resources,
814 )
816 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
817 executor = MPGraphExecutor(
818 numProc=args.processes,
819 timeout=timeout,
820 startMethod=args.start_method,
821 quantumExecutor=quantumExecutor,
822 failFast=args.fail_fast,
823 pdb=args.pdb,
824 executionGraphFixup=graphFixup,
825 )
826 # Have to reset connection pool to avoid sharing connections with
827 # forked processes.
828 butler.registry.resetConnectionPool()
829 try:
830 with lsst.utils.timer.profile(args.profile, _LOG):
831 executor.execute(graph)
832 finally:
833 if args.summary:
834 report = executor.getReport()
835 if report:
836 with open(args.summary, "w") as out:
837 # Do not save fields that are not set.
838 out.write(report.model_dump_json(exclude_none=True, indent=2))
840 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table:
841 """Generate astropy table listing the number of quanta per task for a
842 given quantum graph.
844 Parameters
845 ----------
846 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph`
847 A QuantumGraph object.
849 Returns
850 -------
851 qg_task_table : `astropy.table.table.Table`
852 An astropy table containing columns: Quanta and Tasks.
853 """
854 qg_quanta, qg_tasks = [], []
855 for task_def in qgraph.iterTaskGraph():
856 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def)
857 qg_quanta.append(num_qnodes)
858 qg_tasks.append(task_def.label)
859 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks))
860 return qg_task_table
862 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int:
863 """Report a summary of the quanta in the graph.
865 Parameters
866 ----------
867 qgraph : `lsst.pipe.base.QuantumGraph`
868 The graph to be summarized.
870 Returns
871 -------
872 n_quanta : `int`
873 The number of quanta in the graph.
874 """
875 n_quanta = len(qgraph)
876 if n_quanta == 0:
877 _LOG.info("QuantumGraph contains no quanta.")
878 else:
879 if _LOG.isEnabledFor(logging.INFO):
880 qg_task_table = self._generateTaskTable(qgraph)
881 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all())
882 quanta_str = "quantum" if n_quanta == 1 else "quanta"
883 n_tasks = len(qgraph.taskGraph)
884 n_tasks_plural = "" if n_tasks == 1 else "s"
885 _LOG.info(
886 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s",
887 n_quanta,
888 quanta_str,
889 n_tasks,
890 n_tasks_plural,
891 qgraph.graphID,
892 qg_task_table_formatted,
893 )
894 return n_quanta
896 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None:
897 """Import/instantiate graph fixup object.
899 Parameters
900 ----------
901 args : `types.SimpleNamespace`
902 Parsed command line.
904 Returns
905 -------
906 fixup : `ExecutionGraphFixup` or `None`
908 Raises
909 ------
910 ValueError
911 Raised if import fails, method call raises exception, or returned
912 instance has unexpected type.
913 """
914 if args.graph_fixup:
915 try:
916 factory = doImportType(args.graph_fixup)
917 except Exception as exc:
918 raise ValueError("Failed to import graph fixup class/method") from exc
919 try:
920 fixup = factory()
921 except Exception as exc:
922 raise ValueError("Failed to make instance of graph fixup") from exc
923 if not isinstance(fixup, ExecutionGraphFixup):
924 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
925 return fixup
926 return None
928 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
929 # Load quantum graph. We do not really need individual Quanta here,
930 # but we need datastore records for initInputs, and those are only
931 # available from Quanta, so load the whole thing.
932 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id)
933 universe = qgraph.universe
935 # Collect all init input/output dataset IDs.
936 predicted_inputs: set[DatasetId] = set()
937 predicted_outputs: set[DatasetId] = set()
938 for taskDef in qgraph.iterTaskGraph():
939 if (refs := qgraph.initInputRefs(taskDef)) is not None:
940 predicted_inputs.update(ref.id for ref in refs)
941 if (refs := qgraph.initOutputRefs(taskDef)) is not None:
942 predicted_outputs.update(ref.id for ref in refs)
943 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs())
944 # remove intermediates from inputs
945 predicted_inputs -= predicted_outputs
947 # Very inefficient way to extract datastore records from quantum graph,
948 # we have to scan all quanta and look at their datastore records.
949 datastore_records: dict[str, DatastoreRecordData] = {}
950 for quantum_node in qgraph:
951 for store_name, records in quantum_node.quantum.datastore_records.items():
952 subset = records.subset(predicted_inputs)
953 if subset is not None:
954 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
956 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
958 # Make butler from everything.
959 butler = QuantumBackedButler.from_predicted(
960 config=args.butler_config,
961 predicted_inputs=predicted_inputs,
962 predicted_outputs=predicted_outputs,
963 dimensions=universe,
964 datastore_records=datastore_records,
965 search_paths=args.config_search_path,
966 dataset_types=dataset_types,
967 )
969 # Save all InitOutputs, configs, etc.
970 preExecInit = PreExecInitLimited(butler, task_factory)
971 preExecInit.initialize(qgraph)
973 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
974 if not args.enable_implicit_threading:
975 disable_implicit_threading()
977 # Load quantum graph.
978 nodes = args.qgraph_node_id or None
979 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id)
981 if qgraph.metadata is None:
982 raise ValueError("QuantumGraph is missing metadata, cannot continue.")
984 self._summarize_qgraph(qgraph)
986 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
988 _butler_factory = _QBBFactory(
989 butler_config=args.butler_config,
990 dimensions=qgraph.universe,
991 dataset_types=dataset_types,
992 )
994 # make special quantum executor
995 resources = self._make_execution_resources(args)
996 quantumExecutor = SingleQuantumExecutor(
997 butler=None,
998 taskFactory=task_factory,
999 enableLsstDebug=args.enableLsstDebug,
1000 exitOnKnownError=args.fail_fast,
1001 limited_butler_factory=_butler_factory,
1002 resources=resources,
1003 )
1005 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
1006 executor = MPGraphExecutor(
1007 numProc=args.processes,
1008 timeout=timeout,
1009 startMethod=args.start_method,
1010 quantumExecutor=quantumExecutor,
1011 failFast=args.fail_fast,
1012 pdb=args.pdb,
1013 )
1014 try:
1015 with lsst.utils.timer.profile(args.profile, _LOG):
1016 executor.execute(qgraph)
1017 finally:
1018 if args.summary:
1019 report = executor.getReport()
1020 if report:
1021 with open(args.summary, "w") as out:
1022 # Do not save fields that are not set.
1023 out.write(report.json(exclude_none=True, indent=2))