Coverage for python/lsst/pipe/base/graphBuilder.py: 50%
32 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-31 09:39 +0000
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-31 09:39 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining GraphBuilder class and related methods.
23"""
25from __future__ import annotations
27__all__ = ["GraphBuilder"]
30from collections.abc import Iterable, Mapping
31from typing import Any
33from lsst.daf.butler import Butler, DataCoordinate, Datastore, Registry
34from lsst.daf.butler.registry.wildcards import CollectionWildcard
36from ._datasetQueryConstraints import DatasetQueryConstraintVariant
37from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
38from .graph import QuantumGraph
39from .pipeline import Pipeline, TaskDef
40from .pipeline_graph import PipelineGraph
42# Re-exports for backwards-compatibility.
43from .quantum_graph_builder import GraphBuilderError # noqa: F401
44from .quantum_graph_builder import OutputExistsError # noqa: F401
45from .quantum_graph_builder import PrerequisiteMissingError # noqa: F401
48class GraphBuilder:
49 """GraphBuilder class is responsible for building task execution graph from
50 a Pipeline.
52 Parameters
53 ----------
54 registry : `~lsst.daf.butler.Registry`
55 Data butler instance.
56 skipExistingIn
57 Expressions representing the collections to search for existing
58 output datasets that should be skipped. See
59 :ref:`daf_butler_ordered_collection_searches`.
60 clobberOutputs : `bool`, optional
61 If `True` (default), allow quanta to created even if partial outputs
62 exist; this requires the same behavior behavior to be enabled when
63 executing.
64 datastore : `~lsst.daf.butler.Datastore`, optional
65 If not `None` then fill datastore records in each generated Quantum.
66 """
68 def __init__(
69 self,
70 registry: Registry,
71 skipExistingIn: Any = None,
72 clobberOutputs: bool = True,
73 datastore: Datastore | None = None,
74 ):
75 self.registry = registry
76 self.dimensions = registry.dimensions
77 self.skipExistingIn = skipExistingIn
78 self.clobberOutputs = clobberOutputs
79 self.datastore = datastore
81 def makeGraph(
82 self,
83 pipeline: Pipeline | Iterable[TaskDef],
84 collections: Any,
85 run: str,
86 userQuery: str | None,
87 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL,
88 metadata: Mapping[str, Any] | None = None,
89 bind: Mapping[str, Any] | None = None,
90 dataId: DataCoordinate | None = None,
91 ) -> QuantumGraph:
92 """Create execution graph for a pipeline.
94 Parameters
95 ----------
96 pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ]
97 Pipeline definition, task names/classes and their configs.
98 collections
99 Expressions representing the collections to search for input
100 datasets. See :ref:`daf_butler_ordered_collection_searches`.
101 run : `str`
102 Name of the `~lsst.daf.butler.CollectionType.RUN` collection for
103 output datasets. Collection does not have to exist and it will be
104 created when graph is executed.
105 userQuery : `str`
106 String which defines user-defined selection for registry, should be
107 empty or `None` if there is no restrictions on data selection.
108 datasetQueryConstraint : `DatasetQueryConstraintVariant`, optional
109 The query constraint variant that should be used to constraint the
110 query based on dataset existance, defaults to
111 `DatasetQueryConstraintVariant.ALL`.
112 metadata : Optional Mapping of `str` to primitives
113 This is an optional parameter of extra data to carry with the
114 graph. Entries in this mapping should be able to be serialized in
115 JSON.
116 bind : `~collections.abc.Mapping`, optional
117 Mapping containing literal values that should be injected into the
118 ``userQuery`` expression, keyed by the identifiers they replace.
119 dataId : `lsst.daf.butler.DataCoordinate`, optional
120 Data ID that should also be included in the query constraint.
121 Ignored if ``pipeline`` is a `Pipeline` instance (which has its own
122 data ID).
124 Returns
125 -------
126 graph : `QuantumGraph`
128 Raises
129 ------
130 UserExpressionError
131 Raised when user expression cannot be parsed.
132 OutputExistsError
133 Raised when output datasets already exist.
134 Exception
135 Other exceptions types may be raised by underlying registry
136 classes.
137 """
138 if isinstance(pipeline, Pipeline):
139 pipeline_graph = pipeline.to_graph()
140 else:
141 pipeline_graph = PipelineGraph(data_id=dataId)
142 for task_def in pipeline:
143 pipeline_graph.add_task(
144 task_def.label,
145 task_def.taskClass,
146 config=task_def.config,
147 connections=task_def.connections,
148 )
149 # We assume `registry` is actually a RegistryShim that has a butler
150 # inside it, since that's now the only kind of Registry code outside
151 # Butler should be able to get, and we assert that the datastore came
152 # from the same place. Soon this interface will be deprecated in favor
153 # of QuantumGraphBuilder (which takes a Butler directly, as all new
154 # code should) anyway.
155 butler: Butler = self.registry._butler # type: ignore
156 assert butler._datastore is self.datastore or self.datastore is None
157 qgb = AllDimensionsQuantumGraphBuilder(
158 pipeline_graph,
159 butler,
160 input_collections=CollectionWildcard.from_expression(collections).require_ordered(),
161 output_run=run,
162 skip_existing_in=self.skipExistingIn if self.skipExistingIn is not None else (),
163 clobber=self.clobberOutputs,
164 where=userQuery if userQuery is not None else "",
165 dataset_query_constraint=datasetQueryConstraint,
166 bind=bind,
167 )
168 return qgb.build(metadata)