Coverage for python/lsst/pipe/base/graphBuilder.py: 58%
37 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 10:49 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-11 10:49 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining GraphBuilder class and related methods.
29"""
31from __future__ import annotations
33__all__ = ["GraphBuilder"]
36import warnings
37from collections.abc import Iterable, Mapping
38from typing import Any
40from deprecated.sphinx import deprecated
41from lsst.daf.butler import Butler, DataCoordinate, Datastore, Registry
42from lsst.daf.butler.registry.wildcards import CollectionWildcard
43from lsst.utils.introspection import find_outside_stacklevel
45from ._datasetQueryConstraints import DatasetQueryConstraintVariant
46from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
47from .graph import QuantumGraph
48from .pipeline import Pipeline, TaskDef
49from .pipeline_graph import PipelineGraph
51# Re-exports for backwards-compatibility.
52from .quantum_graph_builder import GraphBuilderError # noqa: F401
53from .quantum_graph_builder import OutputExistsError # noqa: F401
54from .quantum_graph_builder import PrerequisiteMissingError # noqa: F401
56# TODO: remove this module on DM-40443.
57warnings.warn(
58 "The graphBuilder module is deprecated in favor of quantum_graph_builder, and will be removed after v27.",
59 category=FutureWarning,
60 stacklevel=find_outside_stacklevel("lsst.pipe.base"),
61)
64@deprecated(
65 "Deprecated in favor of QuantumGraphBuilder and will be removed after v27.",
66 version="v27.0",
67 category=FutureWarning,
68)
69class GraphBuilder:
70 """GraphBuilder class is responsible for building task execution graph from
71 a Pipeline.
73 Parameters
74 ----------
75 registry : `~lsst.daf.butler.Registry`
76 Data butler instance.
77 skipExistingIn : `~typing.Any`
78 Expressions representing the collections to search for existing
79 output datasets that should be skipped. See
80 :ref:`daf_butler_ordered_collection_searches`.
81 clobberOutputs : `bool`, optional
82 If `True` (default), allow quanta to created even if partial outputs
83 exist; this requires the same behavior behavior to be enabled when
84 executing.
85 datastore : `~lsst.daf.butler.Datastore`, optional
86 If not `None` then fill datastore records in each generated Quantum.
87 """
89 def __init__(
90 self,
91 registry: Registry,
92 skipExistingIn: Any = None,
93 clobberOutputs: bool = True,
94 datastore: Datastore | None = None,
95 ):
96 self.registry = registry
97 self.dimensions = registry.dimensions
98 self.skipExistingIn = skipExistingIn
99 self.clobberOutputs = clobberOutputs
100 self.datastore = datastore
102 def makeGraph(
103 self,
104 pipeline: Pipeline | Iterable[TaskDef],
105 collections: Any,
106 run: str,
107 userQuery: str | None,
108 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL,
109 metadata: Mapping[str, Any] | None = None,
110 bind: Mapping[str, Any] | None = None,
111 dataId: DataCoordinate | None = None,
112 ) -> QuantumGraph:
113 """Create execution graph for a pipeline.
115 Parameters
116 ----------
117 pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ]
118 Pipeline definition, task names/classes and their configs.
119 collections : `~typing.Any`
120 Expressions representing the collections to search for input
121 datasets. See :ref:`daf_butler_ordered_collection_searches`.
122 run : `str`
123 Name of the `~lsst.daf.butler.CollectionType.RUN` collection for
124 output datasets. Collection does not have to exist and it will be
125 created when graph is executed.
126 userQuery : `str`
127 String which defines user-defined selection for registry, should be
128 empty or `None` if there is no restrictions on data selection.
129 datasetQueryConstraint : `DatasetQueryConstraintVariant`, optional
130 The query constraint variant that should be used to constraint the
131 query based on dataset existance, defaults to
132 `DatasetQueryConstraintVariant.ALL`.
133 metadata : Optional Mapping of `str` to primitives
134 This is an optional parameter of extra data to carry with the
135 graph. Entries in this mapping should be able to be serialized in
136 JSON.
137 bind : `~collections.abc.Mapping`, optional
138 Mapping containing literal values that should be injected into the
139 ``userQuery`` expression, keyed by the identifiers they replace.
140 dataId : `lsst.daf.butler.DataCoordinate`, optional
141 Data ID that should also be included in the query constraint.
142 Ignored if ``pipeline`` is a `Pipeline` instance (which has its own
143 data ID).
145 Returns
146 -------
147 graph : `QuantumGraph`
148 The constructed graph.
150 Raises
151 ------
152 UserExpressionError
153 Raised when user expression cannot be parsed.
154 OutputExistsError
155 Raised when output datasets already exist.
156 Exception
157 Other exceptions types may be raised by underlying registry
158 classes.
159 """
160 if isinstance(pipeline, Pipeline):
161 pipeline_graph = pipeline.to_graph()
162 else:
163 pipeline_graph = PipelineGraph(data_id=dataId)
164 for task_def in pipeline:
165 pipeline_graph.add_task(
166 task_def.label,
167 task_def.taskClass,
168 config=task_def.config,
169 connections=task_def.connections,
170 )
171 # We assume `registry` is actually a RegistryShim that has a butler
172 # inside it, since that's now the only kind of Registry code outside
173 # Butler should be able to get, and we assert that the datastore came
174 # from the same place. Soon this interface will be deprecated in favor
175 # of QuantumGraphBuilder (which takes a Butler directly, as all new
176 # code should) anyway.
177 butler: Butler = self.registry._butler # type: ignore
178 assert butler._datastore is self.datastore or self.datastore is None
179 qgb = AllDimensionsQuantumGraphBuilder(
180 pipeline_graph,
181 butler,
182 input_collections=CollectionWildcard.from_expression(collections).require_ordered(),
183 output_run=run,
184 skip_existing_in=self.skipExistingIn if self.skipExistingIn is not None else (),
185 clobber=self.clobberOutputs,
186 where=userQuery if userQuery is not None else "",
187 dataset_query_constraint=datasetQueryConstraint,
188 bind=bind,
189 )
190 return qgb.build(metadata, attach_datastore_records=(self.datastore is not None))