Coverage for python/lsst/pipe/base/graphBuilder.py: 50%

32 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-04 10:03 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining GraphBuilder class and related methods. 

29""" 

30 

31from __future__ import annotations 

32 

33__all__ = ["GraphBuilder"] 

34 

35 

36from collections.abc import Iterable, Mapping 

37from typing import Any 

38 

39from lsst.daf.butler import Butler, DataCoordinate, Datastore, Registry 

40from lsst.daf.butler.registry.wildcards import CollectionWildcard 

41 

42from ._datasetQueryConstraints import DatasetQueryConstraintVariant 

43from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder 

44from .graph import QuantumGraph 

45from .pipeline import Pipeline, TaskDef 

46from .pipeline_graph import PipelineGraph 

47 

48# Re-exports for backwards-compatibility. 

49from .quantum_graph_builder import GraphBuilderError # noqa: F401 

50from .quantum_graph_builder import OutputExistsError # noqa: F401 

51from .quantum_graph_builder import PrerequisiteMissingError # noqa: F401 

52 

53 

54class GraphBuilder: 

55 """GraphBuilder class is responsible for building task execution graph from 

56 a Pipeline. 

57 

58 Parameters 

59 ---------- 

60 registry : `~lsst.daf.butler.Registry` 

61 Data butler instance. 

62 skipExistingIn : `~typing.Any` 

63 Expressions representing the collections to search for existing 

64 output datasets that should be skipped. See 

65 :ref:`daf_butler_ordered_collection_searches`. 

66 clobberOutputs : `bool`, optional 

67 If `True` (default), allow quanta to created even if partial outputs 

68 exist; this requires the same behavior behavior to be enabled when 

69 executing. 

70 datastore : `~lsst.daf.butler.Datastore`, optional 

71 If not `None` then fill datastore records in each generated Quantum. 

72 """ 

73 

74 def __init__( 

75 self, 

76 registry: Registry, 

77 skipExistingIn: Any = None, 

78 clobberOutputs: bool = True, 

79 datastore: Datastore | None = None, 

80 ): 

81 self.registry = registry 

82 self.dimensions = registry.dimensions 

83 self.skipExistingIn = skipExistingIn 

84 self.clobberOutputs = clobberOutputs 

85 self.datastore = datastore 

86 

87 def makeGraph( 

88 self, 

89 pipeline: Pipeline | Iterable[TaskDef], 

90 collections: Any, 

91 run: str, 

92 userQuery: str | None, 

93 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL, 

94 metadata: Mapping[str, Any] | None = None, 

95 bind: Mapping[str, Any] | None = None, 

96 dataId: DataCoordinate | None = None, 

97 ) -> QuantumGraph: 

98 """Create execution graph for a pipeline. 

99 

100 Parameters 

101 ---------- 

102 pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] 

103 Pipeline definition, task names/classes and their configs. 

104 collections : `~typing.Any` 

105 Expressions representing the collections to search for input 

106 datasets. See :ref:`daf_butler_ordered_collection_searches`. 

107 run : `str` 

108 Name of the `~lsst.daf.butler.CollectionType.RUN` collection for 

109 output datasets. Collection does not have to exist and it will be 

110 created when graph is executed. 

111 userQuery : `str` 

112 String which defines user-defined selection for registry, should be 

113 empty or `None` if there is no restrictions on data selection. 

114 datasetQueryConstraint : `DatasetQueryConstraintVariant`, optional 

115 The query constraint variant that should be used to constraint the 

116 query based on dataset existance, defaults to 

117 `DatasetQueryConstraintVariant.ALL`. 

118 metadata : Optional Mapping of `str` to primitives 

119 This is an optional parameter of extra data to carry with the 

120 graph. Entries in this mapping should be able to be serialized in 

121 JSON. 

122 bind : `~collections.abc.Mapping`, optional 

123 Mapping containing literal values that should be injected into the 

124 ``userQuery`` expression, keyed by the identifiers they replace. 

125 dataId : `lsst.daf.butler.DataCoordinate`, optional 

126 Data ID that should also be included in the query constraint. 

127 Ignored if ``pipeline`` is a `Pipeline` instance (which has its own 

128 data ID). 

129 

130 Returns 

131 ------- 

132 graph : `QuantumGraph` 

133 The constructed graph. 

134 

135 Raises 

136 ------ 

137 UserExpressionError 

138 Raised when user expression cannot be parsed. 

139 OutputExistsError 

140 Raised when output datasets already exist. 

141 Exception 

142 Other exceptions types may be raised by underlying registry 

143 classes. 

144 """ 

145 if isinstance(pipeline, Pipeline): 

146 pipeline_graph = pipeline.to_graph() 

147 else: 

148 pipeline_graph = PipelineGraph(data_id=dataId) 

149 for task_def in pipeline: 

150 pipeline_graph.add_task( 

151 task_def.label, 

152 task_def.taskClass, 

153 config=task_def.config, 

154 connections=task_def.connections, 

155 ) 

156 # We assume `registry` is actually a RegistryShim that has a butler 

157 # inside it, since that's now the only kind of Registry code outside 

158 # Butler should be able to get, and we assert that the datastore came 

159 # from the same place. Soon this interface will be deprecated in favor 

160 # of QuantumGraphBuilder (which takes a Butler directly, as all new 

161 # code should) anyway. 

162 butler: Butler = self.registry._butler # type: ignore 

163 assert butler._datastore is self.datastore or self.datastore is None 

164 qgb = AllDimensionsQuantumGraphBuilder( 

165 pipeline_graph, 

166 butler, 

167 input_collections=CollectionWildcard.from_expression(collections).require_ordered(), 

168 output_run=run, 

169 skip_existing_in=self.skipExistingIn if self.skipExistingIn is not None else (), 

170 clobber=self.clobberOutputs, 

171 where=userQuery if userQuery is not None else "", 

172 dataset_query_constraint=datasetQueryConstraint, 

173 bind=bind, 

174 ) 

175 return qgb.build(metadata)