Coverage for python/lsst/pipe/base/graphBuilder.py: 50%

32 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-11 09:32 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining GraphBuilder class and related methods. 

29""" 

30 

31from __future__ import annotations 

32 

33__all__ = ["GraphBuilder"] 

34 

35 

36from collections.abc import Iterable, Mapping 

37from typing import Any 

38 

39from lsst.daf.butler import Butler, DataCoordinate, Datastore, Registry 

40from lsst.daf.butler.registry.wildcards import CollectionWildcard 

41 

42from ._datasetQueryConstraints import DatasetQueryConstraintVariant 

43from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder 

44from .graph import QuantumGraph 

45from .pipeline import Pipeline, TaskDef 

46from .pipeline_graph import PipelineGraph 

47 

48# Re-exports for backwards-compatibility. 

49from .quantum_graph_builder import GraphBuilderError # noqa: F401 

50from .quantum_graph_builder import OutputExistsError # noqa: F401 

51from .quantum_graph_builder import PrerequisiteMissingError # noqa: F401 

52 

53 

54class GraphBuilder: 

55 """GraphBuilder class is responsible for building task execution graph from 

56 a Pipeline. 

57 

58 Parameters 

59 ---------- 

60 registry : `~lsst.daf.butler.Registry` 

61 Data butler instance. 

62 skipExistingIn 

63 Expressions representing the collections to search for existing 

64 output datasets that should be skipped. See 

65 :ref:`daf_butler_ordered_collection_searches`. 

66 clobberOutputs : `bool`, optional 

67 If `True` (default), allow quanta to created even if partial outputs 

68 exist; this requires the same behavior behavior to be enabled when 

69 executing. 

70 datastore : `~lsst.daf.butler.Datastore`, optional 

71 If not `None` then fill datastore records in each generated Quantum. 

72 """ 

73 

74 def __init__( 

75 self, 

76 registry: Registry, 

77 skipExistingIn: Any = None, 

78 clobberOutputs: bool = True, 

79 datastore: Datastore | None = None, 

80 ): 

81 self.registry = registry 

82 self.dimensions = registry.dimensions 

83 self.skipExistingIn = skipExistingIn 

84 self.clobberOutputs = clobberOutputs 

85 self.datastore = datastore 

86 

87 def makeGraph( 

88 self, 

89 pipeline: Pipeline | Iterable[TaskDef], 

90 collections: Any, 

91 run: str, 

92 userQuery: str | None, 

93 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL, 

94 metadata: Mapping[str, Any] | None = None, 

95 bind: Mapping[str, Any] | None = None, 

96 dataId: DataCoordinate | None = None, 

97 ) -> QuantumGraph: 

98 """Create execution graph for a pipeline. 

99 

100 Parameters 

101 ---------- 

102 pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] 

103 Pipeline definition, task names/classes and their configs. 

104 collections 

105 Expressions representing the collections to search for input 

106 datasets. See :ref:`daf_butler_ordered_collection_searches`. 

107 run : `str` 

108 Name of the `~lsst.daf.butler.CollectionType.RUN` collection for 

109 output datasets. Collection does not have to exist and it will be 

110 created when graph is executed. 

111 userQuery : `str` 

112 String which defines user-defined selection for registry, should be 

113 empty or `None` if there is no restrictions on data selection. 

114 datasetQueryConstraint : `DatasetQueryConstraintVariant`, optional 

115 The query constraint variant that should be used to constraint the 

116 query based on dataset existance, defaults to 

117 `DatasetQueryConstraintVariant.ALL`. 

118 metadata : Optional Mapping of `str` to primitives 

119 This is an optional parameter of extra data to carry with the 

120 graph. Entries in this mapping should be able to be serialized in 

121 JSON. 

122 bind : `~collections.abc.Mapping`, optional 

123 Mapping containing literal values that should be injected into the 

124 ``userQuery`` expression, keyed by the identifiers they replace. 

125 dataId : `lsst.daf.butler.DataCoordinate`, optional 

126 Data ID that should also be included in the query constraint. 

127 Ignored if ``pipeline`` is a `Pipeline` instance (which has its own 

128 data ID). 

129 

130 Returns 

131 ------- 

132 graph : `QuantumGraph` 

133 

134 Raises 

135 ------ 

136 UserExpressionError 

137 Raised when user expression cannot be parsed. 

138 OutputExistsError 

139 Raised when output datasets already exist. 

140 Exception 

141 Other exceptions types may be raised by underlying registry 

142 classes. 

143 """ 

144 if isinstance(pipeline, Pipeline): 

145 pipeline_graph = pipeline.to_graph() 

146 else: 

147 pipeline_graph = PipelineGraph(data_id=dataId) 

148 for task_def in pipeline: 

149 pipeline_graph.add_task( 

150 task_def.label, 

151 task_def.taskClass, 

152 config=task_def.config, 

153 connections=task_def.connections, 

154 ) 

155 # We assume `registry` is actually a RegistryShim that has a butler 

156 # inside it, since that's now the only kind of Registry code outside 

157 # Butler should be able to get, and we assert that the datastore came 

158 # from the same place. Soon this interface will be deprecated in favor 

159 # of QuantumGraphBuilder (which takes a Butler directly, as all new 

160 # code should) anyway. 

161 butler: Butler = self.registry._butler # type: ignore 

162 assert butler._datastore is self.datastore or self.datastore is None 

163 qgb = AllDimensionsQuantumGraphBuilder( 

164 pipeline_graph, 

165 butler, 

166 input_collections=CollectionWildcard.from_expression(collections).require_ordered(), 

167 output_run=run, 

168 skip_existing_in=self.skipExistingIn if self.skipExistingIn is not None else (), 

169 clobber=self.clobberOutputs, 

170 where=userQuery if userQuery is not None else "", 

171 dataset_query_constraint=datasetQueryConstraint, 

172 bind=bind, 

173 ) 

174 return qgb.build(metadata)