Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 8%

48 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-11 09:31 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ["transfer_from_graph"] 

29 

30from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry 

31from lsst.daf.butler.registry import MissingCollectionError 

32from lsst.pipe.base import QuantumGraph 

33 

34 

35def transfer_from_graph( 

36 graph: str, 

37 dest: str, 

38 register_dataset_types: bool, 

39 transfer_dimensions: bool, 

40 update_output_chain: bool, 

41) -> int: 

42 """Transfer output datasets from quantum graph to dest. 

43 

44 Parameters 

45 ---------- 

46 graph : `str` 

47 URI string of the quantum graph. 

48 dest : `str` 

49 URI string of the destination Butler repo. 

50 register_dataset_types : `bool` 

51 Indicate whether missing dataset types should be registered. 

52 transfer_dimensions : `bool` 

53 Indicate whether dimensions should be transferred along with datasets. 

54 It can be more efficient to disable this if it is known that all 

55 dimensions exist. 

56 update_output_chain : `bool` 

57 If quantum graph metadata includes output run name and output 

58 collection which is a chain, update the chain definition to include run 

59 name as a the first collection in the chain. 

60 

61 Returns 

62 ------- 

63 count : `int` 

64 Actual count of transferred datasets. 

65 """ 

66 # Read whole graph into memory 

67 qgraph = QuantumGraph.loadUri(graph) 

68 

69 # Collect output refs that could be created by this graph. 

70 original_output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs()) 

71 for task_def in qgraph.iterTaskGraph(): 

72 if refs := qgraph.initOutputRefs(task_def): 

73 original_output_refs.update(refs) 

74 for qnode in qgraph: 

75 for otherRefs in qnode.quantum.outputs.values(): 

76 original_output_refs.update(otherRefs) 

77 

78 # Get data repository definitions from the QuantumGraph; these can have 

79 # different storage classes than those in the quanta. 

80 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

81 

82 # Convert output_refs to the data repository storage classes, too. 

83 output_refs = set() 

84 for ref in original_output_refs: 

85 internal_dataset_type = dataset_types.get(ref.datasetType.name, ref.datasetType) 

86 if internal_dataset_type.storageClass_name != ref.datasetType.storageClass_name: 

87 output_refs.add(ref.overrideStorageClass(internal_dataset_type.storageClass_name)) 

88 else: 

89 output_refs.add(ref) 

90 

91 # Make QBB, its config is the same as output Butler. 

92 qbb = QuantumBackedButler.from_predicted( 

93 config=dest, 

94 predicted_inputs=[ref.id for ref in output_refs], 

95 predicted_outputs=[], 

96 dimensions=qgraph.universe, 

97 datastore_records={}, 

98 dataset_types=dataset_types, 

99 ) 

100 

101 dest_butler = Butler(dest, writeable=True) 

102 

103 transferred = dest_butler.transfer_from( 

104 qbb, 

105 output_refs, 

106 transfer="auto", 

107 register_dataset_types=register_dataset_types, 

108 transfer_dimensions=transfer_dimensions, 

109 ) 

110 count = len(transferred) 

111 

112 # If anything was transferred then update output chain definition if asked. 

113 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None: 

114 # These are defined in CmdLineFwk. 

115 output_run = metadata.get("output_run") 

116 output = metadata.get("output") 

117 input = metadata.get("input") 

118 if output_run is not None and output is not None: 

119 _update_chain(dest_butler.registry, output, output_run, input) 

120 

121 return count 

122 

123 

124def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None: 

125 """Update chain definition if it exists to include run as the first item 

126 in a chain. If it does not exist then create it to include all inputs and 

127 output. 

128 """ 

129 try: 

130 # If output_chain is not a chain the exception will be raised. 

131 chain_definition = list(registry.getCollectionChain(output_chain)) 

132 except MissingCollectionError: 

133 # We have to create chained collection to include inputs and output run 

134 # (this reproduces logic in CmdLineFwk). 

135 registry.registerCollection(output_chain, type=CollectionType.CHAINED) 

136 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else [] 

137 chain_definition = [output_run] + [run for run in chain_definition if run != output_run] 

138 registry.setCollectionChain(output_chain, chain_definition) 

139 else: 

140 # If run is in the chain but not the first item then remove it, will 

141 # re-insert at front below. 

142 try: 

143 index = chain_definition.index(output_run) 

144 if index == 0: 

145 # It is already at the top. 

146 return 

147 else: 

148 del chain_definition[index] 

149 except ValueError: 

150 pass 

151 

152 chain_definition.insert(0, output_run) 

153 registry.setCollectionChain(output_chain, chain_definition)