Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 8%

48 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-31 09:39 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["transfer_from_graph"] 

23 

24from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry 

25from lsst.daf.butler.registry import MissingCollectionError 

26from lsst.pipe.base import QuantumGraph 

27 

28 

29def transfer_from_graph( 

30 graph: str, 

31 dest: str, 

32 register_dataset_types: bool, 

33 transfer_dimensions: bool, 

34 update_output_chain: bool, 

35) -> int: 

36 """Transfer output datasets from quantum graph to dest. 

37 

38 Parameters 

39 ---------- 

40 graph : `str` 

41 URI string of the quantum graph. 

42 dest : `str` 

43 URI string of the destination Butler repo. 

44 register_dataset_types : `bool` 

45 Indicate whether missing dataset types should be registered. 

46 transfer_dimensions : `bool` 

47 Indicate whether dimensions should be transferred along with datasets. 

48 It can be more efficient to disable this if it is known that all 

49 dimensions exist. 

50 update_output_chain : `bool` 

51 If quantum graph metadata includes output run name and output 

52 collection which is a chain, update the chain definition to include run 

53 name as a the first collection in the chain. 

54 

55 Returns 

56 ------- 

57 count : `int` 

58 Actual count of transferred datasets. 

59 """ 

60 # Read whole graph into memory 

61 qgraph = QuantumGraph.loadUri(graph) 

62 

63 # Collect output refs that could be created by this graph. 

64 original_output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs()) 

65 for task_def in qgraph.iterTaskGraph(): 

66 if refs := qgraph.initOutputRefs(task_def): 

67 original_output_refs.update(refs) 

68 for qnode in qgraph: 

69 for otherRefs in qnode.quantum.outputs.values(): 

70 original_output_refs.update(otherRefs) 

71 

72 # Get data repository definitions from the QuantumGraph; these can have 

73 # different storage classes than those in the quanta. 

74 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

75 

76 # Convert output_refs to the data repository storage classes, too. 

77 output_refs = set() 

78 for ref in original_output_refs: 

79 internal_dataset_type = dataset_types.get(ref.datasetType.name, ref.datasetType) 

80 if internal_dataset_type.storageClass_name != ref.datasetType.storageClass_name: 

81 output_refs.add(ref.overrideStorageClass(internal_dataset_type.storageClass_name)) 

82 else: 

83 output_refs.add(ref) 

84 

85 # Make QBB, its config is the same as output Butler. 

86 qbb = QuantumBackedButler.from_predicted( 

87 config=dest, 

88 predicted_inputs=[ref.id for ref in output_refs], 

89 predicted_outputs=[], 

90 dimensions=qgraph.universe, 

91 datastore_records={}, 

92 dataset_types=dataset_types, 

93 ) 

94 

95 dest_butler = Butler(dest, writeable=True) 

96 

97 transferred = dest_butler.transfer_from( 

98 qbb, 

99 output_refs, 

100 transfer="auto", 

101 register_dataset_types=register_dataset_types, 

102 transfer_dimensions=transfer_dimensions, 

103 ) 

104 count = len(transferred) 

105 

106 # If anything was transferred then update output chain definition if asked. 

107 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None: 

108 # These are defined in CmdLineFwk. 

109 output_run = metadata.get("output_run") 

110 output = metadata.get("output") 

111 input = metadata.get("input") 

112 if output_run is not None and output is not None: 

113 _update_chain(dest_butler.registry, output, output_run, input) 

114 

115 return count 

116 

117 

118def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None: 

119 """Update chain definition if it exists to include run as the first item 

120 in a chain. If it does not exist then create it to include all inputs and 

121 output. 

122 """ 

123 try: 

124 # If output_chain is not a chain the exception will be raised. 

125 chain_definition = list(registry.getCollectionChain(output_chain)) 

126 except MissingCollectionError: 

127 # We have to create chained collection to include inputs and output run 

128 # (this reproduces logic in CmdLineFwk). 

129 registry.registerCollection(output_chain, type=CollectionType.CHAINED) 

130 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else [] 

131 chain_definition = [output_run] + [run for run in chain_definition if run != output_run] 

132 registry.setCollectionChain(output_chain, chain_definition) 

133 else: 

134 # If run is in the chain but not the first item then remove it, will 

135 # re-insert at front below. 

136 try: 

137 index = chain_definition.index(output_run) 

138 if index == 0: 

139 # It is already at the top. 

140 return 

141 else: 

142 del chain_definition[index] 

143 except ValueError: 

144 pass 

145 

146 chain_definition.insert(0, output_run) 

147 registry.setCollectionChain(output_chain, chain_definition)