Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 8%
48 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-23 08:14 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-23 08:14 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["transfer_from_graph"]
24from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry
25from lsst.daf.butler.registry import MissingCollectionError
26from lsst.pipe.base import QuantumGraph
29def transfer_from_graph(
30 graph: str,
31 dest: str,
32 register_dataset_types: bool,
33 transfer_dimensions: bool,
34 update_output_chain: bool,
35) -> int:
36 """Transfer output datasets from quantum graph to dest.
38 Parameters
39 ----------
40 graph : `str`
41 URI string of the quantum graph.
42 dest : `str`
43 URI string of the destination Butler repo.
44 register_dataset_types : `bool`
45 Indicate whether missing dataset types should be registered.
46 transfer_dimensions : `bool`
47 Indicate whether dimensions should be transferred along with datasets.
48 It can be more efficient to disable this if it is known that all
49 dimensions exist.
50 update_output_chain : `bool`
51 If quantum graph metadata includes output run name and output
52 collection which is a chain, update the chain definition to include run
53 name as a the first collection in the chain.
55 Returns
56 -------
57 count : `int`
58 Actual count of transferred datasets.
59 """
60 # Read whole graph into memory
61 qgraph = QuantumGraph.loadUri(graph)
63 # Collect output refs that could be created by this graph.
64 original_output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs())
65 for task_def in qgraph.iterTaskGraph():
66 if refs := qgraph.initOutputRefs(task_def):
67 original_output_refs.update(refs)
68 for qnode in qgraph:
69 for otherRefs in qnode.quantum.outputs.values():
70 original_output_refs.update(otherRefs)
72 # Get data repository definitions from the QuantumGraph; these can have
73 # different storage classes than those in the quanta.
74 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
76 # Convert output_refs to the data repository storage classes, too.
77 output_refs = set()
78 for ref in original_output_refs:
79 internal_dataset_type = dataset_types.get(ref.datasetType.name, ref.datasetType)
80 if internal_dataset_type.storageClass_name != ref.datasetType.storageClass_name:
81 output_refs.add(ref.overrideStorageClass(internal_dataset_type.storageClass_name))
82 else:
83 output_refs.add(ref)
85 # Make QBB, its config is the same as output Butler.
86 qbb = QuantumBackedButler.from_predicted(
87 config=dest,
88 predicted_inputs=[ref.id for ref in output_refs],
89 predicted_outputs=[],
90 dimensions=qgraph.universe,
91 datastore_records={},
92 dataset_types=dataset_types,
93 )
95 dest_butler = Butler(dest, writeable=True)
97 transferred = dest_butler.transfer_from(
98 qbb,
99 output_refs,
100 transfer="auto",
101 register_dataset_types=register_dataset_types,
102 transfer_dimensions=transfer_dimensions,
103 )
104 count = len(transferred)
106 # If anything was transferred then update output chain definition if asked.
107 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None:
108 # These are defined in CmdLineFwk.
109 output_run = metadata.get("output_run")
110 output = metadata.get("output")
111 input = metadata.get("input")
112 if output_run is not None and output is not None:
113 _update_chain(dest_butler.registry, output, output_run, input)
115 return count
118def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None:
119 """Update chain definition if it exists to include run as the first item
120 in a chain. If it does not exist then create it to include all inputs and
121 output.
122 """
123 try:
124 # If output_chain is not a chain the exception will be raised.
125 chain_definition = list(registry.getCollectionChain(output_chain))
126 except MissingCollectionError:
127 # We have to create chained collection to include inputs and output run
128 # (this reproduces logic in CmdLineFwk).
129 registry.registerCollection(output_chain, type=CollectionType.CHAINED)
130 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else []
131 chain_definition = [output_run] + [run for run in chain_definition if run != output_run]
132 registry.setCollectionChain(output_chain, chain_definition)
133 else:
134 # If run is in the chain but not the first item then remove it, will
135 # re-insert at front below.
136 try:
137 index = chain_definition.index(output_run)
138 if index == 0:
139 # It is already at the top.
140 return
141 else:
142 del chain_definition[index]
143 except ValueError:
144 pass
146 chain_definition.insert(0, output_run)
147 registry.setCollectionChain(output_chain, chain_definition)