Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 8%
48 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-11 17:45 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-11 17:45 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ["transfer_from_graph"]
30from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry
31from lsst.daf.butler.registry import MissingCollectionError
32from lsst.pipe.base import QuantumGraph
35def transfer_from_graph(
36 graph: str,
37 dest: str,
38 register_dataset_types: bool,
39 transfer_dimensions: bool,
40 update_output_chain: bool,
41) -> int:
42 """Transfer output datasets from quantum graph to dest.
44 Parameters
45 ----------
46 graph : `str`
47 URI string of the quantum graph.
48 dest : `str`
49 URI string of the destination Butler repo.
50 register_dataset_types : `bool`
51 Indicate whether missing dataset types should be registered.
52 transfer_dimensions : `bool`
53 Indicate whether dimensions should be transferred along with datasets.
54 It can be more efficient to disable this if it is known that all
55 dimensions exist.
56 update_output_chain : `bool`
57 If quantum graph metadata includes output run name and output
58 collection which is a chain, update the chain definition to include run
59 name as a the first collection in the chain.
61 Returns
62 -------
63 count : `int`
64 Actual count of transferred datasets.
65 """
66 # Read whole graph into memory
67 qgraph = QuantumGraph.loadUri(graph)
69 # Collect output refs that could be created by this graph.
70 original_output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs())
71 for task_def in qgraph.iterTaskGraph():
72 if refs := qgraph.initOutputRefs(task_def):
73 original_output_refs.update(refs)
74 for qnode in qgraph:
75 for otherRefs in qnode.quantum.outputs.values():
76 original_output_refs.update(otherRefs)
78 # Get data repository definitions from the QuantumGraph; these can have
79 # different storage classes than those in the quanta.
80 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
82 # Convert output_refs to the data repository storage classes, too.
83 output_refs = set()
84 for ref in original_output_refs:
85 internal_dataset_type = dataset_types.get(ref.datasetType.name, ref.datasetType)
86 if internal_dataset_type.storageClass_name != ref.datasetType.storageClass_name:
87 output_refs.add(ref.overrideStorageClass(internal_dataset_type.storageClass_name))
88 else:
89 output_refs.add(ref)
91 # Make QBB, its config is the same as output Butler.
92 qbb = QuantumBackedButler.from_predicted(
93 config=dest,
94 predicted_inputs=[ref.id for ref in output_refs],
95 predicted_outputs=[],
96 dimensions=qgraph.universe,
97 datastore_records={},
98 dataset_types=dataset_types,
99 )
101 dest_butler = Butler.from_config(dest, writeable=True)
103 transferred = dest_butler.transfer_from(
104 qbb,
105 output_refs,
106 transfer="auto",
107 register_dataset_types=register_dataset_types,
108 transfer_dimensions=transfer_dimensions,
109 )
110 count = len(transferred)
112 # If anything was transferred then update output chain definition if asked.
113 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None:
114 # These are defined in CmdLineFwk.
115 output_run = metadata.get("output_run")
116 output = metadata.get("output")
117 input = metadata.get("input")
118 if output_run is not None and output is not None:
119 _update_chain(dest_butler.registry, output, output_run, input)
121 return count
124def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None:
125 """Update chain definition if it exists to include run as the first item
126 in a chain. If it does not exist then create it to include all inputs and
127 output.
128 """
129 try:
130 # If output_chain is not a chain the exception will be raised.
131 chain_definition = list(registry.getCollectionChain(output_chain))
132 except MissingCollectionError:
133 # We have to create chained collection to include inputs and output run
134 # (this reproduces logic in CmdLineFwk).
135 registry.registerCollection(output_chain, type=CollectionType.CHAINED)
136 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else []
137 chain_definition = [output_run] + [run for run in chain_definition if run != output_run]
138 registry.setCollectionChain(output_chain, chain_definition)
139 else:
140 # If run is in the chain but not the first item then remove it, will
141 # re-insert at front below.
142 try:
143 index = chain_definition.index(output_run)
144 if index == 0:
145 # It is already at the top.
146 return
147 else:
148 del chain_definition[index]
149 except ValueError:
150 pass
152 chain_definition.insert(0, output_run)
153 registry.setCollectionChain(output_chain, chain_definition)