Coverage for python / lsst / daf / butler / script / transferDatasets.py: 53%
15 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:36 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:36 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("transferDatasets",)
31import itertools
32import logging
34from .._butler import Butler
35from .queryDatasets import QueryDatasets
37log = logging.getLogger(__name__)
40def transferDatasets(
41 source: str,
42 dest: str,
43 dataset_type: tuple[str, ...],
44 collections: tuple[str, ...],
45 where: str,
46 find_first: bool,
47 limit: int,
48 order_by: tuple[str, ...],
49 transfer: str,
50 register_dataset_types: bool,
51 transfer_dimensions: bool = True,
52 dry_run: bool = False,
53) -> int:
54 """Transfer datasets from run in source to dest.
56 Parameters
57 ----------
58 source : `str`
59 URI string of the source Butler repo.
60 dest : `str`
61 URI string of the destination Butler repo.
62 dataset_type : `tuple` of `str`
63 Dataset type names. An empty tuple implies all dataset types.
64 collections : `tuple` of `str`
65 Names of collection globs to match. An empty tuple implies all
66 collections.
67 where : `str`
68 Query modification string.
69 find_first : `bool`
70 Whether only the first match should be used.
71 limit : `int`
72 Limit the number of results to be returned. A value of 0 means
73 unlimited. A negative value is used to specify a cap where a warning
74 is issued if that cap is hit.
75 order_by : `tuple` of `str`
76 Dimensions to use for sorting results. If no ordering is given the
77 results of ``limit`` are undefined and default sorting of the resulting
78 datasets will be applied. It is an error if the requested ordering
79 is inconsistent with the dimensions of the dataset type being queried.
80 transfer : `str`
81 Transfer mode to use when placing artifacts in the destination.
82 register_dataset_types : `bool`
83 Indicate whether missing dataset types should be registered.
84 transfer_dimensions : `bool`
85 Indicate whether dimensions should be transferred along with
86 datasets. It can be more efficient to disable this if it is known
87 that all dimensions exist.
88 dry_run : `bool`, optional
89 If `True` no transfers are done but the number of transfers that
90 would be done is reported.
91 """
92 with (
93 Butler.from_config(source, writeable=False) as source_butler,
94 Butler.from_config(dest, writeable=True) as dest_butler,
95 ):
96 dataset_type_expr = dataset_type or "*"
97 collections_expr: tuple[str, ...] = collections or ("*",)
99 query = QueryDatasets(
100 butler=source_butler,
101 glob=dataset_type_expr,
102 collections=collections_expr,
103 where=where,
104 find_first=find_first,
105 limit=limit,
106 order_by=order_by,
107 show_uri=False,
108 with_dimension_records=True,
109 )
110 # Place results in a set to remove duplicates (which should not exist
111 # in new query system)
112 source_refs_set = set(itertools.chain(*query.getDatasets()))
114 transferred = dest_butler.transfer_from(
115 source_butler,
116 source_refs_set,
117 transfer=transfer,
118 register_dataset_types=register_dataset_types,
119 transfer_dimensions=transfer_dimensions,
120 dry_run=dry_run,
121 )
122 return len(transferred)