Coverage for python / lsst / daf / butler / script / retrieveArtifacts.py: 35%
22 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-01 08:18 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-01 08:18 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("retrieveArtifacts",)
32import itertools
33import logging
34from typing import TYPE_CHECKING
36from .._butler import Butler
37from .queryDatasets import QueryDatasets
39if TYPE_CHECKING:
40 from lsst.resources import ResourcePath
42log = logging.getLogger(__name__)
45def retrieveArtifacts(
46 repo: str,
47 destination: str,
48 dataset_type: tuple[str, ...],
49 collections: tuple[str, ...],
50 where: str,
51 find_first: bool,
52 limit: int,
53 order_by: tuple[str, ...],
54 transfer: str,
55 preserve_path: bool,
56 clobber: bool,
57 zip: bool,
58) -> list[ResourcePath]:
59 """Parameters are those required for querying datasets plus a destination
60 URI.
62 Parameters
63 ----------
64 repo : `str`
65 URI string of the Butler repo to use.
66 destination : `str`
67 URI string of the directory to write the artifacts.
68 dataset_type : `tuple` of `str`
69 Dataset type names. An empty tuple implies all dataset types.
70 collections : `tuple` of `str`
71 Names of collection globs to match. An empty tuple implies all
72 collections.
73 where : `str`
74 Query modification string.
75 find_first : `bool`
76 Whether only the first match should be used.
77 limit : `int`
78 Limit the number of results to be returned. A value of 0 means
79 unlimited. A negative value is used to specify a cap where a warning
80 is issued if that cap is hit.
81 order_by : `tuple` of `str`
82 Dimensions to use for sorting results. If no ordering is given the
83 results of ``limit`` are undefined and default sorting of the resulting
84 datasets will be applied. It is an error if the requested ordering
85 is inconsistent with the dimensions of the dataset type being queried.
86 transfer : `str`
87 Transfer mode to use when placing artifacts in the destination.
88 preserve_path : `bool`
89 If `True` the full datastore path will be retained within the
90 destination directory, else only the filename will be used.
91 clobber : `bool`
92 If `True` allow transfers to overwrite files at the destination.
93 zip : `bool`
94 If `True` retrieve the datasets and place in a zip file.
96 Returns
97 -------
98 transferred : `list` of `lsst.resources.ResourcePath`
99 The destination URIs of every transferred artifact or a list with a
100 single entry of the name of the zip file.
101 """
102 query_types = dataset_type or "*"
103 query_collections: tuple[str, ...] = collections or ("*",)
105 with Butler.from_config(repo, writeable=False) as butler:
106 # Need to store in set so we can count the number to give some feedback
107 # to caller.
108 query = QueryDatasets(
109 butler=butler,
110 glob=query_types,
111 collections=query_collections,
112 where=where,
113 find_first=find_first,
114 limit=limit,
115 order_by=order_by,
116 show_uri=False,
117 with_dimension_records=True,
118 )
119 refs = set(itertools.chain(*query.getDatasets()))
120 log.info("Number of datasets matching query: %d", len(refs))
121 if not refs:
122 return []
124 if not zip:
125 transferred = butler.retrieveArtifacts(
126 refs,
127 destination=destination,
128 transfer=transfer,
129 preserve_path=preserve_path,
130 overwrite=clobber,
131 )
132 else:
133 zip_file = butler.retrieve_artifacts_zip(refs, destination=destination, overwrite=clobber)
134 transferred = [zip_file]
136 return transferred