Coverage for python / lsst / daf / butler / script / retrieveArtifacts.py: 35%

22 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:49 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("retrieveArtifacts",) 

31 

32import itertools 

33import logging 

34from typing import TYPE_CHECKING 

35 

36from .._butler import Butler 

37from .queryDatasets import QueryDatasets 

38 

39if TYPE_CHECKING: 

40 from lsst.resources import ResourcePath 

41 

42log = logging.getLogger(__name__) 

43 

44 

45def retrieveArtifacts( 

46 repo: str, 

47 destination: str, 

48 dataset_type: tuple[str, ...], 

49 collections: tuple[str, ...], 

50 where: str, 

51 find_first: bool, 

52 limit: int, 

53 order_by: tuple[str, ...], 

54 transfer: str, 

55 preserve_path: bool, 

56 clobber: bool, 

57 zip: bool, 

58) -> list[ResourcePath]: 

59 """Parameters are those required for querying datasets plus a destination 

60 URI. 

61 

62 Parameters 

63 ---------- 

64 repo : `str` 

65 URI string of the Butler repo to use. 

66 destination : `str` 

67 URI string of the directory to write the artifacts. 

68 dataset_type : `tuple` of `str` 

69 Dataset type names. An empty tuple implies all dataset types. 

70 collections : `tuple` of `str` 

71 Names of collection globs to match. An empty tuple implies all 

72 collections. 

73 where : `str` 

74 Query modification string. 

75 find_first : `bool` 

76 Whether only the first match should be used. 

77 limit : `int` 

78 Limit the number of results to be returned. A value of 0 means 

79 unlimited. A negative value is used to specify a cap where a warning 

80 is issued if that cap is hit. 

81 order_by : `tuple` of `str` 

82 Dimensions to use for sorting results. If no ordering is given the 

83 results of ``limit`` are undefined and default sorting of the resulting 

84 datasets will be applied. It is an error if the requested ordering 

85 is inconsistent with the dimensions of the dataset type being queried. 

86 transfer : `str` 

87 Transfer mode to use when placing artifacts in the destination. 

88 preserve_path : `bool` 

89 If `True` the full datastore path will be retained within the 

90 destination directory, else only the filename will be used. 

91 clobber : `bool` 

92 If `True` allow transfers to overwrite files at the destination. 

93 zip : `bool` 

94 If `True` retrieve the datasets and place in a zip file. 

95 

96 Returns 

97 ------- 

98 transferred : `list` of `lsst.resources.ResourcePath` 

99 The destination URIs of every transferred artifact or a list with a 

100 single entry of the name of the zip file. 

101 """ 

102 query_types = dataset_type or "*" 

103 query_collections: tuple[str, ...] = collections or ("*",) 

104 

105 with Butler.from_config(repo, writeable=False) as butler: 

106 # Need to store in set so we can count the number to give some feedback 

107 # to caller. 

108 query = QueryDatasets( 

109 butler=butler, 

110 glob=query_types, 

111 collections=query_collections, 

112 where=where, 

113 find_first=find_first, 

114 limit=limit, 

115 order_by=order_by, 

116 show_uri=False, 

117 with_dimension_records=True, 

118 ) 

119 refs = set(itertools.chain(*query.getDatasets())) 

120 log.info("Number of datasets matching query: %d", len(refs)) 

121 if not refs: 

122 return [] 

123 

124 if not zip: 

125 transferred = butler.retrieveArtifacts( 

126 refs, 

127 destination=destination, 

128 transfer=transfer, 

129 preserve_path=preserve_path, 

130 overwrite=clobber, 

131 ) 

132 else: 

133 zip_file = butler.retrieve_artifacts_zip(refs, destination=destination, overwrite=clobber) 

134 transferred = [zip_file] 

135 

136 return transferred