Coverage for python/lsst/daf/butler/script/exportCalibs.py: 13%

57 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-30 02:52 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29import logging 

30import os 

31from collections.abc import Iterable 

32from typing import TYPE_CHECKING 

33 

34from astropy.table import Table 

35 

36from .._butler import Butler 

37from ..registry import CollectionType 

38 

39if TYPE_CHECKING: 

40 from lsst.daf.butler import DatasetRef, DatasetType, Registry 

41 

42log = logging.getLogger(__name__) 

43 

44 

45def parseCalibrationCollection( 

46 registry: Registry, collection: str, datasetTypes: Iterable[DatasetType] 

47) -> tuple[list[str], list[DatasetRef]]: 

48 """Search a calibration collection for calibration datasets. 

49 

50 Parameters 

51 ---------- 

52 registry : `lsst.daf.butler.Registry` 

53 Butler registry to use. 

54 collection : `str` 

55 Collection to search. This should be a CALIBRATION 

56 collection. 

57 datasetTypes : `list` [`lsst.daf.Butler.DatasetType`] 

58 List of calibration dataset types. 

59 

60 Returns 

61 ------- 

62 exportCollections : `list` [`str`] 

63 List of collections to save on export. 

64 exportDatasets : `list` [`lsst.daf.butler.DatasetRef`] 

65 Datasets to save on export. 

66 

67 Raises 

68 ------ 

69 RuntimeError 

70 Raised if the collection to search is not a CALIBRATION collection. 

71 """ 

72 if registry.getCollectionType(collection) != CollectionType.CALIBRATION: 

73 raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.") 

74 

75 exportCollections = [] 

76 exportDatasets = [] 

77 for calibType in datasetTypes: 

78 associations = registry.queryDatasetAssociations( 

79 calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION] 

80 ) 

81 for result in associations: 

82 # Need an expanded dataId in case file templates will be used 

83 # in the transfer. 

84 dataId = registry.expandDataId(result.ref.dataId) 

85 ref = result.ref.expanded(dataId) 

86 exportDatasets.append(ref) 

87 assert ref.run is not None, "These refs must all be resolved." 

88 exportCollections.append(ref.run) 

89 return exportCollections, exportDatasets 

90 

91 

92def exportCalibs( 

93 repo: str, directory: str, collections: Iterable[str], dataset_type: Iterable[str], transfer: str 

94) -> Table: 

95 """Certify a set of calibrations with a validity range. 

96 

97 Parameters 

98 ---------- 

99 repo : `str` 

100 URI to the location of the repo or URI to a config file 

101 describing the repo and its location. 

102 directory : `str` 

103 URI string of the directory to write the exported 

104 calibrations. 

105 collections : `list` [`str`] 

106 Data collections to pull calibrations from. Must be an 

107 existing `~CollectionType.CHAINED` or 

108 `~CollectionType.CALIBRATION` collection. 

109 dataset_type : `tuple` [`str`] 

110 The dataset types to export. Default is to export all. 

111 transfer : `str` 

112 The transfer mode to use for exporting. 

113 

114 Returns 

115 ------- 

116 datasetTable : `astropy.table.Table` 

117 A table containing relevant information about the calibrations 

118 exported. 

119 

120 Raises 

121 ------ 

122 RuntimeError 

123 Raised if the output directory already exists. 

124 """ 

125 butler = Butler.from_config(repo, writeable=False) 

126 

127 dataset_type_query = dataset_type or ... 

128 collections_query = collections or ... 

129 

130 calibTypes = [ 

131 datasetType 

132 for datasetType in butler.registry.queryDatasetTypes(dataset_type_query) 

133 if datasetType.isCalibration() 

134 ] 

135 

136 collectionsToExport = [] 

137 datasetsToExport = [] 

138 

139 for collection in butler.registry.queryCollections( 

140 collections_query, 

141 flattenChains=True, 

142 includeChains=True, 

143 collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED}, 

144 ): 

145 log.info("Checking collection: %s", collection) 

146 

147 # Get collection information. 

148 collectionsToExport.append(collection) 

149 collectionType = butler.registry.getCollectionType(collection) 

150 if collectionType == CollectionType.CALIBRATION: 

151 exportCollections, exportDatasets = parseCalibrationCollection( 

152 butler.registry, collection, calibTypes 

153 ) 

154 collectionsToExport.extend(exportCollections) 

155 datasetsToExport.extend(exportDatasets) 

156 

157 if os.path.exists(directory): 

158 raise RuntimeError(f"Export directory exists: {directory}") 

159 os.makedirs(directory) 

160 with butler.export(directory=directory, format="yaml", transfer=transfer) as export: 

161 collectionsToExport = list(set(collectionsToExport)) 

162 datasetsToExport = list(set(datasetsToExport)) 

163 

164 for exportable in collectionsToExport: 

165 try: 

166 export.saveCollection(exportable) 

167 except Exception as e: 

168 log.warning("Did not save collection %s due to %s.", exportable, e) 

169 

170 log.info("Saving %d dataset(s)", len(datasetsToExport)) 

171 export.saveDatasets(datasetsToExport) 

172 

173 sortedDatasets = sorted(datasetsToExport, key=lambda x: x.datasetType.name) 

174 

175 requiredDimensions: set[str] = set() 

176 for ref in sortedDatasets: 

177 requiredDimensions.update(ref.dimensions.names) 

178 dimensionColumns = { 

179 dimensionName: [ref.dataId.get(dimensionName, "") for ref in sortedDatasets] 

180 for dimensionName in requiredDimensions 

181 } 

182 

183 return Table( 

184 { 

185 "calibrationType": [ref.datasetType.name for ref in sortedDatasets], 

186 "run": [ref.run for ref in sortedDatasets], 

187 **dimensionColumns, 

188 } 

189 )