Coverage for python/lsst/daf/butler/script/exportCalibs.py: 15%

59 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-11 02:30 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23import logging 

24import os 

25from collections.abc import Iterable 

26from typing import TYPE_CHECKING 

27 

28from astropy.table import Table 

29 

30from .._butler import Butler 

31from ..registry import CollectionType 

32 

33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true

34 from lsst.daf.butler import DatasetRef, DatasetType, Registry 

35 

36log = logging.getLogger(__name__) 

37 

38 

39def parseCalibrationCollection( 

40 registry: Registry, collection: str, datasetTypes: Iterable[DatasetType] 

41) -> tuple[list[str], list[DatasetRef]]: 

42 """Search a calibration collection for calibration datasets. 

43 

44 Parameters 

45 ---------- 

46 registry : `lsst.daf.butler.Registry` 

47 Butler registry to use. 

48 collection : `str` 

49 Collection to search. This should be a CALIBRATION 

50 collection. 

51 datasetTypes : `list` [`lsst.daf.Butler.DatasetType`] 

52 List of calibration dataset types. 

53 

54 Returns 

55 ------- 

56 exportCollections : `list` [`str`] 

57 List of collections to save on export. 

58 exportDatasets : `list` [`lsst.daf.butler.DatasetRef`] 

59 Datasets to save on export. 

60 

61 Raises 

62 ------ 

63 RuntimeError 

64 Raised if the collection to search is not a CALIBRATION collection. 

65 """ 

66 if registry.getCollectionType(collection) != CollectionType.CALIBRATION: 

67 raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.") 

68 

69 exportCollections = [] 

70 exportDatasets = [] 

71 for calibType in datasetTypes: 

72 associations = registry.queryDatasetAssociations( 

73 calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION] 

74 ) 

75 for result in associations: 

76 # Need an expanded dataId in case file templates will be used 

77 # in the transfer. 

78 dataId = registry.expandDataId(result.ref.dataId) 

79 ref = result.ref.expanded(dataId) 

80 exportDatasets.append(ref) 

81 assert ref.run is not None, "These refs must all be resolved." 

82 exportCollections.append(ref.run) 

83 return exportCollections, exportDatasets 

84 

85 

86def exportCalibs( 

87 repo: str, directory: str, collections: Iterable[str], dataset_type: Iterable[str], transfer: str 

88) -> Table: 

89 """Certify a set of calibrations with a validity range. 

90 

91 Parameters 

92 ---------- 

93 repo : `str` 

94 URI to the location of the repo or URI to a config file 

95 describing the repo and its location. 

96 directory : `str` 

97 URI string of the directory to write the exported 

98 calibrations. 

99 collections : `list` [`str`] 

100 Data collections to pull calibrations from. Must be an 

101 existing `~CollectionType.CHAINED` or 

102 `~CollectionType.CALIBRATION` collection. 

103 dataset_type : `tuple` [`str`] 

104 The dataset types to export. Default is to export all. 

105 transfer : `str` 

106 The transfer mode to use for exporting. 

107 

108 Returns 

109 ------- 

110 datasetTable : `astropy.table.Table` 

111 A table containing relevant information about the calibrations 

112 exported. 

113 

114 Raises 

115 ------ 

116 RuntimeError : 

117 Raised if the output directory already exists. 

118 """ 

119 butler = Butler(repo, writeable=False) 

120 

121 dataset_type_query = dataset_type if dataset_type else ... 

122 collections_query = collections if collections else ... 

123 

124 calibTypes = [ 

125 datasetType 

126 for datasetType in butler.registry.queryDatasetTypes(dataset_type_query) 

127 if datasetType.isCalibration() 

128 ] 

129 

130 collectionsToExport = [] 

131 datasetsToExport = [] 

132 

133 for collection in butler.registry.queryCollections( 

134 collections_query, 

135 flattenChains=True, 

136 includeChains=True, 

137 collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED}, 

138 ): 

139 log.info("Checking collection: %s", collection) 

140 

141 # Get collection information. 

142 collectionsToExport.append(collection) 

143 collectionType = butler.registry.getCollectionType(collection) 

144 if collectionType == CollectionType.CALIBRATION: 

145 exportCollections, exportDatasets = parseCalibrationCollection( 

146 butler.registry, collection, calibTypes 

147 ) 

148 collectionsToExport.extend(exportCollections) 

149 datasetsToExport.extend(exportDatasets) 

150 

151 if os.path.exists(directory): 

152 raise RuntimeError(f"Export directory exists: {directory}") 

153 os.makedirs(directory) 

154 with butler.export(directory=directory, format="yaml", transfer=transfer) as export: 

155 collectionsToExport = list(set(collectionsToExport)) 

156 datasetsToExport = list(set(datasetsToExport)) 

157 

158 for exportable in collectionsToExport: 

159 try: 

160 export.saveCollection(exportable) 

161 except Exception as e: 

162 log.warning("Did not save collection %s due to %s.", exportable, e) 

163 

164 log.info("Saving %d dataset(s)", len(datasetsToExport)) 

165 export.saveDatasets(datasetsToExport) 

166 

167 sortedDatasets = sorted(datasetsToExport, key=lambda x: x.datasetType.name) 

168 

169 requiredDimensions: set[str] = set() 

170 for ref in sortedDatasets: 

171 requiredDimensions.update(ref.dimensions.names) 

172 dimensionColumns = { 

173 dimensionName: [ref.dataId.get(dimensionName, "") for ref in sortedDatasets] 

174 for dimensionName in requiredDimensions 

175 } 

176 

177 return Table( 

178 { 

179 "calibrationType": [ref.datasetType.name for ref in sortedDatasets], 

180 "run": [ref.run for ref in sortedDatasets], 

181 **dimensionColumns, 

182 } 

183 )