Coverage for python/lsst/daf/butler/script/exportCalibs.py: 9%

55 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-27 01:57 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import logging 

23import os 

24 

25from astropy.table import Table 

26 

27from .._butler import Butler 

28from ..registry import CollectionType 

29 

30log = logging.getLogger(__name__) 

31 

32 

33def parseCalibrationCollection(registry, collection, datasetTypes): 

34 """Search a calibration collection for calibration datasets. 

35 

36 Parameters 

37 ---------- 

38 registry : `lsst.daf.butler.Registry` 

39 Butler registry to use. 

40 collection : `str` 

41 Collection to search. This should be a CALIBRATION 

42 collection. 

43 datasetTypes : `list` [`lsst.daf.Butler.DatasetType`] 

44 List of calibration dataset types. 

45 

46 Returns 

47 ------- 

48 exportCollections : `list` [`str`] 

49 List of collections to save on export. 

50 exportDatasets : `list` [`lsst.daf.butler.queries.DatasetQueryResults`] 

51 Datasets to save on export. 

52 

53 Raises 

54 ------ 

55 RuntimeError 

56 Raised if the collection to search is not a CALIBRATION collection. 

57 """ 

58 if registry.getCollectionType(collection) != CollectionType.CALIBRATION: 

59 raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.") 

60 

61 exportCollections = [] 

62 exportDatasets = [] 

63 for calibType in datasetTypes: 

64 associations = registry.queryDatasetAssociations( 

65 calibType, collections=collection, collectionTypes=[CollectionType.CALIBRATION] 

66 ) 

67 for result in associations: 

68 # Need an expanded dataId in case file templates will be used 

69 # in the transfer. 

70 dataId = registry.expandDataId(result.ref.dataId) 

71 ref = result.ref.expanded(dataId) 

72 exportDatasets.append(ref) 

73 exportCollections.append(ref.run) 

74 return exportCollections, exportDatasets 

75 

76 

77def exportCalibs(repo, directory, collections, dataset_type, transfer): 

78 """Certify a set of calibrations with a validity range. 

79 

80 Parameters 

81 ---------- 

82 repo : `str` 

83 URI to the location of the repo or URI to a config file 

84 describing the repo and its location. 

85 directory : `str` 

86 URI string of the directory to write the exported 

87 calibrations. 

88 collections : `list` [`str`] 

89 Data collections to pull calibrations from. Must be an 

90 existing `~CollectionType.CHAINED` or 

91 `~CollectionType.CALIBRATION` collection. 

92 dataset_type : `tuple` [`str`] 

93 The dataset types to export. Default is to export all. 

94 transfer : `str` 

95 The transfer mode to use for exporting. 

96 

97 Returns 

98 ------- 

99 datasetTable : `astropy.table.Table` 

100 A table containing relevant information about the calibrations 

101 exported. 

102 

103 Raises 

104 ------ 

105 RuntimeError : 

106 Raised if the output directory already exists. 

107 """ 

108 butler = Butler(repo, writeable=False) 

109 

110 if not dataset_type: 

111 dataset_type = ... 

112 if not collections: 

113 collections = ... 

114 

115 calibTypes = [ 

116 datasetType 

117 for datasetType in butler.registry.queryDatasetTypes(dataset_type) 

118 if datasetType.isCalibration() 

119 ] 

120 

121 collectionsToExport = [] 

122 datasetsToExport = [] 

123 

124 for collection in butler.registry.queryCollections( 

125 collections, 

126 flattenChains=True, 

127 includeChains=True, 

128 collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED}, 

129 ): 

130 log.info("Checking collection: %s", collection) 

131 

132 # Get collection information. 

133 collectionsToExport.append(collection) 

134 collectionType = butler.registry.getCollectionType(collection) 

135 if collectionType == CollectionType.CALIBRATION: 

136 exportCollections, exportDatasets = parseCalibrationCollection( 

137 butler.registry, collection, calibTypes 

138 ) 

139 collectionsToExport.extend(exportCollections) 

140 datasetsToExport.extend(exportDatasets) 

141 

142 if os.path.exists(directory): 

143 raise RuntimeError(f"Export directory exists: {directory}") 

144 os.makedirs(directory) 

145 with butler.export(directory=directory, format="yaml", transfer=transfer) as export: 

146 collectionsToExport = list(set(collectionsToExport)) 

147 datasetsToExport = list(set(datasetsToExport)) 

148 

149 for exportable in collectionsToExport: 

150 try: 

151 export.saveCollection(exportable) 

152 except Exception as e: 

153 log.warning("Did not save collection %s due to %s.", exportable, e) 

154 

155 log.info("Saving %d dataset(s)", len(datasetsToExport)) 

156 export.saveDatasets(datasetsToExport) 

157 

158 sortedDatasets = sorted(datasetsToExport, key=lambda x: x.datasetType.name) 

159 

160 requiredDimensions = set() 

161 for ref in sortedDatasets: 

162 requiredDimensions.update(ref.dimensions.names) 

163 dimensionColumns = { 

164 dimensionName: [ref.dataId.get(dimensionName, "") for ref in sortedDatasets] 

165 for dimensionName in requiredDimensions 

166 } 

167 

168 return Table( 

169 { 

170 "calibrationType": [ref.datasetType.name for ref in sortedDatasets], 

171 "run": [ref.run for ref in sortedDatasets], 

172 **dimensionColumns, 

173 } 

174 )