Coverage for python/lsst/obs/lsst/_ingestPhotodiode.py: 12%

101 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-03-01 14:58 +0000

1# This file is part of obs_lsst. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21__all__ = ('PhotodiodeIngestConfig', 'PhotodiodeIngestTask') 

22 

23 

24from lsst.daf.butler import ( 

25 CollectionType, 

26 DataCoordinate, 

27 DatasetIdGenEnum, 

28 DatasetRef, 

29 DatasetType, 

30 FileDataset, 

31 Progress, 

32) 

33from lsst.ip.isr import PhotodiodeCalib 

34from lsst.obs.base import makeTransferChoiceField 

35from lsst.obs.base.formatters.fitsGeneric import FitsGenericFormatter 

36from lsst.pex.config import Config 

37from lsst.pipe.base import Task 

38from lsst.resources import ResourcePath 

39 

40 

41class PhotodiodeIngestConfig(Config): 

42 """Configuration class for PhotodiodeIngestTask.""" 

43 

44 transfer = makeTransferChoiceField(default="copy") 

45 

46 def validate(self): 

47 super().validate() 

48 if self.transfer != "copy": 

49 raise ValueError(f"Transfer Must be 'copy' for photodiode data. {self.transfer}") 

50 

51 

52class PhotodiodeIngestTask(Task): 

53 """Task to ingest photodiode data into a butler repository. 

54 

55 Parameters 

56 ---------- 

57 config : `PhotodiodeIngestConfig` 

58 Configuration for the task. 

59 instrument : `~lsst.obs.base.Instrument` 

60 The instrument these photodiode datasets are from. 

61 butler : `~lsst.daf.butler.Butler` 

62 Writable butler instance, with ``butler.run`` set to the 

63 appropriate `~lsst.daf.butler.CollectionType.RUN` collection 

64 for these datasets. 

65 **kwargs 

66 Additional keyword arguments. 

67 """ 

68 

69 ConfigClass = PhotodiodeIngestConfig 

70 _DefaultName = "photodiodeIngest" 

71 

72 def getDatasetType(self): 

73 """Return the DatasetType of the photodiode datasets.""" 

74 return DatasetType( 

75 "photodiode", 

76 ("instrument", "exposure"), 

77 "IsrCalib", 

78 universe=self.universe, 

79 ) 

80 

81 def __init__(self, butler, instrument, config=None, **kwargs): 

82 config.validate() 

83 super().__init__(config, **kwargs) 

84 self.butler = butler 

85 self.universe = self.butler.dimensions 

86 self.datasetType = self.getDatasetType() 

87 self.progress = Progress(self.log.name) 

88 self.instrument = instrument 

89 self.camera = self.instrument.getCamera() 

90 

91 def run(self, locations, run=None, file_filter=r"Photodiode_Readings.*txt$|_photodiode.ecsv$", 

92 track_file_attrs=None): 

93 """Ingest photodiode data into a Butler data repository. 

94 

95 Parameters 

96 ---------- 

97 files : iterable over `lsst.resources.ResourcePath` 

98 URIs to the files to be ingested. 

99 run : `str`, optional 

100 Name of the RUN-type collection to write to, 

101 overriding the default derived from the instrument 

102 name. 

103 skip_existing_exposures : `bool`, optional 

104 If `True`, skip photodiodes that have already been 

105 ingested (i.e. raws for which we already have a 

106 dataset with the same data ID in the target 

107 collection). 

108 track_file_attrs : `bool`, optional 

109 Control whether file attributes such as the size or 

110 checksum should be tracked by the datastore. Whether 

111 this parameter is honored depends on the specific 

112 datastore implementation. 

113 

114 Returns 

115 ------- 

116 refs : `list` [`lsst.daf.butler.DatasetRef`] 

117 Dataset references for ingested raws. 

118 

119 Raises 

120 ------ 

121 RuntimeError 

122 Raised if the number of exposures found for a photodiode 

123 file is not one 

124 """ 

125 files = ResourcePath.findFileResources(locations, file_filter) 

126 

127 registry = self.butler.registry 

128 registry.registerDatasetType(self.datasetType) 

129 

130 if "day_obs" in self.butler.dimensions["exposure"].implied: 

131 day_obs_key = "day_obs" 

132 else: 

133 day_obs_key = "exposure.day_obs" 

134 

135 # Find and register run that we will ingest to. 

136 if run is None: 

137 run = self.instrument.makeCollectionName("calib", "photodiode") 

138 registry.registerCollection(run, type=CollectionType.RUN) 

139 

140 # Use datasetIds that match the raw exposure data. 

141 if self.butler.registry.supportsIdGenerationMode(DatasetIdGenEnum.DATAID_TYPE_RUN): 

142 mode = DatasetIdGenEnum.DATAID_TYPE_RUN 

143 else: 

144 mode = DatasetIdGenEnum.UNIQUE 

145 

146 refs = [] 

147 numExisting = 0 

148 numFailed = 0 

149 for inputFile in files: 

150 # Convert the file into the right class. 

151 calibType = "Unknown" 

152 try: 

153 # Can this be read directly in standard form? 

154 with inputFile.as_local() as localFile: 

155 calib = PhotodiodeCalib.readText(localFile.ospath) 

156 calibType = "full" 

157 except Exception: 

158 # Try reading as a two-column file. 

159 with inputFile.as_local() as localFile: 

160 calib = PhotodiodeCalib.readTwoColumnPhotodiodeData(localFile.ospath) 

161 calibType = "two-column" 

162 

163 # Get exposure records 

164 if calibType == "full": 

165 instrumentName = calib.getMetadata().get('INSTRUME') 

166 if instrumentName is None: 

167 # The field is populated by the calib class, so we 

168 # can't use defaults. 

169 instrumentName = self.instrument.getName() 

170 

171 obsId = calib.getMetadata()['obsId'] 

172 whereClause = "exposure.obs_id=obsId" 

173 binding = {"obsId": obsId} 

174 logId = obsId 

175 

176 elif calibType == "two-column": 

177 dayObs = calib.getMetadata()['day_obs'] 

178 seqNum = calib.getMetadata()['seq_num'] 

179 

180 # Find the associated exposure information. 

181 whereClause = f"{day_obs_key}=dayObs and exposure.seq_num=seqNum" 

182 instrumentName = self.instrument.getName() 

183 binding = {"dayObs": dayObs, "seqNum": seqNum} 

184 logId = (dayObs, seqNum) 

185 

186 else: 

187 self.log.warning("Skipping input file %s of unknown type.", 

188 inputFile) 

189 continue 

190 exposureRecords = [rec for rec in registry.queryDimensionRecords("exposure", 

191 instrument=instrumentName, 

192 where=whereClause, 

193 bind=binding)] 

194 

195 nRecords = len(exposureRecords) 

196 if nRecords == 1: 

197 exposureId = exposureRecords[0].id 

198 calib.updateMetadata(camera=self.camera, exposure=exposureId) 

199 elif nRecords == 0: 

200 numFailed += 1 

201 self.log.warning("Skipping instrument %s and identifiers %s: no exposures found.", 

202 instrumentName, logId) 

203 continue 

204 else: 

205 numFailed += 1 

206 self.log.warning("Multiple exposure entries found for instrument %s and " 

207 "identifiers %s.", instrumentName, logId) 

208 continue 

209 

210 # Generate the dataId for this file. 

211 dataId = DataCoordinate.standardize( 

212 instrument=self.instrument.getName(), 

213 exposure=exposureId, 

214 universe=self.universe, 

215 ) 

216 

217 # If this already exists, we should skip it and continue. 

218 existing = { 

219 ref.dataId 

220 for ref in self.butler.registry.queryDatasets(self.datasetType, collections=[run], 

221 dataId=dataId) 

222 } 

223 if existing: 

224 self.log.debug("Skipping instrument %s and identifiers %s: already exists in run %s.", 

225 instrumentName, logId, run) 

226 numExisting += 1 

227 continue 

228 

229 # Ingest must work from a file, but we can't use the 

230 # original, as we've added new metadata and reformatted 

231 # it. Write it to a temp file that we can use to ingest. 

232 # If we can have the files written appropriately, this 

233 # will be a direct ingest of those files. 

234 with ResourcePath.temporary_uri(suffix=".fits") as tempFile: 

235 calib.writeFits(tempFile.ospath) 

236 

237 ref = DatasetRef(self.datasetType, dataId, run=run, id_generation_mode=mode) 

238 dataset = FileDataset(path=tempFile, refs=ref, formatter=FitsGenericFormatter) 

239 

240 # No try, as if this fails, we should stop. 

241 self.butler.ingest(dataset, transfer=self.config.transfer, 

242 record_validation_info=track_file_attrs) 

243 self.log.info("Photodiode %s:%d (%s) ingested successfully", instrumentName, exposureId, 

244 logId) 

245 refs.append(dataset) 

246 

247 if numExisting != 0: 

248 self.log.warning("Skipped %d entries that already existed in run %s", numExisting, run) 

249 if numFailed != 0: 

250 raise RuntimeError(f"Failed to ingest {numFailed} entries due to missing exposure information.") 

251 return refs