Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import pickle 

24import shutil 

25import tempfile 

26import unittest 

27 

28import lsst.log 

29import lsst.daf.butler.tests as butlerTests 

30from lsst.daf.butler import DatasetType, Butler, DataCoordinate, Config 

31from lsst.daf.butler.registry import ConflictingDefinitionError 

32from lsst.daf.butler.core.utils import getFullTypeName 

33 

34from lsst.obs.base.ingest_tests import IngestTestBase 

35from lsst.obs.base.instrument_tests import DummyCam 

36from lsst.obs.base import RawIngestTask 

37 

38 

39TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

40INGESTDIR = os.path.join(TESTDIR, "data", "ingest") 

41 

42 

43class DummyCamRawIngestTask(RawIngestTask): 

44 """For DummyCam we ingest a different dataset type that can return 

45 a non-Exposure.""" 

46 

47 def getDatasetType(self): 

48 """Return the DatasetType of the datasets ingested by this Task. 

49 """ 

50 return DatasetType("raw_dict", ("instrument", "detector", "exposure"), "StructuredDataDict", 

51 universe=self.butler.registry.dimensions) 

52 

53 

54class RawIngestTestCase(IngestTestBase, unittest.TestCase): 

55 """Test ingest using JSON sidecar files.""" 

56 

57 ingestDatasetTypeName = "raw_dict" 

58 rawIngestTask = getFullTypeName(DummyCamRawIngestTask) 

59 curatedCalibrationDatasetTypes = () 

60 ingestDir = TESTDIR 

61 instrumentClassName = "lsst.obs.base.instrument_tests.DummyCam" 

62 file = os.path.join(INGESTDIR, "sidecar_data", "dataset_1.yaml") 

63 dataIds = [dict(instrument="DummyCam", exposure=100, detector=0)] 

64 

65 @property 

66 def visits(self): 

67 butler = Butler(self.root, collections=[self.outputRun]) 

68 return { 

69 DataCoordinate.standardize( 

70 instrument="DummyCam", 

71 visit=100, 

72 universe=butler.registry.dimensions 

73 ): [ 

74 DataCoordinate.standardize( 

75 instrument="DummyCam", 

76 exposure=100, 

77 universe=butler.registry.dimensions 

78 ) 

79 ] 

80 } 

81 

82 def testWriteCuratedCalibrations(self): 

83 """There are no curated calibrations in this test instrument""" 

84 pass 

85 

86 

87class RawIngestImpliedIndexTestCase(RawIngestTestCase): 

88 """Test ingest using JSON index files.""" 

89 file = os.path.join(INGESTDIR, "indexed_data", "dataset_1.yaml") 

90 

91 

92class RawIngestEdgeCaseTestCase(unittest.TestCase): 

93 """Test ingest using non-standard approaches including failures.""" 

94 

95 @classmethod 

96 def setUpClass(cls): 

97 butlerConfig = """ 

98datastore: 

99 # Want to ingest real files so can't use in-memory datastore 

100 cls: lsst.daf.butler.datastores.fileDatastore.FileDatastore 

101""" 

102 cls.root = tempfile.mkdtemp(dir=TESTDIR) 

103 cls.creatorButler = butlerTests.makeTestRepo(cls.root, {}, config=Config.fromYaml(butlerConfig)) 

104 DummyCam().register(cls.creatorButler.registry) 

105 

106 @classmethod 

107 def tearDownClass(cls): 

108 if cls.root is not None: 

109 shutil.rmtree(cls.root, ignore_errors=True) 

110 

111 def setUp(self): 

112 self.butler = butlerTests.makeTestCollection(self.creatorButler) 

113 self.outputRun = self.butler.run 

114 

115 config = RawIngestTask.ConfigClass() 

116 self.task = DummyCamRawIngestTask(config=config, butler=self.butler) 

117 

118 # Different test files. 

119 self.bad_metadata_file = os.path.join(TESTDIR, "data", "small.fits") 

120 self.good_file = os.path.join(INGESTDIR, "sidecar_data", "dataset_2.yaml") 

121 self.bad_instrument_file = os.path.join(TESTDIR, "data", "calexp.fits") 

122 

123 def testSimpleIngest(self): 

124 # Use the default per-instrument run for this. 

125 self.task.run([self.good_file]) 

126 datasets = list(self.butler.registry.queryDatasets("raw_dict", collections="DummyCam/raw/all")) 

127 self.assertEqual(len(datasets), 1) 

128 

129 # Now parallelized. 

130 files = [self.good_file, 

131 os.path.join(INGESTDIR, "sidecar_data", "dataset_1.yaml")] 

132 self.task.run(files, processes=2, run=self.outputRun) 

133 datasets = list(self.butler.registry.queryDatasets("raw_dict", collections=self.outputRun)) 

134 self.assertEqual(len(datasets), 2) 

135 

136 def testExplicitIndex(self): 

137 files = [os.path.join(INGESTDIR, "indexed_data", "_index.json")] 

138 self.task.run(files, run=self.outputRun) 

139 

140 datasets = list(self.butler.registry.queryDatasets("raw_dict", collections=self.outputRun)) 

141 self.assertEqual(len(datasets), 2) 

142 

143 # Try again with an explicit index and a file that is in that index. 

144 files.append(os.path.join(INGESTDIR, "indexed_data", "dataset_2.yaml")) 

145 new_run = self.outputRun + "b" 

146 self.task.run(files, run=new_run) 

147 

148 datasets = list(self.butler.registry.queryDatasets("raw_dict", collections=self.outputRun)) 

149 self.assertEqual(len(datasets), 2) 

150 

151 # Now with two index files that point to the same files. 

152 # Look for the warning from duplication. 

153 files = [os.path.join(INGESTDIR, "indexed_data", "_index.json"), 

154 os.path.join(INGESTDIR, "indexed_data", "translated_subdir", "_index.json")] 

155 new_run = self.outputRun + "c" 

156 

157 with self.assertLogs(level="WARNING") as cm: 

158 with lsst.log.UsePythonLogging(): 

159 self.task.run(files, run=new_run) 

160 self.assertIn("already specified in an index file, ignoring content", cm.output[0]) 

161 

162 datasets = list(self.butler.registry.queryDatasets("raw_dict", collections=self.outputRun)) 

163 self.assertEqual(len(datasets), 2) 

164 

165 # Again with an index file of metadata and one of translated. 

166 # Translated should win. 

167 # Put the metadata one first to test that order is preserved. 

168 files = [os.path.join(INGESTDIR, "indexed_data", "metadata_subdir", "_index.json"), 

169 os.path.join(INGESTDIR, "indexed_data", "_index.json")] 

170 new_run = self.outputRun + "d" 

171 with self.assertLogs(level="WARNING") as cm: 

172 with lsst.log.UsePythonLogging(): 

173 self.task.run(files, run=new_run) 

174 self.assertIn("already specified in an index file but overriding", cm.output[0]) 

175 

176 # Reversing the order should change the warning. 

177 # Again with an index file of metadata and one of translated. 

178 # Translated should win. 

179 # Put the metadata one first to test that order is preserved. 

180 files = [os.path.join(INGESTDIR, "indexed_data", "_index.json"), 

181 os.path.join(INGESTDIR, "indexed_data", "metadata_subdir", "_index.json")] 

182 

183 new_run = self.outputRun + "e" 

184 with self.assertLogs(level="WARNING") as cm: 

185 with lsst.log.UsePythonLogging(): 

186 self.task.run(files, run=new_run) 

187 self.assertIn("already specified in an index file, ignoring", cm.output[0]) 

188 

189 # Bad index file. 

190 files = [os.path.join(INGESTDIR, "indexed_data", "bad_index", "_index.json")] 

191 with self.assertRaises(RuntimeError): 

192 self.task.run(files, run=self.outputRun) 

193 

194 # Bad index file due to bad instrument. 

195 files = [os.path.join(INGESTDIR, "indexed_data", "bad_instrument", "_index.json")] 

196 with self.assertLogs(level="WARNING") as cm: 

197 with lsst.log.UsePythonLogging(): 

198 with self.assertRaises(RuntimeError): 

199 self.task.run(files, run=self.outputRun) 

200 self.assertIn("Instrument HSC for file", cm.output[0]) 

201 

202 def testBadExposure(self): 

203 """Test that bad exposures trigger the correct failure modes. 

204 

205 This is the only test that uses the bad definition of dataset 4 

206 because exposure definitions are defined globally in a butler registry. 

207 """ 

208 

209 # Ingest 3 files. 2 of them will implicitly find an index and one 

210 # will use a sidecar. 

211 files = [os.path.join(INGESTDIR, "indexed_data", f"dataset_{n}.yaml") for n in (1, 2, 3)] 

212 new_run = self.outputRun 

213 self.task.run(files, run=new_run) 

214 

215 datasets = list(self.butler.registry.queryDatasets("raw_dict", collections=new_run)) 

216 self.assertEqual(len(datasets), 3) 

217 

218 # Test fail fast. 

219 self.task.config.failFast = True 

220 

221 # Ingest files with conflicting exposure definitions. 

222 # Ingest 3 files. One of them will implicitly find an index and one 

223 # will use a sidecar. The 3rd will fail due to exposure conflict. 

224 files = [os.path.join(INGESTDIR, "indexed_data", f"dataset_{n}.yaml") for n in (1, 3, 4)] 

225 new_run = self.outputRun + "_bad_exposure" 

226 with self.assertRaises(ConflictingDefinitionError): 

227 self.task.run(files, run=new_run) 

228 

229 def testBadFile(self): 

230 """Try to ingest a bad file.""" 

231 files = [self.bad_metadata_file] 

232 

233 with self.assertRaises(RuntimeError) as cm: 

234 # Default is to raise an error at the end. 

235 self.task.run(files, run=self.outputRun) 

236 self.assertIn("Some failures", str(cm.exception)) 

237 

238 # Including a good file will result in ingest working but still 

239 # raises (we might want to move this to solely happen in the 

240 # command line invocation). 

241 files.append(self.good_file) 

242 

243 # Also include a file with unknown instrument. 

244 files.append(self.bad_instrument_file) 

245 

246 with self.assertRaises(RuntimeError): 

247 self.task.run(files, run=self.outputRun) 

248 datasets = list(self.butler.registry.queryDatasets("raw_dict", collections=self.outputRun)) 

249 self.assertEqual(len(datasets), 1) 

250 

251 # Fail fast will trigger a run time error with different text. 

252 # Use a different output run to be sure we are not failing because 

253 # of the attempt to ingest twice. 

254 self.task.config.failFast = True 

255 new_run = self.outputRun + "b" 

256 with self.assertRaises(RuntimeError) as cm: 

257 self.task.run([self.bad_metadata_file, self.good_file], run=new_run) 

258 self.assertIn("Problem extracting metadata", str(cm.exception)) 

259 

260 # Attempt to ingest good file again -- this will fail for a different 

261 # reason than failed metadata extraction. 

262 with self.assertRaises(ConflictingDefinitionError): 

263 self.task.run([self.good_file], run=self.outputRun) 

264 

265 # Ingest a file with good metadata but unknown instrument. 

266 with self.assertRaises(RuntimeError) as cm: 

267 self.task.run([self.bad_instrument_file], run=self.outputRun) 

268 self.assertIn("Instrument HSC", str(cm.exception)) 

269 

270 # Ingest of a metadata index file that will fail translation. 

271 with self.assertRaises(RuntimeError) as cm: 

272 self.task.run([os.path.join(INGESTDIR, "indexed_data", "metadata_subdir", "_index.json")]) 

273 self.assertIn("Problem extracting metadata", str(cm.exception)) 

274 

275 # Ingest of a bad index file. 

276 with self.assertRaises(RuntimeError) as cm: 

277 self.task.run([os.path.join(INGESTDIR, "indexed_data", "bad_index", "_index.json")]) 

278 self.assertIn("Problem reading index file", str(cm.exception)) 

279 

280 # Ingest of an implied bad index file. 

281 with self.assertRaises(RuntimeError) as cm: 

282 self.task.run([os.path.join(INGESTDIR, "indexed_data", "bad_implied", "dataset_2.yaml")]) 

283 

284 def testCallbacks(self): 

285 """Test the callbacks for failures.""" 

286 

287 # Define the callbacks. 

288 metadata_failures = [] 

289 successes = [] 

290 ingest_failures = [] 

291 

292 def on_metadata_failure(filename, exc): 

293 metadata_failures.append(filename) 

294 

295 def on_success(datasets): 

296 successes.append(datasets) 

297 

298 def on_ingest_failure(exposure, exc): 

299 ingest_failures.append(exposure) 

300 

301 # Need our own task instance 

302 config = RawIngestTask.ConfigClass() 

303 self.task = DummyCamRawIngestTask(config=config, butler=self.butler, 

304 on_metadata_failure=on_metadata_failure, 

305 on_success=on_success, 

306 on_ingest_failure=on_ingest_failure) 

307 

308 files = [self.good_file, self.bad_metadata_file, self.bad_instrument_file] 

309 

310 with self.assertRaises(RuntimeError): 

311 self.task.run(files, run=self.outputRun) 

312 

313 self.assertEqual(len(successes), 1) 

314 self.assertEqual(len(metadata_failures), 2) 

315 self.assertEqual(len(ingest_failures), 0) 

316 

317 # Try the good one a second time. 

318 with self.assertRaises(RuntimeError): 

319 self.task.run([self.good_file], run=self.outputRun) 

320 

321 self.assertEqual(len(successes), 1) 

322 self.assertEqual(len(ingest_failures), 1) 

323 

324 # An index file with metadata that won't translate. 

325 metadata_failures[:] = [] 

326 files = [os.path.join(INGESTDIR, "indexed_data", "metadata_subdir", "_index.json")] 

327 with self.assertRaises(RuntimeError): 

328 self.task.run(files, run=self.outputRun) 

329 self.assertEqual(len(metadata_failures), 2) 

330 

331 # Bad index file. 

332 metadata_failures[:] = [] 

333 files = [os.path.join(INGESTDIR, "indexed_data", "bad_index", "_index.json")] 

334 with self.assertRaises(RuntimeError): 

335 self.task.run(files, run=self.outputRun) 

336 self.assertEqual(len(metadata_failures), 1) 

337 

338 # Ingest two files that have conflicting exposure metadata. 

339 ingest_failures[:] = [] 

340 successes[:] = [] 

341 # Ingest 4 files. 2 of them will implicitly find an index and one 

342 # will use a sidecar. The 4th will fail due to exposure conflict. 

343 files = [os.path.join(INGESTDIR, "indexed_data", f"dataset_{n}.yaml") for n in (1, 2, 3, 4)] 

344 new_run = self.outputRun + "_fail" 

345 with self.assertRaises(RuntimeError): 

346 self.task.run(files, run=new_run) 

347 self.assertEqual(len(ingest_failures), 1) 

348 self.assertEqual(len(successes), 3) 

349 

350 

351class TestRawIngestTaskPickle(unittest.TestCase): 

352 """Test that pickling of the RawIngestTask works properly.""" 

353 

354 @classmethod 

355 def setUpClass(cls): 

356 cls.root = tempfile.mkdtemp(dir=TESTDIR) 

357 cls.creatorButler = butlerTests.makeTestRepo(cls.root, {}) 

358 

359 @classmethod 

360 def tearDownClass(cls): 

361 if cls.root is not None: 

362 shutil.rmtree(cls.root, ignore_errors=True) 

363 

364 def setUp(self): 

365 self.butler = butlerTests.makeTestCollection(self.creatorButler) 

366 

367 self.config = RawIngestTask.ConfigClass() 

368 self.config.transfer = "copy" # safe non-default value 

369 self.task = RawIngestTask(config=self.config, butler=self.butler) 

370 

371 def testPickleTask(self): 

372 stream = pickle.dumps(self.task) 

373 copy = pickle.loads(stream) 

374 self.assertEqual(self.task.getFullName(), copy.getFullName()) 

375 self.assertEqual(self.task.log.getName(), copy.log.getName()) 

376 self.assertEqual(self.task.config, copy.config) 

377 self.assertEqual(self.task.butler._config, copy.butler._config) 

378 self.assertEqual(self.task.butler.collections, copy.butler.collections) 

379 self.assertEqual(self.task.butler.run, copy.butler.run) 

380 self.assertEqual(self.task.universe, copy.universe) 

381 self.assertEqual(self.task.datasetType, copy.datasetType) 

382 

383 

384if __name__ == "__main__": 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true

385 unittest.main()