Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import os 

25import pickle 

26import shutil 

27import tempfile 

28import unittest.mock 

29 

30from lsst.utils import getPackageDir 

31import lsst.utils.tests 

32import lsst.pipe.tasks as pipeTasks 

33from lsst.ap.verify import ingestion 

34from lsst.ap.verify.testUtils import DataTestCase 

35from lsst.ap.verify.dataset import Dataset 

36from lsst.ap.verify.workspace import WorkspaceGen2, WorkspaceGen3 

37 

38 

39class MockDetector(object): 

40 def getName(self): 

41 return '0' 

42 

43 def getId(self): 

44 return 0 

45 

46 

47class MockCamera(object): 

48 def __init__(self, detector): 

49 self.det_list = [detector, ] 

50 self.det_dict = {'0': detector} 

51 

52 def __getitem__(self, item): 

53 if type(item) is int: 

54 return self.det_list[item] 

55 else: 

56 return self.det_dict[item] 

57 

58 

59class IngestionTestSuite(DataTestCase): 

60 

61 @classmethod 

62 def setUpClass(cls): 

63 super().setUpClass() 

64 

65 cls.mockCamera = MockCamera(MockDetector()) 

66 cls.config = cls.makeTestConfig() 

67 cls.config.validate() 

68 cls.config.freeze() 

69 

70 cls.testApVerifyData = os.path.join('tests', 'ingestion') 

71 

72 cls.rawData = [{'file': 'lsst_a_204595_R11_S01_i.fits', 'expId': 204595, 'filter': 'i', 

73 'exptime': 30.0}, 

74 ] 

75 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz', 

76 'filter': 'NONE', 'date': '2022-01-01'}, 

77 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz', 

78 'filter': 'i', 'date': '2022-08-06'}, 

79 ] 

80 

81 @staticmethod 

82 def makeTestConfig(): 

83 obsDir = os.path.join(getPackageDir('obs_lsst'), 'config') 

84 config = ingestion.DatasetIngestConfig() 

85 config.dataIngester.load(os.path.join(obsDir, 'ingest.py')) 

86 config.dataIngester.load(os.path.join(obsDir, 'imsim', 'ingest.py')) 

87 config.calibIngester.load(os.path.join(obsDir, 'ingestCalibs.py')) 

88 config.curatedCalibIngester.load(os.path.join(obsDir, 'ingestCuratedCalibs.py')) 

89 return config 

90 

91 def setUp(self): 

92 # Repositories still get used by IngestTask despite Butler being a mock object 

93 self._repo = self._calibRepo = tempfile.mkdtemp() 

94 self.addCleanup(shutil.rmtree, self._repo, ignore_errors=True) 

95 

96 # Fake Butler and RegisterTask to avoid initialization or DB overhead 

97 def mockGet(datasetType, dataId=None): 

98 """Minimally fake a butler.get(). 

99 """ 

100 if "raw_filename" in datasetType: 

101 matchingFiles = [datum['file'] for datum in IngestionTestSuite.rawData 

102 if datum['expId'] == dataId['expId']] 

103 return [os.path.join(self._repo, file) for file in matchingFiles] 

104 elif "bias_filename" in datasetType: 

105 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData 

106 if datum['type'] == 'bias'] 

107 return [os.path.join(self._repo, file) for file in matchingFiles] 

108 elif "flat_filename" in datasetType: 

109 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData 

110 if datum['type'] == 'flat' and datum['filter'] == dataId['filter']] 

111 return [os.path.join(self._repo, file) for file in matchingFiles] 

112 elif "defects_filename" in datasetType: 

113 return [os.path.join(self._repo, 'defects', 'defects.fits'), ] 

114 elif "camera" in datasetType: 

115 return IngestionTestSuite.mockCamera 

116 else: 

117 return None 

118 

119 butlerPatcher = unittest.mock.patch("lsst.daf.persistence.Butler") 

120 self._butler = butlerPatcher.start() 

121 self._butler.getMapperClass.return_value = lsst.obs.lsst.imsim.ImsimMapper 

122 self._butler.return_value.get = mockGet 

123 self.addCleanup(butlerPatcher.stop) 

124 

125 self._dataset = Dataset(self.datasetKey) 

126 # Fake Workspace because it's too hard to make a real one with a fake Butler 

127 self._workspace = unittest.mock.NonCallableMock( 

128 spec=WorkspaceGen2, 

129 dataRepo=self._repo, 

130 calibRepo=self._calibRepo, 

131 ) 

132 

133 self._task = ingestion.DatasetIngestTask(config=IngestionTestSuite.config) 

134 

135 def setUpRawRegistry(self): 

136 """Mock up the RegisterTask used for ingesting raw data. 

137 

138 This method initializes ``self._registerTask``. It should be 

139 called at the start of any test case that attempts raw ingestion. 

140 

141 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`, 

142 or `setupDefectRegistry` is called. 

143 """ 

144 patcherRegister = unittest.mock.patch.object(self._task.dataIngester, "register", 

145 spec=pipeTasks.ingest.RegisterTask, 

146 new_callable=unittest.mock.NonCallableMagicMock) 

147 self._registerTask = patcherRegister.start() 

148 self.addCleanup(patcherRegister.stop) 

149 

150 def setUpCalibRegistry(self): 

151 """Mock up the RegisterTask used for ingesting calib data. 

152 

153 This method initializes ``self._registerTask``. It should be 

154 called at the start of any test case that attempts calib ingestion. 

155 

156 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`, 

157 or `setupDefectRegistry` is called. 

158 """ 

159 patcherRegister = unittest.mock.patch.object(self._task.calibIngester, "register", 

160 spec=pipeTasks.ingestCalibs.CalibsRegisterTask, 

161 new_callable=unittest.mock.NonCallableMagicMock) 

162 self._registerTask = patcherRegister.start() 

163 self._registerTask.config = self._task.config.calibIngester.register 

164 self.addCleanup(patcherRegister.stop) 

165 

166 def assertRawRegistryCalls(self, registryMock, expectedData): 

167 """Test that a particular set of science data is registered correctly. 

168 

169 Parameters 

170 ---------- 

171 registryMock : `unittest.mock.Mock` 

172 a mock object representing the repository's registry. Must have a 

173 mock for the `~lsst.pipe.tasks.ingest.RegisterTask.addRow` method. 

174 expectedData : iterable of `dict` 

175 a collection of dictionaries, each representing one item that 

176 should have been ingested. Each dictionary must contain the 

177 following keys: 

178 - ``file``: file name to be ingested (`str`). 

179 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`). 

180 - ``expId``: exposure ID of the file (`int`). 

181 - ``exptime``: the exposure time of the file (`float`). 

182 calib : `bool` 

183 `True` if ``expectedData`` represents calibration data, `False` if 

184 it represents science data 

185 """ 

186 for datum in expectedData: 

187 found = False 

188 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']} 

189 for call in registryMock.addRow.call_args_list: 

190 args = call[0] 

191 registeredId = args[1] 

192 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset 

193 

194 if registeredId['expId'] == datum['expId']: 

195 found = True 

196 for dimension in dataId: 

197 self.assertEqual(registeredId[dimension], dataId[dimension]) 

198 self.assertTrue(found, msg=f"No call with {dataId}.") 

199 

200 self.assertEqual(registryMock.addRow.call_count, len(expectedData)) 

201 

202 def assertCalibRegistryCalls(self, registryMock, expectedData): 

203 """Test that a particular set of calibration data is registered correctly. 

204 

205 Parameters 

206 ---------- 

207 registryMock : `unittest.mock.Mock` 

208 a mock object representing the repository's registry. Must have a 

209 mock for the `~lsst.pipe.tasks.ingest.CalibsRegisterTask.addRow` method. 

210 expectedData : iterable of `dict` 

211 a collection of dictionaries, each representing one item that 

212 should have been ingested. Each dictionary must contain the 

213 following keys: 

214 - ``file``: file name to be ingested (`str`). 

215 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`). 

216 - ``type``: a valid calibration dataset type (`str`). 

217 - ``date``: the calibration date in YYY-MM-DD format (`str`). 

218 calib : `bool` 

219 `True` if ``expectedData`` represents calibration data, `False` if 

220 it represents science data 

221 """ 

222 for datum in expectedData: 

223 found = False 

224 dataId = {'calibDate': datum['date'], 'filter': datum['filter']} 

225 for call in registryMock.addRow.call_args_list: 

226 args = call[0] 

227 kwargs = call[1] 

228 registeredId = args[1] 

229 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset 

230 

231 if kwargs["table"] == datum["type"] and registeredId['filter'] == datum['filter'] \ 

232 and registeredId['calibDate'] == datum['date']: 

233 found = True 

234 self.assertTrue(found, msg=f"No call with {dataId}.") 

235 

236 self.assertEqual(registryMock.addRow.call_count, len(expectedData)) 

237 

238 def testDataIngest(self): 

239 """Test that ingesting science images given specific files adds them to a repository. 

240 """ 

241 self.setUpRawRegistry() 

242 files = [os.path.join(self._dataset.rawLocation, datum['file']) 

243 for datum in IngestionTestSuite.rawData] 

244 self._task._doIngestRaws(self._repo, self._calibRepo, files, []) 

245 

246 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData) 

247 

248 def testDataIngestDriver(self): 

249 """Test that ingesting science images starting from an abstract dataset adds them to a repository. 

250 """ 

251 self.setUpRawRegistry() 

252 self._task._ingestRaws(self._dataset, self._workspace) 

253 

254 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData) 

255 

256 def testCalibIngest(self): 

257 """Test that ingesting calibrations given specific files adds them to a repository. 

258 """ 

259 files = [os.path.join(self._dataset.calibLocation, datum['file']) 

260 for datum in IngestionTestSuite.calibData] 

261 self.setUpCalibRegistry() 

262 

263 self._task._doIngestCalibs(self._repo, self._calibRepo, files) 

264 

265 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData) 

266 

267 def testCalibIngestDriver(self): 

268 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository. 

269 """ 

270 self.setUpCalibRegistry() 

271 self._task._ingestCalibs(self._dataset, self._workspace) 

272 

273 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData) 

274 

275 def testNoFileIngest(self): 

276 """Test that attempts to ingest nothing raise an exception. 

277 """ 

278 files = [] 

279 self.setUpRawRegistry() 

280 

281 with self.assertRaises(RuntimeError): 

282 self._task._doIngestRaws(self._repo, self._calibRepo, files, []) 

283 with self.assertRaises(RuntimeError): 

284 self._task._doIngestCalibs(self._repo, self._calibRepo, files) 

285 

286 self._registerTask.addRow.assert_not_called() 

287 

288 def testBadFileIngest(self): 

289 """Test that ingestion of raw data ignores forbidden files. 

290 """ 

291 badFiles = ['raw_v2_fg.fits.gz'] 

292 self.setUpRawRegistry() 

293 

294 files = [os.path.join(self._dataset.rawLocation, datum['file']) 

295 for datum in IngestionTestSuite.rawData] 

296 self._task._doIngestRaws(self._repo, self._calibRepo, files, badFiles) 

297 

298 filteredData = [datum for datum in IngestionTestSuite.rawData if datum['file'] not in badFiles] 

299 self.assertRawRegistryCalls(self._registerTask, filteredData) 

300 

301 for datum in IngestionTestSuite.rawData: 

302 if datum['file'] in badFiles: 

303 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']} 

304 # This call should never happen for badFiles 

305 call = unittest.mock.call(self._registerTask.openRegistry().__enter__(), dataId, 

306 create=False, dryrun=False) 

307 self.assertNotIn(call, self._registerTask.addRow.mock_calls) 

308 

309 

310class IngestionTestSuiteGen3(DataTestCase): 

311 

312 @classmethod 

313 def setUpClass(cls): 

314 super().setUpClass() 

315 

316 cls.dataset = Dataset(cls.datasetKey) 

317 

318 cls.INSTRUMENT = cls.dataset.instrument.getName() 

319 cls.VISIT_ID = 204595 

320 cls.DETECTOR_ID = 37 

321 

322 cls.rawData = [{'type': 'raw', 'file': 'lsst_a_204595_R11_S01_i.fits', 

323 'exposure': cls.VISIT_ID, 'detector': cls.DETECTOR_ID, 

324 'instrument': cls.INSTRUMENT}, 

325 ] 

326 

327 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz', 

328 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT}, 

329 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz', 

330 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT, 

331 'physical_filter': 'i'}, 

332 ] 

333 

334 @classmethod 

335 def makeTestConfig(cls): 

336 instrument = cls.dataset.instrument 

337 config = ingestion.Gen3DatasetIngestConfig() 

338 instrument.applyConfigOverrides(ingestion.Gen3DatasetIngestTask._DefaultName, config) 

339 return config 

340 

341 def setUp(self): 

342 super().setUp() 

343 

344 self.config = self.makeTestConfig() 

345 self.config.validate() 

346 self.config.freeze() 

347 

348 self.root = tempfile.mkdtemp() 

349 self.addCleanup(shutil.rmtree, self.root, ignore_errors=True) 

350 self.workspace = WorkspaceGen3(self.root) 

351 self.task = ingestion.Gen3DatasetIngestTask(config=self.config, 

352 dataset=self.dataset, workspace=self.workspace) 

353 

354 self.butler = self.workspace.workButler 

355 

356 def assertIngestedDataFiles(self, data, collection): 

357 """Test that data have been loaded into a specific collection. 

358 

359 Parameters 

360 ---------- 

361 data : `collections.abc.Iterable` [`collections.abc.Mapping`] 

362 An iterable of mappings, each representing the properties of a 

363 single input dataset. Each mapping must contain a `"type"` key 

364 that maps to the dataset's Gen 3 type. 

365 collection : `lsst.daf.butler.CollectionType` 

366 Any valid :ref:`collection expression <daf_butler_collection_expressions>` 

367 for the collection expected to contain the data. 

368 """ 

369 for datum in data: 

370 dataId = datum.copy() 

371 dataId.pop("type", None) 

372 dataId.pop("file", None) 

373 

374 matches = [x for x in self.butler.registry.queryDatasets(datum['type'], 

375 collections=collection, 

376 dataId=dataId)] 

377 self.assertNotEqual(matches, []) 

378 

379 def testDataIngest(self): 

380 """Test that ingesting science images given specific files adds them to a repository. 

381 """ 

382 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData] 

383 self.task._ingestRaws(files, processes=1) 

384 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName()) 

385 

386 def testDataDoubleIngest(self): 

387 """Test that re-ingesting science images raises RuntimeError. 

388 """ 

389 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData] 

390 self.task._ingestRaws(files, processes=1) 

391 with self.assertRaises(RuntimeError): 

392 self.task._ingestRaws(files, processes=1) 

393 

394 def testDataIngestDriver(self): 

395 """Test that ingesting science images starting from an abstract dataset adds them to a repository. 

396 """ 

397 self.task._ensureRaws(processes=1) 

398 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName()) 

399 

400 def testCalibIngestDriver(self): 

401 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository. 

402 """ 

403 self.task._ensureRaws(processes=1) # Should not affect calibs, but would be run 

404 self.assertIngestedDataFiles(self.calibData, self.dataset.instrument.makeCollectionName("calib")) 

405 

406 def testNoFileIngest(self): 

407 """Test that attempts to ingest nothing raise an exception. 

408 """ 

409 with self.assertRaises(RuntimeError): 

410 self.task._ingestRaws([], processes=1) 

411 

412 def testVisitDefinition(self): 

413 """Test that the final repository supports indexing by visit. 

414 """ 

415 self.task._ensureRaws(processes=1) 

416 self.task._defineVisits(processes=1) 

417 

418 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, } 

419 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId)) 

420 self.assertEqual(len(exposures), 1) 

421 self.assertEqual(exposures[0]["exposure"], self.VISIT_ID) 

422 

423 def testVisitDoubleDefinition(self): 

424 """Test that re-defining visits is guarded against. 

425 """ 

426 self.task._ensureRaws(processes=1) 

427 self.task._defineVisits(processes=1) 

428 self.task._defineVisits(processes=1) # must not raise 

429 

430 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, } 

431 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId)) 

432 self.assertEqual(len(exposures), 1) 

433 

434 def testVisitsUndefinable(self): 

435 """Test that attempts to define visits with no exposures raise an exception. 

436 """ 

437 with self.assertRaises(RuntimeError): 

438 self.task._defineVisits(processes=1) 

439 

440 def testCopyConfigs(self): 

441 """Test that "ingesting" configs stores them in the workspace for later reference. 

442 """ 

443 self.task._copyConfigs() 

444 self.assertTrue(os.path.exists(self.workspace.configDir)) 

445 # Only testdata file that *must* be supported in the future 

446 self.assertTrue(os.path.exists(os.path.join(self.workspace.configDir, "datasetIngest.py"))) 

447 

448 def testFindMatchingFiles(self): 

449 """Test that _findMatchingFiles finds the desired files. 

450 """ 

451 testDir = self.dataset.datasetRoot 

452 allFiles = {os.path.join(testDir, 'calib', f) for f in 

453 {'bias-R11-S01-det037_2022-01-01.fits.gz', 

454 'flat_i-R11-S01-det037_2022-08-06.fits.gz', 

455 }} 

456 

457 self.assertSetEqual( 

458 ingestion._findMatchingFiles(testDir, ['*.fits.gz']), allFiles 

459 ) 

460 self.assertSetEqual( 

461 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*_i-*']), 

462 {os.path.join(testDir, 'calib', f) for f in 

463 {'bias-R11-S01-det037_2022-01-01.fits.gz'}} 

464 ) 

465 self.assertSetEqual( 

466 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*R11-S01*']), 

467 set() 

468 ) 

469 # Exclude filters should not match directories 

470 self.assertSetEqual( 

471 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['calib']), 

472 allFiles 

473 ) 

474 

475 def testPickling(self): 

476 """Test that a Gen3DatasetIngestTask can be pickled correctly. 

477 

478 This is needed for multiprocessing support. 

479 """ 

480 stream = pickle.dumps(self.task) 

481 copy = pickle.loads(stream) 

482 self.assertEqual(self.task.getFullName(), copy.getFullName()) 

483 self.assertEqual(self.task.log.getName(), copy.log.getName()) 

484 # Equality for config ill-behaved; skip testing it 

485 self.assertEqual(self.task.dataset, copy.dataset) 

486 self.assertEqual(self.task.workspace, copy.workspace) 

487 

488 

489class MemoryTester(lsst.utils.tests.MemoryTestCase): 

490 pass 

491 

492 

493def setup_module(module): 

494 lsst.utils.tests.init() 

495 

496 

497if __name__ == "__main__": 497 ↛ 498line 497 didn't jump to line 498, because the condition on line 497 was never true

498 lsst.utils.tests.init() 

499 unittest.main()