Coverage for tests/test_ingestion.py: 22%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

244 statements  

1# 

2# This file is part of ap_verify. 

3# 

4# Developed for the LSST Data Management System. 

5# This product includes software developed by the LSST Project 

6# (http://www.lsst.org). 

7# See the COPYRIGHT file at the top-level directory of this distribution 

8# for details of code ownership. 

9# 

10# This program is free software: you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation, either version 3 of the License, or 

13# (at your option) any later version. 

14# 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19# 

20# You should have received a copy of the GNU General Public License 

21# along with this program. If not, see <http://www.gnu.org/licenses/>. 

22# 

23 

24import os 

25import pickle 

26import re 

27import shutil 

28import tempfile 

29import unittest.mock 

30 

31from lsst.utils import getPackageDir 

32import lsst.utils.tests 

33from lsst.daf.butler import CollectionType 

34import lsst.pipe.tasks as pipeTasks 

35from lsst.ap.verify import ingestion 

36from lsst.ap.verify.testUtils import DataTestCase 

37from lsst.ap.verify.dataset import Dataset 

38from lsst.ap.verify.workspace import WorkspaceGen2, WorkspaceGen3 

39 

40 

41class MockDetector(object): 

42 def getName(self): 

43 return '0' 

44 

45 def getId(self): 

46 return 0 

47 

48 

49class MockCamera(object): 

50 def __init__(self, detector): 

51 self.det_list = [detector, ] 

52 self.det_dict = {'0': detector} 

53 

54 def __getitem__(self, item): 

55 if type(item) is int: 

56 return self.det_list[item] 

57 else: 

58 return self.det_dict[item] 

59 

60 

61class IngestionTestSuite(DataTestCase): 

62 

63 @classmethod 

64 def setUpClass(cls): 

65 super().setUpClass() 

66 

67 cls.mockCamera = MockCamera(MockDetector()) 

68 cls.config = cls.makeTestConfig() 

69 cls.config.validate() 

70 cls.config.freeze() 

71 

72 cls.testApVerifyData = os.path.join('tests', 'ingestion') 

73 

74 cls.rawData = [{'file': 'lsst_a_204595_R11_S01_i.fits', 'expId': 204595, 'filter': 'i_sim_1.4', 

75 'exptime': 30.0}, 

76 ] 

77 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz', 

78 'filter': 'NONE', 'date': '2022-01-01'}, 

79 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz', 

80 'filter': 'i_sim_1.4', 'date': '2022-08-06'}, 

81 ] 

82 

83 @staticmethod 

84 def makeTestConfig(): 

85 obsDir = os.path.join(getPackageDir('obs_lsst'), 'config') 

86 config = ingestion.DatasetIngestConfig() 

87 config.dataIngester.load(os.path.join(obsDir, 'ingest.py')) 

88 config.dataIngester.load(os.path.join(obsDir, 'imsim', 'ingest.py')) 

89 config.calibIngester.load(os.path.join(obsDir, 'ingestCalibs.py')) 

90 config.curatedCalibIngester.load(os.path.join(obsDir, 'ingestCuratedCalibs.py')) 

91 return config 

92 

93 def setUp(self): 

94 # Repositories still get used by IngestTask despite Butler being a mock object 

95 self._repo = self._calibRepo = tempfile.mkdtemp() 

96 self.addCleanup(shutil.rmtree, self._repo, ignore_errors=True) 

97 

98 # Fake Butler and RegisterTask to avoid initialization or DB overhead 

99 def mockGet(datasetType, dataId=None): 

100 """Minimally fake a butler.get(). 

101 """ 

102 if "raw_filename" in datasetType: 

103 matchingFiles = [datum['file'] for datum in IngestionTestSuite.rawData 

104 if datum['expId'] == dataId['expId']] 

105 return [os.path.join(self._repo, file) for file in matchingFiles] 

106 elif "bias_filename" in datasetType: 

107 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData 

108 if datum['type'] == 'bias'] 

109 return [os.path.join(self._repo, file) for file in matchingFiles] 

110 elif "flat_filename" in datasetType: 

111 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData 

112 if datum['type'] == 'flat' and datum['filter'] == dataId['filter']] 

113 return [os.path.join(self._repo, file) for file in matchingFiles] 

114 elif "defects_filename" in datasetType: 

115 return [os.path.join(self._repo, 'defects', 'defects.fits'), ] 

116 elif "camera" in datasetType: 

117 return IngestionTestSuite.mockCamera 

118 else: 

119 return None 

120 

121 butlerPatcher = unittest.mock.patch("lsst.daf.persistence.Butler") 

122 self._butler = butlerPatcher.start() 

123 self._butler.getMapperClass.return_value = lsst.obs.lsst.imsim.ImsimMapper 

124 self._butler.return_value.get = mockGet 

125 self.addCleanup(butlerPatcher.stop) 

126 

127 self._dataset = Dataset(self.testDataset) 

128 # Fake Workspace because it's too hard to make a real one with a fake Butler 

129 self._workspace = unittest.mock.NonCallableMock( 

130 spec=WorkspaceGen2, 

131 dataRepo=self._repo, 

132 calibRepo=self._calibRepo, 

133 ) 

134 

135 self._task = ingestion.DatasetIngestTask(config=IngestionTestSuite.config) 

136 

137 def setUpRawRegistry(self): 

138 """Mock up the RegisterTask used for ingesting raw data. 

139 

140 This method initializes ``self._registerTask``. It should be 

141 called at the start of any test case that attempts raw ingestion. 

142 

143 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`, 

144 or `setupDefectRegistry` is called. 

145 """ 

146 patcherRegister = unittest.mock.patch.object(self._task.dataIngester, "register", 

147 spec=pipeTasks.ingest.RegisterTask, 

148 new_callable=unittest.mock.NonCallableMagicMock) 

149 self._registerTask = patcherRegister.start() 

150 self.addCleanup(patcherRegister.stop) 

151 

152 def setUpCalibRegistry(self): 

153 """Mock up the RegisterTask used for ingesting calib data. 

154 

155 This method initializes ``self._registerTask``. It should be 

156 called at the start of any test case that attempts calib ingestion. 

157 

158 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`, 

159 or `setupDefectRegistry` is called. 

160 """ 

161 patcherRegister = unittest.mock.patch.object(self._task.calibIngester, "register", 

162 spec=pipeTasks.ingestCalibs.CalibsRegisterTask, 

163 new_callable=unittest.mock.NonCallableMagicMock) 

164 self._registerTask = patcherRegister.start() 

165 self._registerTask.config = self._task.config.calibIngester.register 

166 self.addCleanup(patcherRegister.stop) 

167 

168 def assertRawRegistryCalls(self, registryMock, expectedData): 

169 """Test that a particular set of science data is registered correctly. 

170 

171 Parameters 

172 ---------- 

173 registryMock : `unittest.mock.Mock` 

174 a mock object representing the repository's registry. Must have a 

175 mock for the `~lsst.pipe.tasks.ingest.RegisterTask.addRow` method. 

176 expectedData : iterable of `dict` 

177 a collection of dictionaries, each representing one item that 

178 should have been ingested. Each dictionary must contain the 

179 following keys: 

180 - ``file``: file name to be ingested (`str`). 

181 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`). 

182 - ``expId``: exposure ID of the file (`int`). 

183 - ``exptime``: the exposure time of the file (`float`). 

184 calib : `bool` 

185 `True` if ``expectedData`` represents calibration data, `False` if 

186 it represents science data 

187 """ 

188 for datum in expectedData: 

189 found = False 

190 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']} 

191 for call in registryMock.addRow.call_args_list: 

192 args = call[0] 

193 registeredId = args[1] 

194 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset 

195 

196 if registeredId['expId'] == datum['expId']: 

197 found = True 

198 for dimension in dataId: 

199 self.assertEqual(registeredId[dimension], dataId[dimension]) 

200 self.assertTrue(found, msg=f"No call with {dataId}.") 

201 

202 self.assertEqual(registryMock.addRow.call_count, len(expectedData)) 

203 

204 def assertCalibRegistryCalls(self, registryMock, expectedData): 

205 """Test that a particular set of calibration data is registered correctly. 

206 

207 Parameters 

208 ---------- 

209 registryMock : `unittest.mock.Mock` 

210 a mock object representing the repository's registry. Must have a 

211 mock for the `~lsst.pipe.tasks.ingest.CalibsRegisterTask.addRow` method. 

212 expectedData : iterable of `dict` 

213 a collection of dictionaries, each representing one item that 

214 should have been ingested. Each dictionary must contain the 

215 following keys: 

216 - ``file``: file name to be ingested (`str`). 

217 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`). 

218 - ``type``: a valid calibration dataset type (`str`). 

219 - ``date``: the calibration date in YYY-MM-DD format (`str`). 

220 calib : `bool` 

221 `True` if ``expectedData`` represents calibration data, `False` if 

222 it represents science data 

223 """ 

224 for datum in expectedData: 

225 found = False 

226 dataId = {'calibDate': datum['date'], 'filter': datum['filter']} 

227 for call in registryMock.addRow.call_args_list: 

228 args = call[0] 

229 kwargs = call[1] 

230 registeredId = args[1] 

231 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset 

232 

233 if kwargs["table"] == datum["type"] and registeredId['filter'] == datum['filter'] \ 

234 and registeredId['calibDate'] == datum['date']: 

235 found = True 

236 self.assertTrue(found, msg=f"No call with {dataId}.") 

237 

238 self.assertEqual(registryMock.addRow.call_count, len(expectedData)) 

239 

240 def testDataIngest(self): 

241 """Test that ingesting science images given specific files adds them to a repository. 

242 """ 

243 self.setUpRawRegistry() 

244 files = [os.path.join(self._dataset.rawLocation, datum['file']) 

245 for datum in IngestionTestSuite.rawData] 

246 self._task._doIngestRaws(self._repo, self._calibRepo, files, []) 

247 

248 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData) 

249 

250 def testDataIngestDriver(self): 

251 """Test that ingesting science images starting from an abstract dataset adds them to a repository. 

252 """ 

253 self.setUpRawRegistry() 

254 self._task._ingestRaws(self._dataset, self._workspace) 

255 

256 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData) 

257 

258 def testCalibIngest(self): 

259 """Test that ingesting calibrations given specific files adds them to a repository. 

260 """ 

261 files = [os.path.join(self._dataset.calibLocation, datum['file']) 

262 for datum in IngestionTestSuite.calibData] 

263 self.setUpCalibRegistry() 

264 

265 self._task._doIngestCalibs(self._repo, self._calibRepo, files) 

266 

267 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData) 

268 

269 def testCalibIngestDriver(self): 

270 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository. 

271 """ 

272 self.setUpCalibRegistry() 

273 self._task._ingestCalibs(self._dataset, self._workspace) 

274 

275 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData) 

276 

277 def testNoFileIngest(self): 

278 """Test that attempts to ingest nothing raise an exception. 

279 """ 

280 files = [] 

281 self.setUpRawRegistry() 

282 

283 with self.assertRaises(RuntimeError): 

284 self._task._doIngestRaws(self._repo, self._calibRepo, files, []) 

285 with self.assertRaises(RuntimeError): 

286 self._task._doIngestCalibs(self._repo, self._calibRepo, files) 

287 

288 self._registerTask.addRow.assert_not_called() 

289 

290 def testBadFileIngest(self): 

291 """Test that ingestion of raw data ignores forbidden files. 

292 """ 

293 badFiles = ['raw_v2_fg.fits.gz'] 

294 self.setUpRawRegistry() 

295 

296 files = [os.path.join(self._dataset.rawLocation, datum['file']) 

297 for datum in IngestionTestSuite.rawData] 

298 self._task._doIngestRaws(self._repo, self._calibRepo, files, badFiles) 

299 

300 filteredData = [datum for datum in IngestionTestSuite.rawData if datum['file'] not in badFiles] 

301 self.assertRawRegistryCalls(self._registerTask, filteredData) 

302 

303 for datum in IngestionTestSuite.rawData: 

304 if datum['file'] in badFiles: 

305 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']} 

306 # This call should never happen for badFiles 

307 call = unittest.mock.call(self._registerTask.openRegistry().__enter__(), dataId, 

308 create=False, dryrun=False) 

309 self.assertNotIn(call, self._registerTask.addRow.mock_calls) 

310 

311 

312class IngestionTestSuiteGen3(DataTestCase): 

313 

314 @classmethod 

315 def setUpClass(cls): 

316 super().setUpClass() 

317 

318 cls.dataset = Dataset(cls.testDataset) 

319 

320 cls.INSTRUMENT = cls.dataset.instrument.getName() 

321 cls.VISIT_ID = 204595 

322 cls.DETECTOR_ID = 37 

323 

324 cls.rawData = [{'type': 'raw', 'file': 'lsst_a_204595_R11_S01_i.fits', 

325 'exposure': cls.VISIT_ID, 'detector': cls.DETECTOR_ID, 

326 'instrument': cls.INSTRUMENT}, 

327 ] 

328 

329 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz', 

330 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT}, 

331 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz', 

332 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT, 

333 'physical_filter': 'i_sim_1.4'}, 

334 ] 

335 

336 @classmethod 

337 def makeTestConfig(cls): 

338 instrument = cls.dataset.instrument 

339 config = ingestion.Gen3DatasetIngestConfig() 

340 instrument.applyConfigOverrides(ingestion.Gen3DatasetIngestTask._DefaultName, config) 

341 return config 

342 

343 def setUp(self): 

344 super().setUp() 

345 

346 self.config = self.makeTestConfig() 

347 self.config.validate() 

348 self.config.freeze() 

349 

350 self.root = tempfile.mkdtemp() 

351 self.addCleanup(shutil.rmtree, self.root, ignore_errors=True) 

352 self.workspace = WorkspaceGen3(self.root) 

353 self.task = ingestion.Gen3DatasetIngestTask(config=self.config, 

354 dataset=self.dataset, workspace=self.workspace) 

355 

356 self.butler = self.workspace.workButler 

357 

358 def assertIngestedDataFiles(self, data, collection): 

359 """Test that data have been loaded into a specific collection. 

360 

361 Parameters 

362 ---------- 

363 data : `collections.abc.Iterable` [`collections.abc.Mapping`] 

364 An iterable of mappings, each representing the properties of a 

365 single input dataset. Each mapping must contain a `"type"` key 

366 that maps to the dataset's Gen 3 type. 

367 collection 

368 Any valid :ref:`collection expression <daf_butler_collection_expressions>` 

369 for the collection expected to contain the data. 

370 """ 

371 for datum in data: 

372 dataId = datum.copy() 

373 dataId.pop("type", None) 

374 dataId.pop("file", None) 

375 

376 matches = [x for x in self.butler.registry.queryDatasets(datum['type'], 

377 collections=collection, 

378 dataId=dataId)] 

379 self.assertNotEqual(matches, []) 

380 

381 def testDataIngest(self): 

382 """Test that ingesting science images given specific files adds them to a repository. 

383 """ 

384 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData] 

385 self.task._ingestRaws(files, processes=1) 

386 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName()) 

387 

388 def testDataDoubleIngest(self): 

389 """Test that re-ingesting science images raises RuntimeError. 

390 """ 

391 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData] 

392 self.task._ingestRaws(files, processes=1) 

393 with self.assertRaises(RuntimeError): 

394 self.task._ingestRaws(files, processes=1) 

395 

396 def testDataIngestDriver(self): 

397 """Test that ingesting science images starting from an abstract dataset adds them to a repository. 

398 """ 

399 self.task._ensureRaws(processes=1) 

400 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName()) 

401 

402 def testCalibIngestDriver(self): 

403 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository. 

404 """ 

405 self.task._ensureRaws(processes=1) # Should not affect calibs, but would be run 

406 # queryDatasets cannot (yet) search CALIBRATION collections, so we 

407 # instead search the RUN-type collections that calibrations are 

408 # ingested into first before being associated with a validity range. 

409 calibrationRunPattern = re.compile( 

410 re.escape(self.dataset.instrument.makeCollectionName("calib") + "/") + ".+" 

411 ) 

412 calibrationRuns = list( 

413 self.butler.registry.queryCollections( 

414 calibrationRunPattern, 

415 collectionTypes={CollectionType.RUN}, 

416 ) 

417 ) 

418 self.assertIngestedDataFiles(self.calibData, calibrationRuns) 

419 

420 def testNoFileIngest(self): 

421 """Test that attempts to ingest nothing raise an exception. 

422 """ 

423 with self.assertRaises(RuntimeError): 

424 self.task._ingestRaws([], processes=1) 

425 

426 def testVisitDefinition(self): 

427 """Test that the final repository supports indexing by visit. 

428 """ 

429 self.task._ensureRaws(processes=1) 

430 self.task._defineVisits(processes=1) 

431 

432 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, } 

433 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId)) 

434 self.assertEqual(len(exposures), 1) 

435 self.assertEqual(exposures[0]["exposure"], self.VISIT_ID) 

436 

437 def testVisitDoubleDefinition(self): 

438 """Test that re-defining visits is guarded against. 

439 """ 

440 self.task._ensureRaws(processes=1) 

441 self.task._defineVisits(processes=1) 

442 self.task._defineVisits(processes=1) # must not raise 

443 

444 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, } 

445 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId)) 

446 self.assertEqual(len(exposures), 1) 

447 

448 def testVisitsUndefinable(self): 

449 """Test that attempts to define visits with no exposures raise an exception. 

450 """ 

451 with self.assertRaises(RuntimeError): 

452 self.task._defineVisits(processes=1) 

453 

454 def testCopyConfigs(self): 

455 """Test that "ingesting" configs stores them in the workspace for later reference. 

456 """ 

457 self.task._copyConfigs() 

458 self.assertTrue(os.path.exists(self.workspace.configDir)) 

459 # Only testdata file that *must* be supported in the future 

460 self.assertTrue(os.path.exists(os.path.join(self.workspace.configDir, "datasetIngest.py"))) 

461 self.assertTrue(os.path.exists(self.workspace.pipelineDir)) 

462 self.assertTrue(os.path.exists(os.path.join(self.workspace.pipelineDir, "ApVerify.yaml"))) 

463 

464 def testFindMatchingFiles(self): 

465 """Test that _findMatchingFiles finds the desired files. 

466 """ 

467 testDir = self.dataset.datasetRoot 

468 allFiles = {os.path.join(testDir, 'calib', f) for f in 

469 {'bias-R11-S01-det037_2022-01-01.fits.gz', 

470 'flat_i-R11-S01-det037_2022-08-06.fits.gz', 

471 }} 

472 

473 self.assertSetEqual( 

474 ingestion._findMatchingFiles(testDir, ['*.fits.gz']), allFiles 

475 ) 

476 self.assertSetEqual( 

477 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*_i-*']), 

478 {os.path.join(testDir, 'calib', f) for f in 

479 {'bias-R11-S01-det037_2022-01-01.fits.gz'}} 

480 ) 

481 self.assertSetEqual( 

482 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*R11-S01*']), 

483 set() 

484 ) 

485 # Exclude filters should not match directories 

486 self.assertSetEqual( 

487 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['calib']), 

488 allFiles 

489 ) 

490 

491 def testPickling(self): 

492 """Test that a Gen3DatasetIngestTask can be pickled correctly. 

493 

494 This is needed for multiprocessing support. 

495 """ 

496 stream = pickle.dumps(self.task) 

497 copy = pickle.loads(stream) 

498 self.assertEqual(self.task.getFullName(), copy.getFullName()) 

499 self.assertEqual(self.task.log.name, copy.log.name) 

500 # Equality for config ill-behaved; skip testing it 

501 self.assertEqual(self.task.dataset, copy.dataset) 

502 self.assertEqual(self.task.workspace, copy.workspace) 

503 

504 

505class MemoryTester(lsst.utils.tests.MemoryTestCase): 

506 pass 

507 

508 

509def setup_module(module): 

510 lsst.utils.tests.init() 

511 

512 

513if __name__ == "__main__": 513 ↛ 514line 513 didn't jump to line 514, because the condition on line 513 was never true

514 lsst.utils.tests.init() 

515 unittest.main()