Coverage for tests/test_ingestion.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import os
25import shutil
26import tempfile
27import unittest.mock
29from lsst.utils import getPackageDir
30import lsst.utils.tests
31import lsst.pipe.tasks as pipeTasks
32from lsst.ap.verify import ingestion
33from lsst.ap.verify.testUtils import DataTestCase
34from lsst.ap.verify.dataset import Dataset
35from lsst.ap.verify.workspace import WorkspaceGen2, WorkspaceGen3
38class MockDetector(object):
39 def getName(self):
40 return '0'
42 def getId(self):
43 return 0
46class MockCamera(object):
47 def __init__(self, detector):
48 self.det_list = [detector, ]
49 self.det_dict = {'0': detector}
51 def __getitem__(self, item):
52 if type(item) is int:
53 return self.det_list[item]
54 else:
55 return self.det_dict[item]
58class IngestionTestSuite(DataTestCase):
60 @classmethod
61 def setUpClass(cls):
62 super().setUpClass()
64 cls.mockCamera = MockCamera(MockDetector())
65 cls.config = cls.makeTestConfig()
66 cls.config.validate()
67 cls.config.freeze()
69 cls.testApVerifyData = os.path.join('tests', 'ingestion')
71 cls.rawData = [{'file': 'lsst_a_204595_R11_S01_i.fits', 'expId': 204595, 'filter': 'i',
72 'exptime': 30.0},
73 ]
74 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz',
75 'filter': 'NONE', 'date': '2022-01-01'},
76 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
77 'filter': 'i', 'date': '2022-08-06'},
78 ]
80 @staticmethod
81 def makeTestConfig():
82 obsDir = os.path.join(getPackageDir('obs_lsst'), 'config')
83 config = ingestion.DatasetIngestConfig()
84 config.dataIngester.load(os.path.join(obsDir, 'ingest.py'))
85 config.dataIngester.load(os.path.join(obsDir, 'imsim', 'ingest.py'))
86 config.calibIngester.load(os.path.join(obsDir, 'ingestCalibs.py'))
87 config.curatedCalibIngester.load(os.path.join(obsDir, 'ingestCuratedCalibs.py'))
88 return config
90 def setUp(self):
91 # Repositories still get used by IngestTask despite Butler being a mock object
92 self._repo = self._calibRepo = tempfile.mkdtemp()
93 self.addCleanup(shutil.rmtree, self._repo, ignore_errors=True)
95 # Fake Butler and RegisterTask to avoid initialization or DB overhead
96 def mockGet(datasetType, dataId=None):
97 """Minimally fake a butler.get().
98 """
99 if "raw_filename" in datasetType:
100 matchingFiles = [datum['file'] for datum in IngestionTestSuite.rawData
101 if datum['expId'] == dataId['expId']]
102 return [os.path.join(self._repo, file) for file in matchingFiles]
103 elif "bias_filename" in datasetType:
104 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData
105 if datum['type'] == 'bias']
106 return [os.path.join(self._repo, file) for file in matchingFiles]
107 elif "flat_filename" in datasetType:
108 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData
109 if datum['type'] == 'flat' and datum['filter'] == dataId['filter']]
110 return [os.path.join(self._repo, file) for file in matchingFiles]
111 elif "defects_filename" in datasetType:
112 return [os.path.join(self._repo, 'defects', 'defects.fits'), ]
113 elif "camera" in datasetType:
114 return IngestionTestSuite.mockCamera
115 else:
116 return None
118 butlerPatcher = unittest.mock.patch("lsst.daf.persistence.Butler")
119 self._butler = butlerPatcher.start()
120 self._butler.getMapperClass.return_value = lsst.obs.lsst.imsim.ImsimMapper
121 self._butler.return_value.get = mockGet
122 self.addCleanup(butlerPatcher.stop)
124 self._dataset = Dataset(self.datasetKey)
125 # Fake Workspace because it's too hard to make a real one with a fake Butler
126 self._workspace = unittest.mock.NonCallableMock(
127 spec=WorkspaceGen2,
128 dataRepo=self._repo,
129 calibRepo=self._calibRepo,
130 )
132 self._task = ingestion.DatasetIngestTask(config=IngestionTestSuite.config)
134 def setUpRawRegistry(self):
135 """Mock up the RegisterTask used for ingesting raw data.
137 This method initializes ``self._registerTask``. It should be
138 called at the start of any test case that attempts raw ingestion.
140 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`,
141 or `setupDefectRegistry` is called.
142 """
143 patcherRegister = unittest.mock.patch.object(self._task.dataIngester, "register",
144 spec=pipeTasks.ingest.RegisterTask,
145 new_callable=unittest.mock.NonCallableMagicMock)
146 self._registerTask = patcherRegister.start()
147 self.addCleanup(patcherRegister.stop)
149 def setUpCalibRegistry(self):
150 """Mock up the RegisterTask used for ingesting calib data.
152 This method initializes ``self._registerTask``. It should be
153 called at the start of any test case that attempts calib ingestion.
155 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`,
156 or `setupDefectRegistry` is called.
157 """
158 patcherRegister = unittest.mock.patch.object(self._task.calibIngester, "register",
159 spec=pipeTasks.ingestCalibs.CalibsRegisterTask,
160 new_callable=unittest.mock.NonCallableMagicMock)
161 self._registerTask = patcherRegister.start()
162 self._registerTask.config = self._task.config.calibIngester.register
163 self.addCleanup(patcherRegister.stop)
165 def assertRawRegistryCalls(self, registryMock, expectedData):
166 """Test that a particular set of science data is registered correctly.
168 Parameters
169 ----------
170 registryMock : `unittest.mock.Mock`
171 a mock object representing the repository's registry. Must have a
172 mock for the `~lsst.pipe.tasks.ingest.RegisterTask.addRow` method.
173 expectedData : iterable of `dict`
174 a collection of dictionaries, each representing one item that
175 should have been ingested. Each dictionary must contain the
176 following keys:
177 - ``file``: file name to be ingested (`str`).
178 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`).
179 - ``expId``: exposure ID of the file (`int`).
180 - ``exptime``: the exposure time of the file (`float`).
181 calib : `bool`
182 `True` if ``expectedData`` represents calibration data, `False` if
183 it represents science data
184 """
185 for datum in expectedData:
186 found = False
187 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']}
188 for call in registryMock.addRow.call_args_list:
189 args = call[0]
190 registeredId = args[1]
191 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset
193 if registeredId['expId'] == datum['expId']:
194 found = True
195 for dimension in dataId:
196 self.assertEqual(registeredId[dimension], dataId[dimension])
197 self.assertTrue(found, msg=f"No call with {dataId}.")
199 self.assertEqual(registryMock.addRow.call_count, len(expectedData))
201 def assertCalibRegistryCalls(self, registryMock, expectedData):
202 """Test that a particular set of calibration data is registered correctly.
204 Parameters
205 ----------
206 registryMock : `unittest.mock.Mock`
207 a mock object representing the repository's registry. Must have a
208 mock for the `~lsst.pipe.tasks.ingest.CalibsRegisterTask.addRow` method.
209 expectedData : iterable of `dict`
210 a collection of dictionaries, each representing one item that
211 should have been ingested. Each dictionary must contain the
212 following keys:
213 - ``file``: file name to be ingested (`str`).
214 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`).
215 - ``type``: a valid calibration dataset type (`str`).
216 - ``date``: the calibration date in YYY-MM-DD format (`str`).
217 calib : `bool`
218 `True` if ``expectedData`` represents calibration data, `False` if
219 it represents science data
220 """
221 for datum in expectedData:
222 found = False
223 dataId = {'calibDate': datum['date'], 'filter': datum['filter']}
224 for call in registryMock.addRow.call_args_list:
225 args = call[0]
226 kwargs = call[1]
227 registeredId = args[1]
228 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset
230 if kwargs["table"] == datum["type"] and registeredId['filter'] == datum['filter'] \
231 and registeredId['calibDate'] == datum['date']:
232 found = True
233 self.assertTrue(found, msg=f"No call with {dataId}.")
235 self.assertEqual(registryMock.addRow.call_count, len(expectedData))
237 def testDataIngest(self):
238 """Test that ingesting science images given specific files adds them to a repository.
239 """
240 self.setUpRawRegistry()
241 files = [os.path.join(self._dataset.rawLocation, datum['file'])
242 for datum in IngestionTestSuite.rawData]
243 self._task._doIngestRaws(self._repo, self._calibRepo, files, [])
245 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData)
247 def testDataIngestDriver(self):
248 """Test that ingesting science images starting from an abstract dataset adds them to a repository.
249 """
250 self.setUpRawRegistry()
251 self._task._ingestRaws(self._dataset, self._workspace)
253 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData)
255 def testCalibIngest(self):
256 """Test that ingesting calibrations given specific files adds them to a repository.
257 """
258 files = [os.path.join(self._dataset.calibLocation, datum['file'])
259 for datum in IngestionTestSuite.calibData]
260 self.setUpCalibRegistry()
262 self._task._doIngestCalibs(self._repo, self._calibRepo, files)
264 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData)
266 def testCalibIngestDriver(self):
267 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository.
268 """
269 self.setUpCalibRegistry()
270 self._task._ingestCalibs(self._dataset, self._workspace)
272 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData)
274 def testNoFileIngest(self):
275 """Test that attempts to ingest nothing raise an exception.
276 """
277 files = []
278 self.setUpRawRegistry()
280 with self.assertRaises(RuntimeError):
281 self._task._doIngestRaws(self._repo, self._calibRepo, files, [])
282 with self.assertRaises(RuntimeError):
283 self._task._doIngestCalibs(self._repo, self._calibRepo, files)
285 self._registerTask.addRow.assert_not_called()
287 def testBadFileIngest(self):
288 """Test that ingestion of raw data ignores forbidden files.
289 """
290 badFiles = ['raw_v2_fg.fits.gz']
291 self.setUpRawRegistry()
293 files = [os.path.join(self._dataset.rawLocation, datum['file'])
294 for datum in IngestionTestSuite.rawData]
295 self._task._doIngestRaws(self._repo, self._calibRepo, files, badFiles)
297 filteredData = [datum for datum in IngestionTestSuite.rawData if datum['file'] not in badFiles]
298 self.assertRawRegistryCalls(self._registerTask, filteredData)
300 for datum in IngestionTestSuite.rawData:
301 if datum['file'] in badFiles:
302 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']}
303 # This call should never happen for badFiles
304 call = unittest.mock.call(self._registerTask.openRegistry().__enter__(), dataId,
305 create=False, dryrun=False)
306 self.assertNotIn(call, self._registerTask.addRow.mock_calls)
309class IngestionTestSuiteGen3(DataTestCase):
311 @classmethod
312 def setUpClass(cls):
313 super().setUpClass()
315 cls.dataset = Dataset(cls.datasetKey)
317 cls.INSTRUMENT = cls.dataset.instrument.getName()
318 cls.VISIT_ID = 204595
319 cls.DETECTOR_ID = 37
321 cls.rawData = [{'type': 'raw', 'file': 'lsst_a_204595_R11_S01_i.fits',
322 'exposure': cls.VISIT_ID, 'detector': cls.DETECTOR_ID,
323 'instrument': cls.INSTRUMENT},
324 ]
326 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz',
327 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT},
328 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
329 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT,
330 'physical_filter': 'i'},
331 ]
333 @classmethod
334 def makeTestConfig(cls):
335 instrument = cls.dataset.instrument
336 config = ingestion.Gen3DatasetIngestConfig()
337 instrument.applyConfigOverrides(ingestion.Gen3DatasetIngestTask._DefaultName, config)
338 return config
340 def setUp(self):
341 super().setUp()
343 self.config = self.makeTestConfig()
344 self.config.validate()
345 self.config.freeze()
347 self.root = tempfile.mkdtemp()
348 self.addCleanup(shutil.rmtree, self.root, ignore_errors=True)
349 self.workspace = WorkspaceGen3(self.root)
350 self.task = ingestion.Gen3DatasetIngestTask(config=self.config,
351 dataset=self.dataset, workspace=self.workspace)
353 self.butler = self.workspace.workButler
355 def assertIngestedDataFiles(self, data, collection):
356 """Test that data have been loaded into a specific collection.
358 Parameters
359 ----------
360 data : `collections.abc.Iterable` [`collections.abc.Mapping`]
361 An iterable of mappings, each representing the properties of a
362 single input dataset. Each mapping must contain a `"type"` key
363 that maps to the dataset's Gen 3 type.
364 collection : `lsst.daf.butler.CollectionType`
365 Any valid :ref:`collection expression <daf_butler_collection_expressions>`
366 for the collection expected to contain the data.
367 """
368 for datum in data:
369 dataId = datum.copy()
370 dataId.pop("type", None)
371 dataId.pop("file", None)
373 matches = [x for x in self.butler.registry.queryDatasets(datum['type'],
374 collections=collection,
375 dataId=dataId)]
376 self.assertNotEqual(matches, [])
378 def testDataIngest(self):
379 """Test that ingesting science images given specific files adds them to a repository.
380 """
381 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData]
382 self.task._ingestRaws(files)
383 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName())
385 def testDataDoubleIngest(self):
386 """Test that re-ingesting science images raises RuntimeError.
387 """
388 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData]
389 self.task._ingestRaws(files)
390 with self.assertRaises(RuntimeError):
391 self.task._ingestRaws(files)
393 def testDataIngestDriver(self):
394 """Test that ingesting science images starting from an abstract dataset adds them to a repository.
395 """
396 self.task._ensureRaws()
397 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName())
399 def testCalibIngestDriver(self):
400 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository.
401 """
402 self.task._ensureRaws() # Should not affect calibs, but would be run
403 self.assertIngestedDataFiles(self.calibData, self.dataset.instrument.makeCollectionName("calib"))
405 def testNoFileIngest(self):
406 """Test that attempts to ingest nothing raise an exception.
407 """
408 with self.assertRaises(RuntimeError):
409 self.task._ingestRaws([])
411 def testVisitDefinition(self):
412 """Test that the final repository supports indexing by visit.
413 """
414 self.task._ensureRaws()
415 self.task._defineVisits()
417 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, }
418 exposures = list(self.butler.registry.queryDimensions("exposure", dataId=testId))
419 self.assertEqual(len(exposures), 1)
420 self.assertEqual(exposures[0]["exposure"], self.VISIT_ID)
422 def testVisitDoubleDefinition(self):
423 """Test that re-defining visits is guarded against.
424 """
425 self.task._ensureRaws()
426 self.task._defineVisits()
427 self.task._defineVisits() # must not raise
429 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, }
430 exposures = list(self.butler.registry.queryDimensions("exposure", dataId=testId))
431 self.assertEqual(len(exposures), 1)
433 def testVisitsUndefinable(self):
434 """Test that attempts to define visits with no exposures raise an exception.
435 """
436 with self.assertRaises(RuntimeError):
437 self.task._defineVisits()
439 def testCopyConfigs(self):
440 """Test that "ingesting" configs stores them in the workspace for later reference.
441 """
442 self.task._copyConfigs()
443 self.assertTrue(os.path.exists(self.workspace.configDir))
444 # Only testdata file that *must* be supported in the future
445 self.assertTrue(os.path.exists(os.path.join(self.workspace.configDir, "datasetIngest.py")))
447 def testFindMatchingFiles(self):
448 """Test that _findMatchingFiles finds the desired files.
449 """
450 testDir = self.dataset.datasetRoot
451 allFiles = {os.path.join(testDir, 'calib', f) for f in
452 {'bias-R11-S01-det037_2022-01-01.fits.gz',
453 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
454 }}
456 self.assertSetEqual(
457 ingestion._findMatchingFiles(testDir, ['*.fits.gz']), allFiles
458 )
459 self.assertSetEqual(
460 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*_i-*']),
461 {os.path.join(testDir, 'calib', f) for f in
462 {'bias-R11-S01-det037_2022-01-01.fits.gz'}}
463 )
464 self.assertSetEqual(
465 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*R11-S01*']),
466 set()
467 )
468 # Exclude filters should not match directories
469 self.assertSetEqual(
470 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['calib']),
471 allFiles
472 )
475class MemoryTester(lsst.utils.tests.MemoryTestCase):
476 pass
479def setup_module(module):
480 lsst.utils.tests.init()
483if __name__ == "__main__": 483 ↛ 484line 483 didn't jump to line 484, because the condition on line 483 was never true
484 lsst.utils.tests.init()
485 unittest.main()