Coverage for tests/test_ingestion.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import os
25import pickle
26import shutil
27import tempfile
28import unittest.mock
30from lsst.utils import getPackageDir
31import lsst.utils.tests
32import lsst.pipe.tasks as pipeTasks
33from lsst.ap.verify import ingestion
34from lsst.ap.verify.testUtils import DataTestCase
35from lsst.ap.verify.dataset import Dataset
36from lsst.ap.verify.workspace import WorkspaceGen2, WorkspaceGen3
39class MockDetector(object):
40 def getName(self):
41 return '0'
43 def getId(self):
44 return 0
47class MockCamera(object):
48 def __init__(self, detector):
49 self.det_list = [detector, ]
50 self.det_dict = {'0': detector}
52 def __getitem__(self, item):
53 if type(item) is int:
54 return self.det_list[item]
55 else:
56 return self.det_dict[item]
59class IngestionTestSuite(DataTestCase):
61 @classmethod
62 def setUpClass(cls):
63 super().setUpClass()
65 cls.mockCamera = MockCamera(MockDetector())
66 cls.config = cls.makeTestConfig()
67 cls.config.validate()
68 cls.config.freeze()
70 cls.testApVerifyData = os.path.join('tests', 'ingestion')
72 cls.rawData = [{'file': 'lsst_a_204595_R11_S01_i.fits', 'expId': 204595, 'filter': 'i_sim_1.4',
73 'exptime': 30.0},
74 ]
75 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz',
76 'filter': 'NONE', 'date': '2022-01-01'},
77 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
78 'filter': 'i_sim_1.4', 'date': '2022-08-06'},
79 ]
81 @staticmethod
82 def makeTestConfig():
83 obsDir = os.path.join(getPackageDir('obs_lsst'), 'config')
84 config = ingestion.DatasetIngestConfig()
85 config.dataIngester.load(os.path.join(obsDir, 'ingest.py'))
86 config.dataIngester.load(os.path.join(obsDir, 'imsim', 'ingest.py'))
87 config.calibIngester.load(os.path.join(obsDir, 'ingestCalibs.py'))
88 config.curatedCalibIngester.load(os.path.join(obsDir, 'ingestCuratedCalibs.py'))
89 return config
91 def setUp(self):
92 # Repositories still get used by IngestTask despite Butler being a mock object
93 self._repo = self._calibRepo = tempfile.mkdtemp()
94 self.addCleanup(shutil.rmtree, self._repo, ignore_errors=True)
96 # Fake Butler and RegisterTask to avoid initialization or DB overhead
97 def mockGet(datasetType, dataId=None):
98 """Minimally fake a butler.get().
99 """
100 if "raw_filename" in datasetType:
101 matchingFiles = [datum['file'] for datum in IngestionTestSuite.rawData
102 if datum['expId'] == dataId['expId']]
103 return [os.path.join(self._repo, file) for file in matchingFiles]
104 elif "bias_filename" in datasetType:
105 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData
106 if datum['type'] == 'bias']
107 return [os.path.join(self._repo, file) for file in matchingFiles]
108 elif "flat_filename" in datasetType:
109 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData
110 if datum['type'] == 'flat' and datum['filter'] == dataId['filter']]
111 return [os.path.join(self._repo, file) for file in matchingFiles]
112 elif "defects_filename" in datasetType:
113 return [os.path.join(self._repo, 'defects', 'defects.fits'), ]
114 elif "camera" in datasetType:
115 return IngestionTestSuite.mockCamera
116 else:
117 return None
119 butlerPatcher = unittest.mock.patch("lsst.daf.persistence.Butler")
120 self._butler = butlerPatcher.start()
121 self._butler.getMapperClass.return_value = lsst.obs.lsst.imsim.ImsimMapper
122 self._butler.return_value.get = mockGet
123 self.addCleanup(butlerPatcher.stop)
125 self._dataset = Dataset(self.datasetKey)
126 # Fake Workspace because it's too hard to make a real one with a fake Butler
127 self._workspace = unittest.mock.NonCallableMock(
128 spec=WorkspaceGen2,
129 dataRepo=self._repo,
130 calibRepo=self._calibRepo,
131 )
133 self._task = ingestion.DatasetIngestTask(config=IngestionTestSuite.config)
135 def setUpRawRegistry(self):
136 """Mock up the RegisterTask used for ingesting raw data.
138 This method initializes ``self._registerTask``. It should be
139 called at the start of any test case that attempts raw ingestion.
141 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`,
142 or `setupDefectRegistry` is called.
143 """
144 patcherRegister = unittest.mock.patch.object(self._task.dataIngester, "register",
145 spec=pipeTasks.ingest.RegisterTask,
146 new_callable=unittest.mock.NonCallableMagicMock)
147 self._registerTask = patcherRegister.start()
148 self.addCleanup(patcherRegister.stop)
150 def setUpCalibRegistry(self):
151 """Mock up the RegisterTask used for ingesting calib data.
153 This method initializes ``self._registerTask``. It should be
154 called at the start of any test case that attempts calib ingestion.
156 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`,
157 or `setupDefectRegistry` is called.
158 """
159 patcherRegister = unittest.mock.patch.object(self._task.calibIngester, "register",
160 spec=pipeTasks.ingestCalibs.CalibsRegisterTask,
161 new_callable=unittest.mock.NonCallableMagicMock)
162 self._registerTask = patcherRegister.start()
163 self._registerTask.config = self._task.config.calibIngester.register
164 self.addCleanup(patcherRegister.stop)
166 def assertRawRegistryCalls(self, registryMock, expectedData):
167 """Test that a particular set of science data is registered correctly.
169 Parameters
170 ----------
171 registryMock : `unittest.mock.Mock`
172 a mock object representing the repository's registry. Must have a
173 mock for the `~lsst.pipe.tasks.ingest.RegisterTask.addRow` method.
174 expectedData : iterable of `dict`
175 a collection of dictionaries, each representing one item that
176 should have been ingested. Each dictionary must contain the
177 following keys:
178 - ``file``: file name to be ingested (`str`).
179 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`).
180 - ``expId``: exposure ID of the file (`int`).
181 - ``exptime``: the exposure time of the file (`float`).
182 calib : `bool`
183 `True` if ``expectedData`` represents calibration data, `False` if
184 it represents science data
185 """
186 for datum in expectedData:
187 found = False
188 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']}
189 for call in registryMock.addRow.call_args_list:
190 args = call[0]
191 registeredId = args[1]
192 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset
194 if registeredId['expId'] == datum['expId']:
195 found = True
196 for dimension in dataId:
197 self.assertEqual(registeredId[dimension], dataId[dimension])
198 self.assertTrue(found, msg=f"No call with {dataId}.")
200 self.assertEqual(registryMock.addRow.call_count, len(expectedData))
202 def assertCalibRegistryCalls(self, registryMock, expectedData):
203 """Test that a particular set of calibration data is registered correctly.
205 Parameters
206 ----------
207 registryMock : `unittest.mock.Mock`
208 a mock object representing the repository's registry. Must have a
209 mock for the `~lsst.pipe.tasks.ingest.CalibsRegisterTask.addRow` method.
210 expectedData : iterable of `dict`
211 a collection of dictionaries, each representing one item that
212 should have been ingested. Each dictionary must contain the
213 following keys:
214 - ``file``: file name to be ingested (`str`).
215 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`).
216 - ``type``: a valid calibration dataset type (`str`).
217 - ``date``: the calibration date in YYY-MM-DD format (`str`).
218 calib : `bool`
219 `True` if ``expectedData`` represents calibration data, `False` if
220 it represents science data
221 """
222 for datum in expectedData:
223 found = False
224 dataId = {'calibDate': datum['date'], 'filter': datum['filter']}
225 for call in registryMock.addRow.call_args_list:
226 args = call[0]
227 kwargs = call[1]
228 registeredId = args[1]
229 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset
231 if kwargs["table"] == datum["type"] and registeredId['filter'] == datum['filter'] \
232 and registeredId['calibDate'] == datum['date']:
233 found = True
234 self.assertTrue(found, msg=f"No call with {dataId}.")
236 self.assertEqual(registryMock.addRow.call_count, len(expectedData))
238 def testDataIngest(self):
239 """Test that ingesting science images given specific files adds them to a repository.
240 """
241 self.setUpRawRegistry()
242 files = [os.path.join(self._dataset.rawLocation, datum['file'])
243 for datum in IngestionTestSuite.rawData]
244 self._task._doIngestRaws(self._repo, self._calibRepo, files, [])
246 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData)
248 def testDataIngestDriver(self):
249 """Test that ingesting science images starting from an abstract dataset adds them to a repository.
250 """
251 self.setUpRawRegistry()
252 self._task._ingestRaws(self._dataset, self._workspace)
254 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData)
256 def testCalibIngest(self):
257 """Test that ingesting calibrations given specific files adds them to a repository.
258 """
259 files = [os.path.join(self._dataset.calibLocation, datum['file'])
260 for datum in IngestionTestSuite.calibData]
261 self.setUpCalibRegistry()
263 self._task._doIngestCalibs(self._repo, self._calibRepo, files)
265 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData)
267 def testCalibIngestDriver(self):
268 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository.
269 """
270 self.setUpCalibRegistry()
271 self._task._ingestCalibs(self._dataset, self._workspace)
273 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData)
275 def testNoFileIngest(self):
276 """Test that attempts to ingest nothing raise an exception.
277 """
278 files = []
279 self.setUpRawRegistry()
281 with self.assertRaises(RuntimeError):
282 self._task._doIngestRaws(self._repo, self._calibRepo, files, [])
283 with self.assertRaises(RuntimeError):
284 self._task._doIngestCalibs(self._repo, self._calibRepo, files)
286 self._registerTask.addRow.assert_not_called()
288 def testBadFileIngest(self):
289 """Test that ingestion of raw data ignores forbidden files.
290 """
291 badFiles = ['raw_v2_fg.fits.gz']
292 self.setUpRawRegistry()
294 files = [os.path.join(self._dataset.rawLocation, datum['file'])
295 for datum in IngestionTestSuite.rawData]
296 self._task._doIngestRaws(self._repo, self._calibRepo, files, badFiles)
298 filteredData = [datum for datum in IngestionTestSuite.rawData if datum['file'] not in badFiles]
299 self.assertRawRegistryCalls(self._registerTask, filteredData)
301 for datum in IngestionTestSuite.rawData:
302 if datum['file'] in badFiles:
303 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']}
304 # This call should never happen for badFiles
305 call = unittest.mock.call(self._registerTask.openRegistry().__enter__(), dataId,
306 create=False, dryrun=False)
307 self.assertNotIn(call, self._registerTask.addRow.mock_calls)
310class IngestionTestSuiteGen3(DataTestCase):
312 @classmethod
313 def setUpClass(cls):
314 super().setUpClass()
316 cls.dataset = Dataset(cls.datasetKey)
318 cls.INSTRUMENT = cls.dataset.instrument.getName()
319 cls.VISIT_ID = 204595
320 cls.DETECTOR_ID = 37
322 cls.rawData = [{'type': 'raw', 'file': 'lsst_a_204595_R11_S01_i.fits',
323 'exposure': cls.VISIT_ID, 'detector': cls.DETECTOR_ID,
324 'instrument': cls.INSTRUMENT},
325 ]
327 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz',
328 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT},
329 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
330 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT,
331 'physical_filter': 'i_sim_1.4'},
332 ]
334 @classmethod
335 def makeTestConfig(cls):
336 instrument = cls.dataset.instrument
337 config = ingestion.Gen3DatasetIngestConfig()
338 instrument.applyConfigOverrides(ingestion.Gen3DatasetIngestTask._DefaultName, config)
339 return config
341 def setUp(self):
342 super().setUp()
344 self.config = self.makeTestConfig()
345 self.config.validate()
346 self.config.freeze()
348 self.root = tempfile.mkdtemp()
349 self.addCleanup(shutil.rmtree, self.root, ignore_errors=True)
350 self.workspace = WorkspaceGen3(self.root)
351 self.task = ingestion.Gen3DatasetIngestTask(config=self.config,
352 dataset=self.dataset, workspace=self.workspace)
354 self.butler = self.workspace.workButler
356 def assertIngestedDataFiles(self, data, collection):
357 """Test that data have been loaded into a specific collection.
359 Parameters
360 ----------
361 data : `collections.abc.Iterable` [`collections.abc.Mapping`]
362 An iterable of mappings, each representing the properties of a
363 single input dataset. Each mapping must contain a `"type"` key
364 that maps to the dataset's Gen 3 type.
365 collection : `lsst.daf.butler.CollectionType`
366 Any valid :ref:`collection expression <daf_butler_collection_expressions>`
367 for the collection expected to contain the data.
368 """
369 for datum in data:
370 dataId = datum.copy()
371 dataId.pop("type", None)
372 dataId.pop("file", None)
374 matches = [x for x in self.butler.registry.queryDatasets(datum['type'],
375 collections=collection,
376 dataId=dataId)]
377 self.assertNotEqual(matches, [])
379 def testDataIngest(self):
380 """Test that ingesting science images given specific files adds them to a repository.
381 """
382 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData]
383 self.task._ingestRaws(files, processes=1)
384 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName())
386 def testDataDoubleIngest(self):
387 """Test that re-ingesting science images raises RuntimeError.
388 """
389 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData]
390 self.task._ingestRaws(files, processes=1)
391 with self.assertRaises(RuntimeError):
392 self.task._ingestRaws(files, processes=1)
394 def testDataIngestDriver(self):
395 """Test that ingesting science images starting from an abstract dataset adds them to a repository.
396 """
397 self.task._ensureRaws(processes=1)
398 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName())
400 def testCalibIngestDriver(self):
401 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository.
402 """
403 self.task._ensureRaws(processes=1) # Should not affect calibs, but would be run
404 self.assertIngestedDataFiles(self.calibData, self.dataset.instrument.makeCollectionName("calib"))
406 def testNoFileIngest(self):
407 """Test that attempts to ingest nothing raise an exception.
408 """
409 with self.assertRaises(RuntimeError):
410 self.task._ingestRaws([], processes=1)
412 def testVisitDefinition(self):
413 """Test that the final repository supports indexing by visit.
414 """
415 self.task._ensureRaws(processes=1)
416 self.task._defineVisits(processes=1)
418 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, }
419 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId))
420 self.assertEqual(len(exposures), 1)
421 self.assertEqual(exposures[0]["exposure"], self.VISIT_ID)
423 def testVisitDoubleDefinition(self):
424 """Test that re-defining visits is guarded against.
425 """
426 self.task._ensureRaws(processes=1)
427 self.task._defineVisits(processes=1)
428 self.task._defineVisits(processes=1) # must not raise
430 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, }
431 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId))
432 self.assertEqual(len(exposures), 1)
434 def testVisitsUndefinable(self):
435 """Test that attempts to define visits with no exposures raise an exception.
436 """
437 with self.assertRaises(RuntimeError):
438 self.task._defineVisits(processes=1)
440 def testCopyConfigs(self):
441 """Test that "ingesting" configs stores them in the workspace for later reference.
442 """
443 self.task._copyConfigs()
444 self.assertTrue(os.path.exists(self.workspace.configDir))
445 # Only testdata file that *must* be supported in the future
446 self.assertTrue(os.path.exists(os.path.join(self.workspace.configDir, "datasetIngest.py")))
448 def testFindMatchingFiles(self):
449 """Test that _findMatchingFiles finds the desired files.
450 """
451 testDir = self.dataset.datasetRoot
452 allFiles = {os.path.join(testDir, 'calib', f) for f in
453 {'bias-R11-S01-det037_2022-01-01.fits.gz',
454 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
455 }}
457 self.assertSetEqual(
458 ingestion._findMatchingFiles(testDir, ['*.fits.gz']), allFiles
459 )
460 self.assertSetEqual(
461 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*_i-*']),
462 {os.path.join(testDir, 'calib', f) for f in
463 {'bias-R11-S01-det037_2022-01-01.fits.gz'}}
464 )
465 self.assertSetEqual(
466 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*R11-S01*']),
467 set()
468 )
469 # Exclude filters should not match directories
470 self.assertSetEqual(
471 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['calib']),
472 allFiles
473 )
475 def testPickling(self):
476 """Test that a Gen3DatasetIngestTask can be pickled correctly.
478 This is needed for multiprocessing support.
479 """
480 stream = pickle.dumps(self.task)
481 copy = pickle.loads(stream)
482 self.assertEqual(self.task.getFullName(), copy.getFullName())
483 self.assertEqual(self.task.log.getName(), copy.log.getName())
484 # Equality for config ill-behaved; skip testing it
485 self.assertEqual(self.task.dataset, copy.dataset)
486 self.assertEqual(self.task.workspace, copy.workspace)
489class MemoryTester(lsst.utils.tests.MemoryTestCase):
490 pass
493def setup_module(module):
494 lsst.utils.tests.init()
497if __name__ == "__main__": 497 ↛ 498line 497 didn't jump to line 498, because the condition on line 497 was never true
498 lsst.utils.tests.init()
499 unittest.main()