Coverage for tests/test_ingestion.py: 19%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_verify.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24import os
25import pickle
26import re
27import shutil
28import tempfile
29import unittest.mock
31from lsst.utils import getPackageDir
32import lsst.utils.tests
33from lsst.daf.butler import CollectionType
34import lsst.pipe.tasks as pipeTasks
35from lsst.ap.verify import ingestion
36from lsst.ap.verify.testUtils import DataTestCase
37from lsst.ap.verify.dataset import Dataset
38from lsst.ap.verify.workspace import WorkspaceGen2, WorkspaceGen3
41class MockDetector(object):
42 def getName(self):
43 return '0'
45 def getId(self):
46 return 0
49class MockCamera(object):
50 def __init__(self, detector):
51 self.det_list = [detector, ]
52 self.det_dict = {'0': detector}
54 def __getitem__(self, item):
55 if type(item) is int:
56 return self.det_list[item]
57 else:
58 return self.det_dict[item]
61class IngestionTestSuite(DataTestCase):
63 @classmethod
64 def setUpClass(cls):
65 super().setUpClass()
67 cls.mockCamera = MockCamera(MockDetector())
68 cls.config = cls.makeTestConfig()
69 cls.config.validate()
70 cls.config.freeze()
72 cls.testApVerifyData = os.path.join('tests', 'ingestion')
74 cls.rawData = [{'file': 'lsst_a_204595_R11_S01_i.fits', 'expId': 204595, 'filter': 'i_sim_1.4',
75 'exptime': 30.0},
76 ]
77 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz',
78 'filter': 'NONE', 'date': '2022-01-01'},
79 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
80 'filter': 'i_sim_1.4', 'date': '2022-08-06'},
81 ]
83 @staticmethod
84 def makeTestConfig():
85 obsDir = os.path.join(getPackageDir('obs_lsst'), 'config')
86 config = ingestion.DatasetIngestConfig()
87 config.dataIngester.load(os.path.join(obsDir, 'ingest.py'))
88 config.dataIngester.load(os.path.join(obsDir, 'imsim', 'ingest.py'))
89 config.calibIngester.load(os.path.join(obsDir, 'ingestCalibs.py'))
90 config.curatedCalibIngester.load(os.path.join(obsDir, 'ingestCuratedCalibs.py'))
91 return config
93 def setUp(self):
94 # Repositories still get used by IngestTask despite Butler being a mock object
95 self._repo = self._calibRepo = tempfile.mkdtemp()
96 self.addCleanup(shutil.rmtree, self._repo, ignore_errors=True)
98 # Fake Butler and RegisterTask to avoid initialization or DB overhead
99 def mockGet(datasetType, dataId=None):
100 """Minimally fake a butler.get().
101 """
102 if "raw_filename" in datasetType:
103 matchingFiles = [datum['file'] for datum in IngestionTestSuite.rawData
104 if datum['expId'] == dataId['expId']]
105 return [os.path.join(self._repo, file) for file in matchingFiles]
106 elif "bias_filename" in datasetType:
107 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData
108 if datum['type'] == 'bias']
109 return [os.path.join(self._repo, file) for file in matchingFiles]
110 elif "flat_filename" in datasetType:
111 matchingFiles = [datum['file'] for datum in IngestionTestSuite.calibData
112 if datum['type'] == 'flat' and datum['filter'] == dataId['filter']]
113 return [os.path.join(self._repo, file) for file in matchingFiles]
114 elif "defects_filename" in datasetType:
115 return [os.path.join(self._repo, 'defects', 'defects.fits'), ]
116 elif "camera" in datasetType:
117 return IngestionTestSuite.mockCamera
118 else:
119 return None
121 butlerPatcher = unittest.mock.patch("lsst.daf.persistence.Butler")
122 self._butler = butlerPatcher.start()
123 self._butler.getMapperClass.return_value = lsst.obs.lsst.imsim.ImsimMapper
124 self._butler.return_value.get = mockGet
125 self.addCleanup(butlerPatcher.stop)
127 self._dataset = Dataset(self.testDataset)
128 # Fake Workspace because it's too hard to make a real one with a fake Butler
129 self._workspace = unittest.mock.NonCallableMock(
130 spec=WorkspaceGen2,
131 dataRepo=self._repo,
132 calibRepo=self._calibRepo,
133 )
135 self._task = ingestion.DatasetIngestTask(config=IngestionTestSuite.config)
137 def setUpRawRegistry(self):
138 """Mock up the RegisterTask used for ingesting raw data.
140 This method initializes ``self._registerTask``. It should be
141 called at the start of any test case that attempts raw ingestion.
143 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`,
144 or `setupDefectRegistry` is called.
145 """
146 patcherRegister = unittest.mock.patch.object(self._task.dataIngester, "register",
147 spec=pipeTasks.ingest.RegisterTask,
148 new_callable=unittest.mock.NonCallableMagicMock)
149 self._registerTask = patcherRegister.start()
150 self.addCleanup(patcherRegister.stop)
152 def setUpCalibRegistry(self):
153 """Mock up the RegisterTask used for ingesting calib data.
155 This method initializes ``self._registerTask``. It should be
156 called at the start of any test case that attempts calib ingestion.
158 Behavior is undefined if more than one of `setUpRawRegistry`, `setUpCalibRegistry`,
159 or `setupDefectRegistry` is called.
160 """
161 patcherRegister = unittest.mock.patch.object(self._task.calibIngester, "register",
162 spec=pipeTasks.ingestCalibs.CalibsRegisterTask,
163 new_callable=unittest.mock.NonCallableMagicMock)
164 self._registerTask = patcherRegister.start()
165 self._registerTask.config = self._task.config.calibIngester.register
166 self.addCleanup(patcherRegister.stop)
168 def assertRawRegistryCalls(self, registryMock, expectedData):
169 """Test that a particular set of science data is registered correctly.
171 Parameters
172 ----------
173 registryMock : `unittest.mock.Mock`
174 a mock object representing the repository's registry. Must have a
175 mock for the `~lsst.pipe.tasks.ingest.RegisterTask.addRow` method.
176 expectedData : iterable of `dict`
177 a collection of dictionaries, each representing one item that
178 should have been ingested. Each dictionary must contain the
179 following keys:
180 - ``file``: file name to be ingested (`str`).
181 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`).
182 - ``expId``: exposure ID of the file (`int`).
183 - ``exptime``: the exposure time of the file (`float`).
184 calib : `bool`
185 `True` if ``expectedData`` represents calibration data, `False` if
186 it represents science data
187 """
188 for datum in expectedData:
189 found = False
190 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']}
191 for call in registryMock.addRow.call_args_list:
192 args = call[0]
193 registeredId = args[1]
194 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset
196 if registeredId['expId'] == datum['expId']:
197 found = True
198 for dimension in dataId:
199 self.assertEqual(registeredId[dimension], dataId[dimension])
200 self.assertTrue(found, msg=f"No call with {dataId}.")
202 self.assertEqual(registryMock.addRow.call_count, len(expectedData))
204 def assertCalibRegistryCalls(self, registryMock, expectedData):
205 """Test that a particular set of calibration data is registered correctly.
207 Parameters
208 ----------
209 registryMock : `unittest.mock.Mock`
210 a mock object representing the repository's registry. Must have a
211 mock for the `~lsst.pipe.tasks.ingest.CalibsRegisterTask.addRow` method.
212 expectedData : iterable of `dict`
213 a collection of dictionaries, each representing one item that
214 should have been ingested. Each dictionary must contain the
215 following keys:
216 - ``file``: file name to be ingested (`str`).
217 - ``filter``: the filter of the file, or "NONE" if not applicable (`str`).
218 - ``type``: a valid calibration dataset type (`str`).
219 - ``date``: the calibration date in YYY-MM-DD format (`str`).
220 calib : `bool`
221 `True` if ``expectedData`` represents calibration data, `False` if
222 it represents science data
223 """
224 for datum in expectedData:
225 found = False
226 dataId = {'calibDate': datum['date'], 'filter': datum['filter']}
227 for call in registryMock.addRow.call_args_list:
228 args = call[0]
229 kwargs = call[1]
230 registeredId = args[1]
231 self.assertLessEqual(set(dataId.keys()), set(registeredId.keys())) # subset
233 if kwargs["table"] == datum["type"] and registeredId['filter'] == datum['filter'] \
234 and registeredId['calibDate'] == datum['date']:
235 found = True
236 self.assertTrue(found, msg=f"No call with {dataId}.")
238 self.assertEqual(registryMock.addRow.call_count, len(expectedData))
240 def testDataIngest(self):
241 """Test that ingesting science images given specific files adds them to a repository.
242 """
243 self.setUpRawRegistry()
244 files = [os.path.join(self._dataset.rawLocation, datum['file'])
245 for datum in IngestionTestSuite.rawData]
246 self._task._doIngestRaws(self._repo, self._calibRepo, files, [])
248 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData)
250 def testDataIngestDriver(self):
251 """Test that ingesting science images starting from an abstract dataset adds them to a repository.
252 """
253 self.setUpRawRegistry()
254 self._task._ingestRaws(self._dataset, self._workspace)
256 self.assertRawRegistryCalls(self._registerTask, IngestionTestSuite.rawData)
258 def testCalibIngest(self):
259 """Test that ingesting calibrations given specific files adds them to a repository.
260 """
261 files = [os.path.join(self._dataset.calibLocation, datum['file'])
262 for datum in IngestionTestSuite.calibData]
263 self.setUpCalibRegistry()
265 self._task._doIngestCalibs(self._repo, self._calibRepo, files)
267 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData)
269 def testCalibIngestDriver(self):
270 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository.
271 """
272 self.setUpCalibRegistry()
273 self._task._ingestCalibs(self._dataset, self._workspace)
275 self.assertCalibRegistryCalls(self._registerTask, IngestionTestSuite.calibData)
277 def testNoFileIngest(self):
278 """Test that attempts to ingest nothing raise an exception.
279 """
280 files = []
281 self.setUpRawRegistry()
283 with self.assertRaises(RuntimeError):
284 self._task._doIngestRaws(self._repo, self._calibRepo, files, [])
285 with self.assertRaises(RuntimeError):
286 self._task._doIngestCalibs(self._repo, self._calibRepo, files)
288 self._registerTask.addRow.assert_not_called()
290 def testBadFileIngest(self):
291 """Test that ingestion of raw data ignores forbidden files.
292 """
293 badFiles = ['raw_v2_fg.fits.gz']
294 self.setUpRawRegistry()
296 files = [os.path.join(self._dataset.rawLocation, datum['file'])
297 for datum in IngestionTestSuite.rawData]
298 self._task._doIngestRaws(self._repo, self._calibRepo, files, badFiles)
300 filteredData = [datum for datum in IngestionTestSuite.rawData if datum['file'] not in badFiles]
301 self.assertRawRegistryCalls(self._registerTask, filteredData)
303 for datum in IngestionTestSuite.rawData:
304 if datum['file'] in badFiles:
305 dataId = {'expId': datum['expId'], 'expTime': datum['exptime'], 'filter': datum['filter']}
306 # This call should never happen for badFiles
307 call = unittest.mock.call(self._registerTask.openRegistry().__enter__(), dataId,
308 create=False, dryrun=False)
309 self.assertNotIn(call, self._registerTask.addRow.mock_calls)
312class IngestionTestSuiteGen3(DataTestCase):
314 @classmethod
315 def setUpClass(cls):
316 super().setUpClass()
318 cls.dataset = Dataset(cls.testDataset)
320 cls.INSTRUMENT = cls.dataset.instrument.getName()
321 cls.VISIT_ID = 204595
322 cls.DETECTOR_ID = 37
324 cls.rawData = [{'type': 'raw', 'file': 'lsst_a_204595_R11_S01_i.fits',
325 'exposure': cls.VISIT_ID, 'detector': cls.DETECTOR_ID,
326 'instrument': cls.INSTRUMENT},
327 ]
329 cls.calibData = [{'type': 'bias', 'file': 'bias-R11-S01-det037_2022-01-01.fits.gz',
330 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT},
331 {'type': 'flat', 'file': 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
332 'detector': cls.DETECTOR_ID, 'instrument': cls.INSTRUMENT,
333 'physical_filter': 'i_sim_1.4'},
334 ]
336 @classmethod
337 def makeTestConfig(cls):
338 instrument = cls.dataset.instrument
339 config = ingestion.Gen3DatasetIngestConfig()
340 instrument.applyConfigOverrides(ingestion.Gen3DatasetIngestTask._DefaultName, config)
341 return config
343 def setUp(self):
344 super().setUp()
346 self.config = self.makeTestConfig()
347 self.config.validate()
348 self.config.freeze()
350 self.root = tempfile.mkdtemp()
351 self.addCleanup(shutil.rmtree, self.root, ignore_errors=True)
352 self.workspace = WorkspaceGen3(self.root)
353 self.task = ingestion.Gen3DatasetIngestTask(config=self.config,
354 dataset=self.dataset, workspace=self.workspace)
356 self.butler = self.workspace.workButler
358 def assertIngestedDataFiles(self, data, collection):
359 """Test that data have been loaded into a specific collection.
361 Parameters
362 ----------
363 data : `collections.abc.Iterable` [`collections.abc.Mapping`]
364 An iterable of mappings, each representing the properties of a
365 single input dataset. Each mapping must contain a `"type"` key
366 that maps to the dataset's Gen 3 type.
367 collection
368 Any valid :ref:`collection expression <daf_butler_collection_expressions>`
369 for the collection expected to contain the data.
370 """
371 for datum in data:
372 dataId = datum.copy()
373 dataId.pop("type", None)
374 dataId.pop("file", None)
376 matches = [x for x in self.butler.registry.queryDatasets(datum['type'],
377 collections=collection,
378 dataId=dataId)]
379 self.assertNotEqual(matches, [])
381 def testDataIngest(self):
382 """Test that ingesting science images given specific files adds them to a repository.
383 """
384 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData]
385 self.task._ingestRaws(files, processes=1)
386 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName())
388 def testDataDoubleIngest(self):
389 """Test that re-ingesting science images raises RuntimeError.
390 """
391 files = [os.path.join(self.dataset.rawLocation, datum['file']) for datum in self.rawData]
392 self.task._ingestRaws(files, processes=1)
393 with self.assertRaises(RuntimeError):
394 self.task._ingestRaws(files, processes=1)
396 def testDataIngestDriver(self):
397 """Test that ingesting science images starting from an abstract dataset adds them to a repository.
398 """
399 self.task._ensureRaws(processes=1)
400 self.assertIngestedDataFiles(self.rawData, self.dataset.instrument.makeDefaultRawIngestRunName())
402 def testCalibIngestDriver(self):
403 """Test that ingesting calibrations starting from an abstract dataset adds them to a repository.
404 """
405 self.task._ensureRaws(processes=1) # Should not affect calibs, but would be run
406 # queryDatasets cannot (yet) search CALIBRATION collections, so we
407 # instead search the RUN-type collections that calibrations are
408 # ingested into first before being associated with a validity range.
409 calibrationRunPattern = re.compile(
410 re.escape(self.dataset.instrument.makeCollectionName("calib") + "/") + ".+"
411 )
412 calibrationRuns = list(
413 self.butler.registry.queryCollections(
414 calibrationRunPattern,
415 collectionTypes={CollectionType.RUN},
416 )
417 )
418 self.assertIngestedDataFiles(self.calibData, calibrationRuns)
420 def testNoFileIngest(self):
421 """Test that attempts to ingest nothing raise an exception.
422 """
423 with self.assertRaises(RuntimeError):
424 self.task._ingestRaws([], processes=1)
426 def testVisitDefinition(self):
427 """Test that the final repository supports indexing by visit.
428 """
429 self.task._ensureRaws(processes=1)
430 self.task._defineVisits(processes=1)
432 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, }
433 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId))
434 self.assertEqual(len(exposures), 1)
435 self.assertEqual(exposures[0]["exposure"], self.VISIT_ID)
437 def testVisitDoubleDefinition(self):
438 """Test that re-defining visits is guarded against.
439 """
440 self.task._ensureRaws(processes=1)
441 self.task._defineVisits(processes=1)
442 self.task._defineVisits(processes=1) # must not raise
444 testId = {"visit": self.VISIT_ID, "instrument": self.INSTRUMENT, }
445 exposures = list(self.butler.registry.queryDataIds("exposure", dataId=testId))
446 self.assertEqual(len(exposures), 1)
448 def testVisitsUndefinable(self):
449 """Test that attempts to define visits with no exposures raise an exception.
450 """
451 with self.assertRaises(RuntimeError):
452 self.task._defineVisits(processes=1)
454 def testCopyConfigs(self):
455 """Test that "ingesting" configs stores them in the workspace for later reference.
456 """
457 self.task._copyConfigs()
458 self.assertTrue(os.path.exists(self.workspace.configDir))
459 # Only testdata file that *must* be supported in the future
460 self.assertTrue(os.path.exists(os.path.join(self.workspace.configDir, "datasetIngest.py")))
461 self.assertTrue(os.path.exists(self.workspace.pipelineDir))
462 self.assertTrue(os.path.exists(os.path.join(self.workspace.pipelineDir, "ApVerify.yaml")))
464 def testFindMatchingFiles(self):
465 """Test that _findMatchingFiles finds the desired files.
466 """
467 testDir = self.dataset.datasetRoot
468 allFiles = {os.path.join(testDir, 'calib', f) for f in
469 {'bias-R11-S01-det037_2022-01-01.fits.gz',
470 'flat_i-R11-S01-det037_2022-08-06.fits.gz',
471 }}
473 self.assertSetEqual(
474 ingestion._findMatchingFiles(testDir, ['*.fits.gz']), allFiles
475 )
476 self.assertSetEqual(
477 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*_i-*']),
478 {os.path.join(testDir, 'calib', f) for f in
479 {'bias-R11-S01-det037_2022-01-01.fits.gz'}}
480 )
481 self.assertSetEqual(
482 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['*R11-S01*']),
483 set()
484 )
485 # Exclude filters should not match directories
486 self.assertSetEqual(
487 ingestion._findMatchingFiles(testDir, ['*.fits.gz'], exclude=['calib']),
488 allFiles
489 )
491 def testPickling(self):
492 """Test that a Gen3DatasetIngestTask can be pickled correctly.
494 This is needed for multiprocessing support.
495 """
496 stream = pickle.dumps(self.task)
497 copy = pickle.loads(stream)
498 self.assertEqual(self.task.getFullName(), copy.getFullName())
499 self.assertEqual(self.task.log.name, copy.log.name)
500 # Equality for config ill-behaved; skip testing it
501 self.assertEqual(self.task.dataset, copy.dataset)
502 self.assertEqual(self.task.workspace, copy.workspace)
505class MemoryTester(lsst.utils.tests.MemoryTestCase):
506 pass
509def setup_module(module):
510 lsst.utils.tests.init()
513if __name__ == "__main__": 513 ↛ 514line 513 didn't jump to line 514, because the condition on line 513 was never true
514 lsst.utils.tests.init()
515 unittest.main()