Coverage for python/lsst/obs/base/ingest_tests.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Base class for writing Gen3 raw data ingest tests.
23"""
25__all__ = ("IngestTestBase",)
27import abc
28import tempfile
29import unittest
30import os
31import shutil
33import lsst.afw.cameraGeom
34from lsst.daf.butler import Butler, ButlerURI
35from lsst.daf.butler.cli.butler import cli as butlerCli
36from lsst.daf.butler.cli.utils import LogCliRunner
37import lsst.obs.base
38from lsst.utils import doImport
39from .utils import getInstrument
40from . import script
43class IngestTestBase(metaclass=abc.ABCMeta):
44 """Base class for tests of gen3 ingest. Subclass from this, then
45 `unittest.TestCase` to get a working test suite.
46 """
48 ingestDir = ""
49 """Root path to ingest files into. Typically `obs_package/tests/`; the
50 actual directory will be a tempdir under this one.
51 """
53 ingestDatasetTypeName = "raw"
54 """The DatasetType to use for the ingest.
56 If this is not an Exposure dataset type the tests will be more limited.
57 """
59 dataIds = []
60 """list of butler data IDs of files that should have been ingested."""
62 file = ""
63 """Full path to a file to ingest in tests."""
65 filterLabel = None
66 """The lsst.afw.image.FilterLabel that should be returned by the above
67 file."""
69 rawIngestTask = "lsst.obs.base.RawIngestTask"
70 """The task to use in the Ingest test."""
72 curatedCalibrationDatasetTypes = None
73 """List or tuple of Datasets types that should be present after calling
74 writeCuratedCalibrations. If `None` writeCuratedCalibrations will
75 not be called and the test will be skipped."""
77 defineVisitsTask = lsst.obs.base.DefineVisitsTask
78 """The task to use to define visits from groups of exposures.
79 This is ignored if ``visits`` is `None`.
80 """
82 visits = {}
83 """A dictionary mapping visit data IDs the lists of exposure data IDs that
84 are associated with them.
85 If this is empty (but not `None`), visit definition will be run but no
86 visits will be expected (e.g. because no exposures are on-sky
87 observations).
88 """
90 @property
91 @abc.abstractmethod
92 def instrumentClassName(self):
93 """The fully qualified instrument class name.
95 Returns
96 -------
97 `str`
98 The fully qualified instrument class name.
99 """
100 pass
102 @property
103 def instrumentClass(self):
104 """The instrument class."""
105 return doImport(self.instrumentClassName)
107 @property
108 def instrumentName(self):
109 """The name of the instrument.
111 Returns
112 -------
113 `str`
114 The name of the instrument.
115 """
116 return self.instrumentClass.getName()
118 @classmethod
119 def setUpClass(cls):
120 # Use a temporary working directory.
121 cls.root = tempfile.mkdtemp(dir=cls.ingestDir)
122 cls._createRepo()
124 # Register the instrument and its static metadata.
125 cls._registerInstrument()
127 def setUp(self):
128 # Want a unique run name per test.
129 self.outputRun = "raw_ingest_" + self.id()
131 @classmethod
132 def tearDownClass(cls):
133 if os.path.exists(cls.root):
134 shutil.rmtree(cls.root, ignore_errors=True)
136 def verifyIngest(self, files=None, cli=False, fullCheck=False):
137 """
138 Test that RawIngestTask ingested the expected files.
140 Parameters
141 ----------
142 files : `list` [`str`], or None
143 List of files to be ingested, or None to use ``self.file``
144 fullCheck : `bool`, optional
145 If `True`, read the full raw dataset and check component
146 consistency. If `False` check that a component can be read
147 but do not read the entire raw exposure.
149 Notes
150 -----
151 Reading all the ingested test data can be expensive. The code paths
152 for reading the second raw are the same as reading the first so
153 we do not gain anything by doing full checks of everything.
154 Only read full pixel data for first dataset from file.
155 Don't even do that if we are requested not to by the caller.
156 This only really affects files that contain multiple datasets.
157 """
158 butler = Butler(self.root, run=self.outputRun)
159 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun))
160 self.assertEqual(len(datasets), len(self.dataIds))
162 # Get the URI to the first dataset and check it is inside the
163 # datastore.
164 datasetUri = butler.getURI(datasets[0])
165 self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root))
167 # Get the relevant dataset type.
168 datasetType = butler.registry.getDatasetType(self.ingestDatasetTypeName)
170 for dataId in self.dataIds:
171 # For testing we only read the entire dataset the first time
172 # round if this is an Exposure. If it's not an Exposure
173 # we always read it completely but we don't read components
174 # because for an arbitrary dataset type we can't easily tell
175 # what component to test.
177 if not datasetType.storageClass.name.startswith("Exposure"):
178 exposure = butler.get(self.ingestDatasetTypeName, dataId)
179 # Could be anything so nothing to test by default
180 continue
182 # Check that we can read metadata from a raw.
183 metadata = butler.get(f"{self.ingestDatasetTypeName}.metadata", dataId)
184 if not fullCheck:
185 continue
186 fullCheck = False
187 exposure = butler.get(self.ingestDatasetTypeName, dataId)
189 # Comparing headers will not work directly because of header
190 # fix up provenance.
191 metadata_headers = metadata.toDict()
192 exposure_headers = exposure.getMetadata().toDict()
193 metadata_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None)
194 exposure_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None)
195 self.assertEqual(metadata_headers, exposure_headers)
197 # Since components follow a different code path we check that
198 # WCS match and also we check that at least the shape
199 # of the image is the same (rather than doing per-pixel equality)
200 wcs = butler.get(f"{self.ingestDatasetTypeName}.wcs", dataId)
201 self.assertEqual(wcs, exposure.getWcs())
203 rawImage = butler.get(f"{self.ingestDatasetTypeName}.image", dataId)
204 self.assertEqual(rawImage.getBBox(), exposure.getBBox())
206 # Check that the filter label got the correct band.
207 filterLabel = butler.get(f"{self.ingestDatasetTypeName}.filterLabel", dataId)
208 self.assertEqual(filterLabel, self.filterLabel)
210 self.checkRepo(files=files)
212 def checkRepo(self, files=None):
213 """Check the state of the repository after ingest.
215 This is an optional hook provided for subclasses; by default it does
216 nothing.
218 Parameters
219 ----------
220 files : `list` [`str`], or None
221 List of files to be ingested, or None to use ``self.file``
222 """
223 pass
225 @classmethod
226 def _createRepo(cls):
227 """Use the Click `testing` module to call the butler command line api
228 to create a repository."""
229 runner = LogCliRunner()
230 result = runner.invoke(butlerCli, ["create", cls.root])
231 # Classmethod so assertEqual does not work.
232 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
234 def _ingestRaws(self, transfer, file=None):
235 """Use the Click `testing` module to call the butler command line api
236 to ingest raws.
238 Parameters
239 ----------
240 transfer : `str`
241 The external data transfer type.
242 file : `str`
243 Path to a file to ingest instead of the default associated with
244 the object.
245 """
246 if file is None:
247 file = self.file
248 runner = LogCliRunner()
249 result = runner.invoke(butlerCli, ["ingest-raws", self.root, file,
250 "--output-run", self.outputRun,
251 "--transfer", transfer,
252 "--ingest-task", self.rawIngestTask])
253 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
255 @classmethod
256 def _registerInstrument(cls):
257 """Use the Click `testing` module to call the butler command line api
258 to register the instrument."""
259 runner = LogCliRunner()
260 result = runner.invoke(butlerCli, ["register-instrument", cls.root, cls.instrumentClassName])
261 # Classmethod so assertEqual does not work.
262 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
264 def _writeCuratedCalibrations(self):
265 """Use the Click `testing` module to call the butler command line api
266 to write curated calibrations."""
267 runner = LogCliRunner()
268 result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root, self.instrumentName])
269 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
271 def testLink(self):
272 self._ingestRaws(transfer="link")
273 self.verifyIngest()
275 def testSymLink(self):
276 self._ingestRaws(transfer="symlink")
277 self.verifyIngest()
279 def testDirect(self):
280 self._ingestRaws(transfer="direct")
282 # Check that it really did have a URI outside of datastore.
283 srcUri = ButlerURI(self.file, forceAbsolute=True)
284 butler = Butler(self.root, run=self.outputRun)
285 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun))
286 datastoreUri = butler.getURI(datasets[0])
287 self.assertEqual(datastoreUri, srcUri)
289 def testCopy(self):
290 self._ingestRaws(transfer="copy")
291 # Only test full read of raws for the copy test. No need to do it
292 # in the other tests since the formatter will be the same in all
293 # cases.
294 self.verifyIngest(fullCheck=True)
296 def testHardLink(self):
297 try:
298 self._ingestRaws(transfer="hardlink")
299 # Running ingest through the Click testing infrastructure causes
300 # the original exception indicating that we can't hard-link
301 # on this filesystem to be turned into a nonzero exit code, which
302 # then trips the test assertion.
303 except (AssertionError, PermissionError) as err:
304 raise unittest.SkipTest("Skipping hard-link test because input data"
305 " is on a different filesystem.") from err
306 self.verifyIngest()
308 def testInPlace(self):
309 """Test that files already in the directory can be added to the
310 registry in-place.
311 """
312 butler = Butler(self.root, run=self.outputRun)
314 # If the test uses an index file the index file needs to also
315 # appear in the datastore root along with the file to be ingested.
316 # In that scenario the file name being used for ingest can not
317 # be modified and must have the same name as found in the index
318 # file itself.
319 source_file_uri = ButlerURI(self.file)
320 index_file = source_file_uri.dirname().join("_index.json")
321 pathInStore = source_file_uri.basename()
322 if index_file.exists():
323 os.symlink(index_file.ospath, butler.datastore.root.join("_index.json").ospath)
324 else:
325 # No index file so we are free to pick any name.
326 pathInStore = "prefix-" + pathInStore
328 # Create a symlink to the original file so that it looks like it
329 # is now inside the datastore.
330 newPath = butler.datastore.root.join(pathInStore)
331 os.symlink(os.path.abspath(self.file), newPath.ospath)
333 # If there is a sidecar file it needs to be linked in as well
334 # since ingest code does not follow symlinks.
335 sidecar_uri = ButlerURI(source_file_uri).updatedExtension(".json")
336 if sidecar_uri.exists():
337 newSidecar = ButlerURI(newPath).updatedExtension(".json")
338 os.symlink(sidecar_uri.ospath, newSidecar.ospath)
340 # Run ingest with auto mode since that should automatically determine
341 # that an in-place ingest is happening.
342 self._ingestRaws(transfer="auto", file=newPath.ospath)
343 self.verifyIngest()
345 # Recreate a butler post-ingest (the earlier one won't see the
346 # ingested files).
347 butler = Butler(self.root, run=self.outputRun)
349 # Check that the URI associated with this path is the right one.
350 uri = butler.getURI(self.ingestDatasetTypeName, self.dataIds[0])
351 self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore)
353 def testFailOnConflict(self):
354 """Re-ingesting the same data into the repository should fail.
355 """
356 self._ingestRaws(transfer="symlink")
357 with self.assertRaises(Exception):
358 self._ingestRaws(transfer="symlink")
360 def testWriteCuratedCalibrations(self):
361 """Test that we can ingest the curated calibrations, and read them
362 with `loadCamera` both before and after.
363 """
364 if self.curatedCalibrationDatasetTypes is None:
365 raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
367 butler = Butler(self.root, writeable=False)
368 collection = self.instrumentClass().makeCalibrationCollectionName()
370 # Trying to load a camera with a data ID not known to the registry
371 # is an error, because we can't get any temporal information.
372 with self.assertRaises(LookupError):
373 lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection)
375 # Ingest raws in order to get some exposure records.
376 self._ingestRaws(transfer="auto")
378 # Load camera should returned an unversioned camera because there's
379 # nothing in the repo.
380 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
381 self.assertFalse(isVersioned)
382 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
384 self._writeCuratedCalibrations()
386 # Make a new butler instance to make sure we don't have any stale
387 # caches (e.g. of DatasetTypes). Note that we didn't give
388 # _writeCuratedCalibrations the butler instance we had, because it's
389 # trying to test the CLI interface anyway.
390 butler = Butler(self.root, writeable=False)
392 for datasetTypeName in self.curatedCalibrationDatasetTypes:
393 with self.subTest(dtype=datasetTypeName):
394 found = list(
395 butler.registry.queryDatasetAssociations(
396 datasetTypeName,
397 collections=collection,
398 )
399 )
400 self.assertGreater(len(found), 0, f"Checking {datasetTypeName}")
402 # Load camera should returned the versioned camera from the repo.
403 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
404 self.assertTrue(isVersioned)
405 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
407 def testDefineVisits(self):
408 if self.visits is None:
409 self.skipTest("Expected visits were not defined.")
410 self._ingestRaws(transfer="link")
412 # Calling defineVisits tests the implementation of the butler command
413 # line interface "define-visits" subcommand. Functions in the script
414 # folder are generally considered protected and should not be used
415 # as public api.
416 script.defineVisits(self.root, config_file=None, collections=self.outputRun,
417 instrument=self.instrumentName)
419 # Test that we got the visits we expected.
420 butler = Butler(self.root, run=self.outputRun)
421 visits = butler.registry.queryDataIds(["visit"]).expanded().toSet()
422 self.assertCountEqual(visits, self.visits.keys())
423 instr = getInstrument(self.instrumentName, butler.registry)
424 camera = instr.getCamera()
425 for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
426 # Test that this visit is associated with the expected exposures.
427 foundExposures = butler.registry.queryDataIds(["exposure"], dataId=expectedVisit
428 ).expanded().toSet()
429 self.assertCountEqual(foundExposures, expectedExposures)
430 # Test that we have a visit region, and that it contains all of the
431 # detector+visit regions.
432 self.assertIsNotNone(foundVisit.region)
433 detectorVisitDataIds = butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit
434 ).expanded().toSet()
435 self.assertEqual(len(detectorVisitDataIds), len(camera))
436 for dataId in detectorVisitDataIds:
437 self.assertTrue(foundVisit.region.contains(dataId.region))