Coverage for python/lsst/obs/base/ingest_tests.py: 29%
201 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-27 09:52 +0000
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-27 09:52 +0000
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Base class for writing Gen3 raw data ingest tests.
23"""
25__all__ = ("IngestTestBase",)
27import abc
28import os
29import shutil
30import tempfile
31import unittest
33import lsst.afw.cameraGeom
34import lsst.obs.base
35from lsst.daf.butler import Butler
36from lsst.daf.butler.cli.butler import cli as butlerCli
37from lsst.daf.butler.cli.utils import LogCliRunner
38from lsst.pipe.base import Instrument
39from lsst.resources import ResourcePath
40from lsst.utils import doImportType
42from . import script
45class IngestTestBase(metaclass=abc.ABCMeta):
46 """Base class for tests of gen3 ingest. Subclass from this, then
47 `unittest.TestCase` to get a working test suite.
48 """
50 ingestDir = ""
51 """Root path to ingest files into. Typically `obs_package/tests/`; the
52 actual directory will be a tempdir under this one.
53 """
55 ingestDatasetTypeName = "raw"
56 """The DatasetType to use for the ingest.
58 If this is not an Exposure dataset type the tests will be more limited.
59 """
61 dataIds = []
62 """list of butler data IDs of files that should have been ingested."""
64 file = ""
65 """Full path to a file to ingest in tests."""
67 filterLabel = None
68 """The lsst.afw.image.FilterLabel that should be returned by the above
69 file."""
71 rawIngestTask = "lsst.obs.base.RawIngestTask"
72 """The task to use in the Ingest test."""
74 curatedCalibrationDatasetTypes = None
75 """List or tuple of Datasets types that should be present after calling
76 writeCuratedCalibrations. If `None` writeCuratedCalibrations will
77 not be called and the test will be skipped."""
79 defineVisitsTask = lsst.obs.base.DefineVisitsTask
80 """The task to use to define visits from groups of exposures.
81 This is ignored if ``visits`` is `None`.
82 """
84 visits = {}
85 """A dictionary mapping visit data IDs the lists of exposure data IDs that
86 are associated with them.
87 If this is empty (but not `None`), visit definition will be run but no
88 visits will be expected (e.g. because no exposures are on-sky
89 observations).
90 """
92 seed_config = None
93 """Location of a seed configuration file to pass to butler create.
95 Useful if additional formatters or storage classes need to be defined.
96 """
98 @property
99 @abc.abstractmethod
100 def instrumentClassName(self):
101 """The fully qualified instrument class name.
103 Returns
104 -------
105 `str`
106 The fully qualified instrument class name.
107 """
108 pass
110 @property
111 def instrumentClass(self):
112 """The instrument class."""
113 return doImportType(self.instrumentClassName)
115 @property
116 def instrumentName(self):
117 """The name of the instrument.
119 Returns
120 -------
121 `str`
122 The name of the instrument.
123 """
124 return self.instrumentClass.getName()
126 @classmethod
127 def setUpClass(cls):
128 # Use a temporary working directory.
129 cls.root = tempfile.mkdtemp(dir=cls.ingestDir)
130 cls._createRepo()
132 # Register the instrument and its static metadata.
133 cls._registerInstrument()
135 def setUp(self):
136 # Want a unique run name per test.
137 self.outputRun = "raw_ingest_" + self.id()
139 @classmethod
140 def tearDownClass(cls):
141 if os.path.exists(cls.root):
142 shutil.rmtree(cls.root, ignore_errors=True)
144 def verifyIngest(self, files=None, cli=False, fullCheck=False):
145 """
146 Test that RawIngestTask ingested the expected files.
148 Parameters
149 ----------
150 files : `list` [`str`], or None
151 List of files to be ingested, or None to use ``self.file``
152 fullCheck : `bool`, optional
153 If `True`, read the full raw dataset and check component
154 consistency. If `False` check that a component can be read
155 but do not read the entire raw exposure.
157 Notes
158 -----
159 Reading all the ingested test data can be expensive. The code paths
160 for reading the second raw are the same as reading the first so
161 we do not gain anything by doing full checks of everything.
162 Only read full pixel data for first dataset from file.
163 Don't even do that if we are requested not to by the caller.
164 This only really affects files that contain multiple datasets.
165 """
166 butler = Butler(self.root, run=self.outputRun)
167 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun))
168 self.assertEqual(len(datasets), len(self.dataIds))
170 # Get the URI to the first dataset and check it is inside the
171 # datastore.
172 datasetUri = butler.getURI(datasets[0])
173 self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root))
175 # Get the relevant dataset type.
176 datasetType = butler.registry.getDatasetType(self.ingestDatasetTypeName)
178 for dataId in self.dataIds:
179 # For testing we only read the entire dataset the first time
180 # round if this is an Exposure. If it's not an Exposure
181 # we always read it completely but we don't read components
182 # because for an arbitrary dataset type we can't easily tell
183 # what component to test.
185 if not datasetType.storageClass.name.startswith("Exposure"):
186 exposure = butler.get(self.ingestDatasetTypeName, dataId)
187 # Could be anything so nothing to test by default
188 continue
190 # Check that we can read metadata from a raw.
191 metadata = butler.get(f"{self.ingestDatasetTypeName}.metadata", dataId)
192 if not fullCheck:
193 continue
194 fullCheck = False
195 exposure = butler.get(self.ingestDatasetTypeName, dataId)
197 # Comparing headers will not work directly because of header
198 # fix up provenance.
199 metadata_headers = metadata.toDict()
200 exposure_headers = exposure.getMetadata().toDict()
201 metadata_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None)
202 exposure_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None)
203 self.assertEqual(metadata_headers, exposure_headers)
205 # Since components follow a different code path we check that
206 # WCS match and also we check that at least the shape
207 # of the image is the same (rather than doing per-pixel equality)
208 wcs = butler.get(f"{self.ingestDatasetTypeName}.wcs", dataId)
209 self.assertEqual(wcs, exposure.getWcs())
211 rawImage = butler.get(f"{self.ingestDatasetTypeName}.image", dataId)
212 self.assertEqual(rawImage.getBBox(), exposure.getBBox())
214 # Check that the filter label got the correct band.
215 filterLabel = butler.get(f"{self.ingestDatasetTypeName}.filter", dataId)
216 self.assertEqual(filterLabel, self.filterLabel)
218 # Check that the exposure's Detector is the same as the component
219 # we would read (this is tricky for LSST, which modifies its
220 # detector at read time; for most other cameras it should be
221 # trivially satisfied.
222 detector = butler.get(f"{self.ingestDatasetTypeName}.detector", dataId)
223 self.assertDetectorsEqual(detector, exposure.getDetector(), compareTransforms=False)
225 self.checkRepo(files=files)
227 def checkRepo(self, files=None):
228 """Check the state of the repository after ingest.
230 This is an optional hook provided for subclasses; by default it does
231 nothing.
233 Parameters
234 ----------
235 files : `list` [`str`], or None
236 List of files to be ingested, or None to use ``self.file``
237 """
238 pass
240 @classmethod
241 def _createRepo(cls):
242 """Use the Click `testing` module to call the butler command line api
243 to create a repository."""
244 runner = LogCliRunner()
245 args = []
246 if cls.seed_config:
247 args.extend(["--seed-config", cls.seed_config])
248 result = runner.invoke(butlerCli, ["create", cls.root, *args])
249 # Classmethod so assertEqual does not work.
250 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
252 def _ingestRaws(self, transfer, file=None):
253 """Use the Click `testing` module to call the butler command line api
254 to ingest raws.
256 Parameters
257 ----------
258 transfer : `str`
259 The external data transfer type.
260 file : `str`
261 Path to a file to ingest instead of the default associated with
262 the object.
263 """
264 if file is None:
265 file = self.file
266 runner = LogCliRunner()
267 result = runner.invoke(
268 butlerCli,
269 [
270 "ingest-raws",
271 self.root,
272 file,
273 "--output-run",
274 self.outputRun,
275 "--transfer",
276 transfer,
277 "--ingest-task",
278 self.rawIngestTask,
279 ],
280 )
281 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
283 @classmethod
284 def _registerInstrument(cls):
285 """Use the Click `testing` module to call the butler command line api
286 to register the instrument."""
287 runner = LogCliRunner()
288 result = runner.invoke(butlerCli, ["register-instrument", cls.root, cls.instrumentClassName])
289 # Classmethod so assertEqual does not work.
290 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
292 def _writeCuratedCalibrations(self):
293 """Use the Click `testing` module to call the butler command line api
294 to write curated calibrations."""
295 runner = LogCliRunner()
296 result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root, self.instrumentName])
297 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
299 def testLink(self):
300 self._ingestRaws(transfer="link")
301 self.verifyIngest()
303 def testSymLink(self):
304 self._ingestRaws(transfer="symlink")
305 self.verifyIngest()
307 def testDirect(self):
308 self._ingestRaws(transfer="direct")
310 # Check that it really did have a URI outside of datastore.
311 srcUri = ResourcePath(self.file, forceAbsolute=True)
312 butler = Butler(self.root, run=self.outputRun)
313 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun))
314 datastoreUri = butler.getURI(datasets[0])
315 self.assertEqual(datastoreUri, srcUri)
317 def testCopy(self):
318 self._ingestRaws(transfer="copy")
319 # Only test full read of raws for the copy test. No need to do it
320 # in the other tests since the formatter will be the same in all
321 # cases.
322 self.verifyIngest(fullCheck=True)
324 def testHardLink(self):
325 try:
326 self._ingestRaws(transfer="hardlink")
327 # Running ingest through the Click testing infrastructure causes
328 # the original exception indicating that we can't hard-link
329 # on this filesystem to be turned into a nonzero exit code, which
330 # then trips the test assertion.
331 except (AssertionError, PermissionError) as err:
332 raise unittest.SkipTest(
333 "Skipping hard-link test because input data is on a different filesystem."
334 ) from err
335 self.verifyIngest()
337 def testInPlace(self):
338 """Test that files already in the directory can be added to the
339 registry in-place.
340 """
341 butler = Butler(self.root, run=self.outputRun)
343 # If the test uses an index file the index file needs to also
344 # appear in the datastore root along with the file to be ingested.
345 # In that scenario the file name being used for ingest can not
346 # be modified and must have the same name as found in the index
347 # file itself.
348 source_file_uri = ResourcePath(self.file)
349 index_file = source_file_uri.dirname().join("_index.json")
350 pathInStore = source_file_uri.basename()
351 if index_file.exists():
352 os.symlink(index_file.ospath, butler.datastore.root.join("_index.json").ospath)
353 else:
354 # No index file so we are free to pick any name.
355 pathInStore = "prefix-" + pathInStore
357 # Create a symlink to the original file so that it looks like it
358 # is now inside the datastore.
359 newPath = butler.datastore.root.join(pathInStore)
360 os.symlink(os.path.abspath(self.file), newPath.ospath)
362 # If there is a sidecar file it needs to be linked in as well
363 # since ingest code does not follow symlinks.
364 sidecar_uri = ResourcePath(source_file_uri).updatedExtension(".json")
365 if sidecar_uri.exists():
366 newSidecar = ResourcePath(newPath).updatedExtension(".json")
367 os.symlink(sidecar_uri.ospath, newSidecar.ospath)
369 # Run ingest with auto mode since that should automatically determine
370 # that an in-place ingest is happening.
371 self._ingestRaws(transfer="auto", file=newPath.ospath)
372 self.verifyIngest()
374 # Recreate a butler post-ingest (the earlier one won't see the
375 # ingested files).
376 butler = Butler(self.root, run=self.outputRun)
378 # Check that the URI associated with this path is the right one.
379 uri = butler.getURI(self.ingestDatasetTypeName, self.dataIds[0])
380 self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore)
382 def testFailOnConflict(self):
383 """Re-ingesting the same data into the repository should fail."""
384 self._ingestRaws(transfer="symlink")
385 with self.assertRaises(Exception):
386 self._ingestRaws(transfer="symlink")
388 def testWriteCuratedCalibrations(self):
389 """Test that we can ingest the curated calibrations, and read them
390 with `loadCamera` both before and after.
391 """
392 if self.curatedCalibrationDatasetTypes is None:
393 raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
395 butler = Butler(self.root, writeable=False)
396 collection = self.instrumentClass().makeCalibrationCollectionName()
398 # Trying to load a camera with a data ID not known to the registry
399 # is an error, because we can't get any temporal information.
400 with self.assertRaises(LookupError):
401 lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection)
403 # Ingest raws in order to get some exposure records.
404 self._ingestRaws(transfer="auto")
406 # Load camera should returned an unversioned camera because there's
407 # nothing in the repo.
408 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
409 self.assertFalse(isVersioned)
410 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
412 self._writeCuratedCalibrations()
414 # Make a new butler instance to make sure we don't have any stale
415 # caches (e.g. of DatasetTypes). Note that we didn't give
416 # _writeCuratedCalibrations the butler instance we had, because it's
417 # trying to test the CLI interface anyway.
418 butler = Butler(self.root, writeable=False)
420 instrumentClass = self.instrumentClass()
421 calibration_names = instrumentClass.getCuratedCalibrationNames()
423 for datasetTypeName in self.curatedCalibrationDatasetTypes:
424 with self.subTest(dtype=datasetTypeName):
425 found = list(
426 butler.registry.queryDatasetAssociations(
427 datasetTypeName,
428 collections=collection,
429 )
430 )
431 self.assertGreater(len(found), 0, f"Checking {datasetTypeName}")
432 self.assertIn(datasetTypeName, calibration_names)
434 # Load camera should returned the versioned camera from the repo.
435 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
436 self.assertTrue(isVersioned)
437 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
439 def testDefineVisits(self):
440 if self.visits is None:
441 self.skipTest("Expected visits were not defined.")
442 self._ingestRaws(transfer="link")
444 # Calling defineVisits tests the implementation of the butler command
445 # line interface "define-visits" subcommand. Functions in the script
446 # folder are generally considered protected and should not be used
447 # as public api.
448 script.defineVisits(
449 self.root,
450 config_file=None,
451 collections=self.outputRun,
452 instrument=self.instrumentName,
453 raw_name=self.ingestDatasetTypeName,
454 )
456 # Test that we got the visits we expected.
457 butler = Butler(self.root, run=self.outputRun)
458 visits = butler.registry.queryDataIds(["visit"]).expanded().toSet()
459 self.assertCountEqual(visits, self.visits.keys())
460 instr = Instrument.from_string(self.instrumentName, butler.registry)
461 camera = instr.getCamera()
462 for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
463 # Test that this visit is associated with the expected exposures.
464 foundExposures = (
465 butler.registry.queryDataIds(["exposure"], dataId=expectedVisit).expanded().toSet()
466 )
467 self.assertCountEqual(foundExposures, expectedExposures)
468 # Test that we have a visit region, and that it contains all of the
469 # detector+visit regions.
470 self.assertIsNotNone(foundVisit.region)
471 detectorVisitDataIds = (
472 butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit).expanded().toSet()
473 )
474 self.assertEqual(len(detectorVisitDataIds), len(camera))
475 for dataId in detectorVisitDataIds:
476 self.assertTrue(foundVisit.region.contains(dataId.region))
478 idInfo = lsst.obs.base.ExposureIdInfo.fromDataId(dataId)
479 self.assertGreater(idInfo.unusedBits, 0)