lsst.obs.base  20.0.0-74-g0218c7a+3251b390b2
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import tempfile
29 import unittest
30 import os
31 import shutil
32 
33 import lsst.afw.cameraGeom
34 from lsst.daf.butler import Butler, ButlerURI
35 from lsst.daf.butler.cli.butler import cli as butlerCli
36 from lsst.daf.butler.cli.utils import LogCliRunner
37 import lsst.obs.base
38 from lsst.utils import doImport
39 from .utils import getInstrument
40 from . import script
41 
42 
43 class IngestTestBase(metaclass=abc.ABCMeta):
44  """Base class for tests of gen3 ingest. Subclass from this, then
45  `unittest.TestCase` to get a working test suite.
46  """
47 
48  ingestDir = ""
49  """Root path to ingest files into. Typically `obs_package/tests/`; the
50  actual directory will be a tempdir under this one.
51  """
52 
53  dataIds = []
54  """list of butler data IDs of files that should have been ingested."""
55 
56  file = ""
57  """Full path to a file to ingest in tests."""
58 
59  rawIngestTask = "lsst.obs.base.RawIngestTask"
60  """The task to use in the Ingest test."""
61 
62  curatedCalibrationDatasetTypes = None
63  """List or tuple of Datasets types that should be present after calling
64  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
65  not be called and the test will be skipped."""
66 
67  defineVisitsTask = lsst.obs.base.DefineVisitsTask
68  """The task to use to define visits from groups of exposures.
69  This is ignored if ``visits`` is `None`.
70  """
71 
72  visits = {}
73  """A dictionary mapping visit data IDs the lists of exposure data IDs that
74  are associated with them.
75  If this is empty (but not `None`), visit definition will be run but no
76  visits will be expected (e.g. because no exposures are on-sky
77  observations).
78  """
79 
80  @property
81  @abc.abstractmethod
83  """The fully qualified instrument class name.
84 
85  Returns
86  -------
87  `str`
88  The fully qualified instrument class name.
89  """
90  pass
91 
92  @property
93  def instrumentClass(self):
94  """The instrument class."""
95  return doImport(self.instrumentClassName)
96 
97  @property
98  def instrumentName(self):
99  """The name of the instrument.
100 
101  Returns
102  -------
103  `str`
104  The name of the instrument.
105  """
106  return self.instrumentClass.getName()
107 
108  @classmethod
109  def setUpClass(cls):
110  # Use a temporary working directory
111  cls.root = tempfile.mkdtemp(dir=cls.ingestDir)
112  cls._createRepo()
113 
114  # Register the instrument and its static metadata
115  cls._registerInstrument()
116 
117  def setUp(self):
118  # Want a unique run name per test
119  self.outputRun = "raw_ingest_" + self.id()
120 
121  @classmethod
122  def tearDownClass(cls):
123  if os.path.exists(cls.root):
124  shutil.rmtree(cls.root, ignore_errors=True)
125 
126  def verifyIngest(self, files=None, cli=False, fullCheck=False):
127  """
128  Test that RawIngestTask ingested the expected files.
129 
130  Parameters
131  ----------
132  files : `list` [`str`], or None
133  List of files to be ingested, or None to use ``self.file``
134  fullCheck : `bool`, optional
135  If `True`, read the full raw dataset and check component
136  consistency. If `False` check that a component can be read
137  but do not read the entire raw exposure.
138 
139  Notes
140  -----
141  Reading all the ingested test data can be expensive. The code paths
142  for reading the second raw are the same as reading the first so
143  we do not gain anything by doing full checks of everything.
144  Only read full pixel data for first dataset from file.
145  Don't even do that if we are requested not to by the caller.
146  This only really affects files that contain multiple datasets.
147  """
148  butler = Butler(self.root, run=self.outputRun)
149  datasets = list(butler.registry.queryDatasets("raw", collections=self.outputRun))
150  self.assertEqual(len(datasets), len(self.dataIds))
151 
152  # Get the URI to the first dataset and check it is inside the
153  # datastore
154  datasetUri = butler.getURI(datasets[0])
155  self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root))
156 
157  for dataId in self.dataIds:
158  # Check that we can read metadata from a raw
159  metadata = butler.get("raw.metadata", dataId)
160  if not fullCheck:
161  continue
162  fullCheck = False
163  exposure = butler.get("raw", dataId)
164  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
165 
166  # Since components follow a different code path we check that
167  # WCS match and also we check that at least the shape
168  # of the image is the same (rather than doing per-pixel equality)
169  wcs = butler.get("raw.wcs", dataId)
170  self.assertEqual(wcs, exposure.getWcs())
171 
172  rawImage = butler.get("raw.image", dataId)
173  self.assertEqual(rawImage.getBBox(), exposure.getBBox())
174 
175  self.checkRepo(files=files)
176 
177  def checkRepo(self, files=None):
178  """Check the state of the repository after ingest.
179 
180  This is an optional hook provided for subclasses; by default it does
181  nothing.
182 
183  Parameters
184  ----------
185  files : `list` [`str`], or None
186  List of files to be ingested, or None to use ``self.file``
187  """
188  pass
189 
190  @classmethod
191  def _createRepo(cls):
192  """Use the Click `testing` module to call the butler command line api
193  to create a repository."""
194  runner = LogCliRunner()
195  result = runner.invoke(butlerCli, ["create", cls.root])
196  # Classmethod so assertEqual does not work
197  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
198 
199  def _ingestRaws(self, transfer, file=None):
200  """Use the Click `testing` module to call the butler command line api
201  to ingest raws.
202 
203  Parameters
204  ----------
205  transfer : `str`
206  The external data transfer type.
207  file : `str`
208  Path to a file to ingest instead of the default associated with
209  the object.
210  """
211  if file is None:
212  file = self.file
213  runner = LogCliRunner()
214  result = runner.invoke(butlerCli, ["ingest-raws", self.root, file,
215  "--output-run", self.outputRun,
216  "--transfer", transfer,
217  "--ingest-task", self.rawIngestTask])
218  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
219 
220  @classmethod
221  def _registerInstrument(cls):
222  """Use the Click `testing` module to call the butler command line api
223  to register the instrument."""
224  runner = LogCliRunner()
225  result = runner.invoke(butlerCli, ["register-instrument", cls.root, cls.instrumentClassName])
226  # Classmethod so assertEqual does not work
227  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
228 
229  def _writeCuratedCalibrations(self):
230  """Use the Click `testing` module to call the butler command line api
231  to write curated calibrations."""
232  runner = LogCliRunner()
233  result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root, self.instrumentName])
234  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
235 
236  def testLink(self):
237  self._ingestRaws(transfer="link")
238  self.verifyIngest()
239 
240  def testSymLink(self):
241  self._ingestRaws(transfer="symlink")
242  self.verifyIngest()
243 
244  def testDirect(self):
245  self._ingestRaws(transfer="direct")
246 
247  # Check that it really did have a URI outside of datastore
248  srcUri = ButlerURI(self.file)
249  butler = Butler(self.root, run=self.outputRun)
250  datasets = list(butler.registry.queryDatasets("raw", collections=self.outputRun))
251  datastoreUri = butler.getURI(datasets[0])
252  self.assertEqual(datastoreUri, srcUri)
253 
254  def testCopy(self):
255  self._ingestRaws(transfer="copy")
256  # Only test full read of raws for the copy test. No need to do it
257  # in the other tests since the formatter will be the same in all
258  # cases.
259  self.verifyIngest(fullCheck=True)
260 
261  def testHardLink(self):
262  try:
263  self._ingestRaws(transfer="hardlink")
264  # Running ingest through the Click testing infrastructure causes
265  # the original exception indicating that we can't hard-link
266  # on this filesystem to be turned into a nonzero exit code, which
267  # then trips the test assertion.
268  except (AssertionError, PermissionError) as err:
269  raise unittest.SkipTest("Skipping hard-link test because input data"
270  " is on a different filesystem.") from err
271  self.verifyIngest()
272 
273  def testInPlace(self):
274  """Test that files already in the directory can be added to the
275  registry in-place.
276  """
277  # symlink into repo root manually
278  butler = Butler(self.root, run=self.outputRun)
279  pathInStore = "prefix-" + os.path.basename(self.file)
280  newPath = butler.datastore.root.join(pathInStore)
281  os.symlink(os.path.abspath(self.file), newPath.ospath)
282  self._ingestRaws(transfer="auto", file=newPath.ospath)
283  self.verifyIngest()
284 
285  # Recreate a butler post-ingest (the earlier one won't see the
286  # ingested files)
287  butler = Butler(self.root, run=self.outputRun)
288 
289  # Check that the URI associated with this path is the right one
290  uri = butler.getURI("raw", self.dataIds[0])
291  self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore)
292 
294  """Re-ingesting the same data into the repository should fail.
295  """
296  self._ingestRaws(transfer="symlink")
297  with self.assertRaises(Exception):
298  self._ingestRaws(transfer="symlink")
299 
301  """Test that we can ingest the curated calibrations, and read them
302  with `loadCamera` both before and after.
303  """
304  if self.curatedCalibrationDatasetTypes is None:
305  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
306 
307  butler = Butler(self.root, writeable=False)
308  collection = self.instrumentClass.makeCalibrationCollectionName()
309 
310  # Trying to load a camera with a data ID not known to the registry
311  # is an error, because we can't get any temporal information.
312  with self.assertRaises(LookupError):
313  lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection)
314 
315  # Ingest raws in order to get some exposure records.
316  self._ingestRaws(transfer="auto")
317 
318  # Load camera should returned an unversioned camera because there's
319  # nothing in the repo.
320  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
321  self.assertFalse(isVersioned)
322  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
323 
325 
326  # Make a new butler instance to make sure we don't have any stale
327  # caches (e.g. of DatasetTypes). Note that we didn't give
328  # _writeCuratedCalibrations the butler instance we had, because it's
329  # trying to test the CLI interface anyway.
330  butler = Butler(self.root, writeable=False)
331 
332  for datasetTypeName in self.curatedCalibrationDatasetTypes:
333  with self.subTest(dtype=datasetTypeName):
334  found = list(
335  butler.registry.queryDatasetAssociations(
336  datasetTypeName,
337  collections=collection,
338  )
339  )
340  self.assertGreater(len(found), 0, f"Checking {datasetTypeName}")
341 
342  # Load camera should returned the versioned camera from the repo.
343  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
344  self.assertTrue(isVersioned)
345  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
346 
347  def testDefineVisits(self):
348  if self.visits is None:
349  self.skipTest("Expected visits were not defined.")
350  self._ingestRaws(transfer="link")
351 
352  # Calling defineVisits tests the implementation of the butler command
353  # line interface "define-visits" subcommand. Functions in the script
354  # folder are generally considered protected and should not be used
355  # as public api.
356  script.defineVisits(self.root, config_file=None, collections=self.outputRun,
357  instrument=self.instrumentName)
358 
359  # Test that we got the visits we expected.
360  butler = Butler(self.root, run=self.outputRun)
361  visits = butler.registry.queryDataIds(["visit"]).expanded().toSet()
362  self.assertCountEqual(visits, self.visits.keys())
363  instr = getInstrument(self.instrumentName, butler.registry)
364  camera = instr.getCamera()
365  for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
366  # Test that this visit is associated with the expected exposures.
367  foundExposures = butler.registry.queryDataIds(["exposure"], dataId=expectedVisit
368  ).expanded().toSet()
369  self.assertCountEqual(foundExposures, expectedExposures)
370  # Test that we have a visit region, and that it contains all of the
371  # detector+visit regions.
372  self.assertIsNotNone(foundVisit.region)
373  detectorVisitDataIds = butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit
374  ).expanded().toSet()
375  self.assertEqual(len(detectorVisitDataIds), len(camera))
376  for dataId in detectorVisitDataIds:
377  self.assertTrue(foundVisit.region.contains(dataId.region))
lsst.obs.base.ingest_tests.IngestTestBase._ingestRaws
def _ingestRaws(self, transfer, file=None)
Definition: ingest_tests.py:199
lsst.obs.base.ingest_tests.IngestTestBase.outputRun
outputRun
Definition: ingest_tests.py:119
lsst.obs.base.ingest_tests.IngestTestBase.setUp
def setUp(self)
Definition: ingest_tests.py:117
lsst.obs.base.ingest_tests.IngestTestBase._createRepo
def _createRepo(cls)
Definition: ingest_tests.py:191
lsst.obs.base.utils.getInstrument
def getInstrument(instrumentName, registry=None)
Definition: utils.py:131
lsst.obs.base.ingest_tests.IngestTestBase.testHardLink
def testHardLink(self)
Definition: ingest_tests.py:261
lsst.obs.base.ingest_tests.IngestTestBase.testInPlace
def testInPlace(self)
Definition: ingest_tests.py:273
lsst.obs.base.ingest_tests.IngestTestBase.testCopy
def testCopy(self)
Definition: ingest_tests.py:254
lsst.obs.base.ingest_tests.IngestTestBase.visits
dictionary visits
Definition: ingest_tests.py:72
lsst.obs.base.ingest_tests.IngestTestBase.ingestDir
string ingestDir
Definition: ingest_tests.py:48
lsst.obs.base.ingest_tests.IngestTestBase.instrumentName
def instrumentName(self)
Definition: ingest_tests.py:98
lsst.obs.base.ingest_tests.IngestTestBase.dataIds
list dataIds
Definition: ingest_tests.py:53
lsst.obs.base.ingest_tests.IngestTestBase.testSymLink
def testSymLink(self)
Definition: ingest_tests.py:240
lsst.obs.base.ingest_tests.IngestTestBase.testDirect
def testDirect(self)
Definition: ingest_tests.py:244
lsst.obs.base.ingest_tests.IngestTestBase.testWriteCuratedCalibrations
def testWriteCuratedCalibrations(self)
Definition: ingest_tests.py:300
lsst.obs.base.ingest_tests.IngestTestBase.rawIngestTask
string rawIngestTask
Definition: ingest_tests.py:59
lsst.obs.base.ingest_tests.IngestTestBase.setUpClass
def setUpClass(cls)
Definition: ingest_tests.py:109
lsst::utils
lsst.obs.base.ingest_tests.IngestTestBase.verifyIngest
def verifyIngest(self, files=None, cli=False, fullCheck=False)
Definition: ingest_tests.py:126
lsst.obs.base.defineVisits.DefineVisitsTask
Definition: defineVisits.py:281
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClass
def instrumentClass(self)
Definition: ingest_tests.py:93
lsst.obs.base.ingest_tests.IngestTestBase.tearDownClass
def tearDownClass(cls)
Definition: ingest_tests.py:122
lsst.obs.base.ingest_tests.IngestTestBase.curatedCalibrationDatasetTypes
curatedCalibrationDatasetTypes
Definition: ingest_tests.py:62
lsst.obs.base.ingest_tests.IngestTestBase.root
root
Definition: ingest_tests.py:111
lsst.obs.base.ingest_tests.IngestTestBase._registerInstrument
def _registerInstrument(cls)
Definition: ingest_tests.py:221
lsst.obs.base.ingest_tests.IngestTestBase.testLink
def testLink(self)
Definition: ingest_tests.py:236
lsst.obs.base.ingest_tests.IngestTestBase.file
string file
Definition: ingest_tests.py:56
lsst.obs.base.ingest_tests.IngestTestBase.testDefineVisits
def testDefineVisits(self)
Definition: ingest_tests.py:347
lsst.obs.base.ingest_tests.IngestTestBase._writeCuratedCalibrations
def _writeCuratedCalibrations(self)
Definition: ingest_tests.py:229
lsst.obs.base.ingest_tests.IngestTestBase.checkRepo
def checkRepo(self, files=None)
Definition: ingest_tests.py:177
lsst.obs.base.ingest_tests.IngestTestBase.testFailOnConflict
def testFailOnConflict(self)
Definition: ingest_tests.py:293
lsst.obs.base.ingest_tests.IngestTestBase
Definition: ingest_tests.py:43
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClassName
def instrumentClassName(self)
Definition: ingest_tests.py:82
lsst.obs.base
Definition: __init__.py:1