lsst.obs.base  20.0.0-73-gf477d90+1f94629e3c
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import tempfile
29 import unittest
30 import os
31 import shutil
32 
33 import lsst.afw.cameraGeom
34 from lsst.daf.butler import Butler, ButlerURI
35 from lsst.daf.butler.cli.butler import cli as butlerCli
36 from lsst.daf.butler.cli.utils import LogCliRunner
37 import lsst.obs.base
38 from lsst.utils import doImport
39 from .utils import getInstrument
40 from . import script
41 
42 
43 class IngestTestBase(metaclass=abc.ABCMeta):
44  """Base class for tests of gen3 ingest. Subclass from this, then
45  `unittest.TestCase` to get a working test suite.
46  """
47 
48  ingestDir = ""
49  """Root path to ingest files into. Typically `obs_package/tests/`; the
50  actual directory will be a tempdir under this one.
51  """
52 
53  dataIds = []
54  """list of butler data IDs of files that should have been ingested."""
55 
56  file = ""
57  """Full path to a file to ingest in tests."""
58 
59  rawIngestTask = "lsst.obs.base.RawIngestTask"
60  """The task to use in the Ingest test."""
61 
62  curatedCalibrationDatasetTypes = None
63  """List or tuple of Datasets types that should be present after calling
64  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
65  not be called and the test will be skipped."""
66 
67  defineVisitsTask = lsst.obs.base.DefineVisitsTask
68  """The task to use to define visits from groups of exposures.
69  This is ignored if ``visits`` is `None`.
70  """
71 
72  visits = {}
73  """A dictionary mapping visit data IDs the lists of exposure data IDs that
74  are associated with them.
75  If this is empty (but not `None`), visit definition will be run but no
76  visits will be expected (e.g. because no exposures are on-sky
77  observations).
78  """
79 
80  @property
81  @abc.abstractmethod
83  """The fully qualified instrument class name.
84 
85  Returns
86  -------
87  `str`
88  The fully qualified instrument class name.
89  """
90  pass
91 
92  @property
93  def instrumentClass(self):
94  """The instrument class."""
95  return doImport(self.instrumentClassName)
96 
97  @property
98  def instrumentName(self):
99  """The name of the instrument.
100 
101  Returns
102  -------
103  `str`
104  The name of the instrument.
105  """
106  return self.instrumentClass.getName()
107 
108  @classmethod
109  def setUpClass(cls):
110  # Use a temporary working directory
111  cls.root = tempfile.mkdtemp(dir=cls.ingestDir)
112  cls._createRepo()
113 
114  # Register the instrument and its static metadata
115  cls._registerInstrument()
116 
117  def setUp(self):
118  # Want a unique run name per test
119  self.outputRun = "raw_ingest_" + self.id()
120 
121  @classmethod
122  def tearDownClass(cls):
123  if os.path.exists(cls.root):
124  shutil.rmtree(cls.root, ignore_errors=True)
125 
126  def verifyIngest(self, files=None, cli=False, fullCheck=False):
127  """
128  Test that RawIngestTask ingested the expected files.
129 
130  Parameters
131  ----------
132  files : `list` [`str`], or None
133  List of files to be ingested, or None to use ``self.file``
134  fullCheck : `bool`, optional
135  If `True`, read the full raw dataset and check component
136  consistency. If `False` check that a component can be read
137  but do not read the entire raw exposure.
138 
139  Notes
140  -----
141  Reading all the ingested test data can be expensive. The code paths
142  for reading the second raw are the same as reading the first so
143  we do not gain anything by doing full checks of everything.
144  Only read full pixel data for first dataset from file.
145  Don't even do that if we are requested not to by the caller.
146  This only really affects files that contain multiple datasets.
147  """
148  butler = Butler(self.root, run=self.outputRun)
149  datasets = list(butler.registry.queryDatasets("raw", collections=self.outputRun))
150  self.assertEqual(len(datasets), len(self.dataIds))
151 
152  # Get the URI to the first dataset and check it is inside the
153  # datastore
154  datasetUri = butler.getURI(datasets[0])
155  self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root))
156 
157  for dataId in self.dataIds:
158  # Check that we can read metadata from a raw
159  metadata = butler.get("raw.metadata", dataId)
160  if not fullCheck:
161  continue
162  fullCheck = False
163  exposure = butler.get("raw", dataId)
164  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
165 
166  # Since components follow a different code path we check that
167  # WCS match and also we check that at least the shape
168  # of the image is the same (rather than doing per-pixel equality)
169  wcs = butler.get("raw.wcs", dataId)
170  self.assertEqual(wcs, exposure.getWcs())
171 
172  rawImage = butler.get("raw.image", dataId)
173  self.assertEqual(rawImage.getBBox(), exposure.getBBox())
174 
175  self.checkRepo(files=files)
176 
177  def checkRepo(self, files=None):
178  """Check the state of the repository after ingest.
179 
180  This is an optional hook provided for subclasses; by default it does
181  nothing.
182 
183  Parameters
184  ----------
185  files : `list` [`str`], or None
186  List of files to be ingested, or None to use ``self.file``
187  """
188  pass
189 
190  @classmethod
191  def _createRepo(cls):
192  """Use the Click `testing` module to call the butler command line api
193  to create a repository."""
194  runner = LogCliRunner()
195  result = runner.invoke(butlerCli, ["create", cls.root])
196  # Classmethod so assertEqual does not work
197  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
198 
199  def _ingestRaws(self, transfer, file=None):
200  """Use the Click `testing` module to call the butler command line api
201  to ingest raws.
202 
203  Parameters
204  ----------
205  transfer : `str`
206  The external data transfer type.
207  file : `str`
208  Path to a file to ingest instead of the default associated with
209  the object.
210  """
211  if file is None:
212  file = self.file
213  runner = LogCliRunner()
214  result = runner.invoke(butlerCli, ["ingest-raws", self.root, file,
215  "--output-run", self.outputRun,
216  "--transfer", transfer,
217  "--ingest-task", self.rawIngestTask])
218  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
219 
220  @classmethod
221  def _registerInstrument(cls):
222  """Use the Click `testing` module to call the butler command line api
223  to register the instrument."""
224  runner = LogCliRunner()
225  result = runner.invoke(butlerCli, ["register-instrument", cls.root, cls.instrumentClassName])
226  # Classmethod so assertEqual does not work
227  assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}"
228 
229  def _writeCuratedCalibrations(self):
230  """Use the Click `testing` module to call the butler command line api
231  to write curated calibrations."""
232  runner = LogCliRunner()
233  result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root, self.instrumentName])
234  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
235 
236  def testLink(self):
237  self._ingestRaws(transfer="link")
238  self.verifyIngest()
239 
240  def testSymLink(self):
241  self._ingestRaws(transfer="symlink")
242  self.verifyIngest()
243 
244  def testDirect(self):
245  self._ingestRaws(transfer="direct")
246 
247  # Check that it really did have a URI outside of datastore
248  srcUri = ButlerURI(self.file)
249  butler = Butler(self.root, run=self.outputRun)
250  datasets = list(butler.registry.queryDatasets("raw", collections=self.outputRun))
251  datastoreUri = butler.getURI(datasets[0])
252  self.assertEqual(datastoreUri, srcUri)
253 
254  def testCopy(self):
255  self._ingestRaws(transfer="copy")
256  # Only test full read of raws for the copy test. No need to do it
257  # in the other tests since the formatter will be the same in all
258  # cases.
259  self.verifyIngest(fullCheck=True)
260 
261  def testHardLink(self):
262  try:
263  self._ingestRaws(transfer="hardlink")
264  self.verifyIngest()
265  except PermissionError as err:
266  raise unittest.SkipTest("Skipping hard-link test because input data"
267  " is on a different filesystem.") from err
268 
269  def testInPlace(self):
270  """Test that files already in the directory can be added to the
271  registry in-place.
272  """
273  # symlink into repo root manually
274  butler = Butler(self.root, run=self.outputRun)
275  pathInStore = "prefix-" + os.path.basename(self.file)
276  newPath = butler.datastore.root.join(pathInStore)
277  os.symlink(os.path.abspath(self.file), newPath.ospath)
278  self._ingestRaws(transfer="auto", file=newPath.ospath)
279  self.verifyIngest()
280 
281  # Recreate a butler post-ingest (the earlier one won't see the
282  # ingested files)
283  butler = Butler(self.root, run=self.outputRun)
284 
285  # Check that the URI associated with this path is the right one
286  uri = butler.getURI("raw", self.dataIds[0])
287  self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore)
288 
290  """Re-ingesting the same data into the repository should fail.
291  """
292  self._ingestRaws(transfer="symlink")
293  with self.assertRaises(Exception):
294  self._ingestRaws(transfer="symlink")
295 
297  """Test that we can ingest the curated calibrations, and read them
298  with `loadCamera` both before and after.
299  """
300  if self.curatedCalibrationDatasetTypes is None:
301  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
302 
303  butler = Butler(self.root, writeable=False)
304  collection = self.instrumentClass.makeCalibrationCollectionName()
305 
306  # Trying to load a camera with a data ID not known to the registry
307  # is an error, because we can't get any temporal information.
308  with self.assertRaises(LookupError):
309  lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection)
310 
311  # Ingest raws in order to get some exposure records.
312  self._ingestRaws(transfer="auto")
313 
314  # Load camera should returned an unversioned camera because there's
315  # nothing in the repo.
316  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
317  self.assertFalse(isVersioned)
318  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
319 
321 
322  # Make a new butler instance to make sure we don't have any stale
323  # caches (e.g. of DatasetTypes). Note that we didn't give
324  # _writeCuratedCalibrations the butler instance we had, because it's
325  # trying to test the CLI interface anyway.
326  butler = Butler(self.root, writeable=False)
327 
328  for datasetTypeName in self.curatedCalibrationDatasetTypes:
329  with self.subTest(dtype=datasetTypeName):
330  found = list(
331  butler.registry.queryDatasetAssociations(
332  datasetTypeName,
333  collections=collection,
334  )
335  )
336  self.assertGreater(len(found), 0, f"Checking {datasetTypeName}")
337 
338  # Load camera should returned the versioned camera from the repo.
339  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
340  self.assertTrue(isVersioned)
341  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
342 
343  def testDefineVisits(self):
344  if self.visits is None:
345  self.skipTest("Expected visits were not defined.")
346  self._ingestRaws(transfer="link")
347 
348  # Calling defineVisits tests the implementation of the butler command
349  # line interface "define-visits" subcommand. Functions in the script
350  # folder are generally considered protected and should not be used
351  # as public api.
352  script.defineVisits(self.root, config_file=None, collections=self.outputRun,
353  instrument=self.instrumentName)
354 
355  # Test that we got the visits we expected.
356  butler = Butler(self.root, run=self.outputRun)
357  visits = butler.registry.queryDataIds(["visit"]).expanded().toSet()
358  self.assertCountEqual(visits, self.visits.keys())
359  instr = getInstrument(self.instrumentName, butler.registry)
360  camera = instr.getCamera()
361  for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
362  # Test that this visit is associated with the expected exposures.
363  foundExposures = butler.registry.queryDataIds(["exposure"], dataId=expectedVisit
364  ).expanded().toSet()
365  self.assertCountEqual(foundExposures, expectedExposures)
366  # Test that we have a visit region, and that it contains all of the
367  # detector+visit regions.
368  self.assertIsNotNone(foundVisit.region)
369  detectorVisitDataIds = butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit
370  ).expanded().toSet()
371  self.assertEqual(len(detectorVisitDataIds), len(camera))
372  for dataId in detectorVisitDataIds:
373  self.assertTrue(foundVisit.region.contains(dataId.region))
lsst.obs.base.ingest_tests.IngestTestBase._ingestRaws
def _ingestRaws(self, transfer, file=None)
Definition: ingest_tests.py:199
lsst.obs.base.ingest_tests.IngestTestBase.outputRun
outputRun
Definition: ingest_tests.py:119
lsst.obs.base.ingest_tests.IngestTestBase.setUp
def setUp(self)
Definition: ingest_tests.py:117
lsst.obs.base.ingest_tests.IngestTestBase._createRepo
def _createRepo(cls)
Definition: ingest_tests.py:191
lsst.obs.base.utils.getInstrument
def getInstrument(instrumentName, registry=None)
Definition: utils.py:131
lsst.obs.base.ingest_tests.IngestTestBase.testHardLink
def testHardLink(self)
Definition: ingest_tests.py:261
lsst.obs.base.ingest_tests.IngestTestBase.testInPlace
def testInPlace(self)
Definition: ingest_tests.py:269
lsst.obs.base.ingest_tests.IngestTestBase.testCopy
def testCopy(self)
Definition: ingest_tests.py:254
lsst.obs.base.ingest_tests.IngestTestBase.visits
dictionary visits
Definition: ingest_tests.py:72
lsst.obs.base.ingest_tests.IngestTestBase.ingestDir
string ingestDir
Definition: ingest_tests.py:48
lsst.obs.base.ingest_tests.IngestTestBase.instrumentName
def instrumentName(self)
Definition: ingest_tests.py:98
lsst.obs.base.ingest_tests.IngestTestBase.dataIds
list dataIds
Definition: ingest_tests.py:53
lsst.obs.base.ingest_tests.IngestTestBase.testSymLink
def testSymLink(self)
Definition: ingest_tests.py:240
lsst.obs.base.ingest_tests.IngestTestBase.testDirect
def testDirect(self)
Definition: ingest_tests.py:244
lsst.obs.base.ingest_tests.IngestTestBase.testWriteCuratedCalibrations
def testWriteCuratedCalibrations(self)
Definition: ingest_tests.py:296
lsst.obs.base.ingest_tests.IngestTestBase.rawIngestTask
string rawIngestTask
Definition: ingest_tests.py:59
lsst.obs.base.ingest_tests.IngestTestBase.setUpClass
def setUpClass(cls)
Definition: ingest_tests.py:109
lsst::utils
lsst.obs.base.ingest_tests.IngestTestBase.verifyIngest
def verifyIngest(self, files=None, cli=False, fullCheck=False)
Definition: ingest_tests.py:126
lsst.obs.base.defineVisits.DefineVisitsTask
Definition: defineVisits.py:281
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClass
def instrumentClass(self)
Definition: ingest_tests.py:93
lsst.obs.base.ingest_tests.IngestTestBase.tearDownClass
def tearDownClass(cls)
Definition: ingest_tests.py:122
lsst.obs.base.ingest_tests.IngestTestBase.curatedCalibrationDatasetTypes
curatedCalibrationDatasetTypes
Definition: ingest_tests.py:62
lsst.obs.base.ingest_tests.IngestTestBase.root
root
Definition: ingest_tests.py:111
lsst.obs.base.ingest_tests.IngestTestBase._registerInstrument
def _registerInstrument(cls)
Definition: ingest_tests.py:221
lsst.obs.base.ingest_tests.IngestTestBase.testLink
def testLink(self)
Definition: ingest_tests.py:236
lsst.obs.base.ingest_tests.IngestTestBase.file
string file
Definition: ingest_tests.py:56
lsst.obs.base.ingest_tests.IngestTestBase.testDefineVisits
def testDefineVisits(self)
Definition: ingest_tests.py:343
lsst.obs.base.ingest_tests.IngestTestBase._writeCuratedCalibrations
def _writeCuratedCalibrations(self)
Definition: ingest_tests.py:229
lsst.obs.base.ingest_tests.IngestTestBase.checkRepo
def checkRepo(self, files=None)
Definition: ingest_tests.py:177
lsst.obs.base.ingest_tests.IngestTestBase.testFailOnConflict
def testFailOnConflict(self)
Definition: ingest_tests.py:289
lsst.obs.base.ingest_tests.IngestTestBase
Definition: ingest_tests.py:43
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClassName
def instrumentClassName(self)
Definition: ingest_tests.py:82
lsst.obs.base
Definition: __init__.py:1