lsst.obs.base  20.0.0-50-g2b8b609+534456e0aa
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import tempfile
29 import unittest
30 import os
31 import shutil
32 
33 import lsst.afw.cameraGeom
34 from lsst.daf.butler import Butler
35 from lsst.daf.butler.cli.butler import cli as butlerCli
36 from lsst.daf.butler.cli.utils import LogCliRunner
37 import lsst.obs.base
38 from lsst.utils import doImport
39 from .utils import getInstrument
40 from . import script
41 
42 
43 class IngestTestBase(metaclass=abc.ABCMeta):
44  """Base class for tests of gen3 ingest. Subclass from this, then
45  `unittest.TestCase` to get a working test suite.
46  """
47 
48  ingestDir = ""
49  """Root path to ingest files into. Typically `obs_package/tests/`; the
50  actual directory will be a tempdir under this one.
51  """
52 
53  dataIds = []
54  """list of butler data IDs of files that should have been ingested."""
55 
56  file = ""
57  """Full path to a file to ingest in tests."""
58 
59  rawIngestTask = "lsst.obs.base.RawIngestTask"
60  """The task to use in the Ingest test."""
61 
62  curatedCalibrationDatasetTypes = None
63  """List or tuple of Datasets types that should be present after calling
64  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
65  not be called and the test will be skipped."""
66 
67  defineVisitsTask = lsst.obs.base.DefineVisitsTask
68  """The task to use to define visits from groups of exposures.
69  This is ignored if ``visits`` is `None`.
70  """
71 
72  visits = {}
73  """A dictionary mapping visit data IDs the lists of exposure data IDs that
74  are associated with them.
75  If this is empty (but not `None`), visit definition will be run but no
76  visits will be expected (e.g. because no exposures are on-sky
77  observations).
78  """
79 
80  outputRun = "raw"
81  """The name of the output run to use in tests.
82  """
83 
84  @property
85  @abc.abstractmethod
87  """The fully qualified instrument class name.
88 
89  Returns
90  -------
91  `str`
92  The fully qualified instrument class name.
93  """
94  pass
95 
96  @property
97  def instrumentClass(self):
98  """The instrument class."""
99  return doImport(self.instrumentClassName)
100 
101  @property
102  def instrumentName(self):
103  """The name of the instrument.
104 
105  Returns
106  -------
107  `str`
108  The name of the instrument.
109  """
110  return self.instrumentClass.getName()
111 
112  def setUp(self):
113  # Use a temporary working directory
114  self.root = tempfile.mkdtemp(dir=self.ingestDir)
115  self._createRepo()
116 
117  # Register the instrument and its static metadata
118  self._registerInstrument()
119 
120  def tearDown(self):
121  if os.path.exists(self.root):
122  shutil.rmtree(self.root, ignore_errors=True)
123 
124  def verifyIngest(self, files=None, cli=False):
125  """
126  Test that RawIngestTask ingested the expected files.
127 
128  Parameters
129  ----------
130  files : `list` [`str`], or None
131  List of files to be ingested, or None to use ``self.file``
132  """
133  butler = Butler(self.root, run=self.outputRun)
134  datasets = butler.registry.queryDatasets(self.outputRun, collections=...)
135  self.assertEqual(len(list(datasets)), len(self.dataIds))
136  for dataId in self.dataIds:
137  exposure = butler.get(self.outputRun, dataId)
138  metadata = butler.get("raw.metadata", dataId)
139  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
140 
141  # Since components follow a different code path we check that
142  # WCS match and also we check that at least the shape
143  # of the image is the same (rather than doing per-pixel equality)
144  wcs = butler.get("raw.wcs", dataId)
145  self.assertEqual(wcs, exposure.getWcs())
146 
147  rawImage = butler.get("raw.image", dataId)
148  self.assertEqual(rawImage.getBBox(), exposure.getBBox())
149 
150  self.checkRepo(files=files)
151 
152  def checkRepo(self, files=None):
153  """Check the state of the repository after ingest.
154 
155  This is an optional hook provided for subclasses; by default it does
156  nothing.
157 
158  Parameters
159  ----------
160  files : `list` [`str`], or None
161  List of files to be ingested, or None to use ``self.file``
162  """
163  pass
164 
165  def _createRepo(self):
166  """Use the Click `testing` module to call the butler command line api
167  to create a repository."""
168  runner = LogCliRunner()
169  result = runner.invoke(butlerCli, ["create", self.root])
170  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
171 
172  def _ingestRaws(self, transfer):
173  """Use the Click `testing` module to call the butler command line api
174  to ingest raws.
175 
176  Parameters
177  ----------
178  transfer : `str`
179  The external data transfer type.
180  """
181  runner = LogCliRunner()
182  result = runner.invoke(butlerCli, ["ingest-raws", self.root, self.file,
183  "--output-run", self.outputRun,
184  "--transfer", transfer,
185  "--ingest-task", self.rawIngestTask])
186  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
187 
188  def _registerInstrument(self):
189  """Use the Click `testing` module to call the butler command line api
190  to register the instrument."""
191  runner = LogCliRunner()
192  result = runner.invoke(butlerCli, ["register-instrument", self.root, self.instrumentClassName])
193  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
194 
195  def _writeCuratedCalibrations(self):
196  """Use the Click `testing` module to call the butler command line api
197  to write curated calibrations."""
198  runner = LogCliRunner()
199  result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root,
200  "--instrument", self.instrumentName])
201  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
202 
203  def testLink(self):
204  self._ingestRaws(transfer="link")
205  self.verifyIngest()
206 
207  def testSymLink(self):
208  self._ingestRaws(transfer="symlink")
209  self.verifyIngest()
210 
211  def testCopy(self):
212  self._ingestRaws(transfer="copy")
213  self.verifyIngest()
214 
215  def testHardLink(self):
216  try:
217  self._ingestRaws(transfer="hardlink")
218  self.verifyIngest()
219  except PermissionError as err:
220  raise unittest.SkipTest("Skipping hard-link test because input data"
221  " is on a different filesystem.") from err
222 
223  def testInPlace(self):
224  """Test that files already in the directory can be added to the
225  registry in-place.
226  """
227  # symlink into repo root manually
228  butler = Butler(self.root, run=self.outputRun)
229  newPath = butler.datastore.root.join(os.path.basename(self.file))
230  os.symlink(os.path.abspath(self.file), newPath.ospath)
231  self._ingestRaws(transfer=None)
232  self.verifyIngest()
233 
235  """Re-ingesting the same data into the repository should fail.
236  """
237  self._ingestRaws(transfer="symlink")
238  with self.assertRaises(Exception):
239  self._ingestRaws(transfer="symlink")
240 
242  """Test that we can ingest the curated calibrations, and read them
243  with `loadCamera` both before and after.
244  """
245  if self.curatedCalibrationDatasetTypes is None:
246  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
247 
248  butler = Butler(self.root, writeable=False)
249  collection = self.instrumentClass.makeCalibrationCollectionName()
250 
251  # Trying to load a camera with a data ID not known to the registry
252  # is an error, because we can't get any temporal information.
253  with self.assertRaises(LookupError):
254  lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
255 
256  # Ingest raws in order to get some exposure records.
257  self._ingestRaws(transfer="auto")
258 
259  # Load camera should returned an unversioned camera because there's
260  # nothing in the repo.
261  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
262  self.assertFalse(isVersioned)
263  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
264 
266 
267  # Make a new butler instance to make sure we don't have any stale
268  # caches (e.g. of DatasetTypes). Note that we didn't give
269  # _writeCuratedCalibrations the butler instance we had, because it's
270  # trying to test the CLI interface anyway.
271  butler = Butler(self.root, writeable=False)
272 
273  for datasetTypeName in self.curatedCalibrationDatasetTypes:
274  with self.subTest(dtype=datasetTypeName):
275  found = list(
276  butler.registry.queryDatasetAssociations(
277  datasetTypeName,
278  collections=collection,
279  )
280  )
281  self.assertGreater(len(found), 0, f"Checking {datasetTypeName}")
282 
283  # Load camera should returned the versioned camera from the repo.
284  camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection)
285  self.assertTrue(isVersioned)
286  self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera)
287 
288  def testDefineVisits(self):
289  if self.visits is None:
290  self.skipTest("Expected visits were not defined.")
291  self._ingestRaws(transfer="link")
292 
293  # Calling defineVisits tests the implementation of the butler command line interface "define-visits"
294  # subcommand. Functions in the script folder are generally considered protected and should not be used
295  # as public api.
296  script.defineVisits(self.root, config_file=None, collections=self.outputRun,
297  instrument=self.instrumentName)
298 
299  # Test that we got the visits we expected.
300  butler = Butler(self.root, run=self.outputRun)
301  visits = butler.registry.queryDataIds(["visit"]).expanded().toSet()
302  self.assertCountEqual(visits, self.visits.keys())
303  instr = getInstrument(self.instrumentName, butler.registry)
304  camera = instr.getCamera()
305  for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
306  # Test that this visit is associated with the expected exposures.
307  foundExposures = butler.registry.queryDataIds(["exposure"], dataId=expectedVisit
308  ).expanded().toSet()
309  self.assertCountEqual(foundExposures, expectedExposures)
310  # Test that we have a visit region, and that it contains all of the
311  # detector+visit regions.
312  self.assertIsNotNone(foundVisit.region)
313  detectorVisitDataIds = butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit
314  ).expanded().toSet()
315  self.assertEqual(len(detectorVisitDataIds), len(camera))
316  for dataId in detectorVisitDataIds:
317  self.assertTrue(foundVisit.region.contains(dataId.region))
lsst.obs.base.ingest_tests.IngestTestBase.verifyIngest
def verifyIngest(self, files=None, cli=False)
Definition: ingest_tests.py:124
lsst.obs.base.ingest_tests.IngestTestBase.setUp
def setUp(self)
Definition: ingest_tests.py:112
lsst.obs.base.utils.getInstrument
def getInstrument(instrumentName, registry=None)
Definition: utils.py:131
lsst.obs.base.ingest_tests.IngestTestBase.testHardLink
def testHardLink(self)
Definition: ingest_tests.py:215
lsst.obs.base.ingest_tests.IngestTestBase.testInPlace
def testInPlace(self)
Definition: ingest_tests.py:223
lsst.obs.base.ingest_tests.IngestTestBase.testCopy
def testCopy(self)
Definition: ingest_tests.py:211
lsst.obs.base.ingest_tests.IngestTestBase.visits
dictionary visits
Definition: ingest_tests.py:72
lsst.obs.base.ingest_tests.IngestTestBase.ingestDir
string ingestDir
Definition: ingest_tests.py:48
lsst.obs.base.ingest_tests.IngestTestBase.instrumentName
def instrumentName(self)
Definition: ingest_tests.py:102
lsst.obs.base.ingest_tests.IngestTestBase.dataIds
list dataIds
Definition: ingest_tests.py:53
lsst.obs.base.ingest_tests.IngestTestBase.tearDown
def tearDown(self)
Definition: ingest_tests.py:120
lsst.obs.base.ingest_tests.IngestTestBase.testSymLink
def testSymLink(self)
Definition: ingest_tests.py:207
lsst.obs.base.ingest_tests.IngestTestBase._ingestRaws
def _ingestRaws(self, transfer)
Definition: ingest_tests.py:172
lsst.obs.base.ingest_tests.IngestTestBase.testWriteCuratedCalibrations
def testWriteCuratedCalibrations(self)
Definition: ingest_tests.py:241
lsst.obs.base.ingest_tests.IngestTestBase._registerInstrument
def _registerInstrument(self)
Definition: ingest_tests.py:188
lsst::utils
lsst.obs.base.defineVisits.DefineVisitsTask
Definition: defineVisits.py:281
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClass
def instrumentClass(self)
Definition: ingest_tests.py:97
lsst.obs.base.ingest_tests.IngestTestBase._createRepo
def _createRepo(self)
Definition: ingest_tests.py:165
lsst.obs.base.ingest_tests.IngestTestBase.curatedCalibrationDatasetTypes
curatedCalibrationDatasetTypes
Definition: ingest_tests.py:62
lsst.obs.base.ingest_tests.IngestTestBase.root
root
Definition: ingest_tests.py:114
lsst.obs.base.ingest_tests.IngestTestBase.testLink
def testLink(self)
Definition: ingest_tests.py:203
lsst.obs.base.ingest_tests.IngestTestBase.file
string file
Definition: ingest_tests.py:56
lsst.obs.base.ingest_tests.IngestTestBase.testDefineVisits
def testDefineVisits(self)
Definition: ingest_tests.py:288
lsst.obs.base.ingest_tests.IngestTestBase._writeCuratedCalibrations
def _writeCuratedCalibrations(self)
Definition: ingest_tests.py:195
lsst.obs.base.ingest_tests.IngestTestBase.outputRun
string outputRun
Definition: ingest_tests.py:80
lsst.obs.base.ingest_tests.IngestTestBase.checkRepo
def checkRepo(self, files=None)
Definition: ingest_tests.py:152
lsst.obs.base.ingest_tests.IngestTestBase.testFailOnConflict
def testFailOnConflict(self)
Definition: ingest_tests.py:234
lsst.obs.base.ingest_tests.IngestTestBase
Definition: ingest_tests.py:43
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClassName
def instrumentClassName(self)
Definition: ingest_tests.py:86
lsst.obs.base
Definition: __init__.py:1