lsst.obs.base  19.0.0-71-g41c0270
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import click.testing
29 import tempfile
30 import unittest
31 import os
32 import shutil
33 
34 from lsst.daf.butler import Butler
35 from lsst.daf.butler.cli.butler import cli as butlerCli
36 import lsst.obs.base
37 from lsst.utils import doImport
38 from .utils import getInstrument
39 from . import script
40 
41 
42 class IngestTestBase(metaclass=abc.ABCMeta):
43  """Base class for tests of gen3 ingest. Subclass from this, then
44  `unittest.TestCase` to get a working test suite.
45  """
46 
47  ingestDir = ""
48  """Root path to ingest files into. Typically `obs_package/tests/`; the
49  actual directory will be a tempdir under this one.
50  """
51 
52  dataIds = []
53  """list of butler data IDs of files that should have been ingested."""
54 
55  file = ""
56  """Full path to a file to ingest in tests."""
57 
58  rawIngestTask = "lsst.obs.base.RawIngestTask"
59  """The task to use in the Ingest test."""
60 
61  curatedCalibrationDatasetTypes = None
62  """List or tuple of Datasets types that should be present after calling
63  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
64  not be called and the test will be skipped."""
65 
66  defineVisitsTask = lsst.obs.base.DefineVisitsTask
67  """The task to use to define visits from groups of exposures.
68  This is ignored if ``visits`` is `None`.
69  """
70 
71  visits = {}
72  """A dictionary mapping visit data IDs the lists of exposure data IDs that
73  are associated with them.
74  If this is empty (but not `None`), visit definition will be run but no
75  visits will be expected (e.g. because no exposures are on-sky
76  observations).
77  """
78 
79  outputRun = "raw"
80  """The name of the output run to use in tests.
81  """
82 
83  @property
84  @abc.abstractmethod
86  """The fully qualified instrument class name.
87 
88  Returns
89  -------
90  `str`
91  The fully qualified instrument class name.
92  """
93  pass
94 
95  @property
96  def instrumentClass(self):
97  """The instrument class."""
98  return doImport(self.instrumentClassName)
99 
100  @property
101  def instrumentName(self):
102  """The name of the instrument.
103 
104  Returns
105  -------
106  `str`
107  The name of the instrument.
108  """
109  return self.instrumentClass.getName()
110 
111  def setUp(self):
112  # Use a temporary working directory
113  self.root = tempfile.mkdtemp(dir=self.ingestDir)
114  self._createRepo()
115 
116  # Register the instrument and its static metadata
117  self._registerInstrument()
118 
119  def tearDown(self):
120  if os.path.exists(self.root):
121  shutil.rmtree(self.root, ignore_errors=True)
122 
123  def verifyIngest(self, files=None, cli=False):
124  """
125  Test that RawIngestTask ingested the expected files.
126 
127  Parameters
128  ----------
129  files : `list` [`str`], or None
130  List of files to be ingested, or None to use ``self.file``
131  """
132  butler = Butler(self.root, run=self.outputRun)
133  datasets = butler.registry.queryDatasets(self.outputRun, collections=...)
134  self.assertEqual(len(list(datasets)), len(self.dataIds))
135  for dataId in self.dataIds:
136  exposure = butler.get(self.outputRun, dataId)
137  metadata = butler.get("raw.metadata", dataId)
138  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
139 
140  # Since components follow a different code path we check that
141  # WCS match and also we check that at least the shape
142  # of the image is the same (rather than doing per-pixel equality)
143  wcs = butler.get("raw.wcs", dataId)
144  self.assertEqual(wcs, exposure.getWcs())
145 
146  rawImage = butler.get("raw.image", dataId)
147  self.assertEqual(rawImage.getBBox(), exposure.getBBox())
148 
149  self.checkRepo(files=files)
150 
151  def checkRepo(self, files=None):
152  """Check the state of the repository after ingest.
153 
154  This is an optional hook provided for subclasses; by default it does
155  nothing.
156 
157  Parameters
158  ----------
159  files : `list` [`str`], or None
160  List of files to be ingested, or None to use ``self.file``
161  """
162  pass
163 
164  def _createRepo(self):
165  """Use the Click `testing` module to call the butler command line api
166  to create a repository."""
167  runner = click.testing.CliRunner()
168  result = runner.invoke(butlerCli, ["create", self.root])
169  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
170 
171  def _ingestRaws(self, transfer):
172  """Use the Click `testing` module to call the butler command line api
173  to ingest raws.
174 
175  Parameters
176  ----------
177  transfer : `str`
178  The external data transfer type.
179  """
180  runner = click.testing.CliRunner()
181  result = runner.invoke(butlerCli, ["ingest-raws", self.root,
182  "--output-run", self.outputRun,
183  "--file", self.file,
184  "--transfer", transfer,
185  "--ingest-task", self.rawIngestTask])
186  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
187 
188  def _registerInstrument(self):
189  """Use the Click `testing` module to call the butler command line api
190  to register the instrument."""
191  runner = click.testing.CliRunner()
192  result = runner.invoke(butlerCli, ["register-instrument", self.root,
193  "--instrument", self.instrumentClassName])
194  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
195 
196  def _writeCuratedCalibrations(self):
197  """Use the Click `testing` module to call the butler command line api
198  to write curated calibrations."""
199  runner = click.testing.CliRunner()
200  result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root,
201  "--instrument", self.instrumentName,
202  "--output-run", self.outputRun])
203  self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}")
204 
205  def testLink(self):
206  self._ingestRaws(transfer="link")
207  self.verifyIngest()
208 
209  def testSymLink(self):
210  self._ingestRaws(transfer="symlink")
211  self.verifyIngest()
212 
213  def testCopy(self):
214  self._ingestRaws(transfer="copy")
215  self.verifyIngest()
216 
217  def testHardLink(self):
218  try:
219  self._ingestRaws(transfer="hardlink")
220  self.verifyIngest()
221  except PermissionError as err:
222  raise unittest.SkipTest("Skipping hard-link test because input data"
223  " is on a different filesystem.") from err
224 
225  def testInPlace(self):
226  """Test that files already in the directory can be added to the
227  registry in-place.
228  """
229  # symlink into repo root manually
230  butler = Butler(self.root, run=self.outputRun)
231  newPath = os.path.join(butler.datastore.root, os.path.basename(self.file))
232  os.symlink(os.path.abspath(self.file), newPath)
233  self._ingestRaws(transfer=None)
234  self.verifyIngest()
235 
237  """Re-ingesting the same data into the repository should fail.
238  """
239  self._ingestRaws(transfer="symlink")
240  with self.assertRaises(Exception):
241  self._ingestRaws(transfer="symlink")
242 
244  """Test that we can ingest the curated calibrations"""
245  if self.curatedCalibrationDatasetTypes is None:
246  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
247 
249 
250  dataId = {"instrument": self.instrumentName}
251  butler = Butler(self.root, run=self.outputRun)
252  for datasetTypeName in self.curatedCalibrationDatasetTypes:
253  with self.subTest(dtype=datasetTypeName, dataId=dataId):
254  datasets = list(butler.registry.queryDatasets(datasetTypeName, collections=...,
255  dataId=dataId))
256  self.assertGreater(len(datasets), 0, f"Checking {datasetTypeName}")
257 
258  def testDefineVisits(self):
259  if self.visits is None:
260  self.skipTest("Expected visits were not defined.")
261  self._ingestRaws(transfer="link")
262 
263  # Calling defineVisits tests the implementation of the butler command line interface "define-visits"
264  # subcommand. Functions in the script folder are generally considered protected and should not be used
265  # as public api.
266  script.defineVisits(self.root, config_file=None, collections=self.outputRun,
267  instrument=self.instrumentName)
268 
269  # Test that we got the visits we expected.
270  butler = Butler(self.root, run=self.outputRun)
271  visits = set(butler.registry.queryDimensions(["visit"], expand=True))
272  self.assertCountEqual(visits, self.visits.keys())
273  instr = getInstrument(self.instrumentName, butler.registry)
274  camera = instr.getCamera()
275  for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
276  # Test that this visit is associated with the expected exposures.
277  foundExposures = set(butler.registry.queryDimensions(["exposure"], dataId=expectedVisit,
278  expand=True))
279  self.assertCountEqual(foundExposures, expectedExposures)
280  # Test that we have a visit region, and that it contains all of the
281  # detector+visit regions.
282  self.assertIsNotNone(foundVisit.region)
283  detectorVisitDataIds = set(butler.registry.queryDimensions(["visit", "detector"],
284  dataId=expectedVisit,
285  expand=True))
286  self.assertEqual(len(detectorVisitDataIds), len(camera))
287  for dataId in detectorVisitDataIds:
288  self.assertTrue(foundVisit.region.contains(dataId.region))
lsst.obs.base.ingest_tests.IngestTestBase.verifyIngest
def verifyIngest(self, files=None, cli=False)
Definition: ingest_tests.py:123
lsst.obs.base.ingest_tests.IngestTestBase.setUp
def setUp(self)
Definition: ingest_tests.py:111
lsst.obs.base.utils.getInstrument
def getInstrument(instrumentName, registry=None)
Definition: utils.py:100
lsst.obs.base.ingest_tests.IngestTestBase.testHardLink
def testHardLink(self)
Definition: ingest_tests.py:217
lsst.obs.base.ingest_tests.IngestTestBase.testInPlace
def testInPlace(self)
Definition: ingest_tests.py:225
lsst.obs.base.ingest_tests.IngestTestBase.testCopy
def testCopy(self)
Definition: ingest_tests.py:213
lsst.obs.base.ingest_tests.IngestTestBase.visits
dictionary visits
Definition: ingest_tests.py:71
lsst.obs.base.ingest_tests.IngestTestBase.ingestDir
string ingestDir
Definition: ingest_tests.py:47
lsst.obs.base.ingest_tests.IngestTestBase.instrumentName
def instrumentName(self)
Definition: ingest_tests.py:101
lsst.obs.base.ingest_tests.IngestTestBase.dataIds
list dataIds
Definition: ingest_tests.py:52
lsst.obs.base.ingest_tests.IngestTestBase.tearDown
def tearDown(self)
Definition: ingest_tests.py:119
lsst.obs.base.ingest_tests.IngestTestBase.testSymLink
def testSymLink(self)
Definition: ingest_tests.py:209
lsst.obs.base.ingest_tests.IngestTestBase._ingestRaws
def _ingestRaws(self, transfer)
Definition: ingest_tests.py:171
lsst.obs.base.ingest_tests.IngestTestBase.testWriteCuratedCalibrations
def testWriteCuratedCalibrations(self)
Definition: ingest_tests.py:243
lsst.obs.base.ingest_tests.IngestTestBase._registerInstrument
def _registerInstrument(self)
Definition: ingest_tests.py:188
lsst::utils
lsst.obs.base.defineVisits.DefineVisitsTask
Definition: defineVisits.py:250
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClass
def instrumentClass(self)
Definition: ingest_tests.py:96
lsst.obs.base.ingest_tests.IngestTestBase._createRepo
def _createRepo(self)
Definition: ingest_tests.py:164
lsst.obs.base.ingest_tests.IngestTestBase.curatedCalibrationDatasetTypes
curatedCalibrationDatasetTypes
Definition: ingest_tests.py:61
lsst.obs.base.ingest_tests.IngestTestBase.root
root
Definition: ingest_tests.py:113
lsst.obs.base.ingest_tests.IngestTestBase.testLink
def testLink(self)
Definition: ingest_tests.py:205
lsst.obs.base.ingest_tests.IngestTestBase.file
string file
Definition: ingest_tests.py:55
lsst.obs.base.ingest_tests.IngestTestBase.testDefineVisits
def testDefineVisits(self)
Definition: ingest_tests.py:258
lsst.obs.base.ingest_tests.IngestTestBase._writeCuratedCalibrations
def _writeCuratedCalibrations(self)
Definition: ingest_tests.py:196
lsst.obs.base.ingest_tests.IngestTestBase.outputRun
string outputRun
Definition: ingest_tests.py:79
lsst.obs.base.ingest_tests.IngestTestBase.checkRepo
def checkRepo(self, files=None)
Definition: ingest_tests.py:151
lsst.obs.base.ingest_tests.IngestTestBase.testFailOnConflict
def testFailOnConflict(self)
Definition: ingest_tests.py:236
lsst.obs.base.ingest_tests.IngestTestBase
Definition: ingest_tests.py:42
lsst.obs.base.ingest_tests.IngestTestBase.instrumentClassName
def instrumentClassName(self)
Definition: ingest_tests.py:85
lsst.obs.base
Definition: __init__.py:1