lsst.obs.base  19.0.0-46-g6423acc
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import tempfile
29 import unittest
30 import os
31 import shutil
32 
33 from lsst.daf.butler import Butler
34 import lsst.obs.base
35 
36 
37 class IngestTestBase(metaclass=abc.ABCMeta):
38  """Base class for tests of gen3 ingest. Subclass from this, then
39  `unittest.TestCase` to get a working test suite.
40  """
41 
42  ingestDir = ""
43  """Root path to ingest files into. Typically `obs_package/tests/`; the
44  actual directory will be a tempdir under this one.
45  """
46 
47  instrument = None
48  """The instrument to be registered and tested."""
49 
50  dataIds = []
51  """list of butler data IDs of files that should have been ingested."""
52 
53  file = ""
54  """Full path to a file to ingest in tests."""
55 
57  """The task to use in the Ingest test."""
58 
59  curatedCalibrationDatasetTypes = None
60  """List or tuple of Datasets types that should be present after calling
61  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
62  not be called and the test will be skipped."""
63 
64  DefineVisitsTask = lsst.obs.base.DefineVisitsTask
65  """The task to use to define visits from groups of exposures.
66 
67  This is ignored if ``visits`` is `None`.
68  """
69 
70  visits = {}
71  """A dictionary mapping visit data IDs the lists of exposure data IDs that
72  are associated with them.
73 
74  If this is empty (but not `None`), visit definition will be run but no
75  visits will be expected (e.g. because no exposures are on-sky
76  observations).
77  """
78 
79  def setUp(self):
80  # Use a temporary working directory
81  self.root = tempfile.mkdtemp(dir=self.ingestDir)
82  Butler.makeRepo(self.root)
83  self.butler = Butler(self.root, run="raw")
84 
85  # Register the instrument and its static metadata
86  self.instrument.register(self.butler.registry)
87 
88  # Make a default config for test methods to play with
89  self.config = self.RawIngestTask.ConfigClass()
90 
91  def tearDown(self):
92  if os.path.exists(self.root):
93  shutil.rmtree(self.root, ignore_errors=True)
94 
95  def runIngest(self, files=None):
96  """
97  Initialize and run RawIngestTask on a list of files.
98 
99  Parameters
100  ----------
101  files : `list` [`str`], or None
102  List of files to be ingested, or None to use ``self.file``
103  """
104  if files is None:
105  files = [self.file]
106  task = self.RawIngestTask(config=self.config, butler=self.butler)
107  task.log.setLevel(task.log.FATAL) # silence logs, since we expect a lot of warnings
108  task.run(files)
109 
110  def runIngestTest(self, files=None):
111  """
112  Test that RawIngestTask ingested the expected files.
113 
114  Parameters
115  ----------
116  files : `list` [`str`], or None
117  List of files to be ingested, or None to use ``self.file``
118  """
119  self.runIngest(files)
120  datasets = self.butler.registry.queryDatasets('raw', collections=...)
121  self.assertEqual(len(list(datasets)), len(self.dataIds))
122  for dataId in self.dataIds:
123  exposure = self.butler.get("raw", dataId)
124  metadata = self.butler.get("raw.metadata", dataId)
125  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
126 
127  # Since components follow a different code path we check that
128  # WCS match and also we check that at least the shape
129  # of the image is the same (rather than doing per-pixel equality)
130  # Check the observation type before trying to check WCS
131  obsType = self.butler.registry.expandDataId(dataId).records["exposure"].observation_type
132  if obsType == "science":
133  wcs = self.butler.get("raw.wcs", dataId)
134  self.assertEqual(wcs, exposure.getWcs())
135 
136  rawImage = self.butler.get("raw.image", dataId)
137  self.assertEqual(rawImage.getBBox(), exposure.getBBox())
138 
139  self.checkRepo(files=files)
140 
141  def checkRepo(self, files=None):
142  """Check the state of the repository after ingest.
143 
144  This is an optional hook provided for subclasses; by default it does
145  nothing.
146 
147  Parameters
148  ----------
149  files : `list` [`str`], or None
150  List of files to be ingested, or None to use ``self.file``
151  """
152  pass
153 
154  def testLink(self):
155  self.config.transfer = "link"
156  self.runIngestTest()
157 
158  def testSymLink(self):
159  self.config.transfer = "symlink"
160  self.runIngestTest()
161 
162  def testCopy(self):
163  self.config.transfer = "copy"
164  self.runIngestTest()
165 
166  def testHardLink(self):
167  self.config.transfer = "hardlink"
168  try:
169  self.runIngestTest()
170  except PermissionError as err:
171  raise unittest.SkipTest("Skipping hard-link test because input data"
172  " is on a different filesystem.") from err
173 
174  def testInPlace(self):
175  """Test that files already in the directory can be added to the
176  registry in-place.
177  """
178  # symlink into repo root manually
179  newPath = os.path.join(self.butler.datastore.root, os.path.basename(self.file))
180  os.symlink(os.path.abspath(self.file), newPath)
181  self.config.transfer = None
182  self.runIngestTest([newPath])
183 
185  """Re-ingesting the same data into the repository should fail.
186  """
187  self.config.transfer = "symlink"
188  self.runIngest()
189  with self.assertRaises(Exception):
190  self.runIngest()
191 
193  """Test that we can ingest the curated calibrations"""
194  if self.curatedCalibrationDatasetTypes is None:
195  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
196 
197  self.instrument.writeCuratedCalibrations(self.butler)
198 
199  dataId = {"instrument": self.instrument.getName()}
200  for datasetTypeName in self.curatedCalibrationDatasetTypes:
201  with self.subTest(dtype=datasetTypeName, dataId=dataId):
202  datasets = list(self.butler.registry.queryDatasets(datasetTypeName, collections=...,
203  dataId=dataId))
204  self.assertGreater(len(datasets), 0, f"Checking {datasetTypeName}")
205 
206  def testDefineVisits(self):
207  if self.visits is None:
208  self.skipTest("Expected visits were not defined.")
209  self.config.transfer = "link"
210  self.runIngest()
211  config = self.DefineVisitsTask.ConfigClass()
212  self.instrument.applyConfigOverrides(self.DefineVisitsTask._DefaultName, config)
213  task = self.DefineVisitsTask(config=config, butler=self.butler)
214  task.run(self.dataIds)
215  # Test that we got the visits we expected.
216  visits = set(self.butler.registry.queryDimensions(["visit"], expand=True))
217  self.assertCountEqual(visits, self.visits.keys())
218  camera = self.instrument.getCamera()
219  for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()):
220  # Test that this visit is associated with the expected exposures.
221  foundExposures = set(self.butler.registry.queryDimensions(["exposure"], dataId=expectedVisit,
222  expand=True))
223  self.assertCountEqual(foundExposures, expectedExposures)
224  # Test that we have a visit region, and that it contains all of the
225  # detector+visit regions.
226  self.assertIsNotNone(foundVisit.region)
227  detectorVisitDataIds = set(self.butler.registry.queryDimensions(["visit", "detector"],
228  dataId=expectedVisit,
229  expand=True))
230  self.assertEqual(len(detectorVisitDataIds), len(camera))
231  for dataId in detectorVisitDataIds:
232  self.assertTrue(foundVisit.region.contains(dataId.region))