lsst.obs.base  19.0.0-32-g3b6bf2d
ingest_tests.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 
22 """Base class for writing Gen3 raw data ingest tests.
23 """
24 
25 __all__ = ("IngestTestBase",)
26 
27 import abc
28 import tempfile
29 import unittest
30 import os
31 import shutil
32 
33 from lsst.daf.butler import Butler
34 import lsst.obs.base
35 
36 
37 class IngestTestBase(metaclass=abc.ABCMeta):
38  """Base class for tests of gen3 ingest. Subclass from this, then
39  `unittest.TestCase` to get a working test suite.
40  """
41 
42  ingestDir = ""
43  """Root path to ingest files into. Typically `obs_package/tests/`; the
44  actual directory will be a tempdir under this one.
45  """
46 
47  instrument = None
48  """The instrument to be registered and tested."""
49 
50  dataIds = []
51  """list of butler data IDs of files that should have been ingested."""
52 
53  file = ""
54  """Full path to a file to ingest in tests."""
55 
57  """The task to use in the Ingest test."""
58 
59  curatedCalibrationDatasetTypes = None
60  """List or tuple of Datasets types that should be present after calling
61  writeCuratedCalibrations. If `None` writeCuratedCalibrations will
62  not be called and the test will be skipped."""
63 
64  def setUp(self):
65  # Use a temporary working directory
66  self.root = tempfile.mkdtemp(dir=self.ingestDir)
67  Butler.makeRepo(self.root)
68  self.butler = Butler(self.root, run="raw")
69 
70  # Register the instrument and its static metadata
71  self.instrument.register(self.butler.registry)
72 
73  # Make a default config for test methods to play with
74  self.config = self.RawIngestTask.ConfigClass()
75  self.config.instrument = \
76  f"{self.instrument.__class__.__module__}.{self.instrument.__class__.__name__}"
77 
78  def tearDown(self):
79  if os.path.exists(self.root):
80  shutil.rmtree(self.root, ignore_errors=True)
81 
82  def runIngest(self, files=None):
83  """
84  Initialize and run RawIngestTask on a list of files.
85 
86  Parameters
87  ----------
88  files : `list` [`str`], or None
89  List of files to be ingested, or None to use ``self.file``
90  """
91  if files is None:
92  files = [self.file]
93  task = self.RawIngestTask(config=self.config, butler=self.butler)
94  task.log.setLevel(task.log.FATAL) # silence logs, since we expect a lot of warnings
95  task.run(files)
96 
97  def runIngestTest(self, files=None):
98  """
99  Test that RawIngestTask ingested the expected files.
100 
101  Parameters
102  ----------
103  files : `list` [`str`], or None
104  List of files to be ingested, or None to use ``self.file``
105  """
106  self.runIngest(files)
107  datasets = self.butler.registry.queryDatasets('raw', collections=...)
108  self.assertEqual(len(list(datasets)), len(self.dataIds))
109  for dataId in self.dataIds:
110  exposure = self.butler.get("raw", dataId)
111  metadata = self.butler.get("raw.metadata", dataId)
112  # only check the metadata, not the images, to speed up tests
113  self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict())
114  self.checkRepo(files=files)
115 
116  def checkRepo(self, files=None):
117  """Check the state of the repository after ingest.
118 
119  This is an optional hook provided for subclasses; by default it does
120  nothing.
121 
122  Parameters
123  ----------
124  files : `list` [`str`], or None
125  List of files to be ingested, or None to use ``self.file``
126  """
127  pass
128 
129  def testSymLink(self):
130  self.config.transfer = "symlink"
131  self.runIngestTest()
132 
133  def testCopy(self):
134  self.config.transfer = "copy"
135  self.runIngestTest()
136 
137  def testHardLink(self):
138  self.config.transfer = "hardlink"
139  try:
140  self.runIngestTest()
141  except PermissionError as err:
142  raise unittest.SkipTest("Skipping hard-link test because input data"
143  " is on a different filesystem.") from err
144 
145  def testInPlace(self):
146  """Test that files already in the directory can be added to the
147  registry in-place.
148  """
149  # symlink into repo root manually
150  newPath = os.path.join(self.butler.datastore.root, os.path.basename(self.file))
151  os.symlink(os.path.abspath(self.file), newPath)
152  self.config.transfer = None
153  self.runIngestTest([newPath])
154 
156  """Re-ingesting the same data into the repository should fail.
157  """
158  self.config.transfer = "symlink"
159  self.runIngest()
160  with self.assertRaises(Exception):
161  self.runIngest()
162 
164  """Test that we can ingest the curated calibrations"""
165  if self.curatedCalibrationDatasetTypes is None:
166  raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test")
167 
168  self.instrument.writeCuratedCalibrations(self.butler)
169 
170  dataId = {"instrument": self.instrument.getName()}
171  for datasetTypeName in self.curatedCalibrationDatasetTypes:
172  with self.subTest(dtype=datasetTypeName, dataId=dataId):
173  datasets = list(self.butler.registry.queryDatasets(datasetTypeName, collections=...,
174  dataId=dataId))
175  self.assertGreater(len(datasets), 0, f"Checking {datasetTypeName}")