Coverage for python/lsst/obs/base/ingest_tests.py: 26%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

181 statements  

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Base class for writing Gen3 raw data ingest tests. 

23""" 

24 

25__all__ = ("IngestTestBase",) 

26 

27import abc 

28import tempfile 

29import unittest 

30import os 

31import shutil 

32 

33import lsst.afw.cameraGeom 

34from lsst.daf.butler import Butler, ButlerURI 

35from lsst.daf.butler.cli.butler import cli as butlerCli 

36from lsst.daf.butler.cli.utils import LogCliRunner 

37import lsst.obs.base 

38from lsst.utils import doImport 

39from .utils import getInstrument 

40from . import script 

41 

42 

43class IngestTestBase(metaclass=abc.ABCMeta): 

44 """Base class for tests of gen3 ingest. Subclass from this, then 

45 `unittest.TestCase` to get a working test suite. 

46 """ 

47 

48 ingestDir = "" 

49 """Root path to ingest files into. Typically `obs_package/tests/`; the 

50 actual directory will be a tempdir under this one. 

51 """ 

52 

53 ingestDatasetTypeName = "raw" 

54 """The DatasetType to use for the ingest. 

55 

56 If this is not an Exposure dataset type the tests will be more limited. 

57 """ 

58 

59 dataIds = [] 

60 """list of butler data IDs of files that should have been ingested.""" 

61 

62 file = "" 

63 """Full path to a file to ingest in tests.""" 

64 

65 filterLabel = None 

66 """The lsst.afw.image.FilterLabel that should be returned by the above 

67 file.""" 

68 

69 rawIngestTask = "lsst.obs.base.RawIngestTask" 

70 """The task to use in the Ingest test.""" 

71 

72 curatedCalibrationDatasetTypes = None 

73 """List or tuple of Datasets types that should be present after calling 

74 writeCuratedCalibrations. If `None` writeCuratedCalibrations will 

75 not be called and the test will be skipped.""" 

76 

77 defineVisitsTask = lsst.obs.base.DefineVisitsTask 

78 """The task to use to define visits from groups of exposures. 

79 This is ignored if ``visits`` is `None`. 

80 """ 

81 

82 visits = {} 

83 """A dictionary mapping visit data IDs the lists of exposure data IDs that 

84 are associated with them. 

85 If this is empty (but not `None`), visit definition will be run but no 

86 visits will be expected (e.g. because no exposures are on-sky 

87 observations). 

88 """ 

89 

90 @property 

91 @abc.abstractmethod 

92 def instrumentClassName(self): 

93 """The fully qualified instrument class name. 

94 

95 Returns 

96 ------- 

97 `str` 

98 The fully qualified instrument class name. 

99 """ 

100 pass 

101 

102 @property 

103 def instrumentClass(self): 

104 """The instrument class.""" 

105 return doImport(self.instrumentClassName) 

106 

107 @property 

108 def instrumentName(self): 

109 """The name of the instrument. 

110 

111 Returns 

112 ------- 

113 `str` 

114 The name of the instrument. 

115 """ 

116 return self.instrumentClass.getName() 

117 

118 @classmethod 

119 def setUpClass(cls): 

120 # Use a temporary working directory. 

121 cls.root = tempfile.mkdtemp(dir=cls.ingestDir) 

122 cls._createRepo() 

123 

124 # Register the instrument and its static metadata. 

125 cls._registerInstrument() 

126 

127 def setUp(self): 

128 # Want a unique run name per test. 

129 self.outputRun = "raw_ingest_" + self.id() 

130 

131 @classmethod 

132 def tearDownClass(cls): 

133 if os.path.exists(cls.root): 

134 shutil.rmtree(cls.root, ignore_errors=True) 

135 

136 def verifyIngest(self, files=None, cli=False, fullCheck=False): 

137 """ 

138 Test that RawIngestTask ingested the expected files. 

139 

140 Parameters 

141 ---------- 

142 files : `list` [`str`], or None 

143 List of files to be ingested, or None to use ``self.file`` 

144 fullCheck : `bool`, optional 

145 If `True`, read the full raw dataset and check component 

146 consistency. If `False` check that a component can be read 

147 but do not read the entire raw exposure. 

148 

149 Notes 

150 ----- 

151 Reading all the ingested test data can be expensive. The code paths 

152 for reading the second raw are the same as reading the first so 

153 we do not gain anything by doing full checks of everything. 

154 Only read full pixel data for first dataset from file. 

155 Don't even do that if we are requested not to by the caller. 

156 This only really affects files that contain multiple datasets. 

157 """ 

158 butler = Butler(self.root, run=self.outputRun) 

159 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun)) 

160 self.assertEqual(len(datasets), len(self.dataIds)) 

161 

162 # Get the URI to the first dataset and check it is inside the 

163 # datastore. 

164 datasetUri = butler.getURI(datasets[0]) 

165 self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root)) 

166 

167 # Get the relevant dataset type. 

168 datasetType = butler.registry.getDatasetType(self.ingestDatasetTypeName) 

169 

170 for dataId in self.dataIds: 

171 # For testing we only read the entire dataset the first time 

172 # round if this is an Exposure. If it's not an Exposure 

173 # we always read it completely but we don't read components 

174 # because for an arbitrary dataset type we can't easily tell 

175 # what component to test. 

176 

177 if not datasetType.storageClass.name.startswith("Exposure"): 

178 exposure = butler.get(self.ingestDatasetTypeName, dataId) 

179 # Could be anything so nothing to test by default 

180 continue 

181 

182 # Check that we can read metadata from a raw. 

183 metadata = butler.get(f"{self.ingestDatasetTypeName}.metadata", dataId) 

184 if not fullCheck: 

185 continue 

186 fullCheck = False 

187 exposure = butler.get(self.ingestDatasetTypeName, dataId) 

188 

189 # Comparing headers will not work directly because of header 

190 # fix up provenance. 

191 metadata_headers = metadata.toDict() 

192 exposure_headers = exposure.getMetadata().toDict() 

193 metadata_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None) 

194 exposure_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None) 

195 self.assertEqual(metadata_headers, exposure_headers) 

196 

197 # Since components follow a different code path we check that 

198 # WCS match and also we check that at least the shape 

199 # of the image is the same (rather than doing per-pixel equality) 

200 wcs = butler.get(f"{self.ingestDatasetTypeName}.wcs", dataId) 

201 self.assertEqual(wcs, exposure.getWcs()) 

202 

203 rawImage = butler.get(f"{self.ingestDatasetTypeName}.image", dataId) 

204 self.assertEqual(rawImage.getBBox(), exposure.getBBox()) 

205 

206 # Check that the filter label got the correct band. 

207 filterLabel = butler.get(f"{self.ingestDatasetTypeName}.filterLabel", dataId) 

208 self.assertEqual(filterLabel, self.filterLabel) 

209 

210 # Check that the exposure's Detector is the same as the component 

211 # we would read (this is tricky for LSST, which modifies its 

212 # detector at read time; for most other cameras it should be 

213 # trivially satisfied. 

214 detector = butler.get(f"{self.ingestDatasetTypeName}.detector", dataId) 

215 self.assertDetectorsEqual(detector, exposure.getDetector(), compareTransforms=False) 

216 

217 self.checkRepo(files=files) 

218 

219 def checkRepo(self, files=None): 

220 """Check the state of the repository after ingest. 

221 

222 This is an optional hook provided for subclasses; by default it does 

223 nothing. 

224 

225 Parameters 

226 ---------- 

227 files : `list` [`str`], or None 

228 List of files to be ingested, or None to use ``self.file`` 

229 """ 

230 pass 

231 

232 @classmethod 

233 def _createRepo(cls): 

234 """Use the Click `testing` module to call the butler command line api 

235 to create a repository.""" 

236 runner = LogCliRunner() 

237 result = runner.invoke(butlerCli, ["create", cls.root]) 

238 # Classmethod so assertEqual does not work. 

239 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}" 

240 

241 def _ingestRaws(self, transfer, file=None): 

242 """Use the Click `testing` module to call the butler command line api 

243 to ingest raws. 

244 

245 Parameters 

246 ---------- 

247 transfer : `str` 

248 The external data transfer type. 

249 file : `str` 

250 Path to a file to ingest instead of the default associated with 

251 the object. 

252 """ 

253 if file is None: 

254 file = self.file 

255 runner = LogCliRunner() 

256 result = runner.invoke(butlerCli, ["ingest-raws", self.root, file, 

257 "--output-run", self.outputRun, 

258 "--transfer", transfer, 

259 "--ingest-task", self.rawIngestTask]) 

260 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}") 

261 

262 @classmethod 

263 def _registerInstrument(cls): 

264 """Use the Click `testing` module to call the butler command line api 

265 to register the instrument.""" 

266 runner = LogCliRunner() 

267 result = runner.invoke(butlerCli, ["register-instrument", cls.root, cls.instrumentClassName]) 

268 # Classmethod so assertEqual does not work. 

269 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}" 

270 

271 def _writeCuratedCalibrations(self): 

272 """Use the Click `testing` module to call the butler command line api 

273 to write curated calibrations.""" 

274 runner = LogCliRunner() 

275 result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root, self.instrumentName]) 

276 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}") 

277 

278 def testLink(self): 

279 self._ingestRaws(transfer="link") 

280 self.verifyIngest() 

281 

282 def testSymLink(self): 

283 self._ingestRaws(transfer="symlink") 

284 self.verifyIngest() 

285 

286 def testDirect(self): 

287 self._ingestRaws(transfer="direct") 

288 

289 # Check that it really did have a URI outside of datastore. 

290 srcUri = ButlerURI(self.file, forceAbsolute=True) 

291 butler = Butler(self.root, run=self.outputRun) 

292 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun)) 

293 datastoreUri = butler.getURI(datasets[0]) 

294 self.assertEqual(datastoreUri, srcUri) 

295 

296 def testCopy(self): 

297 self._ingestRaws(transfer="copy") 

298 # Only test full read of raws for the copy test. No need to do it 

299 # in the other tests since the formatter will be the same in all 

300 # cases. 

301 self.verifyIngest(fullCheck=True) 

302 

303 def testHardLink(self): 

304 try: 

305 self._ingestRaws(transfer="hardlink") 

306 # Running ingest through the Click testing infrastructure causes 

307 # the original exception indicating that we can't hard-link 

308 # on this filesystem to be turned into a nonzero exit code, which 

309 # then trips the test assertion. 

310 except (AssertionError, PermissionError) as err: 

311 raise unittest.SkipTest("Skipping hard-link test because input data" 

312 " is on a different filesystem.") from err 

313 self.verifyIngest() 

314 

315 def testInPlace(self): 

316 """Test that files already in the directory can be added to the 

317 registry in-place. 

318 """ 

319 butler = Butler(self.root, run=self.outputRun) 

320 

321 # If the test uses an index file the index file needs to also 

322 # appear in the datastore root along with the file to be ingested. 

323 # In that scenario the file name being used for ingest can not 

324 # be modified and must have the same name as found in the index 

325 # file itself. 

326 source_file_uri = ButlerURI(self.file) 

327 index_file = source_file_uri.dirname().join("_index.json") 

328 pathInStore = source_file_uri.basename() 

329 if index_file.exists(): 

330 os.symlink(index_file.ospath, butler.datastore.root.join("_index.json").ospath) 

331 else: 

332 # No index file so we are free to pick any name. 

333 pathInStore = "prefix-" + pathInStore 

334 

335 # Create a symlink to the original file so that it looks like it 

336 # is now inside the datastore. 

337 newPath = butler.datastore.root.join(pathInStore) 

338 os.symlink(os.path.abspath(self.file), newPath.ospath) 

339 

340 # If there is a sidecar file it needs to be linked in as well 

341 # since ingest code does not follow symlinks. 

342 sidecar_uri = ButlerURI(source_file_uri).updatedExtension(".json") 

343 if sidecar_uri.exists(): 

344 newSidecar = ButlerURI(newPath).updatedExtension(".json") 

345 os.symlink(sidecar_uri.ospath, newSidecar.ospath) 

346 

347 # Run ingest with auto mode since that should automatically determine 

348 # that an in-place ingest is happening. 

349 self._ingestRaws(transfer="auto", file=newPath.ospath) 

350 self.verifyIngest() 

351 

352 # Recreate a butler post-ingest (the earlier one won't see the 

353 # ingested files). 

354 butler = Butler(self.root, run=self.outputRun) 

355 

356 # Check that the URI associated with this path is the right one. 

357 uri = butler.getURI(self.ingestDatasetTypeName, self.dataIds[0]) 

358 self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore) 

359 

360 def testFailOnConflict(self): 

361 """Re-ingesting the same data into the repository should fail. 

362 """ 

363 self._ingestRaws(transfer="symlink") 

364 with self.assertRaises(Exception): 

365 self._ingestRaws(transfer="symlink") 

366 

367 def testWriteCuratedCalibrations(self): 

368 """Test that we can ingest the curated calibrations, and read them 

369 with `loadCamera` both before and after. 

370 """ 

371 if self.curatedCalibrationDatasetTypes is None: 

372 raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test") 

373 

374 butler = Butler(self.root, writeable=False) 

375 collection = self.instrumentClass().makeCalibrationCollectionName() 

376 

377 # Trying to load a camera with a data ID not known to the registry 

378 # is an error, because we can't get any temporal information. 

379 with self.assertRaises(LookupError): 

380 lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection) 

381 

382 # Ingest raws in order to get some exposure records. 

383 self._ingestRaws(transfer="auto") 

384 

385 # Load camera should returned an unversioned camera because there's 

386 # nothing in the repo. 

387 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection) 

388 self.assertFalse(isVersioned) 

389 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera) 

390 

391 self._writeCuratedCalibrations() 

392 

393 # Make a new butler instance to make sure we don't have any stale 

394 # caches (e.g. of DatasetTypes). Note that we didn't give 

395 # _writeCuratedCalibrations the butler instance we had, because it's 

396 # trying to test the CLI interface anyway. 

397 butler = Butler(self.root, writeable=False) 

398 

399 for datasetTypeName in self.curatedCalibrationDatasetTypes: 

400 with self.subTest(dtype=datasetTypeName): 

401 found = list( 

402 butler.registry.queryDatasetAssociations( 

403 datasetTypeName, 

404 collections=collection, 

405 ) 

406 ) 

407 self.assertGreater(len(found), 0, f"Checking {datasetTypeName}") 

408 

409 # Load camera should returned the versioned camera from the repo. 

410 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection) 

411 self.assertTrue(isVersioned) 

412 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera) 

413 

414 def testDefineVisits(self): 

415 if self.visits is None: 

416 self.skipTest("Expected visits were not defined.") 

417 self._ingestRaws(transfer="link") 

418 

419 # Calling defineVisits tests the implementation of the butler command 

420 # line interface "define-visits" subcommand. Functions in the script 

421 # folder are generally considered protected and should not be used 

422 # as public api. 

423 script.defineVisits(self.root, config_file=None, collections=self.outputRun, 

424 instrument=self.instrumentName, raw_name=self.ingestDatasetTypeName) 

425 

426 # Test that we got the visits we expected. 

427 butler = Butler(self.root, run=self.outputRun) 

428 visits = butler.registry.queryDataIds(["visit"]).expanded().toSet() 

429 self.assertCountEqual(visits, self.visits.keys()) 

430 instr = getInstrument(self.instrumentName, butler.registry) 

431 camera = instr.getCamera() 

432 for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()): 

433 # Test that this visit is associated with the expected exposures. 

434 foundExposures = butler.registry.queryDataIds(["exposure"], dataId=expectedVisit 

435 ).expanded().toSet() 

436 self.assertCountEqual(foundExposures, expectedExposures) 

437 # Test that we have a visit region, and that it contains all of the 

438 # detector+visit regions. 

439 self.assertIsNotNone(foundVisit.region) 

440 detectorVisitDataIds = butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit 

441 ).expanded().toSet() 

442 self.assertEqual(len(detectorVisitDataIds), len(camera)) 

443 for dataId in detectorVisitDataIds: 

444 self.assertTrue(foundVisit.region.contains(dataId.region))