Coverage for python/lsst/obs/base/ingest_tests.py: 29%

202 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-19 12:24 -0700

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Base class for writing Gen3 raw data ingest tests. 

23""" 

24 

25__all__ = ("IngestTestBase",) 

26 

27import abc 

28import os 

29import shutil 

30import tempfile 

31import unittest 

32 

33import lsst.afw.cameraGeom 

34import lsst.afw.cameraGeom.testUtils # For assertDetectorsEqual 

35import lsst.obs.base 

36from lsst.daf.butler import Butler 

37from lsst.daf.butler.cli.butler import cli as butlerCli 

38from lsst.daf.butler.cli.utils import LogCliRunner 

39from lsst.pipe.base import Instrument 

40from lsst.resources import ResourcePath 

41from lsst.utils import doImportType 

42 

43from . import script 

44 

45 

46class IngestTestBase(metaclass=abc.ABCMeta): 

47 """Base class for tests of gen3 ingest. Subclass from this, then 

48 `unittest.TestCase` to get a working test suite. 

49 """ 

50 

51 ingestDir = "" 

52 """Root path to ingest files into. Typically `obs_package/tests/`; the 

53 actual directory will be a tempdir under this one. 

54 """ 

55 

56 ingestDatasetTypeName = "raw" 

57 """The DatasetType to use for the ingest. 

58 

59 If this is not an Exposure dataset type the tests will be more limited. 

60 """ 

61 

62 dataIds = [] 

63 """list of butler data IDs of files that should have been ingested.""" 

64 

65 file = "" 

66 """Full path to a file to ingest in tests.""" 

67 

68 filterLabel = None 

69 """The lsst.afw.image.FilterLabel that should be returned by the above 

70 file.""" 

71 

72 rawIngestTask = "lsst.obs.base.RawIngestTask" 

73 """The task to use in the Ingest test.""" 

74 

75 curatedCalibrationDatasetTypes = None 

76 """List or tuple of Datasets types that should be present after calling 

77 writeCuratedCalibrations. If `None` writeCuratedCalibrations will 

78 not be called and the test will be skipped.""" 

79 

80 defineVisitsTask = lsst.obs.base.DefineVisitsTask 

81 """The task to use to define visits from groups of exposures. 

82 This is ignored if ``visits`` is `None`. 

83 """ 

84 

85 visits = {} 

86 """A dictionary mapping visit data IDs the lists of exposure data IDs that 

87 are associated with them. 

88 If this is empty (but not `None`), visit definition will be run but no 

89 visits will be expected (e.g. because no exposures are on-sky 

90 observations). 

91 """ 

92 

93 seed_config = None 

94 """Location of a seed configuration file to pass to butler create. 

95 

96 Useful if additional formatters or storage classes need to be defined. 

97 """ 

98 

99 @property 

100 @abc.abstractmethod 

101 def instrumentClassName(self): 

102 """The fully qualified instrument class name. 

103 

104 Returns 

105 ------- 

106 `str` 

107 The fully qualified instrument class name. 

108 """ 

109 pass 

110 

111 @property 

112 def instrumentClass(self): 

113 """The instrument class.""" 

114 return doImportType(self.instrumentClassName) 

115 

116 @property 

117 def instrumentName(self): 

118 """The name of the instrument. 

119 

120 Returns 

121 ------- 

122 `str` 

123 The name of the instrument. 

124 """ 

125 return self.instrumentClass.getName() 

126 

127 @classmethod 

128 def setUpClass(cls): 

129 # Use a temporary working directory. 

130 cls.root = tempfile.mkdtemp(dir=cls.ingestDir) 

131 cls._createRepo() 

132 

133 # Register the instrument and its static metadata. 

134 cls._registerInstrument() 

135 

136 def setUp(self): 

137 # Want a unique run name per test. 

138 self.outputRun = "raw_ingest_" + self.id() 

139 

140 @classmethod 

141 def tearDownClass(cls): 

142 if os.path.exists(cls.root): 

143 shutil.rmtree(cls.root, ignore_errors=True) 

144 

145 def verifyIngest(self, files=None, cli=False, fullCheck=False): 

146 """ 

147 Test that RawIngestTask ingested the expected files. 

148 

149 Parameters 

150 ---------- 

151 files : `list` [`str`], or None 

152 List of files to be ingested, or None to use ``self.file`` 

153 fullCheck : `bool`, optional 

154 If `True`, read the full raw dataset and check component 

155 consistency. If `False` check that a component can be read 

156 but do not read the entire raw exposure. 

157 

158 Notes 

159 ----- 

160 Reading all the ingested test data can be expensive. The code paths 

161 for reading the second raw are the same as reading the first so 

162 we do not gain anything by doing full checks of everything. 

163 Only read full pixel data for first dataset from file. 

164 Don't even do that if we are requested not to by the caller. 

165 This only really affects files that contain multiple datasets. 

166 """ 

167 butler = Butler(self.root, run=self.outputRun) 

168 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun)) 

169 self.assertEqual(len(datasets), len(self.dataIds)) 

170 

171 # Get the URI to the first dataset and check it is inside the 

172 # datastore. 

173 datasetUri = butler.getURI(datasets[0]) 

174 self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root)) 

175 

176 # Get the relevant dataset type. 

177 datasetType = butler.registry.getDatasetType(self.ingestDatasetTypeName) 

178 

179 for dataId in self.dataIds: 

180 # For testing we only read the entire dataset the first time 

181 # round if this is an Exposure. If it's not an Exposure 

182 # we always read it completely but we don't read components 

183 # because for an arbitrary dataset type we can't easily tell 

184 # what component to test. 

185 

186 if not datasetType.storageClass.name.startswith("Exposure"): 

187 exposure = butler.get(self.ingestDatasetTypeName, dataId) 

188 # Could be anything so nothing to test by default 

189 continue 

190 

191 # Check that we can read metadata from a raw. 

192 metadata = butler.get(f"{self.ingestDatasetTypeName}.metadata", dataId) 

193 if not fullCheck: 

194 continue 

195 fullCheck = False 

196 exposure = butler.get(self.ingestDatasetTypeName, dataId) 

197 

198 # Comparing headers will not work directly because of header 

199 # fix up provenance. 

200 metadata_headers = metadata.toDict() 

201 exposure_headers = exposure.getMetadata().toDict() 

202 metadata_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None) 

203 exposure_headers.pop("HIERARCH ASTRO METADATA FIX DATE", None) 

204 self.assertEqual(metadata_headers, exposure_headers) 

205 

206 # Since components follow a different code path we check that 

207 # WCS match and also we check that at least the shape 

208 # of the image is the same (rather than doing per-pixel equality) 

209 wcs = butler.get(f"{self.ingestDatasetTypeName}.wcs", dataId) 

210 self.assertEqual(wcs, exposure.getWcs()) 

211 

212 rawImage = butler.get(f"{self.ingestDatasetTypeName}.image", dataId) 

213 self.assertEqual(rawImage.getBBox(), exposure.getBBox()) 

214 

215 # Check that the filter label got the correct band. 

216 filterLabel = butler.get(f"{self.ingestDatasetTypeName}.filter", dataId) 

217 self.assertEqual(filterLabel, self.filterLabel) 

218 

219 # Check that the exposure's Detector is the same as the component 

220 # we would read (this is tricky for LSST, which modifies its 

221 # detector at read time; for most other cameras it should be 

222 # trivially satisfied. 

223 detector = butler.get(f"{self.ingestDatasetTypeName}.detector", dataId) 

224 self.assertDetectorsEqual(detector, exposure.getDetector(), compareTransforms=False) 

225 

226 self.checkRepo(files=files) 

227 

228 def checkRepo(self, files=None): 

229 """Check the state of the repository after ingest. 

230 

231 This is an optional hook provided for subclasses; by default it does 

232 nothing. 

233 

234 Parameters 

235 ---------- 

236 files : `list` [`str`], or None 

237 List of files to be ingested, or None to use ``self.file`` 

238 """ 

239 pass 

240 

241 @classmethod 

242 def _createRepo(cls): 

243 """Use the Click `testing` module to call the butler command line api 

244 to create a repository.""" 

245 runner = LogCliRunner() 

246 args = [] 

247 if cls.seed_config: 

248 args.extend(["--seed-config", cls.seed_config]) 

249 result = runner.invoke(butlerCli, ["create", cls.root, *args]) 

250 # Classmethod so assertEqual does not work. 

251 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}" 

252 

253 def _ingestRaws(self, transfer, file=None): 

254 """Use the Click `testing` module to call the butler command line api 

255 to ingest raws. 

256 

257 Parameters 

258 ---------- 

259 transfer : `str` 

260 The external data transfer type. 

261 file : `str` 

262 Path to a file to ingest instead of the default associated with 

263 the object. 

264 """ 

265 if file is None: 

266 file = self.file 

267 runner = LogCliRunner() 

268 result = runner.invoke( 

269 butlerCli, 

270 [ 

271 "ingest-raws", 

272 self.root, 

273 file, 

274 "--output-run", 

275 self.outputRun, 

276 "--transfer", 

277 transfer, 

278 "--ingest-task", 

279 self.rawIngestTask, 

280 ], 

281 ) 

282 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}") 

283 

284 @classmethod 

285 def _registerInstrument(cls): 

286 """Use the Click `testing` module to call the butler command line api 

287 to register the instrument.""" 

288 runner = LogCliRunner() 

289 result = runner.invoke(butlerCli, ["register-instrument", cls.root, cls.instrumentClassName]) 

290 # Classmethod so assertEqual does not work. 

291 assert result.exit_code == 0, f"output: {result.output} exception: {result.exception}" 

292 

293 def _writeCuratedCalibrations(self): 

294 """Use the Click `testing` module to call the butler command line api 

295 to write curated calibrations.""" 

296 runner = LogCliRunner() 

297 result = runner.invoke(butlerCli, ["write-curated-calibrations", self.root, self.instrumentName]) 

298 self.assertEqual(result.exit_code, 0, f"output: {result.output} exception: {result.exception}") 

299 

300 def testLink(self): 

301 self._ingestRaws(transfer="link") 

302 self.verifyIngest() 

303 

304 def testSymLink(self): 

305 self._ingestRaws(transfer="symlink") 

306 self.verifyIngest() 

307 

308 def testDirect(self): 

309 self._ingestRaws(transfer="direct") 

310 

311 # Check that it really did have a URI outside of datastore. 

312 srcUri = ResourcePath(self.file, forceAbsolute=True) 

313 butler = Butler(self.root, run=self.outputRun) 

314 datasets = list(butler.registry.queryDatasets(self.ingestDatasetTypeName, collections=self.outputRun)) 

315 datastoreUri = butler.getURI(datasets[0]) 

316 self.assertEqual(datastoreUri, srcUri) 

317 

318 def testCopy(self): 

319 self._ingestRaws(transfer="copy") 

320 # Only test full read of raws for the copy test. No need to do it 

321 # in the other tests since the formatter will be the same in all 

322 # cases. 

323 self.verifyIngest(fullCheck=True) 

324 

325 def testHardLink(self): 

326 try: 

327 self._ingestRaws(transfer="hardlink") 

328 # Running ingest through the Click testing infrastructure causes 

329 # the original exception indicating that we can't hard-link 

330 # on this filesystem to be turned into a nonzero exit code, which 

331 # then trips the test assertion. 

332 except (AssertionError, PermissionError) as err: 

333 raise unittest.SkipTest( 

334 "Skipping hard-link test because input data is on a different filesystem." 

335 ) from err 

336 self.verifyIngest() 

337 

338 def testInPlace(self): 

339 """Test that files already in the directory can be added to the 

340 registry in-place. 

341 """ 

342 butler = Butler(self.root, run=self.outputRun) 

343 

344 # If the test uses an index file the index file needs to also 

345 # appear in the datastore root along with the file to be ingested. 

346 # In that scenario the file name being used for ingest can not 

347 # be modified and must have the same name as found in the index 

348 # file itself. 

349 source_file_uri = ResourcePath(self.file) 

350 index_file = source_file_uri.dirname().join("_index.json") 

351 pathInStore = source_file_uri.basename() 

352 if index_file.exists(): 

353 os.symlink(index_file.ospath, butler.datastore.root.join("_index.json").ospath) 

354 else: 

355 # No index file so we are free to pick any name. 

356 pathInStore = "prefix-" + pathInStore 

357 

358 # Create a symlink to the original file so that it looks like it 

359 # is now inside the datastore. 

360 newPath = butler.datastore.root.join(pathInStore) 

361 os.symlink(os.path.abspath(self.file), newPath.ospath) 

362 

363 # If there is a sidecar file it needs to be linked in as well 

364 # since ingest code does not follow symlinks. 

365 sidecar_uri = ResourcePath(source_file_uri).updatedExtension(".json") 

366 if sidecar_uri.exists(): 

367 newSidecar = ResourcePath(newPath).updatedExtension(".json") 

368 os.symlink(sidecar_uri.ospath, newSidecar.ospath) 

369 

370 # Run ingest with auto mode since that should automatically determine 

371 # that an in-place ingest is happening. 

372 self._ingestRaws(transfer="auto", file=newPath.ospath) 

373 self.verifyIngest() 

374 

375 # Recreate a butler post-ingest (the earlier one won't see the 

376 # ingested files). 

377 butler = Butler(self.root, run=self.outputRun) 

378 

379 # Check that the URI associated with this path is the right one. 

380 uri = butler.getURI(self.ingestDatasetTypeName, self.dataIds[0]) 

381 self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore) 

382 

383 def testFailOnConflict(self): 

384 """Re-ingesting the same data into the repository should fail.""" 

385 self._ingestRaws(transfer="symlink") 

386 with self.assertRaises(Exception): 

387 self._ingestRaws(transfer="symlink") 

388 

389 def testWriteCuratedCalibrations(self): 

390 """Test that we can ingest the curated calibrations, and read them 

391 with `loadCamera` both before and after. 

392 """ 

393 if self.curatedCalibrationDatasetTypes is None: 

394 raise unittest.SkipTest("Class requests disabling of writeCuratedCalibrations test") 

395 

396 butler = Butler(self.root, writeable=False) 

397 collection = self.instrumentClass().makeCalibrationCollectionName() 

398 

399 # Trying to load a camera with a data ID not known to the registry 

400 # is an error, because we can't get any temporal information. 

401 with self.assertRaises(LookupError): 

402 lsst.obs.base.loadCamera(butler, {"exposure": 0}, collections=collection) 

403 

404 # Ingest raws in order to get some exposure records. 

405 self._ingestRaws(transfer="auto") 

406 

407 # Load camera should returned an unversioned camera because there's 

408 # nothing in the repo. 

409 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection) 

410 self.assertFalse(isVersioned) 

411 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera) 

412 

413 self._writeCuratedCalibrations() 

414 

415 # Make a new butler instance to make sure we don't have any stale 

416 # caches (e.g. of DatasetTypes). Note that we didn't give 

417 # _writeCuratedCalibrations the butler instance we had, because it's 

418 # trying to test the CLI interface anyway. 

419 butler = Butler(self.root, writeable=False) 

420 

421 instrumentClass = self.instrumentClass() 

422 calibration_names = instrumentClass.getCuratedCalibrationNames() 

423 

424 for datasetTypeName in self.curatedCalibrationDatasetTypes: 

425 with self.subTest(dtype=datasetTypeName): 

426 found = list( 

427 butler.registry.queryDatasetAssociations( 

428 datasetTypeName, 

429 collections=collection, 

430 ) 

431 ) 

432 self.assertGreater(len(found), 0, f"Checking {datasetTypeName}") 

433 self.assertIn(datasetTypeName, calibration_names) 

434 

435 # Load camera should returned the versioned camera from the repo. 

436 camera, isVersioned = lsst.obs.base.loadCamera(butler, self.dataIds[0], collections=collection) 

437 self.assertTrue(isVersioned) 

438 self.assertIsInstance(camera, lsst.afw.cameraGeom.Camera) 

439 

440 def testDefineVisits(self): 

441 if self.visits is None: 

442 self.skipTest("Expected visits were not defined.") 

443 self._ingestRaws(transfer="link") 

444 

445 # Calling defineVisits tests the implementation of the butler command 

446 # line interface "define-visits" subcommand. Functions in the script 

447 # folder are generally considered protected and should not be used 

448 # as public api. 

449 script.defineVisits( 

450 self.root, 

451 config_file=None, 

452 collections=self.outputRun, 

453 instrument=self.instrumentName, 

454 raw_name=self.ingestDatasetTypeName, 

455 ) 

456 

457 # Test that we got the visits we expected. 

458 butler = Butler(self.root, run=self.outputRun) 

459 visits = butler.registry.queryDataIds(["visit"]).expanded().toSet() 

460 self.assertCountEqual(visits, self.visits.keys()) 

461 instr = Instrument.from_string(self.instrumentName, butler.registry) 

462 camera = instr.getCamera() 

463 for foundVisit, (expectedVisit, expectedExposures) in zip(visits, self.visits.items()): 

464 # Test that this visit is associated with the expected exposures. 

465 foundExposures = ( 

466 butler.registry.queryDataIds(["exposure"], dataId=expectedVisit).expanded().toSet() 

467 ) 

468 self.assertCountEqual(foundExposures, expectedExposures) 

469 # Test that we have a visit region, and that it contains all of the 

470 # detector+visit regions. 

471 self.assertIsNotNone(foundVisit.region) 

472 detectorVisitDataIds = ( 

473 butler.registry.queryDataIds(["visit", "detector"], dataId=expectedVisit).expanded().toSet() 

474 ) 

475 self.assertEqual(len(detectorVisitDataIds), len(camera)) 

476 for dataId in detectorVisitDataIds: 

477 self.assertTrue(foundVisit.region.contains(dataId.region)) 

478 

479 idInfo = lsst.obs.base.ExposureIdInfo.fromDataId(dataId) 

480 self.assertGreater(idInfo.unusedBits, 0)