Coverage for tests/test_obscore.py: 19%

264 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-15 02:03 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import gc 

23import os 

24import tempfile 

25import unittest 

26import warnings 

27from abc import abstractmethod 

28from typing import Dict, List, Optional 

29 

30import astropy.time 

31import sqlalchemy 

32from lsst.daf.butler import ( 

33 CollectionType, 

34 Config, 

35 DatasetIdGenEnum, 

36 DatasetRef, 

37 DatasetType, 

38 StorageClassFactory, 

39) 

40from lsst.daf.butler.registry import Registry, RegistryConfig 

41from lsst.daf.butler.registry.obscore import DatasetTypeConfig, ObsCoreConfig, ObsCoreSchema 

42from lsst.daf.butler.registry.obscore._schema import _STATIC_COLUMNS 

43from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

44from lsst.sphgeom import Box, ConvexPolygon, LonLat, UnitVector3d 

45 

46try: 

47 import testing.postgresql 

48except ImportError: 

49 testing = None 

50 

51TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

52 

53 

54class ObsCoreTests: 

55 """Base class for testing obscore manager functionality.""" 

56 

57 def make_registry( 

58 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

59 ) -> Registry: 

60 """Create new empty Registry.""" 

61 config = self.make_registry_config(collections, collection_type) 

62 registry = Registry.createFromConfig(config, butlerRoot=self.root) 

63 self.initialize_registry(registry) 

64 return registry 

65 

66 @abstractmethod 

67 def make_registry_config( 

68 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

69 ) -> RegistryConfig: 

70 """Make Registry configuration.""" 

71 raise NotImplementedError() 

72 

73 def initialize_registry(self, registry: Registry) -> None: 

74 """Populate Registry with the things that we need for tests.""" 

75 

76 registry.insertDimensionData("instrument", {"name": "DummyCam"}) 

77 registry.insertDimensionData( 

78 "physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "r"} 

79 ) 

80 for detector in (1, 2, 3, 4): 

81 registry.insertDimensionData( 

82 "detector", {"instrument": "DummyCam", "id": detector, "full_name": f"detector{detector}"} 

83 ) 

84 

85 for exposure in (1, 2, 3, 4): 

86 registry.insertDimensionData( 

87 "exposure", 

88 { 

89 "instrument": "DummyCam", 

90 "id": exposure, 

91 "obs_id": f"exposure{exposure}", 

92 "physical_filter": "d-r", 

93 }, 

94 ) 

95 

96 registry.insertDimensionData("visit_system", {"instrument": "DummyCam", "id": 1, "name": "default"}) 

97 

98 for visit in (1, 2, 3, 4, 9): 

99 visit_start = astropy.time.Time(f"2020-01-01 08:0{visit}:00", scale="tai") 

100 visit_end = astropy.time.Time(f"2020-01-01 08:0{visit}:45", scale="tai") 

101 registry.insertDimensionData( 

102 "visit", 

103 { 

104 "instrument": "DummyCam", 

105 "id": visit, 

106 "name": f"visit{visit}", 

107 "physical_filter": "d-r", 

108 "visit_system": 1, 

109 "datetime_begin": visit_start, 

110 "datetime_end": visit_end, 

111 }, 

112 ) 

113 

114 # Only couple of exposures are linked to visits. 

115 for visit in (1, 2): 

116 registry.insertDimensionData( 

117 "visit_definition", 

118 { 

119 "instrument": "DummyCam", 

120 "exposure": visit, 

121 "visit": visit, 

122 }, 

123 ) 

124 

125 for visit in (1, 2, 3, 4): 

126 for detector in (1, 2, 3, 4): 

127 lon = visit * 90 - 88 

128 lat = detector * 2 - 5 

129 region = ConvexPolygon( 

130 [ 

131 UnitVector3d(LonLat.fromDegrees(lon - 1.0, lat - 1.0)), 

132 UnitVector3d(LonLat.fromDegrees(lon + 1.0, lat - 1.0)), 

133 UnitVector3d(LonLat.fromDegrees(lon + 1.0, lat + 1.0)), 

134 UnitVector3d(LonLat.fromDegrees(lon - 1.0, lat + 1.0)), 

135 ] 

136 ) 

137 registry.insertDimensionData( 

138 "visit_detector_region", 

139 { 

140 "instrument": "DummyCam", 

141 "visit": visit, 

142 "detector": detector, 

143 "region": region, 

144 }, 

145 ) 

146 

147 # Visit 9 has non-polygon region 

148 for detector in (1, 2, 3, 4): 

149 lat = detector * 2 - 5 

150 region = Box.fromDegrees(17.0, lat - 1.0, 19.0, lat + 1.0) 

151 registry.insertDimensionData( 

152 "visit_detector_region", 

153 { 

154 "instrument": "DummyCam", 

155 "visit": 9, 

156 "detector": detector, 

157 "region": region, 

158 }, 

159 ) 

160 

161 # Add few dataset types 

162 storage_class_factory = StorageClassFactory() 

163 storage_class = storage_class_factory.getStorageClass("StructuredDataDict") 

164 

165 self.dataset_types: Dict[str, DatasetType] = {} 

166 

167 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "exposure"]) 

168 self.dataset_types["raw"] = DatasetType("raw", dimensions, storage_class) 

169 

170 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"]) 

171 self.dataset_types["calexp"] = DatasetType("calexp", dimensions, storage_class) 

172 

173 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"]) 

174 self.dataset_types["no_obscore"] = DatasetType("no_obscore", dimensions, storage_class) 

175 

176 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector"]) 

177 self.dataset_types["calib"] = DatasetType("calib", dimensions, storage_class, isCalibration=True) 

178 

179 for dataset_type in self.dataset_types.values(): 

180 registry.registerDatasetType(dataset_type) 

181 

182 # Add few run collections. 

183 for run in (1, 2, 3, 4, 5, 6): 

184 registry.registerRun(f"run{run}") 

185 

186 # Add few chained collections, run6 is not in any chained collections. 

187 registry.registerCollection("chain12", CollectionType.CHAINED) 

188 registry.setCollectionChain("chain12", ("run1", "run2")) 

189 registry.registerCollection("chain34", CollectionType.CHAINED) 

190 registry.setCollectionChain("chain34", ("run3", "run4")) 

191 registry.registerCollection("chain-all", CollectionType.CHAINED) 

192 registry.setCollectionChain("chain-all", ("chain12", "chain34", "run5")) 

193 

194 # And a tagged collection 

195 registry.registerCollection("tagged", CollectionType.TAGGED) 

196 

197 def make_obscore_config( 

198 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

199 ) -> Config: 

200 """Make configuration for obscore manager.""" 

201 obscore_config = Config(os.path.join(TESTDIR, "config", "basic", "obscore.yaml")) 

202 if collections is not None: 

203 obscore_config["collections"] = collections 

204 if collection_type is not None: 

205 obscore_config["collection_type"] = collection_type 

206 return obscore_config 

207 

208 def _insert_dataset( 

209 self, registry: Registry, run: str, dataset_type: str, do_import: bool = False, **kwargs 

210 ) -> DatasetRef: 

211 """Insert or import one dataset into a specified run collection.""" 

212 data_id = {"instrument": "DummyCam", "physical_filter": "d-r"} 

213 data_id.update(kwargs) 

214 if do_import: 

215 ds_type = self.dataset_types[dataset_type] 

216 dataset_id = registry.datasetIdFactory.makeDatasetId( 

217 run, ds_type, data_id, DatasetIdGenEnum.UNIQUE 

218 ) 

219 ref = DatasetRef(ds_type, data_id, id=dataset_id, run=run) 

220 [ref] = registry._importDatasets([ref]) 

221 else: 

222 [ref] = registry.insertDatasets(dataset_type, [data_id], run=run) 

223 return ref 

224 

225 def _insert_datasets(self, registry: Registry, do_import: bool = False) -> List[DatasetRef]: 

226 """Inset a small bunch of datasets into every run collection.""" 

227 return [ 

228 self._insert_dataset(registry, "run1", "raw", detector=1, exposure=1, do_import=do_import), 

229 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=2, do_import=do_import), 

230 self._insert_dataset(registry, "run3", "raw", detector=3, exposure=3, do_import=do_import), 

231 self._insert_dataset(registry, "run4", "calexp", detector=4, visit=4, do_import=do_import), 

232 self._insert_dataset(registry, "run5", "calexp", detector=4, visit=4, do_import=do_import), 

233 # This dataset type is not configured, will not be in obscore. 

234 self._insert_dataset(registry, "run5", "no_obscore", detector=1, visit=1, do_import=do_import), 

235 self._insert_dataset(registry, "run6", "raw", detector=1, exposure=4, do_import=do_import), 

236 ] 

237 

238 def _obscore_select(self, registry: Registry) -> list: 

239 """Select all rows from obscore table.""" 

240 db = registry._db 

241 table = registry._managers.obscore.table 

242 results = db.query(table.select()) 

243 return list(results) 

244 

245 def test_config_errors(self): 

246 """Test for handling various configuration problems.""" 

247 

248 # This raises pydantic ValidationError, which wraps ValueError 

249 exception_re = "'collections' must have one element" 

250 with self.assertRaisesRegex(ValueError, exception_re): 

251 self.make_registry(None, "TAGGED") 

252 

253 with self.assertRaisesRegex(ValueError, exception_re): 

254 self.make_registry([], "TAGGED") 

255 

256 with self.assertRaisesRegex(ValueError, exception_re): 

257 self.make_registry(["run1", "run2"], "TAGGED") 

258 

259 # Invalid regex. 

260 with self.assertRaisesRegex(ValueError, "Failed to compile regex"): 

261 self.make_registry(["+run"], "RUN") 

262 

263 def test_schema(self): 

264 """Check how obscore schema is constructed""" 

265 

266 config = ObsCoreConfig(obs_collection="", dataset_types=[], facility_name="FACILITY") 

267 schema = ObsCoreSchema(config, []) 

268 table_spec = schema.table_spec 

269 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS]) 

270 

271 # extra columns from top-level config 

272 config = ObsCoreConfig( 

273 obs_collection="", 

274 extra_columns={"c1": 1, "c2": "string", "c3": {"template": "{calib_level}", "type": "float"}}, 

275 dataset_types=[], 

276 facility_name="FACILITY", 

277 ) 

278 schema = ObsCoreSchema(config, []) 

279 table_spec = schema.table_spec 

280 self.assertEqual( 

281 list(table_spec.fields.names), 

282 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"], 

283 ) 

284 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger) 

285 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String) 

286 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float) 

287 

288 # extra columns from per-dataset type configs 

289 config = ObsCoreConfig( 

290 obs_collection="", 

291 extra_columns={"c1": 1}, 

292 dataset_types={ 

293 "raw": DatasetTypeConfig( 

294 name="raw", 

295 dataproduct_type="image", 

296 calib_level=1, 

297 extra_columns={"c2": "string"}, 

298 ), 

299 "calexp": DatasetTypeConfig( 

300 dataproduct_type="image", 

301 calib_level=2, 

302 extra_columns={"c3": 1e10}, 

303 ), 

304 }, 

305 facility_name="FACILITY", 

306 ) 

307 schema = ObsCoreSchema(config, []) 

308 table_spec = schema.table_spec 

309 self.assertEqual( 

310 list(table_spec.fields.names), 

311 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"], 

312 ) 

313 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger) 

314 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String) 

315 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float) 

316 

317 # Columns with the same names as in static list in configs, types 

318 # are not overriden. 

319 config = ObsCoreConfig( 

320 version=0, 

321 obs_collection="", 

322 extra_columns={"t_xel": 1e10}, 

323 dataset_types={ 

324 "raw": DatasetTypeConfig( 

325 dataproduct_type="image", 

326 calib_level=1, 

327 extra_columns={"target_name": 1}, 

328 ), 

329 "calexp": DatasetTypeConfig( 

330 dataproduct_type="image", 

331 calib_level=2, 

332 extra_columns={"em_xel": "string"}, 

333 ), 

334 }, 

335 facility_name="FACILITY", 

336 ) 

337 schema = ObsCoreSchema(config, []) 

338 table_spec = schema.table_spec 

339 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS]) 

340 self.assertEqual(table_spec.fields["t_xel"].dtype, sqlalchemy.Integer) 

341 self.assertEqual(table_spec.fields["target_name"].dtype, sqlalchemy.String) 

342 self.assertEqual(table_spec.fields["em_xel"].dtype, sqlalchemy.Integer) 

343 

344 def test_insert_existing_collection(self): 

345 """Test insert and import registry methods, with various restrictions 

346 on collection names. 

347 """ 

348 

349 # First item is collections, second item is expected record count. 

350 test_data = ( 

351 (None, 6), 

352 (["run1", "run2"], 2), 

353 (["run[34]"], 2), 

354 (["[rR]un[^6]"], 5), 

355 ) 

356 

357 for collections, count in test_data: 

358 for do_import in (False, True): 

359 

360 registry = self.make_registry(collections) 

361 self._insert_datasets(registry, do_import) 

362 

363 rows = self._obscore_select(registry) 

364 self.assertEqual(len(rows), count) 

365 

366 def test_drop_datasets(self): 

367 """Test for dropping datasets after obscore insert.""" 

368 

369 collections = None 

370 registry = self.make_registry(collections) 

371 refs = self._insert_datasets(registry) 

372 

373 rows = self._obscore_select(registry) 

374 self.assertEqual(len(rows), 6) 

375 

376 # drop single dataset 

377 registry.removeDatasets(ref for ref in refs if ref.run == "run1") 

378 rows = self._obscore_select(registry) 

379 self.assertEqual(len(rows), 5) 

380 

381 # drop whole run collection 

382 registry.removeCollection("run6") 

383 rows = self._obscore_select(registry) 

384 self.assertEqual(len(rows), 4) 

385 

386 def test_associate(self): 

387 """Test for associating datasets to TAGGED collection.""" 

388 

389 collections = ["tagged"] 

390 registry = self.make_registry(collections, "TAGGED") 

391 refs = self._insert_datasets(registry) 

392 

393 rows = self._obscore_select(registry) 

394 self.assertEqual(len(rows), 0) 

395 

396 # Associate datasets that are already in obscore, changes nothing. 

397 registry.associate("tagged", (ref for ref in refs if ref.run == "run1")) 

398 rows = self._obscore_select(registry) 

399 self.assertEqual(len(rows), 1) 

400 

401 # Associate datasets that are not in obscore 

402 registry.associate("tagged", (ref for ref in refs if ref.run == "run3")) 

403 rows = self._obscore_select(registry) 

404 self.assertEqual(len(rows), 2) 

405 

406 # Disassociate them 

407 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run3")) 

408 rows = self._obscore_select(registry) 

409 self.assertEqual(len(rows), 1) 

410 

411 # Non-associated dataset, should be OK and not throw. 

412 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run2")) 

413 rows = self._obscore_select(registry) 

414 self.assertEqual(len(rows), 1) 

415 

416 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run1")) 

417 rows = self._obscore_select(registry) 

418 self.assertEqual(len(rows), 0) 

419 

420 def test_region_type_warning(self, count: int = 1) -> None: 

421 """Test that non-polygon region generates one or more warnings.""" 

422 

423 collections = None 

424 registry = self.make_registry(collections) 

425 

426 with warnings.catch_warnings(record=True) as warning_records: 

427 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=9) 

428 self.assertEqual(len(warning_records), count) 

429 for record in warning_records: 

430 self.assertRegex( 

431 str(record.message), 

432 "Unexpected region type for obscore dataset.*lsst.sphgeom._sphgeom.Box.*", 

433 ) 

434 

435 

436class SQLiteObsCoreTest(ObsCoreTests, unittest.TestCase): 

437 """Unit test for obscore with SQLite backend.""" 

438 

439 def setUp(self): 

440 self.root = makeTestTempDir(TESTDIR) 

441 

442 def tearDown(self): 

443 removeTestTempDir(self.root) 

444 

445 def make_registry_config( 

446 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

447 ) -> RegistryConfig: 

448 # docstring inherited from a base class 

449 _, filename = tempfile.mkstemp(dir=self.root, suffix=".sqlite3") 

450 config = RegistryConfig() 

451 config["db"] = f"sqlite:///{filename}" 

452 config["managers", "obscore"] = { 

453 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager", 

454 "config": self.make_obscore_config(collections, collection_type), 

455 } 

456 return config 

457 

458 

459@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

460class PostgresObsCoreTest(ObsCoreTests, unittest.TestCase): 

461 """Unit test for obscore with PostgreSQL backend.""" 

462 

463 @classmethod 

464 def _handler(cls, postgresql): 

465 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

466 with engine.begin() as connection: 

467 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

468 

469 @classmethod 

470 def setUpClass(cls): 

471 # Create the postgres test server. 

472 cls.postgresql = testing.postgresql.PostgresqlFactory( 

473 cache_initialized_db=True, on_initialized=cls._handler 

474 ) 

475 super().setUpClass() 

476 

477 @classmethod 

478 def tearDownClass(cls): 

479 # Clean up any lingering SQLAlchemy engines/connections 

480 # so they're closed before we shut down the server. 

481 gc.collect() 

482 cls.postgresql.clear_cache() 

483 super().tearDownClass() 

484 

485 def setUp(self): 

486 self.root = makeTestTempDir(TESTDIR) 

487 self.server = self.postgresql() 

488 self.count = 0 

489 

490 def tearDown(self): 

491 removeTestTempDir(self.root) 

492 self.server = self.postgresql() 

493 

494 def make_registry_config( 

495 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

496 ) -> RegistryConfig: 

497 # docstring inherited from a base class 

498 self.count += 1 

499 config = RegistryConfig() 

500 config["db"] = self.server.url() 

501 # Use unique namespace for each instance, some tests may use sub-tests. 

502 config["namespace"] = f"namespace{self.count}" 

503 config["managers", "obscore"] = { 

504 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager", 

505 "config": self.make_obscore_config(collections, collection_type), 

506 } 

507 return config 

508 

509 

510@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

511class PostgresPgSphereObsCoreTest(PostgresObsCoreTest): 

512 """Unit test for obscore with PostgreSQL backend and pgsphere plugin.""" 

513 

514 @classmethod 

515 def _handler(cls, postgresql): 

516 super()._handler(postgresql) 

517 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

518 with engine.begin() as connection: 

519 try: 

520 connection.execute(sqlalchemy.text("CREATE EXTENSION pg_sphere;")) 

521 except sqlalchemy.exc.OperationalError: 

522 raise unittest.SkipTest("pg_sphere extension does not exist") 

523 

524 def make_obscore_config( 

525 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

526 ) -> Config: 

527 """Make configuration for obscore manager.""" 

528 obscore_config = super().make_obscore_config(collections, collection_type) 

529 obscore_config["spatial_plugins"] = { 

530 "pgsphere": { 

531 "cls": "lsst.daf.butler.registry.obscore.pgsphere.PgSphereObsCorePlugin", 

532 "config": { 

533 "region_column": "pgs_region", 

534 "position_column": "pgs_center", 

535 }, 

536 } 

537 } 

538 return obscore_config 

539 

540 def test_spatial(self): 

541 """Test that pgsphere plugin fills spatial columns.""" 

542 

543 collections = None 

544 registry = self.make_registry(collections) 

545 self._insert_datasets(registry) 

546 

547 # select everything 

548 rows = self._obscore_select(registry) 

549 self.assertEqual(len(rows), 6) 

550 

551 db = registry._db 

552 table = registry._managers.obscore.table 

553 

554 # It's not easy to generate spatial queries in sqlalchemy, use plain 

555 # text queries for testing. 

556 

557 # position matching visit=1, there is a single dataset 

558 query = f"SELECT * FROM {table.key} WHERE pgs_center <-> '(2d,0d)'::spoint < .1" 

559 results = db.query(sqlalchemy.text(query)) 

560 self.assertEqual(len(list(results)), 1) 

561 

562 # position matching visit=4, there are two datasets 

563 query = f"SELECT * FROM {table.key} WHERE pgs_center <-> '(272d,0d)'::spoint < .1" 

564 results = db.query(sqlalchemy.text(query)) 

565 self.assertEqual(len(list(results)), 2) 

566 

567 # position matching visit=1, there is a single dataset 

568 query = f"SELECT * FROM {table.key} WHERE '(2d,-3d)'::spoint @ pgs_region" 

569 results = db.query(sqlalchemy.text(query)) 

570 self.assertEqual(len(list(results)), 1) 

571 

572 # position matching visit=4, there are two datasets 

573 query = f"SELECT * FROM {table.key} WHERE '(272d,3d)'::spoint @ pgs_region" 

574 results = db.query(sqlalchemy.text(query)) 

575 self.assertEqual(len(list(results)), 2) 

576 

577 def test_region_type_warning(self) -> None: 

578 """Test that non-polygon region generates a warning""" 

579 # pgsphere plugin adds one more warning 

580 super().test_region_type_warning(2) 

581 

582 

583if __name__ == "__main__": 583 ↛ 584line 583 didn't jump to line 584, because the condition on line 583 was never true

584 unittest.main()