Coverage for tests/test_obscore.py: 19%

211 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 02:47 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import gc 

23import os 

24import tempfile 

25import unittest 

26from abc import abstractmethod 

27from typing import Dict, List, Optional 

28 

29import astropy.time 

30import sqlalchemy 

31from lsst.daf.butler import ( 

32 CollectionType, 

33 Config, 

34 DatasetIdGenEnum, 

35 DatasetRef, 

36 DatasetType, 

37 StorageClassFactory, 

38) 

39from lsst.daf.butler.registry import Registry, RegistryConfig 

40from lsst.daf.butler.registry.obscore import DatasetTypeConfig, ObsCoreConfig, ObsCoreSchema 

41from lsst.daf.butler.registry.obscore._schema import _STATIC_COLUMNS 

42from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir 

43from lsst.sphgeom import ConvexPolygon, LonLat, UnitVector3d 

44 

45try: 

46 import testing.postgresql 

47except ImportError: 

48 testing = None 

49 

50TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

51 

52 

53class ObsCoreTests: 

54 @abstractmethod 

55 def make_registry( 

56 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

57 ) -> Registry: 

58 """Create new empty Registry.""" 

59 raise NotImplementedError() 

60 

61 def initialize_registry(self, registry: Registry) -> None: 

62 """Populate Registry with the things that we need for tests.""" 

63 

64 registry.insertDimensionData("instrument", {"name": "DummyCam"}) 

65 registry.insertDimensionData( 

66 "physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "r"} 

67 ) 

68 for detector in (1, 2, 3, 4): 

69 registry.insertDimensionData( 

70 "detector", {"instrument": "DummyCam", "id": detector, "full_name": f"detector{detector}"} 

71 ) 

72 

73 for exposure in (1, 2, 3, 4): 

74 registry.insertDimensionData( 

75 "exposure", 

76 { 

77 "instrument": "DummyCam", 

78 "id": exposure, 

79 "obs_id": f"exposure{exposure}", 

80 "physical_filter": "d-r", 

81 }, 

82 ) 

83 

84 registry.insertDimensionData("visit_system", {"instrument": "DummyCam", "id": 1, "name": "default"}) 

85 

86 for visit in (1, 2, 3, 4): 

87 visit_start = astropy.time.Time(f"2020-01-01 08:0{visit}:00", scale="tai") 

88 visit_end = astropy.time.Time(f"2020-01-01 08:0{visit}:45", scale="tai") 

89 registry.insertDimensionData( 

90 "visit", 

91 { 

92 "instrument": "DummyCam", 

93 "id": visit, 

94 "name": f"visit{visit}", 

95 "physical_filter": "d-r", 

96 "visit_system": 1, 

97 "datetime_begin": visit_start, 

98 "datetime_end": visit_end, 

99 }, 

100 ) 

101 

102 # Only couple of exposures are linked to visits. 

103 for visit in (1, 2): 

104 registry.insertDimensionData( 

105 "visit_definition", 

106 { 

107 "instrument": "DummyCam", 

108 "exposure": visit, 

109 "visit": visit, 

110 }, 

111 ) 

112 

113 region = ConvexPolygon( 

114 [ 

115 UnitVector3d(LonLat.fromDegrees(0.0, 1.0)), 

116 UnitVector3d(LonLat.fromDegrees(2.0, 1.0)), 

117 UnitVector3d(LonLat.fromDegrees(2.0, -1.0)), 

118 UnitVector3d(LonLat.fromDegrees(0.0, -1.0)), 

119 ] 

120 ) 

121 for visit in (1, 2, 3, 4): 

122 for detector in (1, 2, 3, 4): 

123 registry.insertDimensionData( 

124 "visit_detector_region", 

125 { 

126 "instrument": "DummyCam", 

127 "visit": visit, 

128 "detector": detector, 

129 "region": region, 

130 }, 

131 ) 

132 

133 # Add few dataset types 

134 storage_class_factory = StorageClassFactory() 

135 storage_class = storage_class_factory.getStorageClass("StructuredDataDict") 

136 

137 self.dataset_types: Dict[str, DatasetType] = {} 

138 

139 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "exposure"]) 

140 self.dataset_types["raw"] = DatasetType("raw", dimensions, storage_class) 

141 

142 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"]) 

143 self.dataset_types["calexp"] = DatasetType("calexp", dimensions, storage_class) 

144 

145 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"]) 

146 self.dataset_types["no_obscore"] = DatasetType("no_obscore", dimensions, storage_class) 

147 

148 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector"]) 

149 self.dataset_types["calib"] = DatasetType("calib", dimensions, storage_class, isCalibration=True) 

150 

151 for dataset_type in self.dataset_types.values(): 

152 registry.registerDatasetType(dataset_type) 

153 

154 # Add few run collections. 

155 for run in (1, 2, 3, 4, 5, 6): 

156 registry.registerRun(f"run{run}") 

157 

158 # Add few chained collections, run6 is not in any chained collections. 

159 registry.registerCollection("chain12", CollectionType.CHAINED) 

160 registry.setCollectionChain("chain12", ("run1", "run2")) 

161 registry.registerCollection("chain34", CollectionType.CHAINED) 

162 registry.setCollectionChain("chain34", ("run3", "run4")) 

163 registry.registerCollection("chain-all", CollectionType.CHAINED) 

164 registry.setCollectionChain("chain-all", ("chain12", "chain34", "run5")) 

165 

166 # And a tagged collection 

167 registry.registerCollection("tagged", CollectionType.TAGGED) 

168 

169 def make_obscore_config( 

170 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

171 ) -> Config: 

172 """Make configuration for obscore manager.""" 

173 obscore_config = Config(os.path.join(TESTDIR, "config", "basic", "obscore.yaml")) 

174 if collections is not None: 

175 obscore_config["collections"] = collections 

176 if collection_type is not None: 

177 obscore_config["collection_type"] = collection_type 

178 return obscore_config 

179 

180 def _insert_dataset( 

181 self, registry: Registry, run: str, dataset_type: str, do_import: bool = False, **kwargs 

182 ) -> DatasetRef: 

183 """Insert or import one dataset into a specified run collection.""" 

184 data_id = {"instrument": "DummyCam", "physical_filter": "d-r"} 

185 data_id.update(kwargs) 

186 if do_import: 

187 ds_type = self.dataset_types[dataset_type] 

188 dataset_id = registry.datasetIdFactory.makeDatasetId( 

189 run, ds_type, data_id, DatasetIdGenEnum.UNIQUE 

190 ) 

191 ref = DatasetRef(ds_type, data_id, id=dataset_id, run=run) 

192 [ref] = registry._importDatasets([ref]) 

193 else: 

194 [ref] = registry.insertDatasets(dataset_type, [data_id], run=run) 

195 return ref 

196 

197 def _insert_datasets(self, registry: Registry, do_import: bool = False) -> List[DatasetRef]: 

198 """Inset a small bunch of datasets into every run collection.""" 

199 return [ 

200 self._insert_dataset(registry, "run1", "raw", detector=1, exposure=1, do_import=do_import), 

201 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=2, do_import=do_import), 

202 self._insert_dataset(registry, "run3", "raw", detector=3, exposure=3, do_import=do_import), 

203 self._insert_dataset(registry, "run4", "calexp", detector=4, visit=4, do_import=do_import), 

204 self._insert_dataset(registry, "run5", "calexp", detector=4, visit=4, do_import=do_import), 

205 # This dataset type is not configured, will not be in obscore. 

206 self._insert_dataset(registry, "run5", "no_obscore", detector=1, visit=1, do_import=do_import), 

207 self._insert_dataset(registry, "run6", "raw", detector=1, exposure=4, do_import=do_import), 

208 ] 

209 

210 def _obscore_select(self, registry: Registry) -> list: 

211 """Select all rows from obscore table.""" 

212 db = registry._db 

213 table = registry._managers.obscore.table 

214 results = db.query(table.select()) 

215 return list(results) 

216 

217 def test_config_errors(self): 

218 """Test for handling various configuration problems.""" 

219 

220 # This raises pydantic ValidationError, which wraps ValueError 

221 exception_re = "'collections' must have one element" 

222 with self.assertRaisesRegex(ValueError, exception_re): 

223 self.make_registry(None, "TAGGED") 

224 

225 with self.assertRaisesRegex(ValueError, exception_re): 

226 self.make_registry([], "TAGGED") 

227 

228 with self.assertRaisesRegex(ValueError, exception_re): 

229 self.make_registry(["run1", "run2"], "TAGGED") 

230 

231 # Invalid regex. 

232 with self.assertRaisesRegex(ValueError, "Failed to compile regex"): 

233 self.make_registry(["+run"], "RUN") 

234 

235 def test_schema(self): 

236 """Check how obscore schema is constructed""" 

237 

238 config = ObsCoreConfig(obs_collection="", dataset_types=[], facility_name="FACILITY") 

239 schema = ObsCoreSchema(config) 

240 table_spec = schema.table_spec 

241 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS]) 

242 

243 # extra columns from top-level config 

244 config = ObsCoreConfig( 

245 obs_collection="", 

246 extra_columns={"c1": 1, "c2": "string", "c3": {"template": "{calib_level}", "type": "float"}}, 

247 dataset_types=[], 

248 facility_name="FACILITY", 

249 ) 

250 schema = ObsCoreSchema(config) 

251 table_spec = schema.table_spec 

252 self.assertEqual( 

253 list(table_spec.fields.names), 

254 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"], 

255 ) 

256 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger) 

257 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String) 

258 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float) 

259 

260 # extra columns from per-dataset type configs 

261 config = ObsCoreConfig( 

262 obs_collection="", 

263 extra_columns={"c1": 1}, 

264 dataset_types={ 

265 "raw": DatasetTypeConfig( 

266 name="raw", 

267 dataproduct_type="image", 

268 calib_level=1, 

269 extra_columns={"c2": "string"}, 

270 ), 

271 "calexp": DatasetTypeConfig( 

272 dataproduct_type="image", 

273 calib_level=2, 

274 extra_columns={"c3": 1e10}, 

275 ), 

276 }, 

277 facility_name="FACILITY", 

278 ) 

279 schema = ObsCoreSchema(config) 

280 table_spec = schema.table_spec 

281 self.assertEqual( 

282 list(table_spec.fields.names), 

283 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"], 

284 ) 

285 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger) 

286 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String) 

287 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float) 

288 

289 # Columns with the same names as in static list in configs, types 

290 # are not overriden. 

291 config = ObsCoreConfig( 

292 version=0, 

293 obs_collection="", 

294 extra_columns={"t_xel": 1e10}, 

295 dataset_types={ 

296 "raw": DatasetTypeConfig( 

297 dataproduct_type="image", 

298 calib_level=1, 

299 extra_columns={"target_name": 1}, 

300 ), 

301 "calexp": DatasetTypeConfig( 

302 dataproduct_type="image", 

303 calib_level=2, 

304 extra_columns={"em_xel": "string"}, 

305 ), 

306 }, 

307 facility_name="FACILITY", 

308 ) 

309 schema = ObsCoreSchema(config) 

310 table_spec = schema.table_spec 

311 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS]) 

312 self.assertEqual(table_spec.fields["t_xel"].dtype, sqlalchemy.Integer) 

313 self.assertEqual(table_spec.fields["target_name"].dtype, sqlalchemy.String) 

314 self.assertEqual(table_spec.fields["em_xel"].dtype, sqlalchemy.Integer) 

315 

316 def test_insert_existing_collection(self): 

317 """Test insert and import registry methods, with various restrictions 

318 on collection names. 

319 """ 

320 

321 # First item is collections, second item is expected record count. 

322 test_data = ( 

323 (None, 6), 

324 (["run1", "run2"], 2), 

325 (["run[34]"], 2), 

326 (["[rR]un[^6]"], 5), 

327 ) 

328 

329 for collections, count in test_data: 

330 for do_import in (False, True): 

331 

332 registry = self.make_registry(collections) 

333 self._insert_datasets(registry, do_import) 

334 

335 rows = self._obscore_select(registry) 

336 self.assertEqual(len(rows), count) 

337 

338 def test_drop_datasets(self): 

339 """Test for dropping datasets after obscore insert.""" 

340 

341 collections = None 

342 registry = self.make_registry(collections) 

343 refs = self._insert_datasets(registry) 

344 

345 rows = self._obscore_select(registry) 

346 self.assertEqual(len(rows), 6) 

347 

348 # drop single dataset 

349 registry.removeDatasets(ref for ref in refs if ref.run == "run1") 

350 rows = self._obscore_select(registry) 

351 self.assertEqual(len(rows), 5) 

352 

353 # drop whole run collection 

354 registry.removeCollection("run6") 

355 rows = self._obscore_select(registry) 

356 self.assertEqual(len(rows), 4) 

357 

358 def test_associate(self): 

359 """Test for associating datasets to TAGGED collection.""" 

360 

361 collections = ["tagged"] 

362 registry = self.make_registry(collections, "TAGGED") 

363 refs = self._insert_datasets(registry) 

364 

365 rows = self._obscore_select(registry) 

366 self.assertEqual(len(rows), 0) 

367 

368 # Associate datasets that are already in obscore, changes nothing. 

369 registry.associate("tagged", (ref for ref in refs if ref.run == "run1")) 

370 rows = self._obscore_select(registry) 

371 self.assertEqual(len(rows), 1) 

372 

373 # Associate datasets that are not in obscore 

374 registry.associate("tagged", (ref for ref in refs if ref.run == "run3")) 

375 rows = self._obscore_select(registry) 

376 self.assertEqual(len(rows), 2) 

377 

378 # Disassociate them 

379 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run3")) 

380 rows = self._obscore_select(registry) 

381 self.assertEqual(len(rows), 1) 

382 

383 # Non-associated dataset, should be OK and not throw. 

384 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run2")) 

385 rows = self._obscore_select(registry) 

386 self.assertEqual(len(rows), 1) 

387 

388 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run1")) 

389 rows = self._obscore_select(registry) 

390 self.assertEqual(len(rows), 0) 

391 

392 

393class SQLiteObsCoreTest(ObsCoreTests, unittest.TestCase): 

394 def setUp(self): 

395 self.root = makeTestTempDir(TESTDIR) 

396 

397 def tearDown(self): 

398 removeTestTempDir(self.root) 

399 

400 def make_registry( 

401 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

402 ) -> Registry: 

403 # docstring inherited from a base class 

404 _, filename = tempfile.mkstemp(dir=self.root, suffix=".sqlite3") 

405 config = RegistryConfig() 

406 config["db"] = f"sqlite:///{filename}" 

407 config["managers", "obscore"] = { 

408 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager", 

409 "config": self.make_obscore_config(collections, collection_type), 

410 } 

411 registry = Registry.createFromConfig(config, butlerRoot=self.root) 

412 self.initialize_registry(registry) 

413 return registry 

414 

415 

416@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

417class PostgresObsCoreTest(ObsCoreTests, unittest.TestCase): 

418 @staticmethod 

419 def _handler(postgresql): 

420 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

421 with engine.begin() as connection: 

422 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

423 

424 @classmethod 

425 def setUpClass(cls): 

426 # Create the postgres test server. 

427 cls.postgresql = testing.postgresql.PostgresqlFactory( 

428 cache_initialized_db=True, on_initialized=cls._handler 

429 ) 

430 super().setUpClass() 

431 

432 @classmethod 

433 def tearDownClass(cls): 

434 # Clean up any lingering SQLAlchemy engines/connections 

435 # so they're closed before we shut down the server. 

436 gc.collect() 

437 cls.postgresql.clear_cache() 

438 super().tearDownClass() 

439 

440 def setUp(self): 

441 self.root = makeTestTempDir(TESTDIR) 

442 self.server = self.postgresql() 

443 self.count = 0 

444 

445 def tearDown(self): 

446 removeTestTempDir(self.root) 

447 self.server = self.postgresql() 

448 

449 def make_registry( 

450 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None 

451 ) -> Registry: 

452 # docstring inherited from a base class 

453 self.count += 1 

454 config = RegistryConfig() 

455 config["db"] = self.server.url() 

456 # Use unique namespace for each instance, some tests may use sub-tests. 

457 config["namespace"] = f"namespace{self.count}" 

458 config["managers", "obscore"] = { 

459 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager", 

460 "config": self.make_obscore_config(collections, collection_type), 

461 } 

462 registry = Registry.createFromConfig(config, butlerRoot=self.root) 

463 self.initialize_registry(registry) 

464 return registry 

465 

466 

467if __name__ == "__main__": 467 ↛ 468line 467 didn't jump to line 468, because the condition on line 467 was never true

468 unittest.main()