Coverage for tests/test_obscore.py: 19%
211 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import gc
23import os
24import tempfile
25import unittest
26from abc import abstractmethod
27from typing import Dict, List, Optional
29import astropy.time
30import sqlalchemy
31from lsst.daf.butler import (
32 CollectionType,
33 Config,
34 DatasetIdGenEnum,
35 DatasetRef,
36 DatasetType,
37 StorageClassFactory,
38)
39from lsst.daf.butler.registry import Registry, RegistryConfig
40from lsst.daf.butler.registry.obscore import DatasetTypeConfig, ObsCoreConfig, ObsCoreSchema
41from lsst.daf.butler.registry.obscore._schema import _STATIC_COLUMNS
42from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
43from lsst.sphgeom import ConvexPolygon, LonLat, UnitVector3d
45try:
46 import testing.postgresql
47except ImportError:
48 testing = None
50TESTDIR = os.path.abspath(os.path.dirname(__file__))
53class ObsCoreTests:
54 @abstractmethod
55 def make_registry(
56 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None
57 ) -> Registry:
58 """Create new empty Registry."""
59 raise NotImplementedError()
61 def initialize_registry(self, registry: Registry) -> None:
62 """Populate Registry with the things that we need for tests."""
64 registry.insertDimensionData("instrument", {"name": "DummyCam"})
65 registry.insertDimensionData(
66 "physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "r"}
67 )
68 for detector in (1, 2, 3, 4):
69 registry.insertDimensionData(
70 "detector", {"instrument": "DummyCam", "id": detector, "full_name": f"detector{detector}"}
71 )
73 for exposure in (1, 2, 3, 4):
74 registry.insertDimensionData(
75 "exposure",
76 {
77 "instrument": "DummyCam",
78 "id": exposure,
79 "obs_id": f"exposure{exposure}",
80 "physical_filter": "d-r",
81 },
82 )
84 registry.insertDimensionData("visit_system", {"instrument": "DummyCam", "id": 1, "name": "default"})
86 for visit in (1, 2, 3, 4):
87 visit_start = astropy.time.Time(f"2020-01-01 08:0{visit}:00", scale="tai")
88 visit_end = astropy.time.Time(f"2020-01-01 08:0{visit}:45", scale="tai")
89 registry.insertDimensionData(
90 "visit",
91 {
92 "instrument": "DummyCam",
93 "id": visit,
94 "name": f"visit{visit}",
95 "physical_filter": "d-r",
96 "visit_system": 1,
97 "datetime_begin": visit_start,
98 "datetime_end": visit_end,
99 },
100 )
102 # Only couple of exposures are linked to visits.
103 for visit in (1, 2):
104 registry.insertDimensionData(
105 "visit_definition",
106 {
107 "instrument": "DummyCam",
108 "exposure": visit,
109 "visit": visit,
110 },
111 )
113 region = ConvexPolygon(
114 [
115 UnitVector3d(LonLat.fromDegrees(0.0, 1.0)),
116 UnitVector3d(LonLat.fromDegrees(2.0, 1.0)),
117 UnitVector3d(LonLat.fromDegrees(2.0, -1.0)),
118 UnitVector3d(LonLat.fromDegrees(0.0, -1.0)),
119 ]
120 )
121 for visit in (1, 2, 3, 4):
122 for detector in (1, 2, 3, 4):
123 registry.insertDimensionData(
124 "visit_detector_region",
125 {
126 "instrument": "DummyCam",
127 "visit": visit,
128 "detector": detector,
129 "region": region,
130 },
131 )
133 # Add few dataset types
134 storage_class_factory = StorageClassFactory()
135 storage_class = storage_class_factory.getStorageClass("StructuredDataDict")
137 self.dataset_types: Dict[str, DatasetType] = {}
139 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "exposure"])
140 self.dataset_types["raw"] = DatasetType("raw", dimensions, storage_class)
142 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"])
143 self.dataset_types["calexp"] = DatasetType("calexp", dimensions, storage_class)
145 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"])
146 self.dataset_types["no_obscore"] = DatasetType("no_obscore", dimensions, storage_class)
148 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector"])
149 self.dataset_types["calib"] = DatasetType("calib", dimensions, storage_class, isCalibration=True)
151 for dataset_type in self.dataset_types.values():
152 registry.registerDatasetType(dataset_type)
154 # Add few run collections.
155 for run in (1, 2, 3, 4, 5, 6):
156 registry.registerRun(f"run{run}")
158 # Add few chained collections, run6 is not in any chained collections.
159 registry.registerCollection("chain12", CollectionType.CHAINED)
160 registry.setCollectionChain("chain12", ("run1", "run2"))
161 registry.registerCollection("chain34", CollectionType.CHAINED)
162 registry.setCollectionChain("chain34", ("run3", "run4"))
163 registry.registerCollection("chain-all", CollectionType.CHAINED)
164 registry.setCollectionChain("chain-all", ("chain12", "chain34", "run5"))
166 # And a tagged collection
167 registry.registerCollection("tagged", CollectionType.TAGGED)
169 def make_obscore_config(
170 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None
171 ) -> Config:
172 """Make configuration for obscore manager."""
173 obscore_config = Config(os.path.join(TESTDIR, "config", "basic", "obscore.yaml"))
174 if collections is not None:
175 obscore_config["collections"] = collections
176 if collection_type is not None:
177 obscore_config["collection_type"] = collection_type
178 return obscore_config
180 def _insert_dataset(
181 self, registry: Registry, run: str, dataset_type: str, do_import: bool = False, **kwargs
182 ) -> DatasetRef:
183 """Insert or import one dataset into a specified run collection."""
184 data_id = {"instrument": "DummyCam", "physical_filter": "d-r"}
185 data_id.update(kwargs)
186 if do_import:
187 ds_type = self.dataset_types[dataset_type]
188 dataset_id = registry.datasetIdFactory.makeDatasetId(
189 run, ds_type, data_id, DatasetIdGenEnum.UNIQUE
190 )
191 ref = DatasetRef(ds_type, data_id, id=dataset_id, run=run)
192 [ref] = registry._importDatasets([ref])
193 else:
194 [ref] = registry.insertDatasets(dataset_type, [data_id], run=run)
195 return ref
197 def _insert_datasets(self, registry: Registry, do_import: bool = False) -> List[DatasetRef]:
198 """Inset a small bunch of datasets into every run collection."""
199 return [
200 self._insert_dataset(registry, "run1", "raw", detector=1, exposure=1, do_import=do_import),
201 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=2, do_import=do_import),
202 self._insert_dataset(registry, "run3", "raw", detector=3, exposure=3, do_import=do_import),
203 self._insert_dataset(registry, "run4", "calexp", detector=4, visit=4, do_import=do_import),
204 self._insert_dataset(registry, "run5", "calexp", detector=4, visit=4, do_import=do_import),
205 # This dataset type is not configured, will not be in obscore.
206 self._insert_dataset(registry, "run5", "no_obscore", detector=1, visit=1, do_import=do_import),
207 self._insert_dataset(registry, "run6", "raw", detector=1, exposure=4, do_import=do_import),
208 ]
210 def _obscore_select(self, registry: Registry) -> list:
211 """Select all rows from obscore table."""
212 db = registry._db
213 table = registry._managers.obscore.table
214 results = db.query(table.select())
215 return list(results)
217 def test_config_errors(self):
218 """Test for handling various configuration problems."""
220 # This raises pydantic ValidationError, which wraps ValueError
221 exception_re = "'collections' must have one element"
222 with self.assertRaisesRegex(ValueError, exception_re):
223 self.make_registry(None, "TAGGED")
225 with self.assertRaisesRegex(ValueError, exception_re):
226 self.make_registry([], "TAGGED")
228 with self.assertRaisesRegex(ValueError, exception_re):
229 self.make_registry(["run1", "run2"], "TAGGED")
231 # Invalid regex.
232 with self.assertRaisesRegex(ValueError, "Failed to compile regex"):
233 self.make_registry(["+run"], "RUN")
235 def test_schema(self):
236 """Check how obscore schema is constructed"""
238 config = ObsCoreConfig(obs_collection="", dataset_types=[], facility_name="FACILITY")
239 schema = ObsCoreSchema(config)
240 table_spec = schema.table_spec
241 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS])
243 # extra columns from top-level config
244 config = ObsCoreConfig(
245 obs_collection="",
246 extra_columns={"c1": 1, "c2": "string", "c3": {"template": "{calib_level}", "type": "float"}},
247 dataset_types=[],
248 facility_name="FACILITY",
249 )
250 schema = ObsCoreSchema(config)
251 table_spec = schema.table_spec
252 self.assertEqual(
253 list(table_spec.fields.names),
254 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"],
255 )
256 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger)
257 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String)
258 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float)
260 # extra columns from per-dataset type configs
261 config = ObsCoreConfig(
262 obs_collection="",
263 extra_columns={"c1": 1},
264 dataset_types={
265 "raw": DatasetTypeConfig(
266 name="raw",
267 dataproduct_type="image",
268 calib_level=1,
269 extra_columns={"c2": "string"},
270 ),
271 "calexp": DatasetTypeConfig(
272 dataproduct_type="image",
273 calib_level=2,
274 extra_columns={"c3": 1e10},
275 ),
276 },
277 facility_name="FACILITY",
278 )
279 schema = ObsCoreSchema(config)
280 table_spec = schema.table_spec
281 self.assertEqual(
282 list(table_spec.fields.names),
283 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"],
284 )
285 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger)
286 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String)
287 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float)
289 # Columns with the same names as in static list in configs, types
290 # are not overriden.
291 config = ObsCoreConfig(
292 version=0,
293 obs_collection="",
294 extra_columns={"t_xel": 1e10},
295 dataset_types={
296 "raw": DatasetTypeConfig(
297 dataproduct_type="image",
298 calib_level=1,
299 extra_columns={"target_name": 1},
300 ),
301 "calexp": DatasetTypeConfig(
302 dataproduct_type="image",
303 calib_level=2,
304 extra_columns={"em_xel": "string"},
305 ),
306 },
307 facility_name="FACILITY",
308 )
309 schema = ObsCoreSchema(config)
310 table_spec = schema.table_spec
311 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS])
312 self.assertEqual(table_spec.fields["t_xel"].dtype, sqlalchemy.Integer)
313 self.assertEqual(table_spec.fields["target_name"].dtype, sqlalchemy.String)
314 self.assertEqual(table_spec.fields["em_xel"].dtype, sqlalchemy.Integer)
316 def test_insert_existing_collection(self):
317 """Test insert and import registry methods, with various restrictions
318 on collection names.
319 """
321 # First item is collections, second item is expected record count.
322 test_data = (
323 (None, 6),
324 (["run1", "run2"], 2),
325 (["run[34]"], 2),
326 (["[rR]un[^6]"], 5),
327 )
329 for collections, count in test_data:
330 for do_import in (False, True):
332 registry = self.make_registry(collections)
333 self._insert_datasets(registry, do_import)
335 rows = self._obscore_select(registry)
336 self.assertEqual(len(rows), count)
338 def test_drop_datasets(self):
339 """Test for dropping datasets after obscore insert."""
341 collections = None
342 registry = self.make_registry(collections)
343 refs = self._insert_datasets(registry)
345 rows = self._obscore_select(registry)
346 self.assertEqual(len(rows), 6)
348 # drop single dataset
349 registry.removeDatasets(ref for ref in refs if ref.run == "run1")
350 rows = self._obscore_select(registry)
351 self.assertEqual(len(rows), 5)
353 # drop whole run collection
354 registry.removeCollection("run6")
355 rows = self._obscore_select(registry)
356 self.assertEqual(len(rows), 4)
358 def test_associate(self):
359 """Test for associating datasets to TAGGED collection."""
361 collections = ["tagged"]
362 registry = self.make_registry(collections, "TAGGED")
363 refs = self._insert_datasets(registry)
365 rows = self._obscore_select(registry)
366 self.assertEqual(len(rows), 0)
368 # Associate datasets that are already in obscore, changes nothing.
369 registry.associate("tagged", (ref for ref in refs if ref.run == "run1"))
370 rows = self._obscore_select(registry)
371 self.assertEqual(len(rows), 1)
373 # Associate datasets that are not in obscore
374 registry.associate("tagged", (ref for ref in refs if ref.run == "run3"))
375 rows = self._obscore_select(registry)
376 self.assertEqual(len(rows), 2)
378 # Disassociate them
379 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run3"))
380 rows = self._obscore_select(registry)
381 self.assertEqual(len(rows), 1)
383 # Non-associated dataset, should be OK and not throw.
384 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run2"))
385 rows = self._obscore_select(registry)
386 self.assertEqual(len(rows), 1)
388 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run1"))
389 rows = self._obscore_select(registry)
390 self.assertEqual(len(rows), 0)
393class SQLiteObsCoreTest(ObsCoreTests, unittest.TestCase):
394 def setUp(self):
395 self.root = makeTestTempDir(TESTDIR)
397 def tearDown(self):
398 removeTestTempDir(self.root)
400 def make_registry(
401 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None
402 ) -> Registry:
403 # docstring inherited from a base class
404 _, filename = tempfile.mkstemp(dir=self.root, suffix=".sqlite3")
405 config = RegistryConfig()
406 config["db"] = f"sqlite:///{filename}"
407 config["managers", "obscore"] = {
408 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager",
409 "config": self.make_obscore_config(collections, collection_type),
410 }
411 registry = Registry.createFromConfig(config, butlerRoot=self.root)
412 self.initialize_registry(registry)
413 return registry
416@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
417class PostgresObsCoreTest(ObsCoreTests, unittest.TestCase):
418 @staticmethod
419 def _handler(postgresql):
420 engine = sqlalchemy.engine.create_engine(postgresql.url())
421 with engine.begin() as connection:
422 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
424 @classmethod
425 def setUpClass(cls):
426 # Create the postgres test server.
427 cls.postgresql = testing.postgresql.PostgresqlFactory(
428 cache_initialized_db=True, on_initialized=cls._handler
429 )
430 super().setUpClass()
432 @classmethod
433 def tearDownClass(cls):
434 # Clean up any lingering SQLAlchemy engines/connections
435 # so they're closed before we shut down the server.
436 gc.collect()
437 cls.postgresql.clear_cache()
438 super().tearDownClass()
440 def setUp(self):
441 self.root = makeTestTempDir(TESTDIR)
442 self.server = self.postgresql()
443 self.count = 0
445 def tearDown(self):
446 removeTestTempDir(self.root)
447 self.server = self.postgresql()
449 def make_registry(
450 self, collections: Optional[List[str]] = None, collection_type: Optional[str] = None
451 ) -> Registry:
452 # docstring inherited from a base class
453 self.count += 1
454 config = RegistryConfig()
455 config["db"] = self.server.url()
456 # Use unique namespace for each instance, some tests may use sub-tests.
457 config["namespace"] = f"namespace{self.count}"
458 config["managers", "obscore"] = {
459 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager",
460 "config": self.make_obscore_config(collections, collection_type),
461 }
462 registry = Registry.createFromConfig(config, butlerRoot=self.root)
463 self.initialize_registry(registry)
464 return registry
467if __name__ == "__main__": 467 ↛ 468line 467 didn't jump to line 468, because the condition on line 467 was never true
468 unittest.main()