Coverage for tests/test_obscore.py: 17%
308 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:21 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22import gc
23import os
24import tempfile
25import unittest
26import warnings
27from abc import abstractmethod
28from typing import cast
30import astropy.time
31import sqlalchemy
32from lsst.daf.butler import (
33 CollectionType,
34 Config,
35 DataCoordinate,
36 DatasetIdGenEnum,
37 DatasetRef,
38 DatasetType,
39 StorageClassFactory,
40)
41from lsst.daf.butler.registries.sql import SqlRegistry
42from lsst.daf.butler.registry import Registry, RegistryConfig
43from lsst.daf.butler.registry.obscore import (
44 DatasetTypeConfig,
45 ObsCoreConfig,
46 ObsCoreLiveTableManager,
47 ObsCoreSchema,
48)
49from lsst.daf.butler.registry.obscore._schema import _STATIC_COLUMNS
50from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir
51from lsst.sphgeom import Box, ConvexPolygon, LonLat, UnitVector3d
53try:
54 import testing.postgresql # type: ignore
55except ImportError:
56 testing = None
58TESTDIR = os.path.abspath(os.path.dirname(__file__))
61class ObsCoreTests(TestCaseMixin):
62 """Base class for testing obscore manager functionality."""
64 root: str
66 def make_registry(
67 self, collections: list[str] | None = None, collection_type: str | None = None
68 ) -> Registry:
69 """Create new empty Registry."""
70 config = self.make_registry_config(collections, collection_type)
71 registry = Registry.createFromConfig(config, butlerRoot=self.root)
72 self.initialize_registry(registry)
73 return registry
75 @abstractmethod
76 def make_registry_config(
77 self, collections: list[str] | None = None, collection_type: str | None = None
78 ) -> RegistryConfig:
79 """Make Registry configuration."""
80 raise NotImplementedError()
82 def initialize_registry(self, registry: Registry) -> None:
83 """Populate Registry with the things that we need for tests."""
84 registry.insertDimensionData("instrument", {"name": "DummyCam"})
85 registry.insertDimensionData(
86 "physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "r"}
87 )
88 for detector in (1, 2, 3, 4):
89 registry.insertDimensionData(
90 "detector", {"instrument": "DummyCam", "id": detector, "full_name": f"detector{detector}"}
91 )
93 for exposure in (1, 2, 3, 4):
94 registry.insertDimensionData(
95 "exposure",
96 {
97 "instrument": "DummyCam",
98 "id": exposure,
99 "obs_id": f"exposure{exposure}",
100 "physical_filter": "d-r",
101 },
102 )
104 registry.insertDimensionData("visit_system", {"instrument": "DummyCam", "id": 1, "name": "default"})
106 for visit in (1, 2, 3, 4, 9):
107 visit_start = astropy.time.Time(f"2020-01-01 08:0{visit}:00", scale="tai")
108 visit_end = astropy.time.Time(f"2020-01-01 08:0{visit}:45", scale="tai")
109 registry.insertDimensionData(
110 "visit",
111 {
112 "instrument": "DummyCam",
113 "id": visit,
114 "name": f"visit{visit}",
115 "physical_filter": "d-r",
116 "visit_system": 1,
117 "datetime_begin": visit_start,
118 "datetime_end": visit_end,
119 },
120 )
122 # Only couple of exposures are linked to visits.
123 for visit in (1, 2):
124 registry.insertDimensionData(
125 "visit_definition",
126 {
127 "instrument": "DummyCam",
128 "exposure": visit,
129 "visit": visit,
130 },
131 )
133 # map visit and detector to region
134 self.regions: dict[tuple[int, int], ConvexPolygon] = {}
135 for visit in (1, 2, 3, 4):
136 for detector in (1, 2, 3, 4):
137 lon = visit * 90 - 88
138 lat = detector * 2 - 5
139 region = ConvexPolygon(
140 [
141 UnitVector3d(LonLat.fromDegrees(lon - 1.0, lat - 1.0)),
142 UnitVector3d(LonLat.fromDegrees(lon + 1.0, lat - 1.0)),
143 UnitVector3d(LonLat.fromDegrees(lon + 1.0, lat + 1.0)),
144 UnitVector3d(LonLat.fromDegrees(lon - 1.0, lat + 1.0)),
145 ]
146 )
147 registry.insertDimensionData(
148 "visit_detector_region",
149 {
150 "instrument": "DummyCam",
151 "visit": visit,
152 "detector": detector,
153 "region": region,
154 },
155 )
156 self.regions[(visit, detector)] = region
158 # Visit 9 has non-polygon region
159 for detector in (1, 2, 3, 4):
160 lat = detector * 2 - 5
161 region = Box.fromDegrees(17.0, lat - 1.0, 19.0, lat + 1.0)
162 registry.insertDimensionData(
163 "visit_detector_region",
164 {
165 "instrument": "DummyCam",
166 "visit": 9,
167 "detector": detector,
168 "region": region,
169 },
170 )
172 # Add few dataset types
173 storage_class_factory = StorageClassFactory()
174 storage_class = storage_class_factory.getStorageClass("StructuredDataDict")
176 self.dataset_types: dict[str, DatasetType] = {}
178 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "exposure"])
179 self.dataset_types["raw"] = DatasetType("raw", dimensions, storage_class)
181 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"])
182 self.dataset_types["calexp"] = DatasetType("calexp", dimensions, storage_class)
184 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"])
185 self.dataset_types["no_obscore"] = DatasetType("no_obscore", dimensions, storage_class)
187 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector"])
188 self.dataset_types["calib"] = DatasetType("calib", dimensions, storage_class, isCalibration=True)
190 for dataset_type in self.dataset_types.values():
191 registry.registerDatasetType(dataset_type)
193 # Add few run collections.
194 for run in (1, 2, 3, 4, 5, 6):
195 registry.registerRun(f"run{run}")
197 # Add few chained collections, run6 is not in any chained collections.
198 registry.registerCollection("chain12", CollectionType.CHAINED)
199 registry.setCollectionChain("chain12", ("run1", "run2"))
200 registry.registerCollection("chain34", CollectionType.CHAINED)
201 registry.setCollectionChain("chain34", ("run3", "run4"))
202 registry.registerCollection("chain-all", CollectionType.CHAINED)
203 registry.setCollectionChain("chain-all", ("chain12", "chain34", "run5"))
205 # And a tagged collection
206 registry.registerCollection("tagged", CollectionType.TAGGED)
208 def make_obscore_config(
209 self, collections: list[str] | None = None, collection_type: str | None = None
210 ) -> Config:
211 """Make configuration for obscore manager."""
212 obscore_config = Config(os.path.join(TESTDIR, "config", "basic", "obscore.yaml"))
213 if collections is not None:
214 obscore_config["collections"] = collections
215 if collection_type is not None:
216 obscore_config["collection_type"] = collection_type
217 return obscore_config
219 def _insert_dataset(
220 self, registry: Registry, run: str, dataset_type: str, do_import: bool = False, **kwargs
221 ) -> DatasetRef:
222 """Insert or import one dataset into a specified run collection."""
223 data_id = {"instrument": "DummyCam", "physical_filter": "d-r"}
224 data_id.update(kwargs)
225 coordinate = DataCoordinate.standardize(data_id, universe=registry.dimensions)
226 if do_import:
227 ds_type = self.dataset_types[dataset_type]
228 dataset_id = registry.datasetIdFactory.makeDatasetId(
229 run, ds_type, coordinate, DatasetIdGenEnum.UNIQUE
230 )
231 ref = DatasetRef(ds_type, coordinate, id=dataset_id, run=run)
232 [ref] = registry._importDatasets([ref])
233 else:
234 [ref] = registry.insertDatasets(dataset_type, [data_id], run=run)
235 return ref
237 def _insert_datasets(self, registry: Registry, do_import: bool = False) -> list[DatasetRef]:
238 """Inset a small bunch of datasets into every run collection."""
239 return [
240 self._insert_dataset(registry, "run1", "raw", detector=1, exposure=1, do_import=do_import),
241 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=2, do_import=do_import),
242 self._insert_dataset(registry, "run3", "raw", detector=3, exposure=3, do_import=do_import),
243 self._insert_dataset(registry, "run4", "calexp", detector=4, visit=4, do_import=do_import),
244 self._insert_dataset(registry, "run5", "calexp", detector=4, visit=4, do_import=do_import),
245 # This dataset type is not configured, will not be in obscore.
246 self._insert_dataset(registry, "run5", "no_obscore", detector=1, visit=1, do_import=do_import),
247 self._insert_dataset(registry, "run6", "raw", detector=1, exposure=4, do_import=do_import),
248 ]
250 def test_config_errors(self):
251 """Test for handling various configuration problems."""
252 # This raises pydantic ValidationError, which wraps ValueError
253 exception_re = "'collections' must have one element"
254 with self.assertRaisesRegex(ValueError, exception_re):
255 self.make_registry(None, "TAGGED")
257 with self.assertRaisesRegex(ValueError, exception_re):
258 self.make_registry([], "TAGGED")
260 with self.assertRaisesRegex(ValueError, exception_re):
261 self.make_registry(["run1", "run2"], "TAGGED")
263 # Invalid regex.
264 with self.assertRaisesRegex(ValueError, "Failed to compile regex"):
265 self.make_registry(["+run"], "RUN")
267 def test_schema(self):
268 """Check how obscore schema is constructed"""
269 config = ObsCoreConfig(obs_collection="", dataset_types=[], facility_name="FACILITY")
270 schema = ObsCoreSchema(config, [])
271 table_spec = schema.table_spec
272 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS])
274 # extra columns from top-level config
275 config = ObsCoreConfig(
276 obs_collection="",
277 extra_columns={"c1": 1, "c2": "string", "c3": {"template": "{calib_level}", "type": "float"}},
278 dataset_types=[],
279 facility_name="FACILITY",
280 )
281 schema = ObsCoreSchema(config, [])
282 table_spec = schema.table_spec
283 self.assertEqual(
284 list(table_spec.fields.names),
285 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"],
286 )
287 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger)
288 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String)
289 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float)
291 # extra columns from per-dataset type configs
292 config = ObsCoreConfig(
293 obs_collection="",
294 extra_columns={"c1": 1},
295 dataset_types={
296 "raw": DatasetTypeConfig(
297 name="raw",
298 dataproduct_type="image",
299 calib_level=1,
300 extra_columns={"c2": "string"},
301 ),
302 "calexp": DatasetTypeConfig(
303 dataproduct_type="image",
304 calib_level=2,
305 extra_columns={"c3": 1e10},
306 ),
307 },
308 facility_name="FACILITY",
309 )
310 schema = ObsCoreSchema(config, [])
311 table_spec = schema.table_spec
312 self.assertEqual(
313 list(table_spec.fields.names),
314 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"],
315 )
316 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger)
317 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String)
318 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float)
320 # Columns with the same names as in static list in configs, types
321 # are not overriden.
322 config = ObsCoreConfig(
323 version=0,
324 obs_collection="",
325 extra_columns={"t_xel": 1e10},
326 dataset_types={
327 "raw": DatasetTypeConfig(
328 dataproduct_type="image",
329 calib_level=1,
330 extra_columns={"target_name": 1},
331 ),
332 "calexp": DatasetTypeConfig(
333 dataproduct_type="image",
334 calib_level=2,
335 extra_columns={"em_xel": "string"},
336 ),
337 },
338 facility_name="FACILITY",
339 )
340 schema = ObsCoreSchema(config, [])
341 table_spec = schema.table_spec
342 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS])
343 self.assertEqual(table_spec.fields["t_xel"].dtype, sqlalchemy.Integer)
344 self.assertEqual(table_spec.fields["target_name"].dtype, sqlalchemy.String)
345 self.assertEqual(table_spec.fields["em_xel"].dtype, sqlalchemy.Integer)
347 def test_insert_existing_collection(self):
348 """Test insert and import registry methods, with various restrictions
349 on collection names.
350 """
351 # First item is collections, second item is expected record count.
352 test_data = (
353 (None, 6),
354 (["run1", "run2"], 2),
355 (["run[34]"], 2),
356 (["[rR]un[^6]"], 5),
357 )
359 for collections, count in test_data:
360 for do_import in (False, True):
361 registry = self.make_registry(collections)
362 obscore = registry.obsCoreTableManager
363 assert obscore is not None
364 self._insert_datasets(registry, do_import)
366 with obscore.query() as result:
367 rows = list(result)
368 self.assertEqual(len(rows), count)
370 # Also check `query` method with COUNT(*)
371 with obscore.query([sqlalchemy.sql.func.count()]) as result:
372 scalar = result.scalar_one()
373 self.assertEqual(scalar, count)
375 def test_drop_datasets(self):
376 """Test for dropping datasets after obscore insert."""
377 collections = None
378 registry = self.make_registry(collections)
379 obscore = registry.obsCoreTableManager
380 assert obscore is not None
381 refs = self._insert_datasets(registry)
383 with obscore.query() as result:
384 rows = list(result)
385 self.assertEqual(len(rows), 6)
387 # drop single dataset
388 registry.removeDatasets(ref for ref in refs if ref.run == "run1")
389 with obscore.query() as result:
390 rows = list(result)
391 self.assertEqual(len(rows), 5)
393 # drop whole run collection
394 registry.removeCollection("run6")
395 with obscore.query() as result:
396 rows = list(result)
397 self.assertEqual(len(rows), 4)
399 def test_associate(self):
400 """Test for associating datasets to TAGGED collection."""
401 collections = ["tagged"]
402 registry = self.make_registry(collections, "TAGGED")
403 obscore = registry.obsCoreTableManager
404 assert obscore is not None
405 refs = self._insert_datasets(registry)
407 with obscore.query() as result:
408 rows = list(result)
409 self.assertEqual(len(rows), 0)
411 # Associate datasets that are already in obscore, changes nothing.
412 registry.associate("tagged", (ref for ref in refs if ref.run == "run1"))
413 with obscore.query() as result:
414 rows = list(result)
415 self.assertEqual(len(rows), 1)
417 # Associate datasets that are not in obscore
418 registry.associate("tagged", (ref for ref in refs if ref.run == "run3"))
419 with obscore.query() as result:
420 rows = list(result)
421 self.assertEqual(len(rows), 2)
423 # Disassociate them
424 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run3"))
425 with obscore.query() as result:
426 rows = list(result)
427 self.assertEqual(len(rows), 1)
429 # Non-associated dataset, should be OK and not throw.
430 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run2"))
431 with obscore.query() as result:
432 rows = list(result)
433 self.assertEqual(len(rows), 1)
435 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run1"))
436 with obscore.query() as result:
437 rows = list(result)
438 self.assertEqual(len(rows), 0)
440 def test_region_type_warning(self) -> None:
441 """Test that non-polygon region generates one or more warnings."""
442 collections = None
443 registry = self.make_registry(collections)
445 with warnings.catch_warnings(record=True) as warning_records:
446 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=9)
447 self.assertEqual(len(warning_records), 1)
448 for record in warning_records:
449 self.assertRegex(
450 str(record.message),
451 "Unexpected region type: .*lsst.sphgeom._sphgeom.Box.*",
452 )
454 def test_update_exposure_region(self) -> None:
455 """Test for update_exposure_regions method."""
456 registry = self.make_registry(["run1"])
457 obscore = registry.obsCoreTableManager
458 assert obscore is not None
460 # Exposure 4 is not associated with any visit.
461 for detector in (1, 2, 3, 4):
462 self._insert_dataset(registry, "run1", "raw", detector=detector, exposure=4)
464 # All spatial columns should be None.
465 with obscore.query() as result:
466 rows = list(result)
467 self.assertEqual(len(rows), 4)
468 for row in rows:
469 self.assertIsNone(row.s_ra)
470 self.assertIsNone(row.s_dec)
471 self.assertIsNone(row.s_region)
473 # Assign Region from visit 4.
474 count = obscore.update_exposure_regions(
475 "DummyCam", [(4, 1, self.regions[(4, 1)]), (4, 2, self.regions[(4, 2)])]
476 )
477 self.assertEqual(count, 2)
479 with obscore.query(["s_ra", "s_dec", "s_region", "lsst_detector"]) as result:
480 rows = list(result)
481 self.assertEqual(len(rows), 4)
482 for row in rows:
483 if row.lsst_detector in (1, 2):
484 self.assertIsNotNone(row.s_ra)
485 self.assertIsNotNone(row.s_dec)
486 self.assertIsNotNone(row.s_region)
487 else:
488 self.assertIsNone(row.s_ra)
489 self.assertIsNone(row.s_dec)
490 self.assertIsNone(row.s_region)
493class SQLiteObsCoreTest(ObsCoreTests, unittest.TestCase):
494 """Unit test for obscore with SQLite backend."""
496 def setUp(self):
497 self.root = makeTestTempDir(TESTDIR)
499 def tearDown(self):
500 removeTestTempDir(self.root)
502 def make_registry_config(
503 self, collections: list[str] | None = None, collection_type: str | None = None
504 ) -> RegistryConfig:
505 # docstring inherited from a base class
506 _, filename = tempfile.mkstemp(dir=self.root, suffix=".sqlite3")
507 config = RegistryConfig()
508 config["db"] = f"sqlite:///{filename}"
509 config["managers", "obscore"] = {
510 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager",
511 "config": self.make_obscore_config(collections, collection_type),
512 }
513 return config
516@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
517class PostgresObsCoreTest(ObsCoreTests, unittest.TestCase):
518 """Unit test for obscore with PostgreSQL backend."""
520 @classmethod
521 def _handler(cls, postgresql):
522 engine = sqlalchemy.engine.create_engine(postgresql.url())
523 with engine.begin() as connection:
524 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
526 @classmethod
527 def setUpClass(cls):
528 # Create the postgres test server.
529 cls.postgresql = testing.postgresql.PostgresqlFactory(
530 cache_initialized_db=True, on_initialized=cls._handler
531 )
532 super().setUpClass()
534 @classmethod
535 def tearDownClass(cls):
536 # Clean up any lingering SQLAlchemy engines/connections
537 # so they're closed before we shut down the server.
538 gc.collect()
539 cls.postgresql.clear_cache()
540 super().tearDownClass()
542 def setUp(self):
543 self.root = makeTestTempDir(TESTDIR)
544 self.server = self.postgresql()
545 self.count = 0
547 def tearDown(self):
548 removeTestTempDir(self.root)
549 self.server = self.postgresql()
551 def make_registry_config(
552 self, collections: list[str] | None = None, collection_type: str | None = None
553 ) -> RegistryConfig:
554 # docstring inherited from a base class
555 self.count += 1
556 config = RegistryConfig()
557 config["db"] = self.server.url()
558 # Use unique namespace for each instance, some tests may use sub-tests.
559 config["namespace"] = f"namespace{self.count}"
560 config["managers", "obscore"] = {
561 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager",
562 "config": self.make_obscore_config(collections, collection_type),
563 }
564 return config
567@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
568class PostgresPgSphereObsCoreTest(PostgresObsCoreTest):
569 """Unit test for obscore with PostgreSQL backend and pgsphere plugin."""
571 @classmethod
572 def _handler(cls, postgresql):
573 super()._handler(postgresql)
574 engine = sqlalchemy.engine.create_engine(postgresql.url())
575 with engine.begin() as connection:
576 try:
577 connection.execute(sqlalchemy.text("CREATE EXTENSION pg_sphere"))
578 except sqlalchemy.exc.DatabaseError as exc:
579 raise unittest.SkipTest(f"pg_sphere extension does not exist: {exc}")
581 def make_obscore_config(
582 self, collections: list[str] | None = None, collection_type: str | None = None
583 ) -> Config:
584 """Make configuration for obscore manager."""
585 obscore_config = super().make_obscore_config(collections, collection_type)
586 obscore_config["spatial_plugins"] = {
587 "pgsphere": {
588 "cls": "lsst.daf.butler.registry.obscore.pgsphere.PgSphereObsCorePlugin",
589 "config": {
590 "region_column": "pgs_region",
591 "position_column": "pgs_center",
592 },
593 }
594 }
595 return obscore_config
597 def test_spatial(self):
598 """Test that pgsphere plugin fills spatial columns."""
599 collections = None
600 registry = self.make_registry(collections)
601 obscore = registry.obsCoreTableManager
602 assert obscore is not None
603 self._insert_datasets(registry)
605 # select everything
606 with obscore.query() as result:
607 rows = list(result)
608 self.assertEqual(len(rows), 6)
610 db = cast(SqlRegistry, registry)._db
611 assert registry.obsCoreTableManager is not None
612 table = cast(ObsCoreLiveTableManager, registry.obsCoreTableManager).table
614 # It's not easy to generate spatial queries in sqlalchemy, use plain
615 # text queries for testing.
617 # position matching visit=1, there is a single dataset
618 query = f"SELECT * FROM {table.key} WHERE pgs_center <-> '(2d,0d)'::spoint < .1"
619 with db.query(sqlalchemy.text(query)) as results:
620 self.assertEqual(len(list(results)), 1)
622 # position matching visit=4, there are two datasets
623 query = f"SELECT * FROM {table.key} WHERE pgs_center <-> '(272d,0d)'::spoint < .1"
624 with db.query(sqlalchemy.text(query)) as results:
625 self.assertEqual(len(list(results)), 2)
627 # position matching visit=1, there is a single dataset
628 query = f"SELECT * FROM {table.key} WHERE '(2d,-3d)'::spoint @ pgs_region"
629 with db.query(sqlalchemy.text(query)) as results:
630 self.assertEqual(len(list(results)), 1)
632 # position matching visit=4, there are two datasets
633 query = f"SELECT * FROM {table.key} WHERE '(272d,3d)'::spoint @ pgs_region"
634 with db.query(sqlalchemy.text(query)) as results:
635 self.assertEqual(len(list(results)), 2)
638if __name__ == "__main__":
639 unittest.main()