Coverage for tests/test_obscore.py: 17%
307 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28import gc
29import os
30import tempfile
31import unittest
32import warnings
33from abc import abstractmethod
34from typing import cast
36import astropy.time
37import sqlalchemy
38from lsst.daf.butler import (
39 CollectionType,
40 Config,
41 DataCoordinate,
42 DatasetRef,
43 DatasetType,
44 StorageClassFactory,
45)
46from lsst.daf.butler.registries.sql import SqlRegistry
47from lsst.daf.butler.registry import Registry, RegistryConfig, _ButlerRegistry, _RegistryFactory
48from lsst.daf.butler.registry.obscore import (
49 DatasetTypeConfig,
50 ObsCoreConfig,
51 ObsCoreLiveTableManager,
52 ObsCoreSchema,
53)
54from lsst.daf.butler.registry.obscore._schema import _STATIC_COLUMNS
55from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir
56from lsst.sphgeom import Box, ConvexPolygon, LonLat, UnitVector3d
58try:
59 import testing.postgresql # type: ignore
60except ImportError:
61 testing = None
63TESTDIR = os.path.abspath(os.path.dirname(__file__))
66class ObsCoreTests(TestCaseMixin):
67 """Base class for testing obscore manager functionality."""
69 root: str
71 def make_registry(
72 self, collections: list[str] | None = None, collection_type: str | None = None
73 ) -> _ButlerRegistry:
74 """Create new empty Registry."""
75 config = self.make_registry_config(collections, collection_type)
76 registry = _RegistryFactory(config).create_from_config(butlerRoot=self.root)
77 self.initialize_registry(registry)
78 return registry
80 @abstractmethod
81 def make_registry_config(
82 self, collections: list[str] | None = None, collection_type: str | None = None
83 ) -> RegistryConfig:
84 """Make Registry configuration."""
85 raise NotImplementedError()
87 def initialize_registry(self, registry: Registry) -> None:
88 """Populate Registry with the things that we need for tests."""
89 registry.insertDimensionData("instrument", {"name": "DummyCam"})
90 registry.insertDimensionData(
91 "physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "r"}
92 )
93 for detector in (1, 2, 3, 4):
94 registry.insertDimensionData(
95 "detector", {"instrument": "DummyCam", "id": detector, "full_name": f"detector{detector}"}
96 )
98 for exposure in (1, 2, 3, 4):
99 registry.insertDimensionData(
100 "exposure",
101 {
102 "instrument": "DummyCam",
103 "id": exposure,
104 "obs_id": f"exposure{exposure}",
105 "physical_filter": "d-r",
106 },
107 )
109 registry.insertDimensionData("visit_system", {"instrument": "DummyCam", "id": 1, "name": "default"})
111 for visit in (1, 2, 3, 4, 9):
112 visit_start = astropy.time.Time(f"2020-01-01 08:0{visit}:00", scale="tai")
113 visit_end = astropy.time.Time(f"2020-01-01 08:0{visit}:45", scale="tai")
114 registry.insertDimensionData(
115 "visit",
116 {
117 "instrument": "DummyCam",
118 "id": visit,
119 "name": f"visit{visit}",
120 "physical_filter": "d-r",
121 "visit_system": 1,
122 "datetime_begin": visit_start,
123 "datetime_end": visit_end,
124 },
125 )
127 # Only couple of exposures are linked to visits.
128 for visit in (1, 2):
129 registry.insertDimensionData(
130 "visit_definition",
131 {
132 "instrument": "DummyCam",
133 "exposure": visit,
134 "visit": visit,
135 },
136 )
138 # map visit and detector to region
139 self.regions: dict[tuple[int, int], ConvexPolygon] = {}
140 for visit in (1, 2, 3, 4):
141 for detector in (1, 2, 3, 4):
142 lon = visit * 90 - 88
143 lat = detector * 2 - 5
144 region = ConvexPolygon(
145 [
146 UnitVector3d(LonLat.fromDegrees(lon - 1.0, lat - 1.0)),
147 UnitVector3d(LonLat.fromDegrees(lon + 1.0, lat - 1.0)),
148 UnitVector3d(LonLat.fromDegrees(lon + 1.0, lat + 1.0)),
149 UnitVector3d(LonLat.fromDegrees(lon - 1.0, lat + 1.0)),
150 ]
151 )
152 registry.insertDimensionData(
153 "visit_detector_region",
154 {
155 "instrument": "DummyCam",
156 "visit": visit,
157 "detector": detector,
158 "region": region,
159 },
160 )
161 self.regions[(visit, detector)] = region
163 # Visit 9 has non-polygon region
164 for detector in (1, 2, 3, 4):
165 lat = detector * 2 - 5
166 region = Box.fromDegrees(17.0, lat - 1.0, 19.0, lat + 1.0)
167 registry.insertDimensionData(
168 "visit_detector_region",
169 {
170 "instrument": "DummyCam",
171 "visit": 9,
172 "detector": detector,
173 "region": region,
174 },
175 )
177 # Add few dataset types
178 storage_class_factory = StorageClassFactory()
179 storage_class = storage_class_factory.getStorageClass("StructuredDataDict")
181 self.dataset_types: dict[str, DatasetType] = {}
183 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "exposure"])
184 self.dataset_types["raw"] = DatasetType("raw", dimensions, storage_class)
186 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"])
187 self.dataset_types["calexp"] = DatasetType("calexp", dimensions, storage_class)
189 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector", "visit"])
190 self.dataset_types["no_obscore"] = DatasetType("no_obscore", dimensions, storage_class)
192 dimensions = registry.dimensions.extract(["instrument", "physical_filter", "detector"])
193 self.dataset_types["calib"] = DatasetType("calib", dimensions, storage_class, isCalibration=True)
195 for dataset_type in self.dataset_types.values():
196 registry.registerDatasetType(dataset_type)
198 # Add few run collections.
199 for run in (1, 2, 3, 4, 5, 6):
200 registry.registerRun(f"run{run}")
202 # Add few chained collections, run6 is not in any chained collections.
203 registry.registerCollection("chain12", CollectionType.CHAINED)
204 registry.setCollectionChain("chain12", ("run1", "run2"))
205 registry.registerCollection("chain34", CollectionType.CHAINED)
206 registry.setCollectionChain("chain34", ("run3", "run4"))
207 registry.registerCollection("chain-all", CollectionType.CHAINED)
208 registry.setCollectionChain("chain-all", ("chain12", "chain34", "run5"))
210 # And a tagged collection
211 registry.registerCollection("tagged", CollectionType.TAGGED)
213 def make_obscore_config(
214 self, collections: list[str] | None = None, collection_type: str | None = None
215 ) -> Config:
216 """Make configuration for obscore manager."""
217 obscore_config = Config(os.path.join(TESTDIR, "config", "basic", "obscore.yaml"))
218 if collections is not None:
219 obscore_config["collections"] = collections
220 if collection_type is not None:
221 obscore_config["collection_type"] = collection_type
222 return obscore_config
224 def _insert_dataset(
225 self, registry: Registry, run: str, dataset_type: str, do_import: bool = False, **kwargs
226 ) -> DatasetRef:
227 """Insert or import one dataset into a specified run collection."""
228 data_id = {"instrument": "DummyCam", "physical_filter": "d-r"}
229 data_id.update(kwargs)
230 coordinate = DataCoordinate.standardize(data_id, universe=registry.dimensions)
231 if do_import:
232 ds_type = self.dataset_types[dataset_type]
233 ref = DatasetRef(ds_type, coordinate, run=run)
234 [ref] = registry._importDatasets([ref])
235 else:
236 [ref] = registry.insertDatasets(dataset_type, [data_id], run=run)
237 return ref
239 def _insert_datasets(self, registry: Registry, do_import: bool = False) -> list[DatasetRef]:
240 """Inset a small bunch of datasets into every run collection."""
241 return [
242 self._insert_dataset(registry, "run1", "raw", detector=1, exposure=1, do_import=do_import),
243 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=2, do_import=do_import),
244 self._insert_dataset(registry, "run3", "raw", detector=3, exposure=3, do_import=do_import),
245 self._insert_dataset(registry, "run4", "calexp", detector=4, visit=4, do_import=do_import),
246 self._insert_dataset(registry, "run5", "calexp", detector=4, visit=4, do_import=do_import),
247 # This dataset type is not configured, will not be in obscore.
248 self._insert_dataset(registry, "run5", "no_obscore", detector=1, visit=1, do_import=do_import),
249 self._insert_dataset(registry, "run6", "raw", detector=1, exposure=4, do_import=do_import),
250 ]
252 def test_config_errors(self):
253 """Test for handling various configuration problems."""
254 # This raises pydantic ValidationError, which wraps ValueError
255 exception_re = "'collections' must have one element"
256 with self.assertRaisesRegex(ValueError, exception_re):
257 self.make_registry(None, "TAGGED")
259 with self.assertRaisesRegex(ValueError, exception_re):
260 self.make_registry([], "TAGGED")
262 with self.assertRaisesRegex(ValueError, exception_re):
263 self.make_registry(["run1", "run2"], "TAGGED")
265 # Invalid regex.
266 with self.assertRaisesRegex(ValueError, "Failed to compile regex"):
267 self.make_registry(["+run"], "RUN")
269 def test_schema(self):
270 """Check how obscore schema is constructed"""
271 config = ObsCoreConfig(obs_collection="", dataset_types={}, facility_name="FACILITY")
272 schema = ObsCoreSchema(config, [])
273 table_spec = schema.table_spec
274 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS])
276 # extra columns from top-level config
277 config = ObsCoreConfig(
278 obs_collection="",
279 extra_columns={"c1": 1, "c2": "string", "c3": {"template": "{calib_level}", "type": "float"}},
280 dataset_types={},
281 facility_name="FACILITY",
282 )
283 schema = ObsCoreSchema(config, [])
284 table_spec = schema.table_spec
285 self.assertEqual(
286 list(table_spec.fields.names),
287 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"],
288 )
289 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger)
290 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String)
291 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float)
293 # extra columns from per-dataset type configs
294 config = ObsCoreConfig(
295 obs_collection="",
296 extra_columns={"c1": 1},
297 dataset_types={
298 "raw": DatasetTypeConfig(
299 name="raw",
300 dataproduct_type="image",
301 calib_level=1,
302 extra_columns={"c2": "string"},
303 ),
304 "calexp": DatasetTypeConfig(
305 dataproduct_type="image",
306 calib_level=2,
307 extra_columns={"c3": 1e10},
308 ),
309 },
310 facility_name="FACILITY",
311 )
312 schema = ObsCoreSchema(config, [])
313 table_spec = schema.table_spec
314 self.assertEqual(
315 list(table_spec.fields.names),
316 [col.name for col in _STATIC_COLUMNS] + ["c1", "c2", "c3"],
317 )
318 self.assertEqual(table_spec.fields["c1"].dtype, sqlalchemy.BigInteger)
319 self.assertEqual(table_spec.fields["c2"].dtype, sqlalchemy.String)
320 self.assertEqual(table_spec.fields["c3"].dtype, sqlalchemy.Float)
322 # Columns with the same names as in static list in configs, types
323 # are not overriden.
324 config = ObsCoreConfig(
325 version=0,
326 obs_collection="",
327 extra_columns={"t_xel": 1e10},
328 dataset_types={
329 "raw": DatasetTypeConfig(
330 dataproduct_type="image",
331 calib_level=1,
332 extra_columns={"target_name": 1},
333 ),
334 "calexp": DatasetTypeConfig(
335 dataproduct_type="image",
336 calib_level=2,
337 extra_columns={"em_xel": "string"},
338 ),
339 },
340 facility_name="FACILITY",
341 )
342 schema = ObsCoreSchema(config, [])
343 table_spec = schema.table_spec
344 self.assertEqual(list(table_spec.fields.names), [col.name for col in _STATIC_COLUMNS])
345 self.assertEqual(table_spec.fields["t_xel"].dtype, sqlalchemy.Integer)
346 self.assertEqual(table_spec.fields["target_name"].dtype, sqlalchemy.String)
347 self.assertEqual(table_spec.fields["em_xel"].dtype, sqlalchemy.Integer)
349 def test_insert_existing_collection(self):
350 """Test insert and import registry methods, with various restrictions
351 on collection names.
352 """
353 # First item is collections, second item is expected record count.
354 test_data = (
355 (None, 6),
356 (["run1", "run2"], 2),
357 (["run[34]"], 2),
358 (["[rR]un[^6]"], 5),
359 )
361 for collections, count in test_data:
362 for do_import in (False, True):
363 registry = self.make_registry(collections)
364 obscore = registry.obsCoreTableManager
365 assert obscore is not None
366 self._insert_datasets(registry, do_import)
368 with obscore.query() as result:
369 rows = list(result)
370 self.assertEqual(len(rows), count)
372 # Also check `query` method with COUNT(*)
373 with obscore.query([sqlalchemy.sql.func.count()]) as result:
374 scalar = result.scalar_one()
375 self.assertEqual(scalar, count)
377 def test_drop_datasets(self):
378 """Test for dropping datasets after obscore insert."""
379 collections = None
380 registry = self.make_registry(collections)
381 obscore = registry.obsCoreTableManager
382 assert obscore is not None
383 refs = self._insert_datasets(registry)
385 with obscore.query() as result:
386 rows = list(result)
387 self.assertEqual(len(rows), 6)
389 # drop single dataset
390 registry.removeDatasets(ref for ref in refs if ref.run == "run1")
391 with obscore.query() as result:
392 rows = list(result)
393 self.assertEqual(len(rows), 5)
395 # drop whole run collection
396 registry.removeCollection("run6")
397 with obscore.query() as result:
398 rows = list(result)
399 self.assertEqual(len(rows), 4)
401 def test_associate(self):
402 """Test for associating datasets to TAGGED collection."""
403 collections = ["tagged"]
404 registry = self.make_registry(collections, "TAGGED")
405 obscore = registry.obsCoreTableManager
406 assert obscore is not None
407 refs = self._insert_datasets(registry)
409 with obscore.query() as result:
410 rows = list(result)
411 self.assertEqual(len(rows), 0)
413 # Associate datasets that are already in obscore, changes nothing.
414 registry.associate("tagged", (ref for ref in refs if ref.run == "run1"))
415 with obscore.query() as result:
416 rows = list(result)
417 self.assertEqual(len(rows), 1)
419 # Associate datasets that are not in obscore
420 registry.associate("tagged", (ref for ref in refs if ref.run == "run3"))
421 with obscore.query() as result:
422 rows = list(result)
423 self.assertEqual(len(rows), 2)
425 # Disassociate them
426 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run3"))
427 with obscore.query() as result:
428 rows = list(result)
429 self.assertEqual(len(rows), 1)
431 # Non-associated dataset, should be OK and not throw.
432 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run2"))
433 with obscore.query() as result:
434 rows = list(result)
435 self.assertEqual(len(rows), 1)
437 registry.disassociate("tagged", (ref for ref in refs if ref.run == "run1"))
438 with obscore.query() as result:
439 rows = list(result)
440 self.assertEqual(len(rows), 0)
442 def test_region_type_warning(self) -> None:
443 """Test that non-polygon region generates one or more warnings."""
444 collections = None
445 registry = self.make_registry(collections)
447 with warnings.catch_warnings(record=True) as warning_records:
448 self._insert_dataset(registry, "run2", "calexp", detector=2, visit=9)
449 self.assertEqual(len(warning_records), 1)
450 for record in warning_records:
451 self.assertRegex(
452 str(record.message),
453 "Unexpected region type: .*lsst.sphgeom._sphgeom.Box.*",
454 )
456 def test_update_exposure_region(self) -> None:
457 """Test for update_exposure_regions method."""
458 registry = self.make_registry(["run1"])
459 obscore = registry.obsCoreTableManager
460 assert obscore is not None
462 # Exposure 4 is not associated with any visit.
463 for detector in (1, 2, 3, 4):
464 self._insert_dataset(registry, "run1", "raw", detector=detector, exposure=4)
466 # All spatial columns should be None.
467 with obscore.query() as result:
468 rows = list(result)
469 self.assertEqual(len(rows), 4)
470 for row in rows:
471 self.assertIsNone(row.s_ra)
472 self.assertIsNone(row.s_dec)
473 self.assertIsNone(row.s_region)
475 # Assign Region from visit 4.
476 count = obscore.update_exposure_regions(
477 "DummyCam", [(4, 1, self.regions[(4, 1)]), (4, 2, self.regions[(4, 2)])]
478 )
479 self.assertEqual(count, 2)
481 with obscore.query(["s_ra", "s_dec", "s_region", "lsst_detector"]) as result:
482 rows = list(result)
483 self.assertEqual(len(rows), 4)
484 for row in rows:
485 if row.lsst_detector in (1, 2):
486 self.assertIsNotNone(row.s_ra)
487 self.assertIsNotNone(row.s_dec)
488 self.assertIsNotNone(row.s_region)
489 else:
490 self.assertIsNone(row.s_ra)
491 self.assertIsNone(row.s_dec)
492 self.assertIsNone(row.s_region)
495class SQLiteObsCoreTest(ObsCoreTests, unittest.TestCase):
496 """Unit test for obscore with SQLite backend."""
498 def setUp(self):
499 self.root = makeTestTempDir(TESTDIR)
501 def tearDown(self):
502 removeTestTempDir(self.root)
504 def make_registry_config(
505 self, collections: list[str] | None = None, collection_type: str | None = None
506 ) -> RegistryConfig:
507 # docstring inherited from a base class
508 _, filename = tempfile.mkstemp(dir=self.root, suffix=".sqlite3")
509 config = RegistryConfig()
510 config["db"] = f"sqlite:///{filename}"
511 config["managers", "obscore"] = {
512 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager",
513 "config": self.make_obscore_config(collections, collection_type),
514 }
515 return config
518@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
519class PostgresObsCoreTest(ObsCoreTests, unittest.TestCase):
520 """Unit test for obscore with PostgreSQL backend."""
522 @classmethod
523 def _handler(cls, postgresql):
524 engine = sqlalchemy.engine.create_engine(postgresql.url())
525 with engine.begin() as connection:
526 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
528 @classmethod
529 def setUpClass(cls):
530 # Create the postgres test server.
531 cls.postgresql = testing.postgresql.PostgresqlFactory(
532 cache_initialized_db=True, on_initialized=cls._handler
533 )
534 super().setUpClass()
536 @classmethod
537 def tearDownClass(cls):
538 # Clean up any lingering SQLAlchemy engines/connections
539 # so they're closed before we shut down the server.
540 gc.collect()
541 cls.postgresql.clear_cache()
542 super().tearDownClass()
544 def setUp(self):
545 self.root = makeTestTempDir(TESTDIR)
546 self.server = self.postgresql()
547 self.count = 0
549 def tearDown(self):
550 removeTestTempDir(self.root)
551 self.server = self.postgresql()
553 def make_registry_config(
554 self, collections: list[str] | None = None, collection_type: str | None = None
555 ) -> RegistryConfig:
556 # docstring inherited from a base class
557 self.count += 1
558 config = RegistryConfig()
559 config["db"] = self.server.url()
560 # Use unique namespace for each instance, some tests may use sub-tests.
561 config["namespace"] = f"namespace{self.count}"
562 config["managers", "obscore"] = {
563 "cls": "lsst.daf.butler.registry.obscore.ObsCoreLiveTableManager",
564 "config": self.make_obscore_config(collections, collection_type),
565 }
566 return config
569@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
570class PostgresPgSphereObsCoreTest(PostgresObsCoreTest):
571 """Unit test for obscore with PostgreSQL backend and pgsphere plugin."""
573 @classmethod
574 def _handler(cls, postgresql):
575 super()._handler(postgresql)
576 engine = sqlalchemy.engine.create_engine(postgresql.url())
577 with engine.begin() as connection:
578 try:
579 connection.execute(sqlalchemy.text("CREATE EXTENSION pg_sphere"))
580 except sqlalchemy.exc.DatabaseError as exc:
581 raise unittest.SkipTest(f"pg_sphere extension does not exist: {exc}") from None
583 def make_obscore_config(
584 self, collections: list[str] | None = None, collection_type: str | None = None
585 ) -> Config:
586 """Make configuration for obscore manager."""
587 obscore_config = super().make_obscore_config(collections, collection_type)
588 obscore_config["spatial_plugins"] = {
589 "pgsphere": {
590 "cls": "lsst.daf.butler.registry.obscore.pgsphere.PgSphereObsCorePlugin",
591 "config": {
592 "region_column": "pgs_region",
593 "position_column": "pgs_center",
594 },
595 }
596 }
597 return obscore_config
599 def test_spatial(self):
600 """Test that pgsphere plugin fills spatial columns."""
601 collections = None
602 registry = self.make_registry(collections)
603 obscore = registry.obsCoreTableManager
604 assert obscore is not None
605 self._insert_datasets(registry)
607 # select everything
608 with obscore.query() as result:
609 rows = list(result)
610 self.assertEqual(len(rows), 6)
612 db = cast(SqlRegistry, registry)._db
613 assert registry.obsCoreTableManager is not None
614 table = cast(ObsCoreLiveTableManager, registry.obsCoreTableManager).table
616 # It's not easy to generate spatial queries in sqlalchemy, use plain
617 # text queries for testing.
619 # position matching visit=1, there is a single dataset
620 query = f"SELECT * FROM {table.key} WHERE pgs_center <-> '(2d,0d)'::spoint < .1"
621 with db.query(sqlalchemy.text(query)) as results:
622 self.assertEqual(len(list(results)), 1)
624 # position matching visit=4, there are two datasets
625 query = f"SELECT * FROM {table.key} WHERE pgs_center <-> '(272d,0d)'::spoint < .1"
626 with db.query(sqlalchemy.text(query)) as results:
627 self.assertEqual(len(list(results)), 2)
629 # position matching visit=1, there is a single dataset
630 query = f"SELECT * FROM {table.key} WHERE '(2d,-3d)'::spoint @ pgs_region"
631 with db.query(sqlalchemy.text(query)) as results:
632 self.assertEqual(len(list(results)), 1)
634 # position matching visit=4, there are two datasets
635 query = f"SELECT * FROM {table.key} WHERE '(272d,3d)'::spoint @ pgs_region"
636 with db.query(sqlalchemy.text(query)) as results:
637 self.assertEqual(len(list(results)), 2)
640if __name__ == "__main__":
641 unittest.main()