Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1487 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-12 10:07 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-12 10:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import unittest
38import uuid
39from abc import ABC, abstractmethod
40from collections import defaultdict, namedtuple
41from collections.abc import Iterator
42from datetime import timedelta
43from typing import TYPE_CHECKING
45import astropy.time
46import sqlalchemy
48try:
49 import numpy as np
50except ImportError:
51 np = None
53import lsst.sphgeom
54from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
56from ..._dataset_association import DatasetAssociation
57from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
58from ..._dataset_type import DatasetType
59from ..._exceptions import MissingDatasetTypeError
60from ..._exceptions_legacy import DatasetTypeError
61from ..._storage_class import StorageClass
62from ..._timespan import Timespan
63from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
64from .._collection_summary import CollectionSummary
65from .._collection_type import CollectionType
66from .._config import RegistryConfig
67from .._exceptions import (
68 ArgumentError,
69 CollectionError,
70 CollectionTypeError,
71 ConflictingDefinitionError,
72 DataIdValueError,
73 DatasetTypeExpressionError,
74 InconsistentDataIdError,
75 MissingCollectionError,
76 NoDefaultCollectionError,
77 OrphanedRecordError,
78)
79from .._registry import Registry
80from ..interfaces import ButlerAttributeExistsError
82if TYPE_CHECKING:
83 from ..sql_registry import SqlRegistry
86class RegistryTests(ABC):
87 """Generic tests for the `SqlRegistry` class that can be subclassed to
88 generate tests for different configurations.
89 """
91 collectionsManager: str | None = None
92 """Name of the collections manager class, if subclass provides value for
93 this member then it overrides name specified in default configuration
94 (`str`).
95 """
97 datasetsManager: str | dict[str, str] | None = None
98 """Name or configuration dictionary of the datasets manager class, if
99 subclass provides value for this member then it overrides name specified
100 in default configuration (`str` or `dict`).
101 """
103 @classmethod
104 @abstractmethod
105 def getDataDir(cls) -> str:
106 """Return the root directory containing test data YAML files."""
107 raise NotImplementedError()
109 def makeRegistryConfig(self) -> RegistryConfig:
110 """Create RegistryConfig used to create a registry.
112 This method should be called by a subclass from `makeRegistry`.
113 Returned instance will be pre-configured based on the values of class
114 members, and default-configured for all other parameters. Subclasses
115 that need default configuration should just instantiate
116 `RegistryConfig` directly.
117 """
118 config = RegistryConfig()
119 if self.collectionsManager:
120 config["managers", "collections"] = self.collectionsManager
121 if self.datasetsManager:
122 config["managers", "datasets"] = self.datasetsManager
123 return config
125 @abstractmethod
126 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
127 """Return the Registry instance to be tested.
129 Parameters
130 ----------
131 share_repo_with : `Registry`, optional
132 If provided, the new registry should point to the same data
133 repository as this existing registry.
135 Returns
136 -------
137 registry : `Registry`
138 New `Registry` instance, or `None` *only* if `share_repo_with`
139 is not `None` and this test case does not support that argument
140 (e.g. it is impossible with in-memory SQLite DBs).
141 """
142 raise NotImplementedError()
144 def loadData(self, registry: SqlRegistry, filename: str) -> None:
145 """Load registry test data from ``getDataDir/<filename>``,
146 which should be a YAML import/export file.
148 Parameters
149 ----------
150 registry : `SqlRegistry`
151 The registry to load into.
152 filename : `str`
153 The name of the file to load.
154 """
155 from ...transfers import YamlRepoImportBackend
157 with open(os.path.join(self.getDataDir(), filename)) as stream:
158 backend = YamlRepoImportBackend(stream, registry)
159 backend.register()
160 backend.load(datastore=None)
162 def checkQueryResults(self, results, expected):
163 """Check that a query results object contains expected values.
165 Parameters
166 ----------
167 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
168 A lazy-evaluation query results object.
169 expected : `list`
170 A list of `DataCoordinate` o `DatasetRef` objects that should be
171 equal to results of the query, aside from ordering.
172 """
173 self.assertCountEqual(list(results), expected)
174 self.assertEqual(results.count(), len(expected))
175 if expected:
176 self.assertTrue(results.any())
177 else:
178 self.assertFalse(results.any())
180 def testOpaque(self):
181 """Tests for `SqlRegistry.registerOpaqueTable`,
182 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
183 `SqlRegistry.deleteOpaqueData`.
184 """
185 registry = self.makeRegistry()
186 table = "opaque_table_for_testing"
187 registry.registerOpaqueTable(
188 table,
189 spec=ddl.TableSpec(
190 fields=[
191 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
192 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
193 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
194 ],
195 ),
196 )
197 rows = [
198 {"id": 1, "name": "one", "count": None},
199 {"id": 2, "name": "two", "count": 5},
200 {"id": 3, "name": "three", "count": 6},
201 ]
202 registry.insertOpaqueData(table, *rows)
203 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
204 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
205 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
206 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
207 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
208 # Test very long IN clause which exceeds sqlite limit on number of
209 # parameters. SQLite says the limit is 32k but it looks like it is
210 # much higher.
211 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
212 # Two IN clauses, each longer than 1k batch size, first with
213 # duplicates, second has matching elements in different batches (after
214 # sorting).
215 self.assertEqual(
216 rows[0:2],
217 list(
218 registry.fetchOpaqueData(
219 table,
220 id=list(range(1000)) + list(range(100, 0, -1)),
221 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
222 )
223 ),
224 )
225 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
226 registry.deleteOpaqueData(table, id=3)
227 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
228 registry.deleteOpaqueData(table)
229 self.assertEqual([], list(registry.fetchOpaqueData(table)))
231 def testDatasetType(self):
232 """Tests for `SqlRegistry.registerDatasetType` and
233 `SqlRegistry.getDatasetType`.
234 """
235 registry = self.makeRegistry()
236 # Check valid insert
237 datasetTypeName = "test"
238 storageClass = StorageClass("testDatasetType")
239 registry.storageClasses.registerStorageClass(storageClass)
240 dimensions = registry.dimensions.conform(("instrument", "visit"))
241 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
242 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
243 # Inserting for the first time should return True
244 self.assertTrue(registry.registerDatasetType(inDatasetType))
245 outDatasetType1 = registry.getDatasetType(datasetTypeName)
246 self.assertEqual(outDatasetType1, inDatasetType)
248 # Re-inserting should work
249 self.assertFalse(registry.registerDatasetType(inDatasetType))
250 # Except when they are not identical
251 with self.assertRaises(ConflictingDefinitionError):
252 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
253 registry.registerDatasetType(nonIdenticalDatasetType)
255 # Template can be None
256 datasetTypeName = "testNoneTemplate"
257 storageClass = StorageClass("testDatasetType2")
258 registry.storageClasses.registerStorageClass(storageClass)
259 dimensions = registry.dimensions.conform(("instrument", "visit"))
260 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
261 registry.registerDatasetType(inDatasetType)
262 outDatasetType2 = registry.getDatasetType(datasetTypeName)
263 self.assertEqual(outDatasetType2, inDatasetType)
265 allTypes = set(registry.queryDatasetTypes())
266 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
268 def testDimensions(self):
269 """Tests for `SqlRegistry.insertDimensionData`,
270 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
271 """
272 registry = self.makeRegistry()
273 dimensionName = "instrument"
274 dimension = registry.dimensions[dimensionName]
275 dimensionValue = {
276 "name": "DummyCam",
277 "visit_max": 10,
278 "visit_system": 0,
279 "exposure_max": 10,
280 "detector_max": 2,
281 "class_name": "lsst.pipe.base.Instrument",
282 }
283 registry.insertDimensionData(dimensionName, dimensionValue)
284 # Inserting the same value twice should fail
285 with self.assertRaises(sqlalchemy.exc.IntegrityError):
286 registry.insertDimensionData(dimensionName, dimensionValue)
287 # expandDataId should retrieve the record we just inserted
288 self.assertEqual(
289 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
290 .records[dimensionName]
291 .toDict(),
292 dimensionValue,
293 )
294 # expandDataId should raise if there is no record with the given ID.
295 with self.assertRaises(DataIdValueError):
296 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
297 # band doesn't have a table; insert should fail.
298 with self.assertRaises(TypeError):
299 registry.insertDimensionData("band", {"band": "i"})
300 dimensionName2 = "physical_filter"
301 dimension2 = registry.dimensions[dimensionName2]
302 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
303 # Missing required dependency ("instrument") should fail
304 with self.assertRaises(KeyError):
305 registry.insertDimensionData(dimensionName2, dimensionValue2)
306 # Adding required dependency should fix the failure
307 dimensionValue2["instrument"] = "DummyCam"
308 registry.insertDimensionData(dimensionName2, dimensionValue2)
309 # expandDataId should retrieve the record we just inserted.
310 self.assertEqual(
311 registry.expandDataId(
312 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
313 )
314 .records[dimensionName2]
315 .toDict(),
316 dimensionValue2,
317 )
318 # Use syncDimensionData to insert a new record successfully.
319 dimensionName3 = "detector"
320 dimensionValue3 = {
321 "instrument": "DummyCam",
322 "id": 1,
323 "full_name": "one",
324 "name_in_raft": "zero",
325 "purpose": "SCIENCE",
326 }
327 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
328 # Sync that again. Note that one field ("raft") is NULL, and that
329 # should be okay.
330 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
331 # Now try that sync with the same primary key but a different value.
332 # This should fail.
333 with self.assertRaises(ConflictingDefinitionError):
334 registry.syncDimensionData(
335 dimensionName3,
336 {
337 "instrument": "DummyCam",
338 "id": 1,
339 "full_name": "one",
340 "name_in_raft": "four",
341 "purpose": "SCIENCE",
342 },
343 )
345 @unittest.skipIf(np is None, "numpy not available.")
346 def testNumpyDataId(self):
347 """Test that we can use a numpy int in a dataId."""
348 registry = self.makeRegistry()
349 dimensionEntries = [
350 ("instrument", {"instrument": "DummyCam"}),
351 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
352 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
353 # Using an np.int64 here fails unless Records.fromDict is also
354 # patched to look for numbers.Integral
355 (
356 "visit",
357 {
358 "instrument": "DummyCam",
359 "id": 42,
360 "name": "fortytwo",
361 "physical_filter": "d-r",
362 "day_obs": 20250101,
363 },
364 ),
365 ]
366 for args in dimensionEntries:
367 registry.insertDimensionData(*args)
369 # Try a normal integer and something that looks like an int but
370 # is not.
371 for visit_id in (42, np.int64(42)):
372 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
373 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
374 self.assertEqual(expanded["visit"], int(visit_id))
375 self.assertIsInstance(expanded["visit"], int)
377 def testDataIdRelationships(self):
378 """Test that `SqlRegistry.expandDataId` raises an exception when the
379 given keys are inconsistent.
380 """
381 registry = self.makeRegistry()
382 self.loadData(registry, "base.yaml")
383 # Insert a few more dimension records for the next test.
384 registry.insertDimensionData(
385 "day_obs",
386 {"instrument": "Cam1", "id": 20250101},
387 )
388 registry.insertDimensionData(
389 "group",
390 {"instrument": "Cam1", "name": "group1"},
391 )
392 registry.insertDimensionData(
393 "exposure",
394 {
395 "instrument": "Cam1",
396 "id": 1,
397 "obs_id": "one",
398 "physical_filter": "Cam1-G",
399 "group": "group1",
400 "day_obs": 20250101,
401 },
402 )
403 registry.insertDimensionData(
404 "group",
405 {"instrument": "Cam1", "name": "group2"},
406 )
407 registry.insertDimensionData(
408 "exposure",
409 {
410 "instrument": "Cam1",
411 "id": 2,
412 "obs_id": "two",
413 "physical_filter": "Cam1-G",
414 "group": "group2",
415 "day_obs": 20250101,
416 },
417 )
418 registry.insertDimensionData(
419 "visit_system",
420 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
421 )
422 registry.insertDimensionData(
423 "visit",
424 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
425 )
426 registry.insertDimensionData(
427 "visit_definition",
428 {"instrument": "Cam1", "visit": 1, "exposure": 1},
429 )
430 with self.assertRaises(InconsistentDataIdError):
431 registry.expandDataId(
432 {"instrument": "Cam1", "visit": 1, "exposure": 2},
433 )
435 def testDataset(self):
436 """Basic tests for `SqlRegistry.insertDatasets`,
437 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
438 """
439 registry = self.makeRegistry()
440 self.loadData(registry, "base.yaml")
441 run = "tésτ"
442 registry.registerRun(run)
443 datasetType = registry.getDatasetType("bias")
444 dataId = {"instrument": "Cam1", "detector": 2}
445 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
446 outRef = registry.getDataset(ref.id)
447 self.assertIsNotNone(ref.id)
448 self.assertEqual(ref, outRef)
449 with self.assertRaises(ConflictingDefinitionError):
450 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
451 registry.removeDatasets([ref])
452 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
454 def testFindDataset(self):
455 """Tests for `SqlRegistry.findDataset`."""
456 registry = self.makeRegistry()
457 self.loadData(registry, "base.yaml")
458 run = "tésτ"
459 datasetType = registry.getDatasetType("bias")
460 dataId = {"instrument": "Cam1", "detector": 4}
461 registry.registerRun(run)
462 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
463 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
464 self.assertEqual(outputRef, inputRef)
465 # Check that retrieval with invalid dataId raises
466 with self.assertRaises(LookupError):
467 dataId = {"instrument": "Cam1"} # no detector
468 registry.findDataset(datasetType, dataId, collections=run)
469 # Check that different dataIds match to different datasets
470 dataId1 = {"instrument": "Cam1", "detector": 1}
471 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
472 dataId2 = {"instrument": "Cam1", "detector": 2}
473 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
474 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
475 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
476 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
477 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
478 # Check that requesting a non-existing dataId returns None
479 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
480 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
481 # Search more than one collection, in which two have the right
482 # dataset type and another does not.
483 registry.registerRun("empty")
484 self.loadData(registry, "datasets.yaml")
485 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
486 self.assertIsNotNone(bias1)
487 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
488 self.assertIsNotNone(bias2)
489 self.assertEqual(
490 bias1,
491 registry.findDataset(
492 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
493 ),
494 )
495 self.assertEqual(
496 bias2,
497 registry.findDataset(
498 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
499 ),
500 )
501 # Search more than one collection, with one of them a CALIBRATION
502 # collection.
503 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
504 timespan = Timespan(
505 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
506 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
507 )
508 registry.certify("Cam1/calib", [bias2], timespan=timespan)
509 self.assertEqual(
510 bias1,
511 registry.findDataset(
512 "bias",
513 instrument="Cam1",
514 detector=2,
515 collections=["empty", "imported_g", "Cam1/calib"],
516 timespan=timespan,
517 ),
518 )
519 self.assertEqual(
520 bias2,
521 registry.findDataset(
522 "bias",
523 instrument="Cam1",
524 detector=2,
525 collections=["empty", "Cam1/calib", "imported_g"],
526 timespan=timespan,
527 ),
528 )
529 # If we try to search those same collections without a timespan, it
530 # should still work, since the CALIBRATION collection is ignored.
531 self.assertEqual(
532 bias1,
533 registry.findDataset(
534 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
535 ),
536 )
537 self.assertEqual(
538 bias1,
539 registry.findDataset(
540 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
541 ),
542 )
544 def testRemoveDatasetTypeSuccess(self):
545 """Test that SqlRegistry.removeDatasetType works when there are no
546 datasets of that type present.
547 """
548 registry = self.makeRegistry()
549 self.loadData(registry, "base.yaml")
550 registry.removeDatasetType("flat")
551 with self.assertRaises(MissingDatasetTypeError):
552 registry.getDatasetType("flat")
554 def testRemoveDatasetTypeFailure(self):
555 """Test that SqlRegistry.removeDatasetType raises when there are
556 datasets of that type present or if the dataset type is for a
557 component.
558 """
559 registry = self.makeRegistry()
560 self.loadData(registry, "base.yaml")
561 self.loadData(registry, "datasets.yaml")
562 with self.assertRaises(OrphanedRecordError):
563 registry.removeDatasetType("flat")
564 with self.assertRaises(DatasetTypeError):
565 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
567 def testImportDatasetsUUID(self):
568 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
569 if isinstance(self.datasetsManager, str):
570 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
571 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
572 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
573 ".ByDimensionsDatasetRecordStorageManagerUUID"
574 ):
575 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
577 registry = self.makeRegistry()
578 self.loadData(registry, "base.yaml")
579 for run in range(6):
580 registry.registerRun(f"run{run}")
581 datasetTypeBias = registry.getDatasetType("bias")
582 datasetTypeFlat = registry.getDatasetType("flat")
583 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
584 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
585 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
587 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
588 (ref1,) = registry._importDatasets([ref])
589 # UUID is used without change
590 self.assertEqual(ref.id, ref1.id)
592 # All different failure modes
593 refs = (
594 # Importing same DatasetRef with different dataset ID is an error
595 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
596 # Same DatasetId but different DataId
597 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
598 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
599 # Same DatasetRef and DatasetId but different run
600 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
601 )
602 for ref in refs:
603 with self.assertRaises(ConflictingDefinitionError):
604 registry._importDatasets([ref])
606 # Test for non-unique IDs, they can be re-imported multiple times.
607 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
608 with self.subTest(idGenMode=idGenMode):
609 # Make dataset ref with reproducible dataset ID.
610 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
611 (ref1,) = registry._importDatasets([ref])
612 self.assertIsInstance(ref1.id, uuid.UUID)
613 self.assertEqual(ref1.id.version, 5)
614 self.assertEqual(ref1.id, ref.id)
616 # Importing it again is OK
617 (ref2,) = registry._importDatasets([ref1])
618 self.assertEqual(ref2.id, ref1.id)
620 # Cannot import to different run with the same ID
621 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
622 with self.assertRaises(ConflictingDefinitionError):
623 registry._importDatasets([ref])
625 ref = DatasetRef(
626 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
627 )
628 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
629 # Cannot import same DATAID_TYPE ref into a new run
630 with self.assertRaises(ConflictingDefinitionError):
631 (ref2,) = registry._importDatasets([ref])
632 else:
633 # DATAID_TYPE_RUN ref can be imported into a new run
634 (ref2,) = registry._importDatasets([ref])
636 def testComponentLookups(self):
637 """Test searching for component datasets via their parents.
639 Components can no longer be found by registry. This test checks
640 that this now fails.
641 """
642 registry = self.makeRegistry()
643 self.loadData(registry, "base.yaml")
644 self.loadData(registry, "datasets.yaml")
645 # Test getting the child dataset type (which does still exist in the
646 # Registry), and check for consistency with
647 # DatasetRef.makeComponentRef.
648 collection = "imported_g"
649 parentType = registry.getDatasetType("bias")
650 childType = registry.getDatasetType("bias.wcs")
651 parentRefResolved = registry.findDataset(
652 parentType, collections=collection, instrument="Cam1", detector=1
653 )
654 self.assertIsInstance(parentRefResolved, DatasetRef)
655 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
656 # Search for a single dataset with findDataset.
657 with self.assertRaises(DatasetTypeError):
658 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
660 def testCollections(self):
661 """Tests for registry methods that manage collections."""
662 registry = self.makeRegistry()
663 other_registry = self.makeRegistry(share_repo_with=registry)
664 self.loadData(registry, "base.yaml")
665 self.loadData(registry, "datasets.yaml")
666 run1 = "imported_g"
667 run2 = "imported_r"
668 # Test setting a collection docstring after it has been created.
669 registry.setCollectionDocumentation(run1, "doc for run1")
670 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
671 registry.setCollectionDocumentation(run1, None)
672 self.assertIsNone(registry.getCollectionDocumentation(run1))
673 datasetType = "bias"
674 # Find some datasets via their run's collection.
675 dataId1 = {"instrument": "Cam1", "detector": 1}
676 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
677 self.assertIsNotNone(ref1)
678 dataId2 = {"instrument": "Cam1", "detector": 2}
679 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
680 self.assertIsNotNone(ref2)
681 # Associate those into a new collection, then look for them there.
682 tag1 = "tag1"
683 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
684 # Check that we can query for old and new collections by type.
685 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
686 self.assertEqual(
687 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
688 {tag1, run1, run2},
689 )
690 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
691 registry.associate(tag1, [ref1, ref2])
692 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
693 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
694 # Disassociate one and verify that we can't it there anymore...
695 registry.disassociate(tag1, [ref1])
696 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
697 # ...but we can still find ref2 in tag1, and ref1 in the run.
698 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
699 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
700 collections = set(registry.queryCollections())
701 self.assertEqual(collections, {run1, run2, tag1})
702 # Associate both refs into tag1 again; ref2 is already there, but that
703 # should be a harmless no-op.
704 registry.associate(tag1, [ref1, ref2])
705 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
706 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
707 # Get a different dataset (from a different run) that has the same
708 # dataset type and data ID as ref2.
709 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
710 self.assertNotEqual(ref2, ref2b)
711 # Attempting to associate that into tag1 should be an error.
712 with self.assertRaises(ConflictingDefinitionError):
713 registry.associate(tag1, [ref2b])
714 # That error shouldn't have messed up what we had before.
715 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
716 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
717 # Attempt to associate the conflicting dataset again, this time with
718 # a dataset that isn't in the collection and won't cause a conflict.
719 # Should also fail without modifying anything.
720 dataId3 = {"instrument": "Cam1", "detector": 3}
721 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
722 with self.assertRaises(ConflictingDefinitionError):
723 registry.associate(tag1, [ref3, ref2b])
724 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
725 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
726 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
727 # Register a chained collection that searches [tag1, run2]
728 chain1 = "chain1"
729 registry.registerCollection(chain1, type=CollectionType.CHAINED)
730 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
731 # Chained collection exists, but has no collections in it.
732 self.assertFalse(registry.getCollectionChain(chain1))
733 # If we query for all collections, we should get the chained collection
734 # only if we don't ask to flatten it (i.e. yield only its children).
735 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
736 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
737 # Attempt to set its child collections to something circular; that
738 # should fail.
739 with self.assertRaises(ValueError):
740 registry.setCollectionChain(chain1, [tag1, chain1])
741 # Add the child collections.
742 registry.setCollectionChain(chain1, [tag1, run2])
743 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
744 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
745 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
746 # Refresh the other registry that points to the same repo, and make
747 # sure it can see the things we've done (note that this does require
748 # an explicit refresh(); that's the documented behavior, because
749 # caching is ~impossible otherwise).
750 if other_registry is not None:
751 other_registry.refresh()
752 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
753 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
754 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
755 # Searching for dataId1 or dataId2 in the chain should return ref1 and
756 # ref2, because both are in tag1.
757 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
758 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
759 # Now disassociate ref2 from tag1. The search (for bias) with
760 # dataId2 in chain1 should then:
761 # 1. not find it in tag1
762 # 2. find a different dataset in run2
763 registry.disassociate(tag1, [ref2])
764 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
765 self.assertNotEqual(ref2b, ref2)
766 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
767 # Define a new chain so we can test recursive chains.
768 chain2 = "chain2"
769 registry.registerCollection(chain2, type=CollectionType.CHAINED)
770 registry.setCollectionChain(chain2, [run2, chain1])
771 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
772 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
773 # Query for collections matching a regex.
774 self.assertCountEqual(
775 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
776 ["imported_r", "imported_g"],
777 )
778 # Query for collections matching a regex or an explicit str.
779 self.assertCountEqual(
780 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
781 ["imported_r", "imported_g", "chain1"],
782 )
783 # Search for bias with dataId1 should find it via tag1 in chain2,
784 # recursing, because is not in run1.
785 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
786 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
787 # Search for bias with dataId2 should find it in run2 (ref2b).
788 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
789 # Search for a flat that is in run2. That should not be found
790 # at the front of chain2, because of the restriction to bias
791 # on run2 there, but it should be found in at the end of chain1.
792 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
793 ref4 = registry.findDataset("flat", dataId4, collections=run2)
794 self.assertIsNotNone(ref4)
795 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
796 # Deleting a collection that's part of a CHAINED collection is not
797 # allowed, and is exception-safe.
798 with self.assertRaises(sqlalchemy.exc.IntegrityError):
799 registry.removeCollection(run2)
800 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
801 with self.assertRaises(sqlalchemy.exc.IntegrityError):
802 registry.removeCollection(chain1)
803 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
804 # Actually remove chain2, test that it's gone by asking for its type.
805 registry.removeCollection(chain2)
806 with self.assertRaises(MissingCollectionError):
807 registry.getCollectionType(chain2)
808 # Actually remove run2 and chain1, which should work now.
809 registry.removeCollection(chain1)
810 registry.removeCollection(run2)
811 with self.assertRaises(MissingCollectionError):
812 registry.getCollectionType(run2)
813 with self.assertRaises(MissingCollectionError):
814 registry.getCollectionType(chain1)
815 # Remove tag1 as well, just to test that we can remove TAGGED
816 # collections.
817 registry.removeCollection(tag1)
818 with self.assertRaises(MissingCollectionError):
819 registry.getCollectionType(tag1)
821 def testCollectionChainFlatten(self):
822 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
823 option.
824 """
825 registry = self.makeRegistry()
826 registry.registerCollection("inner", CollectionType.CHAINED)
827 registry.registerCollection("innermost", CollectionType.RUN)
828 registry.setCollectionChain("inner", ["innermost"])
829 registry.registerCollection("outer", CollectionType.CHAINED)
830 registry.setCollectionChain("outer", ["inner"], flatten=False)
831 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
832 registry.setCollectionChain("outer", ["inner"], flatten=True)
833 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
835 def testBasicTransaction(self):
836 """Test that all operations within a single transaction block are
837 rolled back if an exception propagates out of the block.
838 """
839 registry = self.makeRegistry()
840 storageClass = StorageClass("testDatasetType")
841 registry.storageClasses.registerStorageClass(storageClass)
842 with registry.transaction():
843 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
844 with self.assertRaises(ValueError):
845 with registry.transaction():
846 registry.insertDimensionData("instrument", {"name": "Cam2"})
847 raise ValueError("Oops, something went wrong")
848 # Cam1 should exist
849 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
850 # But Cam2 and Cam3 should both not exist
851 with self.assertRaises(DataIdValueError):
852 registry.expandDataId(instrument="Cam2")
853 with self.assertRaises(DataIdValueError):
854 registry.expandDataId(instrument="Cam3")
856 def testNestedTransaction(self):
857 """Test that operations within a transaction block are not rolled back
858 if an exception propagates out of an inner transaction block and is
859 then caught.
860 """
861 registry = self.makeRegistry()
862 dimension = registry.dimensions["instrument"]
863 dataId1 = {"instrument": "DummyCam"}
864 dataId2 = {"instrument": "DummyCam2"}
865 checkpointReached = False
866 with registry.transaction():
867 # This should be added and (ultimately) committed.
868 registry.insertDimensionData(dimension, dataId1)
869 with self.assertRaises(sqlalchemy.exc.IntegrityError):
870 with registry.transaction(savepoint=True):
871 # This does not conflict, and should succeed (but not
872 # be committed).
873 registry.insertDimensionData(dimension, dataId2)
874 checkpointReached = True
875 # This should conflict and raise, triggerring a rollback
876 # of the previous insertion within the same transaction
877 # context, but not the original insertion in the outer
878 # block.
879 registry.insertDimensionData(dimension, dataId1)
880 self.assertTrue(checkpointReached)
881 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
882 with self.assertRaises(DataIdValueError):
883 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
885 def testInstrumentDimensions(self):
886 """Test queries involving only instrument dimensions, with no joins to
887 skymap.
888 """
889 registry = self.makeRegistry()
891 # need a bunch of dimensions and datasets for test
892 registry.insertDimensionData(
893 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
894 )
895 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
896 registry.insertDimensionData(
897 "physical_filter",
898 dict(instrument="DummyCam", name="dummy_r", band="r"),
899 dict(instrument="DummyCam", name="dummy_i", band="i"),
900 )
901 registry.insertDimensionData(
902 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
903 )
904 registry.insertDimensionData(
905 "visit",
906 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
907 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
908 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
909 )
910 registry.insertDimensionData(
911 "group",
912 dict(instrument="DummyCam", name="ten"),
913 dict(instrument="DummyCam", name="eleven"),
914 dict(instrument="DummyCam", name="twelve"),
915 )
916 for i in range(1, 6):
917 registry.insertDimensionData(
918 "visit_detector_region",
919 dict(instrument="DummyCam", visit=10, detector=i),
920 dict(instrument="DummyCam", visit=11, detector=i),
921 dict(instrument="DummyCam", visit=20, detector=i),
922 )
923 registry.insertDimensionData(
924 "exposure",
925 dict(
926 instrument="DummyCam",
927 id=100,
928 obs_id="100",
929 physical_filter="dummy_i",
930 group="ten",
931 day_obs=20250101,
932 ),
933 dict(
934 instrument="DummyCam",
935 id=101,
936 obs_id="101",
937 physical_filter="dummy_i",
938 group="ten",
939 day_obs=20250101,
940 ),
941 dict(
942 instrument="DummyCam",
943 id=110,
944 obs_id="110",
945 physical_filter="dummy_r",
946 group="eleven",
947 day_obs=20250101,
948 ),
949 dict(
950 instrument="DummyCam",
951 id=111,
952 obs_id="111",
953 physical_filter="dummy_r",
954 group="eleven",
955 day_obs=20250101,
956 ),
957 dict(
958 instrument="DummyCam",
959 id=200,
960 obs_id="200",
961 physical_filter="dummy_r",
962 group="twelve",
963 day_obs=20250101,
964 ),
965 dict(
966 instrument="DummyCam",
967 id=201,
968 obs_id="201",
969 physical_filter="dummy_r",
970 group="twelve",
971 day_obs=20250101,
972 ),
973 )
974 registry.insertDimensionData(
975 "visit_definition",
976 dict(instrument="DummyCam", exposure=100, visit=10),
977 dict(instrument="DummyCam", exposure=101, visit=10),
978 dict(instrument="DummyCam", exposure=110, visit=11),
979 dict(instrument="DummyCam", exposure=111, visit=11),
980 dict(instrument="DummyCam", exposure=200, visit=20),
981 dict(instrument="DummyCam", exposure=201, visit=20),
982 )
983 # dataset types
984 run1 = "test1_r"
985 run2 = "test2_r"
986 tagged2 = "test2_t"
987 registry.registerRun(run1)
988 registry.registerRun(run2)
989 registry.registerCollection(tagged2)
990 storageClass = StorageClass("testDataset")
991 registry.storageClasses.registerStorageClass(storageClass)
992 rawType = DatasetType(
993 name="RAW",
994 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
995 storageClass=storageClass,
996 )
997 registry.registerDatasetType(rawType)
998 calexpType = DatasetType(
999 name="CALEXP",
1000 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1001 storageClass=storageClass,
1002 )
1003 registry.registerDatasetType(calexpType)
1005 # add pre-existing datasets
1006 for exposure in (100, 101, 110, 111):
1007 for detector in (1, 2, 3):
1008 # note that only 3 of 5 detectors have datasets
1009 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1010 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1011 # exposures 100 and 101 appear in both run1 and tagged2.
1012 # 100 has different datasets in the different collections
1013 # 101 has the same dataset in both collections.
1014 if exposure == 100:
1015 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1016 if exposure in (100, 101):
1017 registry.associate(tagged2, [ref])
1018 # Add pre-existing datasets to tagged2.
1019 for exposure in (200, 201):
1020 for detector in (3, 4, 5):
1021 # note that only 3 of 5 detectors have datasets
1022 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1023 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1024 registry.associate(tagged2, [ref])
1026 dimensions = registry.dimensions.conform(
1027 rawType.dimensions.required.names | calexpType.dimensions.required.names
1028 )
1029 # Test that single dim string works as well as list of str
1030 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1031 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1032 self.assertEqual(rows, rowsI)
1033 # with empty expression
1034 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1035 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1036 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1037 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1038 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1040 # second collection
1041 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1042 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1043 for dataId in rows:
1044 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1045 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1046 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1047 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1049 # with two input datasets
1050 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1051 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1052 for dataId in rows:
1053 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1054 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1055 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1056 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1058 # limit to single visit
1059 rows = registry.queryDataIds(
1060 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1061 ).toSet()
1062 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1063 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1064 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1065 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1067 # more limiting expression, using link names instead of Table.column
1068 rows = registry.queryDataIds(
1069 dimensions,
1070 datasets=rawType,
1071 collections=run1,
1072 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1073 ).toSet()
1074 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1075 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1076 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1077 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1079 # queryDataIds with only one of `datasets` and `collections` is an
1080 # error.
1081 with self.assertRaises(CollectionError):
1082 registry.queryDataIds(dimensions, datasets=rawType)
1083 with self.assertRaises(ArgumentError):
1084 registry.queryDataIds(dimensions, collections=run1)
1086 # expression excludes everything
1087 rows = registry.queryDataIds(
1088 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1089 ).toSet()
1090 self.assertEqual(len(rows), 0)
1092 # Selecting by physical_filter, this is not in the dimensions, but it
1093 # is a part of the full expression so it should work too.
1094 rows = registry.queryDataIds(
1095 dimensions,
1096 datasets=rawType,
1097 collections=run1,
1098 where="physical_filter = 'dummy_r'",
1099 instrument="DummyCam",
1100 ).toSet()
1101 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1102 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1103 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1104 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1106 def testSkyMapDimensions(self):
1107 """Tests involving only skymap dimensions, no joins to instrument."""
1108 registry = self.makeRegistry()
1110 # need a bunch of dimensions and datasets for test, we want
1111 # "band" in the test so also have to add physical_filter
1112 # dimensions
1113 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1114 registry.insertDimensionData(
1115 "physical_filter",
1116 dict(instrument="DummyCam", name="dummy_r", band="r"),
1117 dict(instrument="DummyCam", name="dummy_i", band="i"),
1118 )
1119 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1120 for tract in range(10):
1121 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1122 registry.insertDimensionData(
1123 "patch",
1124 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1125 )
1127 # dataset types
1128 run = "tésτ"
1129 registry.registerRun(run)
1130 storageClass = StorageClass("testDataset")
1131 registry.storageClasses.registerStorageClass(storageClass)
1132 calexpType = DatasetType(
1133 name="deepCoadd_calexp",
1134 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1135 storageClass=storageClass,
1136 )
1137 registry.registerDatasetType(calexpType)
1138 mergeType = DatasetType(
1139 name="deepCoadd_mergeDet",
1140 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1141 storageClass=storageClass,
1142 )
1143 registry.registerDatasetType(mergeType)
1144 measType = DatasetType(
1145 name="deepCoadd_meas",
1146 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1147 storageClass=storageClass,
1148 )
1149 registry.registerDatasetType(measType)
1151 dimensions = registry.dimensions.conform(
1152 calexpType.dimensions.required.names
1153 | mergeType.dimensions.required.names
1154 | measType.dimensions.required.names
1155 )
1157 # add pre-existing datasets
1158 for tract in (1, 3, 5):
1159 for patch in (2, 4, 6, 7):
1160 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1161 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1162 for aFilter in ("i", "r"):
1163 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1164 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1166 # with empty expression
1167 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1168 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1169 for dataId in rows:
1170 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1171 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1172 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1173 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1175 # limit to 2 tracts and 2 patches
1176 rows = registry.queryDataIds(
1177 dimensions,
1178 datasets=[calexpType, mergeType],
1179 collections=run,
1180 where="tract IN (1, 5) AND patch IN (2, 7)",
1181 skymap="DummyMap",
1182 ).toSet()
1183 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1184 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1185 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1186 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1188 # limit to single filter
1189 rows = registry.queryDataIds(
1190 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1191 ).toSet()
1192 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1193 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1194 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1195 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1197 # Specifying non-existing skymap is an exception
1198 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1199 rows = registry.queryDataIds(
1200 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1201 ).toSet()
1203 def testSpatialJoin(self):
1204 """Test queries that involve spatial overlap joins."""
1205 registry = self.makeRegistry()
1206 self.loadData(registry, "hsc-rc2-subset.yaml")
1208 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1209 # the TopologicalFamily they belong to. We'll relate all elements in
1210 # each family to all of the elements in each other family.
1211 families = defaultdict(set)
1212 # Dictionary of {element.name: {dataId: region}}.
1213 regions = {}
1214 for element in registry.dimensions.database_elements:
1215 if element.spatial is not None:
1216 families[element.spatial.name].add(element)
1217 regions[element.name] = {
1218 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1219 }
1221 # If this check fails, it's not necessarily a problem - it may just be
1222 # a reasonable change to the default dimension definitions - but the
1223 # test below depends on there being more than one family to do anything
1224 # useful.
1225 self.assertEqual(len(families), 2)
1227 # Overlap DatabaseDimensionElements with each other.
1228 for family1, family2 in itertools.combinations(families, 2):
1229 for element1, element2 in itertools.product(families[family1], families[family2]):
1230 dimensions = element1.minimal_group | element2.minimal_group
1231 # Construct expected set of overlapping data IDs via a
1232 # brute-force comparison of the regions we've already fetched.
1233 expected = {
1234 DataCoordinate.standardize(
1235 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1236 )
1237 for (dataId1, region1), (dataId2, region2) in itertools.product(
1238 regions[element1.name].items(), regions[element2.name].items()
1239 )
1240 if not region1.isDisjointFrom(region2)
1241 }
1242 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1243 queried = set(registry.queryDataIds(dimensions))
1244 self.assertEqual(expected, queried)
1246 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1247 commonSkyPix = registry.dimensions.commonSkyPix
1248 for elementName, these_regions in regions.items():
1249 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1250 expected = set()
1251 for dataId, region in these_regions.items():
1252 for begin, end in commonSkyPix.pixelization.envelope(region):
1253 expected.update(
1254 DataCoordinate.standardize(
1255 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1256 )
1257 for index in range(begin, end)
1258 )
1259 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1260 queried = set(registry.queryDataIds(dimensions))
1261 self.assertEqual(expected, queried)
1263 def testAbstractQuery(self):
1264 """Test that we can run a query that just lists the known
1265 bands. This is tricky because band is
1266 backed by a query against physical_filter.
1267 """
1268 registry = self.makeRegistry()
1269 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1270 registry.insertDimensionData(
1271 "physical_filter",
1272 dict(instrument="DummyCam", name="dummy_i", band="i"),
1273 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1274 dict(instrument="DummyCam", name="dummy_r", band="r"),
1275 )
1276 rows = registry.queryDataIds(["band"]).toSet()
1277 self.assertCountEqual(
1278 rows,
1279 [
1280 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1281 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1282 ],
1283 )
1285 def testAttributeManager(self):
1286 """Test basic functionality of attribute manager."""
1287 # number of attributes with schema versions in a fresh database,
1288 # 6 managers with 2 records per manager, plus config for dimensions
1289 VERSION_COUNT = 6 * 2 + 1
1291 registry = self.makeRegistry()
1292 attributes = registry._managers.attributes
1294 # check what get() returns for non-existing key
1295 self.assertIsNone(attributes.get("attr"))
1296 self.assertEqual(attributes.get("attr", ""), "")
1297 self.assertEqual(attributes.get("attr", "Value"), "Value")
1298 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1300 # cannot store empty key or value
1301 with self.assertRaises(ValueError):
1302 attributes.set("", "value")
1303 with self.assertRaises(ValueError):
1304 attributes.set("attr", "")
1306 # set value of non-existing key
1307 attributes.set("attr", "value")
1308 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1309 self.assertEqual(attributes.get("attr"), "value")
1311 # update value of existing key
1312 with self.assertRaises(ButlerAttributeExistsError):
1313 attributes.set("attr", "value2")
1315 attributes.set("attr", "value2", force=True)
1316 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1317 self.assertEqual(attributes.get("attr"), "value2")
1319 # delete existing key
1320 self.assertTrue(attributes.delete("attr"))
1321 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1323 # delete non-existing key
1324 self.assertFalse(attributes.delete("non-attr"))
1326 # store bunch of keys and get the list back
1327 data = [
1328 ("version.core", "1.2.3"),
1329 ("version.dimensions", "3.2.1"),
1330 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1331 ]
1332 for key, value in data:
1333 attributes.set(key, value)
1334 items = dict(attributes.items())
1335 for key, value in data:
1336 self.assertEqual(items[key], value)
1338 def testQueryDatasetsDeduplication(self):
1339 """Test that the findFirst option to queryDatasets selects datasets
1340 from collections in the order given".
1341 """
1342 registry = self.makeRegistry()
1343 self.loadData(registry, "base.yaml")
1344 self.loadData(registry, "datasets.yaml")
1345 self.assertCountEqual(
1346 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1347 [
1348 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1349 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1350 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1351 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1352 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1353 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1354 ],
1355 )
1356 self.assertCountEqual(
1357 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1358 [
1359 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1360 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1361 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1362 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1363 ],
1364 )
1365 self.assertCountEqual(
1366 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1367 [
1368 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1369 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1370 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1371 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1372 ],
1373 )
1375 def testQueryResults(self):
1376 """Test querying for data IDs and then manipulating the QueryResults
1377 object returned to perform other queries.
1378 """
1379 registry = self.makeRegistry()
1380 self.loadData(registry, "base.yaml")
1381 self.loadData(registry, "datasets.yaml")
1382 bias = registry.getDatasetType("bias")
1383 flat = registry.getDatasetType("flat")
1384 # Obtain expected results from methods other than those we're testing
1385 # here. That includes:
1386 # - the dimensions of the data IDs we want to query:
1387 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1388 # - the dimensions of some other data IDs we'll extract from that:
1389 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1390 # - the data IDs we expect to obtain from the first queries:
1391 expectedDataIds = DataCoordinateSet(
1392 {
1393 DataCoordinate.standardize(
1394 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1395 )
1396 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1397 },
1398 dimensions=expected_dimensions,
1399 hasFull=False,
1400 hasRecords=False,
1401 )
1402 # - the flat datasets we expect to find from those data IDs, in just
1403 # one collection (so deduplication is irrelevant):
1404 expectedFlats = [
1405 registry.findDataset(
1406 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1407 ),
1408 registry.findDataset(
1409 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1410 ),
1411 registry.findDataset(
1412 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1413 ),
1414 ]
1415 # - the data IDs we expect to extract from that:
1416 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1417 # - the bias datasets we expect to find from those data IDs, after we
1418 # subset-out the physical_filter dimension, both with duplicates:
1419 expectedAllBiases = [
1420 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1421 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1422 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1423 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1424 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1425 ]
1426 # - ...and without duplicates:
1427 expectedDeduplicatedBiases = [
1428 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1429 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1430 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1431 ]
1432 # Test against those expected results, using a "lazy" query for the
1433 # data IDs (which re-executes that query each time we use it to do
1434 # something new).
1435 dataIds = registry.queryDataIds(
1436 ["detector", "physical_filter"],
1437 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1438 instrument="Cam1",
1439 )
1440 self.assertEqual(dataIds.dimensions, expected_dimensions)
1441 self.assertEqual(dataIds.toSet(), expectedDataIds)
1442 self.assertCountEqual(
1443 list(
1444 dataIds.findDatasets(
1445 flat,
1446 collections=["imported_r"],
1447 )
1448 ),
1449 expectedFlats,
1450 )
1451 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1452 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1453 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1454 self.assertCountEqual(
1455 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1456 expectedAllBiases,
1457 )
1458 self.assertCountEqual(
1459 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1460 expectedDeduplicatedBiases,
1461 )
1463 # Searching for a dataset with dimensions we had projected away
1464 # restores those dimensions.
1465 self.assertCountEqual(
1466 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1467 expectedFlats,
1468 )
1470 # Use a named dataset type that does not exist and a dataset type
1471 # object that does not exist.
1472 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1474 # Test both string name and dataset type object.
1475 test_type: str | DatasetType
1476 for test_type, test_type_name in (
1477 (unknown_type, unknown_type.name),
1478 (unknown_type.name, unknown_type.name),
1479 ):
1480 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1481 list(
1482 subsetDataIds.findDatasets(
1483 test_type, collections=["imported_r", "imported_g"], findFirst=True
1484 )
1485 )
1487 # Materialize the bias dataset queries (only) by putting the results
1488 # into temporary tables, then repeat those tests.
1489 with subsetDataIds.findDatasets(
1490 bias, collections=["imported_r", "imported_g"], findFirst=False
1491 ).materialize() as biases:
1492 self.assertCountEqual(list(biases), expectedAllBiases)
1493 with subsetDataIds.findDatasets(
1494 bias, collections=["imported_r", "imported_g"], findFirst=True
1495 ).materialize() as biases:
1496 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1497 # Materialize the data ID subset query, but not the dataset queries.
1498 with subsetDataIds.materialize() as subsetDataIds:
1499 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1500 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1501 self.assertCountEqual(
1502 list(
1503 subsetDataIds.findDatasets(
1504 bias, collections=["imported_r", "imported_g"], findFirst=False
1505 )
1506 ),
1507 expectedAllBiases,
1508 )
1509 self.assertCountEqual(
1510 list(
1511 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1512 ),
1513 expectedDeduplicatedBiases,
1514 )
1515 # Materialize the dataset queries, too.
1516 with subsetDataIds.findDatasets(
1517 bias, collections=["imported_r", "imported_g"], findFirst=False
1518 ).materialize() as biases:
1519 self.assertCountEqual(list(biases), expectedAllBiases)
1520 with subsetDataIds.findDatasets(
1521 bias, collections=["imported_r", "imported_g"], findFirst=True
1522 ).materialize() as biases:
1523 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1524 # Materialize the original query, but none of the follow-up queries.
1525 with dataIds.materialize() as dataIds:
1526 self.assertEqual(dataIds.dimensions, expected_dimensions)
1527 self.assertEqual(dataIds.toSet(), expectedDataIds)
1528 self.assertCountEqual(
1529 list(
1530 dataIds.findDatasets(
1531 flat,
1532 collections=["imported_r"],
1533 )
1534 ),
1535 expectedFlats,
1536 )
1537 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1538 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1539 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1540 self.assertCountEqual(
1541 list(
1542 subsetDataIds.findDatasets(
1543 bias, collections=["imported_r", "imported_g"], findFirst=False
1544 )
1545 ),
1546 expectedAllBiases,
1547 )
1548 self.assertCountEqual(
1549 list(
1550 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1551 ),
1552 expectedDeduplicatedBiases,
1553 )
1554 # Materialize just the bias dataset queries.
1555 with subsetDataIds.findDatasets(
1556 bias, collections=["imported_r", "imported_g"], findFirst=False
1557 ).materialize() as biases:
1558 self.assertCountEqual(list(biases), expectedAllBiases)
1559 with subsetDataIds.findDatasets(
1560 bias, collections=["imported_r", "imported_g"], findFirst=True
1561 ).materialize() as biases:
1562 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1563 # Materialize the subset data ID query, but not the dataset
1564 # queries.
1565 with subsetDataIds.materialize() as subsetDataIds:
1566 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1567 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1568 self.assertCountEqual(
1569 list(
1570 subsetDataIds.findDatasets(
1571 bias, collections=["imported_r", "imported_g"], findFirst=False
1572 )
1573 ),
1574 expectedAllBiases,
1575 )
1576 self.assertCountEqual(
1577 list(
1578 subsetDataIds.findDatasets(
1579 bias, collections=["imported_r", "imported_g"], findFirst=True
1580 )
1581 ),
1582 expectedDeduplicatedBiases,
1583 )
1584 # Materialize the bias dataset queries, too, so now we're
1585 # materializing every single step.
1586 with subsetDataIds.findDatasets(
1587 bias, collections=["imported_r", "imported_g"], findFirst=False
1588 ).materialize() as biases:
1589 self.assertCountEqual(list(biases), expectedAllBiases)
1590 with subsetDataIds.findDatasets(
1591 bias, collections=["imported_r", "imported_g"], findFirst=True
1592 ).materialize() as biases:
1593 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1595 def testStorageClassPropagation(self):
1596 """Test that queries for datasets respect the storage class passed in
1597 as part of a full dataset type.
1598 """
1599 registry = self.makeRegistry()
1600 self.loadData(registry, "base.yaml")
1601 dataset_type_in_registry = DatasetType(
1602 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1603 )
1604 registry.registerDatasetType(dataset_type_in_registry)
1605 run = "run1"
1606 registry.registerRun(run)
1607 (inserted_ref,) = registry.insertDatasets(
1608 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1609 )
1610 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1611 query_dataset_type = DatasetType(
1612 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1613 )
1614 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1615 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1616 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1617 (query_datasets_ref,) = query_datasets_result
1618 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1619 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1620 query_dataset_type, collections=[run]
1621 )
1622 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1623 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1624 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1625 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1626 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1627 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1628 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1630 def testEmptyDimensionsQueries(self):
1631 """Test Query and QueryResults objects in the case where there are no
1632 dimensions.
1633 """
1634 # Set up test data: one dataset type, two runs, one dataset in each.
1635 registry = self.makeRegistry()
1636 self.loadData(registry, "base.yaml")
1637 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1638 registry.registerDatasetType(schema)
1639 dataId = DataCoordinate.make_empty(registry.dimensions)
1640 run1 = "run1"
1641 run2 = "run2"
1642 registry.registerRun(run1)
1643 registry.registerRun(run2)
1644 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1645 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1646 # Query directly for both of the datasets, and each one, one at a time.
1647 self.checkQueryResults(
1648 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1649 )
1650 self.checkQueryResults(
1651 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1652 [dataset1],
1653 )
1654 self.checkQueryResults(
1655 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1656 [dataset2],
1657 )
1658 # Query for data IDs with no dimensions.
1659 dataIds = registry.queryDataIds([])
1660 self.checkQueryResults(dataIds, [dataId])
1661 # Use queried data IDs to find the datasets.
1662 self.checkQueryResults(
1663 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1664 [dataset1, dataset2],
1665 )
1666 self.checkQueryResults(
1667 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1668 [dataset1],
1669 )
1670 self.checkQueryResults(
1671 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1672 [dataset2],
1673 )
1674 # Now materialize the data ID query results and repeat those tests.
1675 with dataIds.materialize() as dataIds:
1676 self.checkQueryResults(dataIds, [dataId])
1677 self.checkQueryResults(
1678 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1679 [dataset1],
1680 )
1681 self.checkQueryResults(
1682 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1683 [dataset2],
1684 )
1685 # Query for non-empty data IDs, then subset that to get the empty one.
1686 # Repeat the above tests starting from that.
1687 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1688 self.checkQueryResults(dataIds, [dataId])
1689 self.checkQueryResults(
1690 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1691 [dataset1, dataset2],
1692 )
1693 self.checkQueryResults(
1694 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1695 [dataset1],
1696 )
1697 self.checkQueryResults(
1698 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1699 [dataset2],
1700 )
1701 with dataIds.materialize() as dataIds:
1702 self.checkQueryResults(dataIds, [dataId])
1703 self.checkQueryResults(
1704 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1705 [dataset1, dataset2],
1706 )
1707 self.checkQueryResults(
1708 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1709 [dataset1],
1710 )
1711 self.checkQueryResults(
1712 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1713 [dataset2],
1714 )
1715 # Query for non-empty data IDs, then materialize, then subset to get
1716 # the empty one. Repeat again.
1717 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1718 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1719 self.checkQueryResults(dataIds, [dataId])
1720 self.checkQueryResults(
1721 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1722 [dataset1, dataset2],
1723 )
1724 self.checkQueryResults(
1725 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1726 [dataset1],
1727 )
1728 self.checkQueryResults(
1729 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1730 [dataset2],
1731 )
1732 with dataIds.materialize() as dataIds:
1733 self.checkQueryResults(dataIds, [dataId])
1734 self.checkQueryResults(
1735 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1736 [dataset1, dataset2],
1737 )
1738 self.checkQueryResults(
1739 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1740 [dataset1],
1741 )
1742 self.checkQueryResults(
1743 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1744 [dataset2],
1745 )
1746 # Repeat the materialization tests with a dimension element that isn't
1747 # cached, so there's no way we can know when building the query where
1748 # there are any rows are not (there aren't).
1749 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1750 with dataIds.materialize() as dataIds:
1751 self.checkQueryResults(dataIds, [])
1752 self.checkQueryResults(
1753 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1754 )
1755 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1756 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1757 # Query for non-empty data IDs with a constraint on an empty-data-ID
1758 # dataset that exists.
1759 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1760 self.checkQueryResults(
1761 dataIds.subset(unique=True),
1762 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1763 )
1764 # Again query for non-empty data IDs with a constraint on empty-data-ID
1765 # datasets, but when the datasets don't exist. We delete the existing
1766 # dataset and query just that collection rather than creating a new
1767 # empty collection because this is a bit less likely for our build-time
1768 # logic to shortcut-out (via the collection summaries), and such a
1769 # shortcut would make this test a bit more trivial than we'd like.
1770 registry.removeDatasets([dataset2])
1771 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1772 self.checkQueryResults(dataIds, [])
1774 def testDimensionDataModifications(self):
1775 """Test that modifying dimension records via:
1776 syncDimensionData(..., update=True) and
1777 insertDimensionData(..., replace=True) works as expected, even in the
1778 presence of datasets using those dimensions and spatial overlap
1779 relationships.
1780 """
1782 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1783 """Unpack a sphgeom.RangeSet into the integers it contains."""
1784 for begin, end in ranges:
1785 yield from range(begin, end)
1787 def _range_set_hull(
1788 ranges: lsst.sphgeom.RangeSet,
1789 pixelization: lsst.sphgeom.HtmPixelization,
1790 ) -> lsst.sphgeom.ConvexPolygon:
1791 """Create a ConvexPolygon hull of the region defined by a set of
1792 HTM pixelization index ranges.
1793 """
1794 points = []
1795 for index in _unpack_range_set(ranges):
1796 points.extend(pixelization.triangle(index).getVertices())
1797 return lsst.sphgeom.ConvexPolygon(points)
1799 # Use HTM to set up an initial parent region (one arbitrary trixel)
1800 # and four child regions (the trixels within the parent at the next
1801 # level. We'll use the parent as a tract/visit region and the children
1802 # as its patch/visit_detector regions.
1803 registry = self.makeRegistry()
1804 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1805 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1806 index = 12288
1807 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1808 assert htm6.universe().contains(child_ranges_small)
1809 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1810 parent_region_small = lsst.sphgeom.ConvexPolygon(
1811 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1812 )
1813 assert all(parent_region_small.contains(c) for c in child_regions_small)
1814 # Make a larger version of each child region, defined to be the set of
1815 # htm6 trixels that overlap the original's bounding circle. Make a new
1816 # parent that's the convex hull of the new children.
1817 child_regions_large = [
1818 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1819 ]
1820 assert all(
1821 large.contains(small)
1822 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1823 )
1824 parent_region_large = lsst.sphgeom.ConvexPolygon(
1825 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1826 )
1827 assert all(parent_region_large.contains(c) for c in child_regions_large)
1828 assert parent_region_large.contains(parent_region_small)
1829 assert not parent_region_small.contains(parent_region_large)
1830 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1831 # Find some commonSkyPix indices that overlap the large regions but not
1832 # overlap the small regions. We use commonSkyPix here to make sure the
1833 # real tests later involve what's in the database, not just post-query
1834 # filtering of regions.
1835 child_difference_indices = []
1836 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1837 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1838 assert difference, "if this is empty, we can't test anything useful with these regions"
1839 assert all(
1840 not commonSkyPix.triangle(d).isDisjointFrom(large)
1841 and commonSkyPix.triangle(d).isDisjointFrom(small)
1842 for d in difference
1843 )
1844 child_difference_indices.append(difference)
1845 parent_difference_indices = list(
1846 _unpack_range_set(
1847 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1848 )
1849 )
1850 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1851 assert all(
1852 (
1853 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1854 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1855 )
1856 for d in parent_difference_indices
1857 )
1858 # Now that we've finally got those regions, we'll insert the large ones
1859 # as tract/patch dimension records.
1860 skymap_name = "testing_v1"
1861 registry.insertDimensionData(
1862 "skymap",
1863 {
1864 "name": skymap_name,
1865 "hash": bytes([42]),
1866 "tract_max": 1,
1867 "patch_nx_max": 2,
1868 "patch_ny_max": 2,
1869 },
1870 )
1871 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1872 registry.insertDimensionData(
1873 "patch",
1874 *[
1875 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1876 for n, c in enumerate(child_regions_large)
1877 ],
1878 )
1879 # Add at dataset that uses these dimensions to make sure that modifying
1880 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1881 # implement insert with replace=True as delete-then-insert).
1882 dataset_type = DatasetType(
1883 "coadd",
1884 dimensions=["tract", "patch"],
1885 universe=registry.dimensions,
1886 storageClass="Exposure",
1887 )
1888 registry.registerDatasetType(dataset_type)
1889 registry.registerCollection("the_run", CollectionType.RUN)
1890 registry.insertDatasets(
1891 dataset_type,
1892 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1893 run="the_run",
1894 )
1895 # Query for tracts and patches that overlap some "difference" htm9
1896 # pixels; there should be overlaps, because the database has
1897 # the "large" suite of regions.
1898 self.assertEqual(
1899 {0},
1900 {
1901 data_id["tract"]
1902 for data_id in registry.queryDataIds(
1903 ["tract"],
1904 skymap=skymap_name,
1905 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1906 )
1907 },
1908 )
1909 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1910 self.assertIn(
1911 patch_id,
1912 {
1913 data_id["patch"]
1914 for data_id in registry.queryDataIds(
1915 ["patch"],
1916 skymap=skymap_name,
1917 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1918 )
1919 },
1920 )
1921 # Use sync to update the tract region and insert to update the regions
1922 # of the patches, to the "small" suite.
1923 updated = registry.syncDimensionData(
1924 "tract",
1925 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1926 update=True,
1927 )
1928 self.assertEqual(updated, {"region": parent_region_large})
1929 registry.insertDimensionData(
1930 "patch",
1931 *[
1932 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1933 for n, c in enumerate(child_regions_small)
1934 ],
1935 replace=True,
1936 )
1937 # Query again; there now should be no such overlaps, because the
1938 # database has the "small" suite of regions.
1939 self.assertFalse(
1940 set(
1941 registry.queryDataIds(
1942 ["tract"],
1943 skymap=skymap_name,
1944 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1945 )
1946 )
1947 )
1948 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1949 self.assertNotIn(
1950 patch_id,
1951 {
1952 data_id["patch"]
1953 for data_id in registry.queryDataIds(
1954 ["patch"],
1955 skymap=skymap_name,
1956 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1957 )
1958 },
1959 )
1960 # Update back to the large regions and query one more time.
1961 updated = registry.syncDimensionData(
1962 "tract",
1963 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1964 update=True,
1965 )
1966 self.assertEqual(updated, {"region": parent_region_small})
1967 registry.insertDimensionData(
1968 "patch",
1969 *[
1970 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1971 for n, c in enumerate(child_regions_large)
1972 ],
1973 replace=True,
1974 )
1975 self.assertEqual(
1976 {0},
1977 {
1978 data_id["tract"]
1979 for data_id in registry.queryDataIds(
1980 ["tract"],
1981 skymap=skymap_name,
1982 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1983 )
1984 },
1985 )
1986 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1987 self.assertIn(
1988 patch_id,
1989 {
1990 data_id["patch"]
1991 for data_id in registry.queryDataIds(
1992 ["patch"],
1993 skymap=skymap_name,
1994 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1995 )
1996 },
1997 )
1999 def testCalibrationCollections(self):
2000 """Test operations on `~CollectionType.CALIBRATION` collections,
2001 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2002 `SqlRegistry.findDataset`, and
2003 `DataCoordinateQueryResults.findRelatedDatasets`.
2004 """
2005 # Setup - make a Registry, fill it with some datasets in
2006 # non-calibration collections.
2007 registry = self.makeRegistry()
2008 self.loadData(registry, "base.yaml")
2009 self.loadData(registry, "datasets.yaml")
2010 # Set up some timestamps.
2011 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2012 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2013 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2014 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2015 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2016 allTimespans = [
2017 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2018 ]
2019 # Insert some exposure records with timespans between each sequential
2020 # pair of those.
2021 registry.insertDimensionData(
2022 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2023 )
2024 registry.insertDimensionData(
2025 "group",
2026 {"instrument": "Cam1", "name": "group0"},
2027 {"instrument": "Cam1", "name": "group1"},
2028 {"instrument": "Cam1", "name": "group2"},
2029 {"instrument": "Cam1", "name": "group3"},
2030 )
2031 registry.insertDimensionData(
2032 "exposure",
2033 {
2034 "instrument": "Cam1",
2035 "id": 0,
2036 "group": "group0",
2037 "obs_id": "zero",
2038 "physical_filter": "Cam1-G",
2039 "day_obs": 20200101,
2040 "timespan": Timespan(t1, t2),
2041 },
2042 {
2043 "instrument": "Cam1",
2044 "id": 1,
2045 "group": "group1",
2046 "obs_id": "one",
2047 "physical_filter": "Cam1-G",
2048 "day_obs": 20200101,
2049 "timespan": Timespan(t2, t3),
2050 },
2051 {
2052 "instrument": "Cam1",
2053 "id": 2,
2054 "group": "group2",
2055 "obs_id": "two",
2056 "physical_filter": "Cam1-G",
2057 "day_obs": 20200101,
2058 "timespan": Timespan(t3, t4),
2059 },
2060 {
2061 "instrument": "Cam1",
2062 "id": 3,
2063 "group": "group3",
2064 "obs_id": "three",
2065 "physical_filter": "Cam1-G",
2066 "day_obs": 20200101,
2067 "timespan": Timespan(t4, t5),
2068 },
2069 )
2070 # Get references to some datasets.
2071 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2072 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2073 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2074 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2075 # Register the main calibration collection we'll be working with.
2076 collection = "Cam1/calibs/default"
2077 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2078 # Cannot associate into a calibration collection (no timespan).
2079 with self.assertRaises(CollectionTypeError):
2080 registry.associate(collection, [bias2a])
2081 # Certify 2a dataset with [t2, t4) validity.
2082 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2083 # Test that we can query for this dataset via the new collection, both
2084 # on its own and with a RUN collection.
2085 self.assertEqual(
2086 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2087 {bias2a},
2088 )
2089 self.assertEqual(
2090 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2091 {
2092 bias2a,
2093 bias2b,
2094 bias3b,
2095 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2096 },
2097 )
2098 self.assertEqual(
2099 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2100 {registry.expandDataId(instrument="Cam1", detector=2)},
2101 )
2102 self.assertEqual(
2103 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2104 {
2105 registry.expandDataId(instrument="Cam1", detector=2),
2106 registry.expandDataId(instrument="Cam1", detector=3),
2107 registry.expandDataId(instrument="Cam1", detector=4),
2108 },
2109 )
2110 self.assertEqual(
2111 set(
2112 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2113 "bias", findFirst=True, collections=[collection]
2114 )
2115 ),
2116 {
2117 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2118 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2119 },
2120 )
2121 self.assertEqual(
2122 set(
2123 registry.queryDataIds(
2124 ["exposure", "detector"], instrument="Cam1", detector=2
2125 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2126 ),
2127 {
2128 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2129 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2130 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2131 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2132 },
2133 )
2135 # We should not be able to certify 2b with anything overlapping that
2136 # window.
2137 with self.assertRaises(ConflictingDefinitionError):
2138 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2139 with self.assertRaises(ConflictingDefinitionError):
2140 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2141 with self.assertRaises(ConflictingDefinitionError):
2142 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2143 with self.assertRaises(ConflictingDefinitionError):
2144 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2145 with self.assertRaises(ConflictingDefinitionError):
2146 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2147 with self.assertRaises(ConflictingDefinitionError):
2148 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2149 with self.assertRaises(ConflictingDefinitionError):
2150 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2151 with self.assertRaises(ConflictingDefinitionError):
2152 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2153 # We should be able to certify 3a with a range overlapping that window,
2154 # because it's for a different detector.
2155 # We'll certify 3a over [t1, t3).
2156 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2157 # Now we'll certify 2b and 3b together over [t4, ∞).
2158 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2160 # Fetch all associations and check that they are what we expect.
2161 self.assertCountEqual(
2162 list(
2163 registry.queryDatasetAssociations(
2164 "bias",
2165 collections=[collection, "imported_g", "imported_r"],
2166 )
2167 ),
2168 [
2169 DatasetAssociation(
2170 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2171 collection="imported_g",
2172 timespan=None,
2173 ),
2174 DatasetAssociation(
2175 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2176 collection="imported_r",
2177 timespan=None,
2178 ),
2179 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2180 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2181 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2182 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2183 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2184 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2185 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2186 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2187 ],
2188 )
2190 class Ambiguous:
2191 """Tag class to denote lookups that should be ambiguous."""
2193 pass
2195 def _assertLookup(
2196 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2197 ) -> None:
2198 """Local function that asserts that a bias lookup returns the given
2199 expected result.
2200 """
2201 if expected is Ambiguous:
2202 with self.assertRaises((DatasetTypeError, LookupError)):
2203 registry.findDataset(
2204 "bias",
2205 collections=collection,
2206 instrument="Cam1",
2207 detector=detector,
2208 timespan=timespan,
2209 )
2210 else:
2211 self.assertEqual(
2212 expected,
2213 registry.findDataset(
2214 "bias",
2215 collections=collection,
2216 instrument="Cam1",
2217 detector=detector,
2218 timespan=timespan,
2219 ),
2220 )
2222 # Systematically test lookups against expected results.
2223 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2224 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2225 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2226 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2227 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2228 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2229 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2230 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2231 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2232 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2233 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2234 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2235 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2236 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2237 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2238 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2239 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2240 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2241 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2242 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2243 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2244 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2245 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2246 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2247 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2248 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2249 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2250 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2251 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2252 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2253 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2254 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2255 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2256 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2257 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2258 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2259 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2260 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2261 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2262 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2263 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2264 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2266 # Test lookups via temporal joins to exposures.
2267 self.assertEqual(
2268 set(
2269 registry.queryDataIds(
2270 ["exposure", "detector"], instrument="Cam1", detector=2
2271 ).findRelatedDatasets("bias", collections=[collection])
2272 ),
2273 {
2274 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2275 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2276 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2277 },
2278 )
2279 self.assertEqual(
2280 set(
2281 registry.queryDataIds(
2282 ["exposure", "detector"], instrument="Cam1", detector=3
2283 ).findRelatedDatasets("bias", collections=[collection])
2284 ),
2285 {
2286 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2287 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2288 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2289 },
2290 )
2291 self.assertEqual(
2292 set(
2293 registry.queryDataIds(
2294 ["exposure", "detector"], instrument="Cam1", detector=2
2295 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2296 ),
2297 {
2298 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2299 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2300 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2301 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2302 },
2303 )
2304 self.assertEqual(
2305 set(
2306 registry.queryDataIds(
2307 ["exposure", "detector"], instrument="Cam1", detector=3
2308 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2309 ),
2310 {
2311 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2312 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2313 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2314 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2315 },
2316 )
2318 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2319 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2320 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2321 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2322 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2323 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2324 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2325 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2326 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2327 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2328 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2329 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2330 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2331 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2332 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2333 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2334 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2335 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2336 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2337 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2338 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2339 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2340 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2341 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2342 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2343 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2344 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2345 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2346 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2347 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2348 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2349 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2350 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2351 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2352 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2353 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2354 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2355 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2356 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2357 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2358 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2359 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2360 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2361 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2362 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2363 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2365 # Decertify everything, this time with explicit data IDs, then check
2366 # that no lookups succeed.
2367 registry.decertify(
2368 collection,
2369 "bias",
2370 Timespan(None, None),
2371 dataIds=[
2372 dict(instrument="Cam1", detector=2),
2373 dict(instrument="Cam1", detector=3),
2374 ],
2375 )
2376 for detector in (2, 3):
2377 for timespan in allTimespans:
2378 _assertLookup(detector=detector, timespan=timespan, expected=None)
2379 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2380 # those.
2381 registry.certify(
2382 collection,
2383 [bias2a, bias3a],
2384 Timespan(None, None),
2385 )
2386 for timespan in allTimespans:
2387 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2388 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2389 # Decertify just bias2 over [t2, t4).
2390 # This should split a single certification row into two (and leave the
2391 # other existing row, for bias3a, alone).
2392 registry.decertify(
2393 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2394 )
2395 for timespan in allTimespans:
2396 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2397 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2398 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2399 if overlapsBefore and overlapsAfter:
2400 expected = Ambiguous
2401 elif overlapsBefore or overlapsAfter:
2402 expected = bias2a
2403 else:
2404 expected = None
2405 _assertLookup(detector=2, timespan=timespan, expected=expected)
2407 def testSkipCalibs(self):
2408 """Test how queries handle skipping of calibration collections."""
2409 registry = self.makeRegistry()
2410 self.loadData(registry, "base.yaml")
2411 self.loadData(registry, "datasets.yaml")
2413 coll_calib = "Cam1/calibs/default"
2414 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2416 # Add all biases to the calibration collection.
2417 # Without this, the logic that prunes dataset subqueries based on
2418 # datasetType-collection summary information will fire before the logic
2419 # we want to test below. This is a good thing (it avoids the dreaded
2420 # NotImplementedError a bit more often) everywhere but here.
2421 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2423 coll_list = [coll_calib, "imported_g", "imported_r"]
2424 chain = "Cam1/chain"
2425 registry.registerCollection(chain, type=CollectionType.CHAINED)
2426 registry.setCollectionChain(chain, coll_list)
2428 # explicit list will raise if findFirst=True or there are temporal
2429 # dimensions
2430 with self.assertRaises(NotImplementedError):
2431 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2432 with self.assertRaises(NotImplementedError):
2433 registry.queryDataIds(
2434 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2435 ).count()
2437 # chain will skip
2438 datasets = list(registry.queryDatasets("bias", collections=chain))
2439 self.assertGreater(len(datasets), 0)
2441 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2442 self.assertGreater(len(dataIds), 0)
2444 # glob will skip too
2445 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2446 self.assertGreater(len(datasets), 0)
2448 # regular expression will skip too
2449 pattern = re.compile(".*")
2450 datasets = list(registry.queryDatasets("bias", collections=pattern))
2451 self.assertGreater(len(datasets), 0)
2453 # ellipsis should work as usual
2454 datasets = list(registry.queryDatasets("bias", collections=...))
2455 self.assertGreater(len(datasets), 0)
2457 # few tests with findFirst
2458 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2459 self.assertGreater(len(datasets), 0)
2461 def testIngestTimeQuery(self):
2462 registry = self.makeRegistry()
2463 self.loadData(registry, "base.yaml")
2464 dt0 = datetime.datetime.now(datetime.UTC)
2465 self.loadData(registry, "datasets.yaml")
2466 dt1 = datetime.datetime.now(datetime.UTC)
2468 datasets = list(registry.queryDatasets(..., collections=...))
2469 len0 = len(datasets)
2470 self.assertGreater(len0, 0)
2472 where = "ingest_date > T'2000-01-01'"
2473 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2474 len1 = len(datasets)
2475 self.assertEqual(len0, len1)
2477 # no one will ever use this piece of software in 30 years
2478 where = "ingest_date > T'2050-01-01'"
2479 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2480 len2 = len(datasets)
2481 self.assertEqual(len2, 0)
2483 # Check more exact timing to make sure there is no 37 seconds offset
2484 # (after fixing DM-30124). SQLite time precision is 1 second, make
2485 # sure that we don't test with higher precision.
2486 tests = [
2487 # format: (timestamp, operator, expected_len)
2488 (dt0 - timedelta(seconds=1), ">", len0),
2489 (dt0 - timedelta(seconds=1), "<", 0),
2490 (dt1 + timedelta(seconds=1), "<", len0),
2491 (dt1 + timedelta(seconds=1), ">", 0),
2492 ]
2493 for dt, op, expect_len in tests:
2494 dt_str = dt.isoformat(sep=" ")
2496 where = f"ingest_date {op} T'{dt_str}'"
2497 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2498 self.assertEqual(len(datasets), expect_len)
2500 # same with bind using datetime or astropy Time
2501 where = f"ingest_date {op} ingest_time"
2502 datasets = list(
2503 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2504 )
2505 self.assertEqual(len(datasets), expect_len)
2507 dt_astropy = astropy.time.Time(dt, format="datetime")
2508 datasets = list(
2509 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2510 )
2511 self.assertEqual(len(datasets), expect_len)
2513 def testTimespanQueries(self):
2514 """Test query expressions involving timespans."""
2515 registry = self.makeRegistry()
2516 self.loadData(registry, "hsc-rc2-subset.yaml")
2517 # All exposures in the database; mapping from ID to timespan.
2518 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2519 # Just those IDs, sorted (which is also temporal sorting, because HSC
2520 # exposure IDs are monotonically increasing).
2521 ids = sorted(visits.keys())
2522 self.assertGreater(len(ids), 20)
2523 # Pick some quasi-random indexes into `ids` to play with.
2524 i1 = int(len(ids) * 0.1)
2525 i2 = int(len(ids) * 0.3)
2526 i3 = int(len(ids) * 0.6)
2527 i4 = int(len(ids) * 0.8)
2528 # Extract some times from those: just before the beginning of i1 (which
2529 # should be after the end of the exposure before), exactly the
2530 # beginning of i2, just after the beginning of i3 (and before its end),
2531 # and the exact end of i4.
2532 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2533 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2534 t2 = visits[ids[i2]].begin
2535 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2536 self.assertLess(t3, visits[ids[i3]].end)
2537 t4 = visits[ids[i4]].end
2538 # Make sure those are actually in order.
2539 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2541 bind = {
2542 "t1": t1,
2543 "t2": t2,
2544 "t3": t3,
2545 "t4": t4,
2546 "ts23": Timespan(t2, t3),
2547 }
2549 def query(where):
2550 """Return results as a sorted, deduplicated list of visit IDs.
2552 Parameters
2553 ----------
2554 where : `str`
2555 The WHERE clause for the query.
2556 """
2557 return sorted(
2558 {
2559 dataId["visit"]
2560 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2561 }
2562 )
2564 # Try a bunch of timespan queries, mixing up the bounds themselves,
2565 # where they appear in the expression, and how we get the timespan into
2566 # the expression.
2568 # t1 is before the start of i1, so this should not include i1.
2569 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2570 # t2 is exactly at the start of i2, but ends are exclusive, so these
2571 # should not include i2.
2572 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2573 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2574 # t3 is in the middle of i3, so this should include i3.
2575 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2576 # This one should not include t3 by the same reasoning.
2577 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2578 # t4 is exactly at the end of i4, so this should include i4.
2579 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2580 # i4's upper bound of t4 is exclusive so this should not include t4.
2581 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2583 # Now some timespan vs. time scalar queries.
2584 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2585 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2586 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2587 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2588 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2589 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2591 # Empty timespans should not overlap anything.
2592 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2594 def testCollectionSummaries(self):
2595 """Test recording and retrieval of collection summaries."""
2596 self.maxDiff = None
2597 registry = self.makeRegistry()
2598 # Importing datasets from yaml should go through the code path where
2599 # we update collection summaries as we insert datasets.
2600 self.loadData(registry, "base.yaml")
2601 self.loadData(registry, "datasets.yaml")
2602 flat = registry.getDatasetType("flat")
2603 expected1 = CollectionSummary()
2604 expected1.dataset_types.add(registry.getDatasetType("bias"))
2605 expected1.add_data_ids(
2606 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2607 )
2608 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2609 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2610 # Create a chained collection with both of the imported runs; the
2611 # summary should be the same, because it's a union with itself.
2612 chain = "chain"
2613 registry.registerCollection(chain, CollectionType.CHAINED)
2614 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2615 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2616 # Associate flats only into a tagged collection and a calibration
2617 # collection to check summaries of those.
2618 tag = "tag"
2619 registry.registerCollection(tag, CollectionType.TAGGED)
2620 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2621 calibs = "calibs"
2622 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2623 registry.certify(
2624 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2625 )
2626 expected2 = expected1.copy()
2627 expected2.dataset_types.discard("bias")
2628 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2629 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2630 # Explicitly calling SqlRegistry.refresh() should load those same
2631 # summaries, via a totally different code path.
2632 registry.refresh()
2633 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2634 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2635 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2636 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2638 def testBindInQueryDatasets(self):
2639 """Test that the bind parameter is correctly forwarded in
2640 queryDatasets recursion.
2641 """
2642 registry = self.makeRegistry()
2643 # Importing datasets from yaml should go through the code path where
2644 # we update collection summaries as we insert datasets.
2645 self.loadData(registry, "base.yaml")
2646 self.loadData(registry, "datasets.yaml")
2647 self.assertEqual(
2648 set(registry.queryDatasets("flat", band="r", collections=...)),
2649 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2650 )
2652 def testQueryIntRangeExpressions(self):
2653 """Test integer range expressions in ``where`` arguments.
2655 Note that our expressions use inclusive stop values, unlike Python's.
2656 """
2657 registry = self.makeRegistry()
2658 self.loadData(registry, "base.yaml")
2659 self.assertEqual(
2660 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2661 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2662 )
2663 self.assertEqual(
2664 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2665 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2666 )
2667 self.assertEqual(
2668 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2669 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2670 )
2672 def testQueryResultSummaries(self):
2673 """Test summary methods like `count`, `any`, and `explain_no_results`
2674 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2675 """
2676 registry = self.makeRegistry()
2677 self.loadData(registry, "base.yaml")
2678 self.loadData(registry, "datasets.yaml")
2679 self.loadData(registry, "spatial.yaml")
2680 # Default test dataset has two collections, each with both flats and
2681 # biases. Add a new collection with only biases.
2682 registry.registerCollection("biases", CollectionType.TAGGED)
2683 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2684 # First query yields two results, and involves no postprocessing.
2685 query1 = registry.queryDataIds(["physical_filter"], band="r")
2686 self.assertTrue(query1.any(execute=False, exact=False))
2687 self.assertTrue(query1.any(execute=True, exact=False))
2688 self.assertTrue(query1.any(execute=True, exact=True))
2689 self.assertEqual(query1.count(exact=False), 2)
2690 self.assertEqual(query1.count(exact=True), 2)
2691 self.assertFalse(list(query1.explain_no_results()))
2692 # Second query should yield no results, which we should see when
2693 # we attempt to expand the data ID.
2694 query2 = registry.queryDataIds(["physical_filter"], band="h")
2695 # There's no execute=False, exact=Fals test here because the behavior
2696 # not something we want to guarantee in this case (and exact=False
2697 # says either answer is legal).
2698 self.assertFalse(query2.any(execute=True, exact=False))
2699 self.assertFalse(query2.any(execute=True, exact=True))
2700 self.assertEqual(query2.count(exact=False), 0)
2701 self.assertEqual(query2.count(exact=True), 0)
2702 self.assertTrue(list(query2.explain_no_results()))
2703 # These queries yield no results due to various problems that can be
2704 # spotted prior to execution, yielding helpful diagnostics.
2705 base_query = registry.queryDataIds(["detector", "physical_filter"])
2706 queries_and_snippets = [
2707 (
2708 # Dataset type name doesn't match any existing dataset types.
2709 registry.queryDatasets("nonexistent", collections=...),
2710 ["nonexistent"],
2711 ),
2712 (
2713 # Dataset type object isn't registered.
2714 registry.queryDatasets(
2715 DatasetType(
2716 "nonexistent",
2717 dimensions=["instrument"],
2718 universe=registry.dimensions,
2719 storageClass="Image",
2720 ),
2721 collections=...,
2722 ),
2723 ["nonexistent"],
2724 ),
2725 (
2726 # No datasets of this type in this collection.
2727 registry.queryDatasets("flat", collections=["biases"]),
2728 ["flat", "biases"],
2729 ),
2730 (
2731 # No datasets of this type in this collection.
2732 base_query.findDatasets("flat", collections=["biases"]),
2733 ["flat", "biases"],
2734 ),
2735 (
2736 # No collections matching at all.
2737 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2738 ["potato"],
2739 ),
2740 ]
2741 with self.assertRaises(MissingDatasetTypeError):
2742 # Dataset type name doesn't match any existing dataset types.
2743 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2744 with self.assertRaises(MissingDatasetTypeError):
2745 # Dataset type name doesn't match any existing dataset types.
2746 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...)
2747 for query, snippets in queries_and_snippets:
2748 self.assertFalse(query.any(execute=False, exact=False))
2749 self.assertFalse(query.any(execute=True, exact=False))
2750 self.assertFalse(query.any(execute=True, exact=True))
2751 self.assertEqual(query.count(exact=False), 0)
2752 self.assertEqual(query.count(exact=True), 0)
2753 messages = list(query.explain_no_results())
2754 self.assertTrue(messages)
2755 # Want all expected snippets to appear in at least one message.
2756 self.assertTrue(
2757 any(
2758 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2759 ),
2760 messages,
2761 )
2763 # Wildcards on dataset types are not permitted in queryDataIds.
2764 with self.assertRaises(DatasetTypeExpressionError):
2765 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2767 # These queries yield no results due to problems that can be identified
2768 # by cheap follow-up queries, yielding helpful diagnostics.
2769 for query, snippets in [
2770 (
2771 # No records for one of the involved dimensions.
2772 registry.queryDataIds(["subfilter"]),
2773 ["no rows", "subfilter"],
2774 ),
2775 (
2776 # No records for one of the involved dimensions.
2777 registry.queryDimensionRecords("subfilter"),
2778 ["no rows", "subfilter"],
2779 ),
2780 ]:
2781 self.assertFalse(query.any(execute=True, exact=False))
2782 self.assertFalse(query.any(execute=True, exact=True))
2783 self.assertEqual(query.count(exact=True), 0)
2784 messages = list(query.explain_no_results())
2785 self.assertTrue(messages)
2786 # Want all expected snippets to appear in at least one message.
2787 self.assertTrue(
2788 any(
2789 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2790 ),
2791 messages,
2792 )
2794 # This query yields four overlaps in the database, but one is filtered
2795 # out in postprocessing. The count queries aren't accurate because
2796 # they don't account for duplication that happens due to an internal
2797 # join against commonSkyPix.
2798 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2799 self.assertEqual(
2800 {
2801 DataCoordinate.standardize(
2802 instrument="Cam1",
2803 skymap="SkyMap1",
2804 visit=v,
2805 tract=t,
2806 universe=registry.dimensions,
2807 )
2808 for v, t in [(1, 0), (2, 0), (2, 1)]
2809 },
2810 set(query3),
2811 )
2812 self.assertTrue(query3.any(execute=False, exact=False))
2813 self.assertTrue(query3.any(execute=True, exact=False))
2814 self.assertTrue(query3.any(execute=True, exact=True))
2815 self.assertGreaterEqual(query3.count(exact=False), 4)
2816 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2817 self.assertFalse(list(query3.explain_no_results()))
2818 # This query yields overlaps in the database, but all are filtered
2819 # out in postprocessing. The count queries again aren't very useful.
2820 # We have to use `where=` here to avoid an optimization that
2821 # (currently) skips the spatial postprocess-filtering because it
2822 # recognizes that no spatial join is necessary. That's not ideal, but
2823 # fixing it is out of scope for this ticket.
2824 query4 = registry.queryDataIds(
2825 ["visit", "tract"],
2826 instrument="Cam1",
2827 skymap="SkyMap1",
2828 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2829 )
2830 self.assertFalse(set(query4))
2831 self.assertTrue(query4.any(execute=False, exact=False))
2832 self.assertTrue(query4.any(execute=True, exact=False))
2833 self.assertFalse(query4.any(execute=True, exact=True))
2834 self.assertGreaterEqual(query4.count(exact=False), 1)
2835 self.assertEqual(query4.count(exact=True, discard=True), 0)
2836 messages = query4.explain_no_results()
2837 self.assertTrue(messages)
2838 self.assertTrue(any("overlap" in message for message in messages))
2839 # This query should yield results from one dataset type but not the
2840 # other, which is not registered.
2841 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2842 self.assertTrue(set(query5))
2843 self.assertTrue(query5.any(execute=False, exact=False))
2844 self.assertTrue(query5.any(execute=True, exact=False))
2845 self.assertTrue(query5.any(execute=True, exact=True))
2846 self.assertGreaterEqual(query5.count(exact=False), 1)
2847 self.assertGreaterEqual(query5.count(exact=True), 1)
2848 self.assertFalse(list(query5.explain_no_results()))
2849 # This query applies a selection that yields no results, fully in the
2850 # database. Explaining why it fails involves traversing the relation
2851 # tree and running a LIMIT 1 query at each level that has the potential
2852 # to remove rows.
2853 query6 = registry.queryDimensionRecords(
2854 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2855 )
2856 self.assertEqual(query6.count(exact=True), 0)
2857 messages = query6.explain_no_results()
2858 self.assertTrue(messages)
2859 self.assertTrue(any("no-purpose" in message for message in messages))
2861 def testQueryDataIdsExpressionError(self):
2862 """Test error checking of 'where' expressions in queryDataIds."""
2863 registry = self.makeRegistry()
2864 self.loadData(registry, "base.yaml")
2865 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2866 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2867 registry.queryDataIds(["detector"], where="foo.bar = 12")
2868 with self.assertRaisesRegex(
2869 LookupError, "Dimension element name cannot be inferred in this context."
2870 ):
2871 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2873 def testQueryDataIdsOrderBy(self):
2874 """Test order_by and limit on result returned by queryDataIds()."""
2875 registry = self.makeRegistry()
2876 self.loadData(registry, "base.yaml")
2877 self.loadData(registry, "datasets.yaml")
2878 self.loadData(registry, "spatial.yaml")
2880 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2881 return registry.queryDataIds(
2882 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2883 )
2885 Test = namedtuple(
2886 "testQueryDataIdsOrderByTest",
2887 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2888 defaults=(None, None, None),
2889 )
2891 test_data = (
2892 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2893 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2894 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2895 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2896 Test(
2897 "tract.id,visit.id",
2898 "tract,visit",
2899 ((0, 1), (0, 1), (0, 2)),
2900 limit=(3,),
2901 ),
2902 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2903 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2904 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2905 Test(
2906 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2907 ),
2908 Test(
2909 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2910 ),
2911 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2912 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2913 Test(
2914 "tract,-visit.timespan.begin,visit.timespan.end",
2915 "tract,visit",
2916 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2917 ),
2918 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2919 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2920 Test(
2921 "tract,detector",
2922 "tract,detector",
2923 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2924 datasets="flat",
2925 collections="imported_r",
2926 ),
2927 Test(
2928 "tract,detector.full_name",
2929 "tract,detector",
2930 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2931 datasets="flat",
2932 collections="imported_r",
2933 ),
2934 Test(
2935 "tract,detector.raft,detector.name_in_raft",
2936 "tract,detector",
2937 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2938 datasets="flat",
2939 collections="imported_r",
2940 ),
2941 )
2943 for test in test_data:
2944 order_by = test.order_by.split(",")
2945 keys = test.keys.split(",")
2946 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2947 if test.limit is not None:
2948 query = query.limit(*test.limit)
2949 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2950 self.assertEqual(dataIds, test.result)
2952 # and materialize
2953 query = do_query(keys).order_by(*order_by)
2954 if test.limit is not None:
2955 query = query.limit(*test.limit)
2956 with self.assertRaises(RelationalAlgebraError):
2957 with query.materialize():
2958 pass
2960 # errors in a name
2961 for order_by in ("", "-"):
2962 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2963 list(do_query().order_by(order_by))
2965 for order_by in ("undimension.name", "-undimension.name"):
2966 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
2967 list(do_query().order_by(order_by))
2969 for order_by in ("attract", "-attract"):
2970 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2971 list(do_query().order_by(order_by))
2973 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2974 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2976 with self.assertRaisesRegex(
2977 ValueError,
2978 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
2979 r"qualify timespan with specific dimension name\.",
2980 ):
2981 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2983 with self.assertRaisesRegex(
2984 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2985 ):
2986 list(do_query("tract").order_by("timespan.begin"))
2988 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2989 list(do_query("tract").order_by("tract.timespan.begin"))
2991 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2992 list(do_query("tract").order_by("tract.name"))
2994 with self.assertRaisesRegex(
2995 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
2996 ):
2997 list(do_query("visit").order_by("timestamp.begin"))
2999 def testQueryDataIdsGovernorExceptions(self):
3000 """Test exceptions raised by queryDataIds() for incorrect governors."""
3001 registry = self.makeRegistry()
3002 self.loadData(registry, "base.yaml")
3003 self.loadData(registry, "datasets.yaml")
3004 self.loadData(registry, "spatial.yaml")
3006 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3007 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3009 Test = namedtuple(
3010 "testQueryDataIdExceptionsTest",
3011 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3012 defaults=(None, None, None, {}, None, 0),
3013 )
3015 test_data = (
3016 Test("tract,visit", count=6),
3017 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3018 Test(
3019 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3020 ),
3021 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3022 Test(
3023 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3024 ),
3025 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3026 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3027 Test(
3028 "tract,visit",
3029 where="instrument=cam AND skymap=map",
3030 bind={"cam": "Cam1", "map": "SkyMap1"},
3031 count=6,
3032 ),
3033 Test(
3034 "tract,visit",
3035 where="instrument=cam AND skymap=map",
3036 bind={"cam": "Cam", "map": "SkyMap"},
3037 exception=DataIdValueError,
3038 ),
3039 )
3041 for test in test_data:
3042 dimensions = test.dimensions.split(",")
3043 if test.exception:
3044 with self.assertRaises(test.exception):
3045 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3046 else:
3047 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3048 self.assertEqual(query.count(discard=True), test.count)
3050 # and materialize
3051 if test.exception:
3052 with self.assertRaises(test.exception):
3053 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3054 with query.materialize() as materialized:
3055 materialized.count(discard=True)
3056 else:
3057 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3058 with query.materialize() as materialized:
3059 self.assertEqual(materialized.count(discard=True), test.count)
3061 def testQueryDimensionRecordsOrderBy(self):
3062 """Test order_by and limit on result returned by
3063 queryDimensionRecords().
3064 """
3065 registry = self.makeRegistry()
3066 self.loadData(registry, "base.yaml")
3067 self.loadData(registry, "datasets.yaml")
3068 self.loadData(registry, "spatial.yaml")
3070 def do_query(element, datasets=None, collections=None):
3071 return registry.queryDimensionRecords(
3072 element, instrument="Cam1", datasets=datasets, collections=collections
3073 )
3075 query = do_query("detector")
3076 self.assertEqual(len(list(query)), 4)
3078 Test = namedtuple(
3079 "testQueryDataIdsOrderByTest",
3080 ("element", "order_by", "result", "limit", "datasets", "collections"),
3081 defaults=(None, None, None),
3082 )
3084 test_data = (
3085 Test("detector", "detector", (1, 2, 3, 4)),
3086 Test("detector", "-detector", (4, 3, 2, 1)),
3087 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3088 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3089 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3090 Test("visit", "visit", (1, 2)),
3091 Test("visit", "-visit.id", (2, 1)),
3092 Test("visit", "zenith_angle", (1, 2)),
3093 Test("visit", "-visit.name", (2, 1)),
3094 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3095 )
3097 for test in test_data:
3098 order_by = test.order_by.split(",")
3099 query = do_query(test.element).order_by(*order_by)
3100 if test.limit is not None:
3101 query = query.limit(*test.limit)
3102 dataIds = tuple(rec.id for rec in query)
3103 self.assertEqual(dataIds, test.result)
3105 # errors in a name
3106 for order_by in ("", "-"):
3107 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3108 list(do_query("detector").order_by(order_by))
3110 for order_by in ("undimension.name", "-undimension.name"):
3111 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3112 list(do_query("detector").order_by(order_by))
3114 for order_by in ("attract", "-attract"):
3115 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3116 list(do_query("detector").order_by(order_by))
3118 for order_by in ("timestamp.begin", "-timestamp.begin"):
3119 with self.assertRaisesRegex(
3120 ValueError,
3121 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3122 r"perhaps you meant 'timespan.begin'\?",
3123 ):
3124 list(do_query("visit").order_by(order_by))
3126 def testQueryDimensionRecordsExceptions(self):
3127 """Test exceptions raised by queryDimensionRecords()."""
3128 registry = self.makeRegistry()
3129 self.loadData(registry, "base.yaml")
3130 self.loadData(registry, "datasets.yaml")
3131 self.loadData(registry, "spatial.yaml")
3133 result = registry.queryDimensionRecords("detector")
3134 self.assertEqual(result.count(), 4)
3135 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3136 self.assertEqual(result.count(), 4)
3137 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3138 self.assertEqual(result.count(), 4)
3139 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3140 self.assertEqual(result.count(), 4)
3141 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3142 self.assertEqual(result.count(), 4)
3144 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3145 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3146 result.count()
3148 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3149 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3150 result.count()
3152 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3153 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3154 result.count()
3156 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3157 result = registry.queryDimensionRecords(
3158 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3159 )
3160 result.count()
3162 def testDatasetConstrainedDimensionRecordQueries(self):
3163 """Test that queryDimensionRecords works even when given a dataset
3164 constraint whose dimensions extend beyond the requested dimension
3165 element's.
3166 """
3167 registry = self.makeRegistry()
3168 self.loadData(registry, "base.yaml")
3169 self.loadData(registry, "datasets.yaml")
3170 # Query for physical_filter dimension records, using a dataset that
3171 # has both physical_filter and dataset dimensions.
3172 records = registry.queryDimensionRecords(
3173 "physical_filter",
3174 datasets=["flat"],
3175 collections="imported_r",
3176 )
3177 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3178 # Trying to constrain by all dataset types is an error.
3179 with self.assertRaises(TypeError):
3180 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3182 def testSkyPixDatasetQueries(self):
3183 """Test that we can build queries involving skypix dimensions as long
3184 as a dataset type that uses those dimensions is included.
3185 """
3186 registry = self.makeRegistry()
3187 self.loadData(registry, "base.yaml")
3188 dataset_type = DatasetType(
3189 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3190 )
3191 registry.registerDatasetType(dataset_type)
3192 run = "r"
3193 registry.registerRun(run)
3194 # First try queries where there are no datasets; the concern is whether
3195 # we can even build and execute these queries without raising, even
3196 # when "doomed" query shortcuts are in play.
3197 self.assertFalse(
3198 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3199 )
3200 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3201 # Now add a dataset and see that we can get it back.
3202 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3203 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3204 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3205 self.assertEqual(
3206 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3207 {data_id},
3208 )
3209 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3211 def testDatasetIdFactory(self):
3212 """Simple test for DatasetIdFactory, mostly to catch potential changes
3213 in its API.
3214 """
3215 registry = self.makeRegistry()
3216 factory = DatasetIdFactory()
3217 dataset_type = DatasetType(
3218 "datasetType",
3219 dimensions=["detector", "instrument"],
3220 universe=registry.dimensions,
3221 storageClass="int",
3222 )
3223 run = "run"
3224 data_id = DataCoordinate.standardize(
3225 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3226 )
3228 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3229 self.assertIsInstance(datasetId, uuid.UUID)
3230 self.assertEqual(datasetId.version, 4)
3232 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3233 self.assertIsInstance(datasetId, uuid.UUID)
3234 self.assertEqual(datasetId.version, 5)
3236 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3237 self.assertIsInstance(datasetId, uuid.UUID)
3238 self.assertEqual(datasetId.version, 5)
3240 def testExposureQueries(self):
3241 """Test query methods using arguments sourced from the exposure log
3242 service.
3244 The most complete test dataset currently available to daf_butler tests
3245 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3246 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3247 dimension records as it was focused on providing nontrivial spatial
3248 overlaps between visit+detector and tract+patch. So in this test we
3249 need to translate queries that originally used the exposure dimension
3250 to use the (very similar) visit dimension instead.
3251 """
3252 registry = self.makeRegistry()
3253 self.loadData(registry, "hsc-rc2-subset.yaml")
3254 self.assertEqual(
3255 [
3256 record.id
3257 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3258 .order_by("id")
3259 .limit(5)
3260 ],
3261 [318, 322, 326, 330, 332],
3262 )
3263 self.assertEqual(
3264 [
3265 data_id["visit"]
3266 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3267 ],
3268 [318, 322, 326, 330, 332],
3269 )
3270 self.assertEqual(
3271 [
3272 record.id
3273 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3274 .order_by("full_name")
3275 .limit(5)
3276 ],
3277 [73, 72, 71, 70, 65],
3278 )
3279 self.assertEqual(
3280 [
3281 data_id["detector"]
3282 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3283 .order_by("full_name")
3284 .limit(5)
3285 ],
3286 [73, 72, 71, 70, 65],
3287 )
3289 def test_long_query_names(self) -> None:
3290 """Test that queries involving very long names are handled correctly.
3292 This is especially important for PostgreSQL, which truncates symbols
3293 longer than 64 chars, but it's worth testing for all DBs.
3294 """
3295 registry = self.makeRegistry()
3296 name = "abcd" * 17
3297 registry.registerDatasetType(
3298 DatasetType(
3299 name,
3300 dimensions=(),
3301 storageClass="Exposure",
3302 universe=registry.dimensions,
3303 )
3304 )
3305 # Need to search more than one collection actually containing a
3306 # matching dataset to avoid optimizations that sidestep bugs due to
3307 # truncation by making findFirst=True a no-op.
3308 run1 = "run1"
3309 registry.registerRun(run1)
3310 run2 = "run2"
3311 registry.registerRun(run2)
3312 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3313 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3314 self.assertEqual(
3315 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3316 {ref1},
3317 )
3319 def test_skypix_constraint_queries(self) -> None:
3320 """Test queries spatially constrained by a skypix data ID."""
3321 registry = self.makeRegistry()
3322 self.loadData(registry, "hsc-rc2-subset.yaml")
3323 patch_regions = {
3324 (data_id["tract"], data_id["patch"]): data_id.region
3325 for data_id in registry.queryDataIds(["patch"]).expanded()
3326 }
3327 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3328 # This check ensures the test doesn't become trivial due to a config
3329 # change; if it does, just pick a different HTML level.
3330 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3331 # Gather all skypix IDs that definitely overlap at least one of these
3332 # patches.
3333 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3334 for patch_region in patch_regions.values():
3335 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3336 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3337 # and does not overlap at least one other patch.
3338 for skypix_id in itertools.chain.from_iterable(
3339 range(begin, end) for begin, end in relevant_skypix_ids
3340 ):
3341 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3342 overlapping_patches = {
3343 patch_key
3344 for patch_key, patch_region in patch_regions.items()
3345 if not patch_region.isDisjointFrom(skypix_region)
3346 }
3347 if overlapping_patches and overlapping_patches != patch_regions.keys():
3348 break
3349 else:
3350 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3351 self.assertEqual(
3352 {
3353 (data_id["tract"], data_id["patch"])
3354 for data_id in registry.queryDataIds(
3355 ["patch"],
3356 dataId={skypix_dimension.name: skypix_id},
3357 )
3358 },
3359 overlapping_patches,
3360 )
3361 # Test that a three-way join that includes the common skypix system in
3362 # the dimensions doesn't generate redundant join terms in the query.
3363 full_data_ids = set(
3364 registry.queryDataIds(
3365 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3366 ).expanded()
3367 )
3368 self.assertGreater(len(full_data_ids), 0)
3369 for data_id in full_data_ids:
3370 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3371 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3373 def test_spatial_constraint_queries(self) -> None:
3374 """Test queries in which one spatial dimension in the constraint (data
3375 ID or ``where`` string) constrains a different spatial dimension in the
3376 query result columns.
3377 """
3378 registry = self.makeRegistry()
3379 self.loadData(registry, "hsc-rc2-subset.yaml")
3380 patch_regions = {
3381 (data_id["tract"], data_id["patch"]): data_id.region
3382 for data_id in registry.queryDataIds(["patch"]).expanded()
3383 }
3384 observation_regions = {
3385 (data_id["visit"], data_id["detector"]): data_id.region
3386 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3387 }
3388 all_combos = {
3389 (patch_key, observation_key)
3390 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3391 }
3392 overlapping_combos = {
3393 (patch_key, observation_key)
3394 for patch_key, observation_key in all_combos
3395 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3396 }
3397 # Check a direct spatial join with no constraint first.
3398 self.assertEqual(
3399 {
3400 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3401 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3402 },
3403 overlapping_combos,
3404 )
3405 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3406 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3407 for patch_key, observation_key in overlapping_combos:
3408 overlaps_by_patch[patch_key].add(observation_key)
3409 overlaps_by_observation[observation_key].add(patch_key)
3410 # Find patches and observations that overlap at least one of the other
3411 # but not all of the other.
3412 nontrivial_patch = next(
3413 iter(
3414 patch_key
3415 for patch_key, observation_keys in overlaps_by_patch.items()
3416 if observation_keys and observation_keys != observation_regions.keys()
3417 )
3418 )
3419 nontrivial_observation = next(
3420 iter(
3421 observation_key
3422 for observation_key, patch_keys in overlaps_by_observation.items()
3423 if patch_keys and patch_keys != patch_regions.keys()
3424 )
3425 )
3426 # Use the nontrivial patches and observations as constraints on the
3427 # other dimensions in various ways, first via a 'where' expression.
3428 # It's better in general to us 'bind' instead of f-strings, but these
3429 # all integers so there are no quoting concerns.
3430 self.assertEqual(
3431 {
3432 (data_id["visit"], data_id["detector"])
3433 for data_id in registry.queryDataIds(
3434 ["visit", "detector"],
3435 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3436 skymap="hsc_rings_v1",
3437 )
3438 },
3439 overlaps_by_patch[nontrivial_patch],
3440 )
3441 self.assertEqual(
3442 {
3443 (data_id["tract"], data_id["patch"])
3444 for data_id in registry.queryDataIds(
3445 ["patch"],
3446 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3447 instrument="HSC",
3448 )
3449 },
3450 overlaps_by_observation[nontrivial_observation],
3451 )
3452 # and then via the dataId argument.
3453 self.assertEqual(
3454 {
3455 (data_id["visit"], data_id["detector"])
3456 for data_id in registry.queryDataIds(
3457 ["visit", "detector"],
3458 dataId={
3459 "tract": nontrivial_patch[0],
3460 "patch": nontrivial_patch[1],
3461 },
3462 skymap="hsc_rings_v1",
3463 )
3464 },
3465 overlaps_by_patch[nontrivial_patch],
3466 )
3467 self.assertEqual(
3468 {
3469 (data_id["tract"], data_id["patch"])
3470 for data_id in registry.queryDataIds(
3471 ["patch"],
3472 dataId={
3473 "visit": nontrivial_observation[0],
3474 "detector": nontrivial_observation[1],
3475 },
3476 instrument="HSC",
3477 )
3478 },
3479 overlaps_by_observation[nontrivial_observation],
3480 )
3482 def test_query_projection_drop_postprocessing(self) -> None:
3483 """Test that projections and deduplications on query objects can
3484 drop post-query region filtering to ensure the query remains in
3485 the SQL engine.
3486 """
3487 registry = self.makeRegistry()
3488 self.loadData(registry, "base.yaml")
3489 self.loadData(registry, "spatial.yaml")
3491 def pop_transfer(tree: Relation) -> Relation:
3492 """If a relation tree terminates with a transfer to a new engine,
3493 return the relation prior to that transfer. If not, return the
3494 original relation.
3496 Parameters
3497 ----------
3498 tree : `Relation`
3499 The relation tree to modify.
3500 """
3501 match tree:
3502 case Transfer(target=target):
3503 return target
3504 case _:
3505 return tree
3507 # There's no public way to get a Query object yet, so we get one from a
3508 # DataCoordinateQueryResults private attribute. When a public API is
3509 # available this test should use it.
3510 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3511 # We expect this query to terminate in the iteration engine originally,
3512 # because region-filtering is necessary.
3513 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3514 # If we deduplicate, we usually have to do that downstream of the
3515 # filtering. That means the deduplication has to happen in the
3516 # iteration engine.
3517 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3518 # If we pass drop_postprocessing, we instead drop the region filtering
3519 # so the deduplication can happen in SQL (though there might still be
3520 # transfer to iteration at the tail of the tree that we can ignore;
3521 # that's what the pop_transfer takes care of here).
3522 self.assertIsInstance(
3523 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3524 sql.Engine,
3525 )
3527 def test_query_find_datasets_drop_postprocessing(self) -> None:
3528 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3529 problems with the FindFirstDataset relation operation.
3530 """
3531 # Setup: load some visit, tract, and patch records, and insert two
3532 # datasets with dimensions {visit, patch}, with one in each of two
3533 # RUN collections.
3534 registry = self.makeRegistry()
3535 self.loadData(registry, "base.yaml")
3536 self.loadData(registry, "spatial.yaml")
3537 storage_class = StorageClass("Warpy")
3538 registry.storageClasses.registerStorageClass(storage_class)
3539 dataset_type = DatasetType(
3540 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3541 )
3542 registry.registerDatasetType(dataset_type)
3543 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3544 registry.registerRun("run1")
3545 registry.registerRun("run2")
3546 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3547 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3548 # Query for the dataset using queryDataIds(...).findDatasets(...)
3549 # against only one of the two collections. This should work even
3550 # though the relation returned by queryDataIds ends with
3551 # iteration-engine region-filtering, because we can recognize before
3552 # running the query that there is only one collecton to search and
3553 # hence the (default) findFirst=True is irrelevant, and joining in the
3554 # dataset query commutes past the iteration-engine postprocessing.
3555 query1 = registry.queryDataIds(
3556 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3557 )
3558 self.assertEqual(
3559 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3560 {ref1},
3561 )
3562 # Query for the dataset using queryDataIds(...).findDatasets(...)
3563 # against both collections. This can only work if the FindFirstDataset
3564 # operation can be commuted past the iteration-engine options into SQL.
3565 query2 = registry.queryDataIds(
3566 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3567 )
3568 self.assertEqual(
3569 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3570 {ref2},
3571 )
3573 def test_query_empty_collections(self) -> None:
3574 """Test for registry query methods with empty collections. The methods
3575 should return empty result set (or None when applicable) and provide
3576 "doomed" diagnostics.
3577 """
3578 registry = self.makeRegistry()
3579 self.loadData(registry, "base.yaml")
3580 self.loadData(registry, "datasets.yaml")
3582 # Tests for registry.findDataset()
3583 with self.assertRaises(NoDefaultCollectionError):
3584 registry.findDataset("bias", instrument="Cam1", detector=1)
3585 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3586 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3588 # Tests for registry.queryDatasets()
3589 with self.assertRaises(NoDefaultCollectionError):
3590 registry.queryDatasets("bias")
3591 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3593 result = registry.queryDatasets("bias", collections=[])
3594 self.assertEqual(len(list(result)), 0)
3595 messages = list(result.explain_no_results())
3596 self.assertTrue(messages)
3597 self.assertTrue(any("because collection list is empty" in message for message in messages))
3599 # Tests for registry.queryDataIds()
3600 with self.assertRaises(NoDefaultCollectionError):
3601 registry.queryDataIds("detector", datasets="bias")
3602 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3604 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3605 self.assertEqual(len(list(result)), 0)
3606 messages = list(result.explain_no_results())
3607 self.assertTrue(messages)
3608 self.assertTrue(any("because collection list is empty" in message for message in messages))
3610 # Tests for registry.queryDimensionRecords()
3611 with self.assertRaises(NoDefaultCollectionError):
3612 registry.queryDimensionRecords("detector", datasets="bias")
3613 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3615 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3616 self.assertEqual(len(list(result)), 0)
3617 messages = list(result.explain_no_results())
3618 self.assertTrue(messages)
3619 self.assertTrue(any("because collection list is empty" in message for message in messages))
3621 def test_dataset_followup_spatial_joins(self) -> None:
3622 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3623 is involved.
3624 """
3625 registry = self.makeRegistry()
3626 self.loadData(registry, "base.yaml")
3627 self.loadData(registry, "spatial.yaml")
3628 pvi_dataset_type = DatasetType(
3629 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3630 )
3631 registry.registerDatasetType(pvi_dataset_type)
3632 collection = "datasets"
3633 registry.registerRun(collection)
3634 (pvi1,) = registry.insertDatasets(
3635 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3636 )
3637 (pvi2,) = registry.insertDatasets(
3638 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3639 )
3640 (pvi3,) = registry.insertDatasets(
3641 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3642 )
3643 self.assertEqual(
3644 set(
3645 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3646 .expanded()
3647 .findRelatedDatasets("pvi", [collection])
3648 ),
3649 {
3650 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3651 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3652 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3653 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3654 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3655 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3656 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3657 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3658 },
3659 )