Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1492 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 02:55 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 02:55 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import unittest
38import uuid
39from abc import ABC, abstractmethod
40from collections import defaultdict, namedtuple
41from collections.abc import Iterator
42from datetime import timedelta
43from typing import TYPE_CHECKING
45import astropy.time
46import sqlalchemy
48try:
49 import numpy as np
50except ImportError:
51 np = None
53import lsst.sphgeom
54from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
56from ..._dataset_association import DatasetAssociation
57from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
58from ..._dataset_type import DatasetType
59from ..._exceptions import MissingCollectionError, MissingDatasetTypeError
60from ..._exceptions_legacy import DatasetTypeError
61from ..._storage_class import StorageClass
62from ..._timespan import Timespan
63from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
64from .._collection_summary import CollectionSummary
65from .._collection_type import CollectionType
66from .._config import RegistryConfig
67from .._exceptions import (
68 ArgumentError,
69 CollectionError,
70 CollectionTypeError,
71 ConflictingDefinitionError,
72 DataIdValueError,
73 DatasetTypeExpressionError,
74 InconsistentDataIdError,
75 NoDefaultCollectionError,
76 OrphanedRecordError,
77)
78from .._registry import Registry
79from ..interfaces import ButlerAttributeExistsError
81if TYPE_CHECKING:
82 from ..sql_registry import SqlRegistry
85class RegistryTests(ABC):
86 """Generic tests for the `SqlRegistry` class that can be subclassed to
87 generate tests for different configurations.
88 """
90 collectionsManager: str | None = None
91 """Name of the collections manager class, if subclass provides value for
92 this member then it overrides name specified in default configuration
93 (`str`).
94 """
96 datasetsManager: str | dict[str, str] | None = None
97 """Name or configuration dictionary of the datasets manager class, if
98 subclass provides value for this member then it overrides name specified
99 in default configuration (`str` or `dict`).
100 """
102 supportsCollectionRegex: bool = True
103 """True if the registry class being tested supports regex searches for
104 collections."""
106 @classmethod
107 @abstractmethod
108 def getDataDir(cls) -> str:
109 """Return the root directory containing test data YAML files."""
110 raise NotImplementedError()
112 def makeRegistryConfig(self) -> RegistryConfig:
113 """Create RegistryConfig used to create a registry.
115 This method should be called by a subclass from `makeRegistry`.
116 Returned instance will be pre-configured based on the values of class
117 members, and default-configured for all other parameters. Subclasses
118 that need default configuration should just instantiate
119 `RegistryConfig` directly.
120 """
121 config = RegistryConfig()
122 if self.collectionsManager:
123 config["managers", "collections"] = self.collectionsManager
124 if self.datasetsManager:
125 config["managers", "datasets"] = self.datasetsManager
126 return config
128 @abstractmethod
129 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
130 """Return the Registry instance to be tested.
132 Parameters
133 ----------
134 share_repo_with : `Registry`, optional
135 If provided, the new registry should point to the same data
136 repository as this existing registry.
138 Returns
139 -------
140 registry : `Registry`
141 New `Registry` instance, or `None` *only* if `share_repo_with`
142 is not `None` and this test case does not support that argument
143 (e.g. it is impossible with in-memory SQLite DBs).
144 """
145 raise NotImplementedError()
147 def loadData(self, registry: SqlRegistry, filename: str) -> None:
148 """Load registry test data from ``getDataDir/<filename>``,
149 which should be a YAML import/export file.
151 Parameters
152 ----------
153 registry : `SqlRegistry`
154 The registry to load into.
155 filename : `str`
156 The name of the file to load.
157 """
158 from ...transfers import YamlRepoImportBackend
160 with open(os.path.join(self.getDataDir(), filename)) as stream:
161 backend = YamlRepoImportBackend(stream, registry)
162 backend.register()
163 backend.load(datastore=None)
165 def checkQueryResults(self, results, expected):
166 """Check that a query results object contains expected values.
168 Parameters
169 ----------
170 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
171 A lazy-evaluation query results object.
172 expected : `list`
173 A list of `DataCoordinate` o `DatasetRef` objects that should be
174 equal to results of the query, aside from ordering.
175 """
176 self.assertCountEqual(list(results), expected)
177 self.assertEqual(results.count(), len(expected))
178 if expected:
179 self.assertTrue(results.any())
180 else:
181 self.assertFalse(results.any())
183 def testOpaque(self):
184 """Tests for `SqlRegistry.registerOpaqueTable`,
185 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
186 `SqlRegistry.deleteOpaqueData`.
187 """
188 registry = self.makeRegistry()
189 table = "opaque_table_for_testing"
190 registry.registerOpaqueTable(
191 table,
192 spec=ddl.TableSpec(
193 fields=[
194 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
195 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
196 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
197 ],
198 ),
199 )
200 rows = [
201 {"id": 1, "name": "one", "count": None},
202 {"id": 2, "name": "two", "count": 5},
203 {"id": 3, "name": "three", "count": 6},
204 ]
205 registry.insertOpaqueData(table, *rows)
206 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
207 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
208 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
209 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
210 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
211 # Test very long IN clause which exceeds sqlite limit on number of
212 # parameters. SQLite says the limit is 32k but it looks like it is
213 # much higher.
214 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
215 # Two IN clauses, each longer than 1k batch size, first with
216 # duplicates, second has matching elements in different batches (after
217 # sorting).
218 self.assertEqual(
219 rows[0:2],
220 list(
221 registry.fetchOpaqueData(
222 table,
223 id=list(range(1000)) + list(range(100, 0, -1)),
224 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
225 )
226 ),
227 )
228 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
229 registry.deleteOpaqueData(table, id=3)
230 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
231 registry.deleteOpaqueData(table)
232 self.assertEqual([], list(registry.fetchOpaqueData(table)))
234 def testDatasetType(self):
235 """Tests for `SqlRegistry.registerDatasetType` and
236 `SqlRegistry.getDatasetType`.
237 """
238 registry = self.makeRegistry()
239 # Check valid insert
240 datasetTypeName = "test"
241 storageClass = StorageClass("testDatasetType")
242 registry.storageClasses.registerStorageClass(storageClass)
243 dimensions = registry.dimensions.conform(("instrument", "visit"))
244 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
245 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
246 # Inserting for the first time should return True
247 self.assertTrue(registry.registerDatasetType(inDatasetType))
248 outDatasetType1 = registry.getDatasetType(datasetTypeName)
249 self.assertEqual(outDatasetType1, inDatasetType)
251 # Re-inserting should work
252 self.assertFalse(registry.registerDatasetType(inDatasetType))
253 # Except when they are not identical
254 with self.assertRaises(ConflictingDefinitionError):
255 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
256 registry.registerDatasetType(nonIdenticalDatasetType)
258 # Template can be None
259 datasetTypeName = "testNoneTemplate"
260 storageClass = StorageClass("testDatasetType2")
261 registry.storageClasses.registerStorageClass(storageClass)
262 dimensions = registry.dimensions.conform(("instrument", "visit"))
263 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
264 registry.registerDatasetType(inDatasetType)
265 outDatasetType2 = registry.getDatasetType(datasetTypeName)
266 self.assertEqual(outDatasetType2, inDatasetType)
268 allTypes = set(registry.queryDatasetTypes())
269 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
271 def testDimensions(self):
272 """Tests for `SqlRegistry.insertDimensionData`,
273 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
274 """
275 registry = self.makeRegistry()
276 dimensionName = "instrument"
277 dimension = registry.dimensions[dimensionName]
278 dimensionValue = {
279 "name": "DummyCam",
280 "visit_max": 10,
281 "visit_system": 0,
282 "exposure_max": 10,
283 "detector_max": 2,
284 "class_name": "lsst.pipe.base.Instrument",
285 }
286 registry.insertDimensionData(dimensionName, dimensionValue)
287 # Inserting the same value twice should fail
288 with self.assertRaises(sqlalchemy.exc.IntegrityError):
289 registry.insertDimensionData(dimensionName, dimensionValue)
290 # expandDataId should retrieve the record we just inserted
291 self.assertEqual(
292 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
293 .records[dimensionName]
294 .toDict(),
295 dimensionValue,
296 )
297 # expandDataId should raise if there is no record with the given ID.
298 with self.assertRaises(DataIdValueError):
299 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
300 # band doesn't have a table; insert should fail.
301 with self.assertRaises(TypeError):
302 registry.insertDimensionData("band", {"band": "i"})
303 dimensionName2 = "physical_filter"
304 dimension2 = registry.dimensions[dimensionName2]
305 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
306 # Missing required dependency ("instrument") should fail
307 with self.assertRaises(KeyError):
308 registry.insertDimensionData(dimensionName2, dimensionValue2)
309 # Adding required dependency should fix the failure
310 dimensionValue2["instrument"] = "DummyCam"
311 registry.insertDimensionData(dimensionName2, dimensionValue2)
312 # expandDataId should retrieve the record we just inserted.
313 self.assertEqual(
314 registry.expandDataId(
315 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
316 )
317 .records[dimensionName2]
318 .toDict(),
319 dimensionValue2,
320 )
321 # Use syncDimensionData to insert a new record successfully.
322 dimensionName3 = "detector"
323 dimensionValue3 = {
324 "instrument": "DummyCam",
325 "id": 1,
326 "full_name": "one",
327 "name_in_raft": "zero",
328 "purpose": "SCIENCE",
329 }
330 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
331 # Sync that again. Note that one field ("raft") is NULL, and that
332 # should be okay.
333 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
334 # Now try that sync with the same primary key but a different value.
335 # This should fail.
336 with self.assertRaises(ConflictingDefinitionError):
337 registry.syncDimensionData(
338 dimensionName3,
339 {
340 "instrument": "DummyCam",
341 "id": 1,
342 "full_name": "one",
343 "name_in_raft": "four",
344 "purpose": "SCIENCE",
345 },
346 )
348 @unittest.skipIf(np is None, "numpy not available.")
349 def testNumpyDataId(self):
350 """Test that we can use a numpy int in a dataId."""
351 registry = self.makeRegistry()
352 dimensionEntries = [
353 ("instrument", {"instrument": "DummyCam"}),
354 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
355 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
356 # Using an np.int64 here fails unless Records.fromDict is also
357 # patched to look for numbers.Integral
358 (
359 "visit",
360 {
361 "instrument": "DummyCam",
362 "id": 42,
363 "name": "fortytwo",
364 "physical_filter": "d-r",
365 "day_obs": 20250101,
366 },
367 ),
368 ]
369 for args in dimensionEntries:
370 registry.insertDimensionData(*args)
372 # Try a normal integer and something that looks like an int but
373 # is not.
374 for visit_id in (42, np.int64(42)):
375 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
376 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
377 self.assertEqual(expanded["visit"], int(visit_id))
378 self.assertIsInstance(expanded["visit"], int)
380 def testDataIdRelationships(self):
381 """Test that `SqlRegistry.expandDataId` raises an exception when the
382 given keys are inconsistent.
383 """
384 registry = self.makeRegistry()
385 self.loadData(registry, "base.yaml")
386 # Insert a few more dimension records for the next test.
387 registry.insertDimensionData(
388 "day_obs",
389 {"instrument": "Cam1", "id": 20250101},
390 )
391 registry.insertDimensionData(
392 "group",
393 {"instrument": "Cam1", "name": "group1"},
394 )
395 registry.insertDimensionData(
396 "exposure",
397 {
398 "instrument": "Cam1",
399 "id": 1,
400 "obs_id": "one",
401 "physical_filter": "Cam1-G",
402 "group": "group1",
403 "day_obs": 20250101,
404 },
405 )
406 registry.insertDimensionData(
407 "group",
408 {"instrument": "Cam1", "name": "group2"},
409 )
410 registry.insertDimensionData(
411 "exposure",
412 {
413 "instrument": "Cam1",
414 "id": 2,
415 "obs_id": "two",
416 "physical_filter": "Cam1-G",
417 "group": "group2",
418 "day_obs": 20250101,
419 },
420 )
421 registry.insertDimensionData(
422 "visit_system",
423 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
424 )
425 registry.insertDimensionData(
426 "visit",
427 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
428 )
429 registry.insertDimensionData(
430 "visit_definition",
431 {"instrument": "Cam1", "visit": 1, "exposure": 1},
432 )
433 with self.assertRaises(InconsistentDataIdError):
434 registry.expandDataId(
435 {"instrument": "Cam1", "visit": 1, "exposure": 2},
436 )
438 def testDataset(self):
439 """Basic tests for `SqlRegistry.insertDatasets`,
440 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
441 """
442 registry = self.makeRegistry()
443 self.loadData(registry, "base.yaml")
444 run = "tésτ"
445 registry.registerRun(run)
446 datasetType = registry.getDatasetType("bias")
447 dataId = {"instrument": "Cam1", "detector": 2}
448 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
449 outRef = registry.getDataset(ref.id)
450 self.assertIsNotNone(ref.id)
451 self.assertEqual(ref, outRef)
452 with self.assertRaises(ConflictingDefinitionError):
453 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
454 registry.removeDatasets([ref])
455 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
457 def testFindDataset(self):
458 """Tests for `SqlRegistry.findDataset`."""
459 registry = self.makeRegistry()
460 self.loadData(registry, "base.yaml")
461 run = "tésτ"
462 datasetType = registry.getDatasetType("bias")
463 dataId = {"instrument": "Cam1", "detector": 4}
464 registry.registerRun(run)
465 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
466 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
467 self.assertEqual(outputRef, inputRef)
468 # Check that retrieval with invalid dataId raises
469 with self.assertRaises(LookupError):
470 dataId = {"instrument": "Cam1"} # no detector
471 registry.findDataset(datasetType, dataId, collections=run)
472 # Check that different dataIds match to different datasets
473 dataId1 = {"instrument": "Cam1", "detector": 1}
474 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
475 dataId2 = {"instrument": "Cam1", "detector": 2}
476 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
477 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
478 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
479 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
480 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
481 # Check that requesting a non-existing dataId returns None
482 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
483 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
484 # Search more than one collection, in which two have the right
485 # dataset type and another does not.
486 registry.registerRun("empty")
487 self.loadData(registry, "datasets.yaml")
488 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
489 self.assertIsNotNone(bias1)
490 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
491 self.assertIsNotNone(bias2)
492 self.assertEqual(
493 bias1,
494 registry.findDataset(
495 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
496 ),
497 )
498 self.assertEqual(
499 bias2,
500 registry.findDataset(
501 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
502 ),
503 )
504 # Search more than one collection, with one of them a CALIBRATION
505 # collection.
506 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
507 timespan = Timespan(
508 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
509 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
510 )
511 registry.certify("Cam1/calib", [bias2], timespan=timespan)
512 self.assertEqual(
513 bias1,
514 registry.findDataset(
515 "bias",
516 instrument="Cam1",
517 detector=2,
518 collections=["empty", "imported_g", "Cam1/calib"],
519 timespan=timespan,
520 ),
521 )
522 self.assertEqual(
523 bias2,
524 registry.findDataset(
525 "bias",
526 instrument="Cam1",
527 detector=2,
528 collections=["empty", "Cam1/calib", "imported_g"],
529 timespan=timespan,
530 ),
531 )
532 # If we try to search those same collections without a timespan, it
533 # should still work, since the CALIBRATION collection is ignored.
534 self.assertEqual(
535 bias1,
536 registry.findDataset(
537 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
538 ),
539 )
540 self.assertEqual(
541 bias1,
542 registry.findDataset(
543 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
544 ),
545 )
547 def testRemoveDatasetTypeSuccess(self):
548 """Test that SqlRegistry.removeDatasetType works when there are no
549 datasets of that type present.
550 """
551 registry = self.makeRegistry()
552 self.loadData(registry, "base.yaml")
553 registry.removeDatasetType("flat")
554 with self.assertRaises(MissingDatasetTypeError):
555 registry.getDatasetType("flat")
557 def testRemoveDatasetTypeFailure(self):
558 """Test that SqlRegistry.removeDatasetType raises when there are
559 datasets of that type present or if the dataset type is for a
560 component.
561 """
562 registry = self.makeRegistry()
563 self.loadData(registry, "base.yaml")
564 self.loadData(registry, "datasets.yaml")
565 with self.assertRaises(OrphanedRecordError):
566 registry.removeDatasetType("flat")
567 with self.assertRaises(DatasetTypeError):
568 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
570 def testImportDatasetsUUID(self):
571 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
572 if isinstance(self.datasetsManager, str):
573 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
574 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
575 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
576 ".ByDimensionsDatasetRecordStorageManagerUUID"
577 ):
578 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
580 registry = self.makeRegistry()
581 self.loadData(registry, "base.yaml")
582 for run in range(6):
583 registry.registerRun(f"run{run}")
584 datasetTypeBias = registry.getDatasetType("bias")
585 datasetTypeFlat = registry.getDatasetType("flat")
586 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
587 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
588 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
590 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
591 (ref1,) = registry._importDatasets([ref])
592 # UUID is used without change
593 self.assertEqual(ref.id, ref1.id)
595 # All different failure modes
596 refs = (
597 # Importing same DatasetRef with different dataset ID is an error
598 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
599 # Same DatasetId but different DataId
600 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
601 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
602 # Same DatasetRef and DatasetId but different run
603 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
604 )
605 for ref in refs:
606 with self.assertRaises(ConflictingDefinitionError):
607 registry._importDatasets([ref])
609 # Test for non-unique IDs, they can be re-imported multiple times.
610 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
611 with self.subTest(idGenMode=idGenMode):
612 # Make dataset ref with reproducible dataset ID.
613 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
614 (ref1,) = registry._importDatasets([ref])
615 self.assertIsInstance(ref1.id, uuid.UUID)
616 self.assertEqual(ref1.id.version, 5)
617 self.assertEqual(ref1.id, ref.id)
619 # Importing it again is OK
620 (ref2,) = registry._importDatasets([ref1])
621 self.assertEqual(ref2.id, ref1.id)
623 # Cannot import to different run with the same ID
624 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
625 with self.assertRaises(ConflictingDefinitionError):
626 registry._importDatasets([ref])
628 ref = DatasetRef(
629 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
630 )
631 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
632 # Cannot import same DATAID_TYPE ref into a new run
633 with self.assertRaises(ConflictingDefinitionError):
634 (ref2,) = registry._importDatasets([ref])
635 else:
636 # DATAID_TYPE_RUN ref can be imported into a new run
637 (ref2,) = registry._importDatasets([ref])
639 def testComponentLookups(self):
640 """Test searching for component datasets via their parents.
642 Components can no longer be found by registry. This test checks
643 that this now fails.
644 """
645 registry = self.makeRegistry()
646 self.loadData(registry, "base.yaml")
647 self.loadData(registry, "datasets.yaml")
648 # Test getting the child dataset type (which does still exist in the
649 # Registry), and check for consistency with
650 # DatasetRef.makeComponentRef.
651 collection = "imported_g"
652 parentType = registry.getDatasetType("bias")
653 childType = registry.getDatasetType("bias.wcs")
654 parentRefResolved = registry.findDataset(
655 parentType, collections=collection, instrument="Cam1", detector=1
656 )
657 self.assertIsInstance(parentRefResolved, DatasetRef)
658 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
659 # Search for a single dataset with findDataset.
660 with self.assertRaises(DatasetTypeError):
661 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
663 def testCollections(self):
664 """Tests for registry methods that manage collections."""
665 registry = self.makeRegistry()
666 other_registry = self.makeRegistry(share_repo_with=registry)
667 self.loadData(registry, "base.yaml")
668 self.loadData(registry, "datasets.yaml")
669 run1 = "imported_g"
670 run2 = "imported_r"
671 # Test setting a collection docstring after it has been created.
672 registry.setCollectionDocumentation(run1, "doc for run1")
673 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
674 registry.setCollectionDocumentation(run1, None)
675 self.assertIsNone(registry.getCollectionDocumentation(run1))
676 datasetType = "bias"
677 # Find some datasets via their run's collection.
678 dataId1 = {"instrument": "Cam1", "detector": 1}
679 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
680 self.assertIsNotNone(ref1)
681 dataId2 = {"instrument": "Cam1", "detector": 2}
682 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
683 self.assertIsNotNone(ref2)
684 # Associate those into a new collection, then look for them there.
685 tag1 = "tag1"
686 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
687 # Check that we can query for old and new collections by type.
688 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
689 self.assertEqual(
690 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
691 {tag1, run1, run2},
692 )
693 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
694 registry.associate(tag1, [ref1, ref2])
695 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
696 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
697 # Disassociate one and verify that we can't it there anymore...
698 registry.disassociate(tag1, [ref1])
699 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
700 # ...but we can still find ref2 in tag1, and ref1 in the run.
701 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
702 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
703 collections = set(registry.queryCollections())
704 self.assertEqual(collections, {run1, run2, tag1})
705 # Associate both refs into tag1 again; ref2 is already there, but that
706 # should be a harmless no-op.
707 registry.associate(tag1, [ref1, ref2])
708 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
709 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
710 # Get a different dataset (from a different run) that has the same
711 # dataset type and data ID as ref2.
712 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
713 self.assertNotEqual(ref2, ref2b)
714 # Attempting to associate that into tag1 should be an error.
715 with self.assertRaises(ConflictingDefinitionError):
716 registry.associate(tag1, [ref2b])
717 # That error shouldn't have messed up what we had before.
718 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
719 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
720 # Attempt to associate the conflicting dataset again, this time with
721 # a dataset that isn't in the collection and won't cause a conflict.
722 # Should also fail without modifying anything.
723 dataId3 = {"instrument": "Cam1", "detector": 3}
724 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
725 with self.assertRaises(ConflictingDefinitionError):
726 registry.associate(tag1, [ref3, ref2b])
727 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
728 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
729 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
730 # Register a chained collection that searches [tag1, run2]
731 chain1 = "chain1"
732 registry.registerCollection(chain1, type=CollectionType.CHAINED)
733 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
734 # Chained collection exists, but has no collections in it.
735 self.assertFalse(registry.getCollectionChain(chain1))
736 # If we query for all collections, we should get the chained collection
737 # only if we don't ask to flatten it (i.e. yield only its children).
738 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
739 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
740 # Attempt to set its child collections to something circular; that
741 # should fail.
742 with self.assertRaises(ValueError):
743 registry.setCollectionChain(chain1, [tag1, chain1])
744 # Add the child collections.
745 registry.setCollectionChain(chain1, [tag1, run2])
746 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
747 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
748 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
749 # Refresh the other registry that points to the same repo, and make
750 # sure it can see the things we've done (note that this does require
751 # an explicit refresh(); that's the documented behavior, because
752 # caching is ~impossible otherwise).
753 if other_registry is not None:
754 other_registry.refresh()
755 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
756 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
757 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
758 # Searching for dataId1 or dataId2 in the chain should return ref1 and
759 # ref2, because both are in tag1.
760 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
761 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
762 # Now disassociate ref2 from tag1. The search (for bias) with
763 # dataId2 in chain1 should then:
764 # 1. not find it in tag1
765 # 2. find a different dataset in run2
766 registry.disassociate(tag1, [ref2])
767 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
768 self.assertNotEqual(ref2b, ref2)
769 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
770 # Define a new chain so we can test recursive chains.
771 chain2 = "chain2"
772 registry.registerCollection(chain2, type=CollectionType.CHAINED)
773 registry.setCollectionChain(chain2, [run2, chain1])
774 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
775 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
777 if self.supportsCollectionRegex:
778 # Query for collections matching a regex.
779 self.assertCountEqual(
780 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
781 ["imported_r", "imported_g"],
782 )
783 # Query for collections matching a regex or an explicit str.
784 self.assertCountEqual(
785 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
786 ["imported_r", "imported_g", "chain1"],
787 )
788 # Same queries as the regex ones above, but using globs instead of
789 # regex.
790 self.assertCountEqual(
791 list(registry.queryCollections("imported_*", flattenChains=False)),
792 ["imported_r", "imported_g"],
793 )
794 # Query for collections matching a regex or an explicit str.
795 self.assertCountEqual(
796 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
797 ["imported_r", "imported_g", "chain1"],
798 )
800 # Search for bias with dataId1 should find it via tag1 in chain2,
801 # recursing, because is not in run1.
802 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
803 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
804 # Search for bias with dataId2 should find it in run2 (ref2b).
805 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
806 # Search for a flat that is in run2. That should not be found
807 # at the front of chain2, because of the restriction to bias
808 # on run2 there, but it should be found in at the end of chain1.
809 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
810 ref4 = registry.findDataset("flat", dataId4, collections=run2)
811 self.assertIsNotNone(ref4)
812 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
813 # Deleting a collection that's part of a CHAINED collection is not
814 # allowed, and is exception-safe.
815 with self.assertRaises(sqlalchemy.exc.IntegrityError):
816 registry.removeCollection(run2)
817 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
818 with self.assertRaises(sqlalchemy.exc.IntegrityError):
819 registry.removeCollection(chain1)
820 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
821 # Actually remove chain2, test that it's gone by asking for its type.
822 registry.removeCollection(chain2)
823 with self.assertRaises(MissingCollectionError):
824 registry.getCollectionType(chain2)
825 # Actually remove run2 and chain1, which should work now.
826 registry.removeCollection(chain1)
827 registry.removeCollection(run2)
828 with self.assertRaises(MissingCollectionError):
829 registry.getCollectionType(run2)
830 with self.assertRaises(MissingCollectionError):
831 registry.getCollectionType(chain1)
832 # Remove tag1 as well, just to test that we can remove TAGGED
833 # collections.
834 registry.removeCollection(tag1)
835 with self.assertRaises(MissingCollectionError):
836 registry.getCollectionType(tag1)
838 def testCollectionChainFlatten(self):
839 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
840 option.
841 """
842 registry = self.makeRegistry()
843 registry.registerCollection("inner", CollectionType.CHAINED)
844 registry.registerCollection("innermost", CollectionType.RUN)
845 registry.setCollectionChain("inner", ["innermost"])
846 registry.registerCollection("outer", CollectionType.CHAINED)
847 registry.setCollectionChain("outer", ["inner"], flatten=False)
848 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
849 registry.setCollectionChain("outer", ["inner"], flatten=True)
850 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
852 def testBasicTransaction(self):
853 """Test that all operations within a single transaction block are
854 rolled back if an exception propagates out of the block.
855 """
856 registry = self.makeRegistry()
857 storageClass = StorageClass("testDatasetType")
858 registry.storageClasses.registerStorageClass(storageClass)
859 with registry.transaction():
860 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
861 with self.assertRaises(ValueError):
862 with registry.transaction():
863 registry.insertDimensionData("instrument", {"name": "Cam2"})
864 raise ValueError("Oops, something went wrong")
865 # Cam1 should exist
866 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
867 # But Cam2 and Cam3 should both not exist
868 with self.assertRaises(DataIdValueError):
869 registry.expandDataId(instrument="Cam2")
870 with self.assertRaises(DataIdValueError):
871 registry.expandDataId(instrument="Cam3")
873 def testNestedTransaction(self):
874 """Test that operations within a transaction block are not rolled back
875 if an exception propagates out of an inner transaction block and is
876 then caught.
877 """
878 registry = self.makeRegistry()
879 dimension = registry.dimensions["instrument"]
880 dataId1 = {"instrument": "DummyCam"}
881 dataId2 = {"instrument": "DummyCam2"}
882 checkpointReached = False
883 with registry.transaction():
884 # This should be added and (ultimately) committed.
885 registry.insertDimensionData(dimension, dataId1)
886 with self.assertRaises(sqlalchemy.exc.IntegrityError):
887 with registry.transaction(savepoint=True):
888 # This does not conflict, and should succeed (but not
889 # be committed).
890 registry.insertDimensionData(dimension, dataId2)
891 checkpointReached = True
892 # This should conflict and raise, triggerring a rollback
893 # of the previous insertion within the same transaction
894 # context, but not the original insertion in the outer
895 # block.
896 registry.insertDimensionData(dimension, dataId1)
897 self.assertTrue(checkpointReached)
898 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
899 with self.assertRaises(DataIdValueError):
900 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
902 def testInstrumentDimensions(self):
903 """Test queries involving only instrument dimensions, with no joins to
904 skymap.
905 """
906 registry = self.makeRegistry()
908 # need a bunch of dimensions and datasets for test
909 registry.insertDimensionData(
910 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
911 )
912 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
913 registry.insertDimensionData(
914 "physical_filter",
915 dict(instrument="DummyCam", name="dummy_r", band="r"),
916 dict(instrument="DummyCam", name="dummy_i", band="i"),
917 )
918 registry.insertDimensionData(
919 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
920 )
921 registry.insertDimensionData(
922 "visit",
923 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
924 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
925 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
926 )
927 registry.insertDimensionData(
928 "group",
929 dict(instrument="DummyCam", name="ten"),
930 dict(instrument="DummyCam", name="eleven"),
931 dict(instrument="DummyCam", name="twelve"),
932 )
933 for i in range(1, 6):
934 registry.insertDimensionData(
935 "visit_detector_region",
936 dict(instrument="DummyCam", visit=10, detector=i),
937 dict(instrument="DummyCam", visit=11, detector=i),
938 dict(instrument="DummyCam", visit=20, detector=i),
939 )
940 registry.insertDimensionData(
941 "exposure",
942 dict(
943 instrument="DummyCam",
944 id=100,
945 obs_id="100",
946 physical_filter="dummy_i",
947 group="ten",
948 day_obs=20250101,
949 ),
950 dict(
951 instrument="DummyCam",
952 id=101,
953 obs_id="101",
954 physical_filter="dummy_i",
955 group="ten",
956 day_obs=20250101,
957 ),
958 dict(
959 instrument="DummyCam",
960 id=110,
961 obs_id="110",
962 physical_filter="dummy_r",
963 group="eleven",
964 day_obs=20250101,
965 ),
966 dict(
967 instrument="DummyCam",
968 id=111,
969 obs_id="111",
970 physical_filter="dummy_r",
971 group="eleven",
972 day_obs=20250101,
973 ),
974 dict(
975 instrument="DummyCam",
976 id=200,
977 obs_id="200",
978 physical_filter="dummy_r",
979 group="twelve",
980 day_obs=20250101,
981 ),
982 dict(
983 instrument="DummyCam",
984 id=201,
985 obs_id="201",
986 physical_filter="dummy_r",
987 group="twelve",
988 day_obs=20250101,
989 ),
990 )
991 registry.insertDimensionData(
992 "visit_definition",
993 dict(instrument="DummyCam", exposure=100, visit=10),
994 dict(instrument="DummyCam", exposure=101, visit=10),
995 dict(instrument="DummyCam", exposure=110, visit=11),
996 dict(instrument="DummyCam", exposure=111, visit=11),
997 dict(instrument="DummyCam", exposure=200, visit=20),
998 dict(instrument="DummyCam", exposure=201, visit=20),
999 )
1000 # dataset types
1001 run1 = "test1_r"
1002 run2 = "test2_r"
1003 tagged2 = "test2_t"
1004 registry.registerRun(run1)
1005 registry.registerRun(run2)
1006 registry.registerCollection(tagged2)
1007 storageClass = StorageClass("testDataset")
1008 registry.storageClasses.registerStorageClass(storageClass)
1009 rawType = DatasetType(
1010 name="RAW",
1011 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1012 storageClass=storageClass,
1013 )
1014 registry.registerDatasetType(rawType)
1015 calexpType = DatasetType(
1016 name="CALEXP",
1017 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1018 storageClass=storageClass,
1019 )
1020 registry.registerDatasetType(calexpType)
1022 # add pre-existing datasets
1023 for exposure in (100, 101, 110, 111):
1024 for detector in (1, 2, 3):
1025 # note that only 3 of 5 detectors have datasets
1026 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1027 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1028 # exposures 100 and 101 appear in both run1 and tagged2.
1029 # 100 has different datasets in the different collections
1030 # 101 has the same dataset in both collections.
1031 if exposure == 100:
1032 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1033 if exposure in (100, 101):
1034 registry.associate(tagged2, [ref])
1035 # Add pre-existing datasets to tagged2.
1036 for exposure in (200, 201):
1037 for detector in (3, 4, 5):
1038 # note that only 3 of 5 detectors have datasets
1039 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1040 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1041 registry.associate(tagged2, [ref])
1043 dimensions = registry.dimensions.conform(
1044 rawType.dimensions.required.names | calexpType.dimensions.required.names
1045 )
1046 # Test that single dim string works as well as list of str
1047 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1048 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1049 self.assertEqual(rows, rowsI)
1050 # with empty expression
1051 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1052 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1053 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1054 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1055 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1057 # second collection
1058 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1059 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1060 for dataId in rows:
1061 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1062 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1063 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1064 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1066 # with two input datasets
1067 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1068 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1069 for dataId in rows:
1070 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1071 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1072 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1073 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1075 # limit to single visit
1076 rows = registry.queryDataIds(
1077 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1078 ).toSet()
1079 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1080 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1081 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1082 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1084 # more limiting expression, using link names instead of Table.column
1085 rows = registry.queryDataIds(
1086 dimensions,
1087 datasets=rawType,
1088 collections=run1,
1089 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1090 ).toSet()
1091 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1092 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1093 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1094 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1096 # queryDataIds with only one of `datasets` and `collections` is an
1097 # error.
1098 with self.assertRaises(CollectionError):
1099 registry.queryDataIds(dimensions, datasets=rawType)
1100 with self.assertRaises(ArgumentError):
1101 registry.queryDataIds(dimensions, collections=run1)
1103 # expression excludes everything
1104 rows = registry.queryDataIds(
1105 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1106 ).toSet()
1107 self.assertEqual(len(rows), 0)
1109 # Selecting by physical_filter, this is not in the dimensions, but it
1110 # is a part of the full expression so it should work too.
1111 rows = registry.queryDataIds(
1112 dimensions,
1113 datasets=rawType,
1114 collections=run1,
1115 where="physical_filter = 'dummy_r'",
1116 instrument="DummyCam",
1117 ).toSet()
1118 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1119 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1120 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1121 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1123 def testSkyMapDimensions(self):
1124 """Tests involving only skymap dimensions, no joins to instrument."""
1125 registry = self.makeRegistry()
1127 # need a bunch of dimensions and datasets for test, we want
1128 # "band" in the test so also have to add physical_filter
1129 # dimensions
1130 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1131 registry.insertDimensionData(
1132 "physical_filter",
1133 dict(instrument="DummyCam", name="dummy_r", band="r"),
1134 dict(instrument="DummyCam", name="dummy_i", band="i"),
1135 )
1136 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1137 for tract in range(10):
1138 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1139 registry.insertDimensionData(
1140 "patch",
1141 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1142 )
1144 # dataset types
1145 run = "tésτ"
1146 registry.registerRun(run)
1147 storageClass = StorageClass("testDataset")
1148 registry.storageClasses.registerStorageClass(storageClass)
1149 calexpType = DatasetType(
1150 name="deepCoadd_calexp",
1151 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1152 storageClass=storageClass,
1153 )
1154 registry.registerDatasetType(calexpType)
1155 mergeType = DatasetType(
1156 name="deepCoadd_mergeDet",
1157 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1158 storageClass=storageClass,
1159 )
1160 registry.registerDatasetType(mergeType)
1161 measType = DatasetType(
1162 name="deepCoadd_meas",
1163 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1164 storageClass=storageClass,
1165 )
1166 registry.registerDatasetType(measType)
1168 dimensions = registry.dimensions.conform(
1169 calexpType.dimensions.required.names
1170 | mergeType.dimensions.required.names
1171 | measType.dimensions.required.names
1172 )
1174 # add pre-existing datasets
1175 for tract in (1, 3, 5):
1176 for patch in (2, 4, 6, 7):
1177 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1178 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1179 for aFilter in ("i", "r"):
1180 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1181 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1183 # with empty expression
1184 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1185 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1186 for dataId in rows:
1187 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1188 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1189 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1190 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1192 # limit to 2 tracts and 2 patches
1193 rows = registry.queryDataIds(
1194 dimensions,
1195 datasets=[calexpType, mergeType],
1196 collections=run,
1197 where="tract IN (1, 5) AND patch IN (2, 7)",
1198 skymap="DummyMap",
1199 ).toSet()
1200 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1201 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1202 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1203 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1205 # limit to single filter
1206 rows = registry.queryDataIds(
1207 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1208 ).toSet()
1209 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1210 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1211 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1212 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1214 # Specifying non-existing skymap is an exception
1215 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1216 rows = registry.queryDataIds(
1217 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1218 ).toSet()
1220 def testSpatialJoin(self):
1221 """Test queries that involve spatial overlap joins."""
1222 registry = self.makeRegistry()
1223 self.loadData(registry, "hsc-rc2-subset.yaml")
1225 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1226 # the TopologicalFamily they belong to. We'll relate all elements in
1227 # each family to all of the elements in each other family.
1228 families = defaultdict(set)
1229 # Dictionary of {element.name: {dataId: region}}.
1230 regions = {}
1231 for element in registry.dimensions.database_elements:
1232 if element.spatial is not None:
1233 families[element.spatial.name].add(element)
1234 regions[element.name] = {
1235 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1236 }
1238 # If this check fails, it's not necessarily a problem - it may just be
1239 # a reasonable change to the default dimension definitions - but the
1240 # test below depends on there being more than one family to do anything
1241 # useful.
1242 self.assertEqual(len(families), 2)
1244 # Overlap DatabaseDimensionElements with each other.
1245 for family1, family2 in itertools.combinations(families, 2):
1246 for element1, element2 in itertools.product(families[family1], families[family2]):
1247 dimensions = element1.minimal_group | element2.minimal_group
1248 # Construct expected set of overlapping data IDs via a
1249 # brute-force comparison of the regions we've already fetched.
1250 expected = {
1251 DataCoordinate.standardize(
1252 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1253 )
1254 for (dataId1, region1), (dataId2, region2) in itertools.product(
1255 regions[element1.name].items(), regions[element2.name].items()
1256 )
1257 if not region1.isDisjointFrom(region2)
1258 }
1259 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1260 queried = set(registry.queryDataIds(dimensions))
1261 self.assertEqual(expected, queried)
1263 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1264 commonSkyPix = registry.dimensions.commonSkyPix
1265 for elementName, these_regions in regions.items():
1266 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1267 expected = set()
1268 for dataId, region in these_regions.items():
1269 for begin, end in commonSkyPix.pixelization.envelope(region):
1270 expected.update(
1271 DataCoordinate.standardize(
1272 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1273 )
1274 for index in range(begin, end)
1275 )
1276 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1277 queried = set(registry.queryDataIds(dimensions))
1278 self.assertEqual(expected, queried)
1280 def testAbstractQuery(self):
1281 """Test that we can run a query that just lists the known
1282 bands. This is tricky because band is
1283 backed by a query against physical_filter.
1284 """
1285 registry = self.makeRegistry()
1286 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1287 registry.insertDimensionData(
1288 "physical_filter",
1289 dict(instrument="DummyCam", name="dummy_i", band="i"),
1290 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1291 dict(instrument="DummyCam", name="dummy_r", band="r"),
1292 )
1293 rows = registry.queryDataIds(["band"]).toSet()
1294 self.assertCountEqual(
1295 rows,
1296 [
1297 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1298 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1299 ],
1300 )
1302 def testAttributeManager(self):
1303 """Test basic functionality of attribute manager."""
1304 # number of attributes with schema versions in a fresh database,
1305 # 6 managers with 2 records per manager, plus config for dimensions
1306 VERSION_COUNT = 6 * 2 + 1
1308 registry = self.makeRegistry()
1309 attributes = registry._managers.attributes
1311 # check what get() returns for non-existing key
1312 self.assertIsNone(attributes.get("attr"))
1313 self.assertEqual(attributes.get("attr", ""), "")
1314 self.assertEqual(attributes.get("attr", "Value"), "Value")
1315 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1317 # cannot store empty key or value
1318 with self.assertRaises(ValueError):
1319 attributes.set("", "value")
1320 with self.assertRaises(ValueError):
1321 attributes.set("attr", "")
1323 # set value of non-existing key
1324 attributes.set("attr", "value")
1325 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1326 self.assertEqual(attributes.get("attr"), "value")
1328 # update value of existing key
1329 with self.assertRaises(ButlerAttributeExistsError):
1330 attributes.set("attr", "value2")
1332 attributes.set("attr", "value2", force=True)
1333 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1334 self.assertEqual(attributes.get("attr"), "value2")
1336 # delete existing key
1337 self.assertTrue(attributes.delete("attr"))
1338 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1340 # delete non-existing key
1341 self.assertFalse(attributes.delete("non-attr"))
1343 # store bunch of keys and get the list back
1344 data = [
1345 ("version.core", "1.2.3"),
1346 ("version.dimensions", "3.2.1"),
1347 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1348 ]
1349 for key, value in data:
1350 attributes.set(key, value)
1351 items = dict(attributes.items())
1352 for key, value in data:
1353 self.assertEqual(items[key], value)
1355 def testQueryDatasetsDeduplication(self):
1356 """Test that the findFirst option to queryDatasets selects datasets
1357 from collections in the order given".
1358 """
1359 registry = self.makeRegistry()
1360 self.loadData(registry, "base.yaml")
1361 self.loadData(registry, "datasets.yaml")
1362 self.assertCountEqual(
1363 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1364 [
1365 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1366 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1367 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1368 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1369 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1370 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1371 ],
1372 )
1373 self.assertCountEqual(
1374 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1375 [
1376 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1377 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1378 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1379 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1380 ],
1381 )
1382 self.assertCountEqual(
1383 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1384 [
1385 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1386 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1387 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1388 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1389 ],
1390 )
1392 def testQueryResults(self):
1393 """Test querying for data IDs and then manipulating the QueryResults
1394 object returned to perform other queries.
1395 """
1396 registry = self.makeRegistry()
1397 self.loadData(registry, "base.yaml")
1398 self.loadData(registry, "datasets.yaml")
1399 bias = registry.getDatasetType("bias")
1400 flat = registry.getDatasetType("flat")
1401 # Obtain expected results from methods other than those we're testing
1402 # here. That includes:
1403 # - the dimensions of the data IDs we want to query:
1404 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1405 # - the dimensions of some other data IDs we'll extract from that:
1406 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1407 # - the data IDs we expect to obtain from the first queries:
1408 expectedDataIds = DataCoordinateSet(
1409 {
1410 DataCoordinate.standardize(
1411 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1412 )
1413 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1414 },
1415 dimensions=expected_dimensions,
1416 hasFull=False,
1417 hasRecords=False,
1418 )
1419 # - the flat datasets we expect to find from those data IDs, in just
1420 # one collection (so deduplication is irrelevant):
1421 expectedFlats = [
1422 registry.findDataset(
1423 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1424 ),
1425 registry.findDataset(
1426 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1427 ),
1428 registry.findDataset(
1429 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1430 ),
1431 ]
1432 # - the data IDs we expect to extract from that:
1433 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1434 # - the bias datasets we expect to find from those data IDs, after we
1435 # subset-out the physical_filter dimension, both with duplicates:
1436 expectedAllBiases = [
1437 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1438 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1439 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1440 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1441 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1442 ]
1443 # - ...and without duplicates:
1444 expectedDeduplicatedBiases = [
1445 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1446 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1447 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1448 ]
1449 # Test against those expected results, using a "lazy" query for the
1450 # data IDs (which re-executes that query each time we use it to do
1451 # something new).
1452 dataIds = registry.queryDataIds(
1453 ["detector", "physical_filter"],
1454 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1455 instrument="Cam1",
1456 )
1457 self.assertEqual(dataIds.dimensions, expected_dimensions)
1458 self.assertEqual(dataIds.toSet(), expectedDataIds)
1459 self.assertCountEqual(
1460 list(
1461 dataIds.findDatasets(
1462 flat,
1463 collections=["imported_r"],
1464 )
1465 ),
1466 expectedFlats,
1467 )
1468 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1469 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1470 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1471 self.assertCountEqual(
1472 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1473 expectedAllBiases,
1474 )
1475 self.assertCountEqual(
1476 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1477 expectedDeduplicatedBiases,
1478 )
1480 # Searching for a dataset with dimensions we had projected away
1481 # restores those dimensions.
1482 self.assertCountEqual(
1483 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1484 expectedFlats,
1485 )
1487 # Use a named dataset type that does not exist and a dataset type
1488 # object that does not exist.
1489 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1491 # Test both string name and dataset type object.
1492 test_type: str | DatasetType
1493 for test_type, test_type_name in (
1494 (unknown_type, unknown_type.name),
1495 (unknown_type.name, unknown_type.name),
1496 ):
1497 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1498 list(
1499 subsetDataIds.findDatasets(
1500 test_type, collections=["imported_r", "imported_g"], findFirst=True
1501 )
1502 )
1504 # Materialize the bias dataset queries (only) by putting the results
1505 # into temporary tables, then repeat those tests.
1506 with subsetDataIds.findDatasets(
1507 bias, collections=["imported_r", "imported_g"], findFirst=False
1508 ).materialize() as biases:
1509 self.assertCountEqual(list(biases), expectedAllBiases)
1510 with subsetDataIds.findDatasets(
1511 bias, collections=["imported_r", "imported_g"], findFirst=True
1512 ).materialize() as biases:
1513 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1514 # Materialize the data ID subset query, but not the dataset queries.
1515 with subsetDataIds.materialize() as subsetDataIds:
1516 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1517 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1518 self.assertCountEqual(
1519 list(
1520 subsetDataIds.findDatasets(
1521 bias, collections=["imported_r", "imported_g"], findFirst=False
1522 )
1523 ),
1524 expectedAllBiases,
1525 )
1526 self.assertCountEqual(
1527 list(
1528 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1529 ),
1530 expectedDeduplicatedBiases,
1531 )
1532 # Materialize the dataset queries, too.
1533 with subsetDataIds.findDatasets(
1534 bias, collections=["imported_r", "imported_g"], findFirst=False
1535 ).materialize() as biases:
1536 self.assertCountEqual(list(biases), expectedAllBiases)
1537 with subsetDataIds.findDatasets(
1538 bias, collections=["imported_r", "imported_g"], findFirst=True
1539 ).materialize() as biases:
1540 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1541 # Materialize the original query, but none of the follow-up queries.
1542 with dataIds.materialize() as dataIds:
1543 self.assertEqual(dataIds.dimensions, expected_dimensions)
1544 self.assertEqual(dataIds.toSet(), expectedDataIds)
1545 self.assertCountEqual(
1546 list(
1547 dataIds.findDatasets(
1548 flat,
1549 collections=["imported_r"],
1550 )
1551 ),
1552 expectedFlats,
1553 )
1554 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1555 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1556 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1557 self.assertCountEqual(
1558 list(
1559 subsetDataIds.findDatasets(
1560 bias, collections=["imported_r", "imported_g"], findFirst=False
1561 )
1562 ),
1563 expectedAllBiases,
1564 )
1565 self.assertCountEqual(
1566 list(
1567 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1568 ),
1569 expectedDeduplicatedBiases,
1570 )
1571 # Materialize just the bias dataset queries.
1572 with subsetDataIds.findDatasets(
1573 bias, collections=["imported_r", "imported_g"], findFirst=False
1574 ).materialize() as biases:
1575 self.assertCountEqual(list(biases), expectedAllBiases)
1576 with subsetDataIds.findDatasets(
1577 bias, collections=["imported_r", "imported_g"], findFirst=True
1578 ).materialize() as biases:
1579 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1580 # Materialize the subset data ID query, but not the dataset
1581 # queries.
1582 with subsetDataIds.materialize() as subsetDataIds:
1583 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1584 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1585 self.assertCountEqual(
1586 list(
1587 subsetDataIds.findDatasets(
1588 bias, collections=["imported_r", "imported_g"], findFirst=False
1589 )
1590 ),
1591 expectedAllBiases,
1592 )
1593 self.assertCountEqual(
1594 list(
1595 subsetDataIds.findDatasets(
1596 bias, collections=["imported_r", "imported_g"], findFirst=True
1597 )
1598 ),
1599 expectedDeduplicatedBiases,
1600 )
1601 # Materialize the bias dataset queries, too, so now we're
1602 # materializing every single step.
1603 with subsetDataIds.findDatasets(
1604 bias, collections=["imported_r", "imported_g"], findFirst=False
1605 ).materialize() as biases:
1606 self.assertCountEqual(list(biases), expectedAllBiases)
1607 with subsetDataIds.findDatasets(
1608 bias, collections=["imported_r", "imported_g"], findFirst=True
1609 ).materialize() as biases:
1610 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1612 def testStorageClassPropagation(self):
1613 """Test that queries for datasets respect the storage class passed in
1614 as part of a full dataset type.
1615 """
1616 registry = self.makeRegistry()
1617 self.loadData(registry, "base.yaml")
1618 dataset_type_in_registry = DatasetType(
1619 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1620 )
1621 registry.registerDatasetType(dataset_type_in_registry)
1622 run = "run1"
1623 registry.registerRun(run)
1624 (inserted_ref,) = registry.insertDatasets(
1625 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1626 )
1627 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1628 query_dataset_type = DatasetType(
1629 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1630 )
1631 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1632 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1633 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1634 (query_datasets_ref,) = query_datasets_result
1635 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1636 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1637 query_dataset_type, collections=[run]
1638 )
1639 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1640 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1641 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1642 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1643 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1644 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1645 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1647 def testEmptyDimensionsQueries(self):
1648 """Test Query and QueryResults objects in the case where there are no
1649 dimensions.
1650 """
1651 # Set up test data: one dataset type, two runs, one dataset in each.
1652 registry = self.makeRegistry()
1653 self.loadData(registry, "base.yaml")
1654 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1655 registry.registerDatasetType(schema)
1656 dataId = DataCoordinate.make_empty(registry.dimensions)
1657 run1 = "run1"
1658 run2 = "run2"
1659 registry.registerRun(run1)
1660 registry.registerRun(run2)
1661 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1662 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1663 # Query directly for both of the datasets, and each one, one at a time.
1664 self.checkQueryResults(
1665 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1666 )
1667 self.checkQueryResults(
1668 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1669 [dataset1],
1670 )
1671 self.checkQueryResults(
1672 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1673 [dataset2],
1674 )
1675 # Query for data IDs with no dimensions.
1676 dataIds = registry.queryDataIds([])
1677 self.checkQueryResults(dataIds, [dataId])
1678 # Use queried data IDs to find the datasets.
1679 self.checkQueryResults(
1680 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1681 [dataset1, dataset2],
1682 )
1683 self.checkQueryResults(
1684 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1685 [dataset1],
1686 )
1687 self.checkQueryResults(
1688 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1689 [dataset2],
1690 )
1691 # Now materialize the data ID query results and repeat those tests.
1692 with dataIds.materialize() as dataIds:
1693 self.checkQueryResults(dataIds, [dataId])
1694 self.checkQueryResults(
1695 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1696 [dataset1],
1697 )
1698 self.checkQueryResults(
1699 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1700 [dataset2],
1701 )
1702 # Query for non-empty data IDs, then subset that to get the empty one.
1703 # Repeat the above tests starting from that.
1704 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1705 self.checkQueryResults(dataIds, [dataId])
1706 self.checkQueryResults(
1707 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1708 [dataset1, dataset2],
1709 )
1710 self.checkQueryResults(
1711 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1712 [dataset1],
1713 )
1714 self.checkQueryResults(
1715 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1716 [dataset2],
1717 )
1718 with dataIds.materialize() as dataIds:
1719 self.checkQueryResults(dataIds, [dataId])
1720 self.checkQueryResults(
1721 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1722 [dataset1, dataset2],
1723 )
1724 self.checkQueryResults(
1725 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1726 [dataset1],
1727 )
1728 self.checkQueryResults(
1729 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1730 [dataset2],
1731 )
1732 # Query for non-empty data IDs, then materialize, then subset to get
1733 # the empty one. Repeat again.
1734 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1735 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1736 self.checkQueryResults(dataIds, [dataId])
1737 self.checkQueryResults(
1738 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1739 [dataset1, dataset2],
1740 )
1741 self.checkQueryResults(
1742 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1743 [dataset1],
1744 )
1745 self.checkQueryResults(
1746 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1747 [dataset2],
1748 )
1749 with dataIds.materialize() as dataIds:
1750 self.checkQueryResults(dataIds, [dataId])
1751 self.checkQueryResults(
1752 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1753 [dataset1, dataset2],
1754 )
1755 self.checkQueryResults(
1756 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1757 [dataset1],
1758 )
1759 self.checkQueryResults(
1760 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1761 [dataset2],
1762 )
1763 # Repeat the materialization tests with a dimension element that isn't
1764 # cached, so there's no way we can know when building the query where
1765 # there are any rows are not (there aren't).
1766 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1767 with dataIds.materialize() as dataIds:
1768 self.checkQueryResults(dataIds, [])
1769 self.checkQueryResults(
1770 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1771 )
1772 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1773 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1774 # Query for non-empty data IDs with a constraint on an empty-data-ID
1775 # dataset that exists.
1776 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1777 self.checkQueryResults(
1778 dataIds.subset(unique=True),
1779 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1780 )
1781 # Again query for non-empty data IDs with a constraint on empty-data-ID
1782 # datasets, but when the datasets don't exist. We delete the existing
1783 # dataset and query just that collection rather than creating a new
1784 # empty collection because this is a bit less likely for our build-time
1785 # logic to shortcut-out (via the collection summaries), and such a
1786 # shortcut would make this test a bit more trivial than we'd like.
1787 registry.removeDatasets([dataset2])
1788 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1789 self.checkQueryResults(dataIds, [])
1791 def testDimensionDataModifications(self):
1792 """Test that modifying dimension records via:
1793 syncDimensionData(..., update=True) and
1794 insertDimensionData(..., replace=True) works as expected, even in the
1795 presence of datasets using those dimensions and spatial overlap
1796 relationships.
1797 """
1799 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1800 """Unpack a sphgeom.RangeSet into the integers it contains."""
1801 for begin, end in ranges:
1802 yield from range(begin, end)
1804 def _range_set_hull(
1805 ranges: lsst.sphgeom.RangeSet,
1806 pixelization: lsst.sphgeom.HtmPixelization,
1807 ) -> lsst.sphgeom.ConvexPolygon:
1808 """Create a ConvexPolygon hull of the region defined by a set of
1809 HTM pixelization index ranges.
1810 """
1811 points = []
1812 for index in _unpack_range_set(ranges):
1813 points.extend(pixelization.triangle(index).getVertices())
1814 return lsst.sphgeom.ConvexPolygon(points)
1816 # Use HTM to set up an initial parent region (one arbitrary trixel)
1817 # and four child regions (the trixels within the parent at the next
1818 # level. We'll use the parent as a tract/visit region and the children
1819 # as its patch/visit_detector regions.
1820 registry = self.makeRegistry()
1821 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1822 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1823 index = 12288
1824 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1825 assert htm6.universe().contains(child_ranges_small)
1826 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1827 parent_region_small = lsst.sphgeom.ConvexPolygon(
1828 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1829 )
1830 assert all(parent_region_small.contains(c) for c in child_regions_small)
1831 # Make a larger version of each child region, defined to be the set of
1832 # htm6 trixels that overlap the original's bounding circle. Make a new
1833 # parent that's the convex hull of the new children.
1834 child_regions_large = [
1835 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1836 ]
1837 assert all(
1838 large.contains(small)
1839 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1840 )
1841 parent_region_large = lsst.sphgeom.ConvexPolygon(
1842 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1843 )
1844 assert all(parent_region_large.contains(c) for c in child_regions_large)
1845 assert parent_region_large.contains(parent_region_small)
1846 assert not parent_region_small.contains(parent_region_large)
1847 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1848 # Find some commonSkyPix indices that overlap the large regions but not
1849 # overlap the small regions. We use commonSkyPix here to make sure the
1850 # real tests later involve what's in the database, not just post-query
1851 # filtering of regions.
1852 child_difference_indices = []
1853 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1854 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1855 assert difference, "if this is empty, we can't test anything useful with these regions"
1856 assert all(
1857 not commonSkyPix.triangle(d).isDisjointFrom(large)
1858 and commonSkyPix.triangle(d).isDisjointFrom(small)
1859 for d in difference
1860 )
1861 child_difference_indices.append(difference)
1862 parent_difference_indices = list(
1863 _unpack_range_set(
1864 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1865 )
1866 )
1867 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1868 assert all(
1869 (
1870 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1871 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1872 )
1873 for d in parent_difference_indices
1874 )
1875 # Now that we've finally got those regions, we'll insert the large ones
1876 # as tract/patch dimension records.
1877 skymap_name = "testing_v1"
1878 registry.insertDimensionData(
1879 "skymap",
1880 {
1881 "name": skymap_name,
1882 "hash": bytes([42]),
1883 "tract_max": 1,
1884 "patch_nx_max": 2,
1885 "patch_ny_max": 2,
1886 },
1887 )
1888 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1889 registry.insertDimensionData(
1890 "patch",
1891 *[
1892 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1893 for n, c in enumerate(child_regions_large)
1894 ],
1895 )
1896 # Add at dataset that uses these dimensions to make sure that modifying
1897 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1898 # implement insert with replace=True as delete-then-insert).
1899 dataset_type = DatasetType(
1900 "coadd",
1901 dimensions=["tract", "patch"],
1902 universe=registry.dimensions,
1903 storageClass="Exposure",
1904 )
1905 registry.registerDatasetType(dataset_type)
1906 registry.registerCollection("the_run", CollectionType.RUN)
1907 registry.insertDatasets(
1908 dataset_type,
1909 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1910 run="the_run",
1911 )
1912 # Query for tracts and patches that overlap some "difference" htm9
1913 # pixels; there should be overlaps, because the database has
1914 # the "large" suite of regions.
1915 self.assertEqual(
1916 {0},
1917 {
1918 data_id["tract"]
1919 for data_id in registry.queryDataIds(
1920 ["tract"],
1921 skymap=skymap_name,
1922 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1923 )
1924 },
1925 )
1926 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1927 self.assertIn(
1928 patch_id,
1929 {
1930 data_id["patch"]
1931 for data_id in registry.queryDataIds(
1932 ["patch"],
1933 skymap=skymap_name,
1934 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1935 )
1936 },
1937 )
1938 # Use sync to update the tract region and insert to update the regions
1939 # of the patches, to the "small" suite.
1940 updated = registry.syncDimensionData(
1941 "tract",
1942 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1943 update=True,
1944 )
1945 self.assertEqual(updated, {"region": parent_region_large})
1946 registry.insertDimensionData(
1947 "patch",
1948 *[
1949 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1950 for n, c in enumerate(child_regions_small)
1951 ],
1952 replace=True,
1953 )
1954 # Query again; there now should be no such overlaps, because the
1955 # database has the "small" suite of regions.
1956 self.assertFalse(
1957 set(
1958 registry.queryDataIds(
1959 ["tract"],
1960 skymap=skymap_name,
1961 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1962 )
1963 )
1964 )
1965 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1966 self.assertNotIn(
1967 patch_id,
1968 {
1969 data_id["patch"]
1970 for data_id in registry.queryDataIds(
1971 ["patch"],
1972 skymap=skymap_name,
1973 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1974 )
1975 },
1976 )
1977 # Update back to the large regions and query one more time.
1978 updated = registry.syncDimensionData(
1979 "tract",
1980 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1981 update=True,
1982 )
1983 self.assertEqual(updated, {"region": parent_region_small})
1984 registry.insertDimensionData(
1985 "patch",
1986 *[
1987 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1988 for n, c in enumerate(child_regions_large)
1989 ],
1990 replace=True,
1991 )
1992 self.assertEqual(
1993 {0},
1994 {
1995 data_id["tract"]
1996 for data_id in registry.queryDataIds(
1997 ["tract"],
1998 skymap=skymap_name,
1999 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2000 )
2001 },
2002 )
2003 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2004 self.assertIn(
2005 patch_id,
2006 {
2007 data_id["patch"]
2008 for data_id in registry.queryDataIds(
2009 ["patch"],
2010 skymap=skymap_name,
2011 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2012 )
2013 },
2014 )
2016 def testCalibrationCollections(self):
2017 """Test operations on `~CollectionType.CALIBRATION` collections,
2018 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2019 `SqlRegistry.findDataset`, and
2020 `DataCoordinateQueryResults.findRelatedDatasets`.
2021 """
2022 # Setup - make a Registry, fill it with some datasets in
2023 # non-calibration collections.
2024 registry = self.makeRegistry()
2025 self.loadData(registry, "base.yaml")
2026 self.loadData(registry, "datasets.yaml")
2027 # Set up some timestamps.
2028 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2029 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2030 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2031 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2032 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2033 allTimespans = [
2034 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2035 ]
2036 # Insert some exposure records with timespans between each sequential
2037 # pair of those.
2038 registry.insertDimensionData(
2039 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2040 )
2041 registry.insertDimensionData(
2042 "group",
2043 {"instrument": "Cam1", "name": "group0"},
2044 {"instrument": "Cam1", "name": "group1"},
2045 {"instrument": "Cam1", "name": "group2"},
2046 {"instrument": "Cam1", "name": "group3"},
2047 )
2048 registry.insertDimensionData(
2049 "exposure",
2050 {
2051 "instrument": "Cam1",
2052 "id": 0,
2053 "group": "group0",
2054 "obs_id": "zero",
2055 "physical_filter": "Cam1-G",
2056 "day_obs": 20200101,
2057 "timespan": Timespan(t1, t2),
2058 },
2059 {
2060 "instrument": "Cam1",
2061 "id": 1,
2062 "group": "group1",
2063 "obs_id": "one",
2064 "physical_filter": "Cam1-G",
2065 "day_obs": 20200101,
2066 "timespan": Timespan(t2, t3),
2067 },
2068 {
2069 "instrument": "Cam1",
2070 "id": 2,
2071 "group": "group2",
2072 "obs_id": "two",
2073 "physical_filter": "Cam1-G",
2074 "day_obs": 20200101,
2075 "timespan": Timespan(t3, t4),
2076 },
2077 {
2078 "instrument": "Cam1",
2079 "id": 3,
2080 "group": "group3",
2081 "obs_id": "three",
2082 "physical_filter": "Cam1-G",
2083 "day_obs": 20200101,
2084 "timespan": Timespan(t4, t5),
2085 },
2086 )
2087 # Get references to some datasets.
2088 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2089 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2090 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2091 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2092 # Register the main calibration collection we'll be working with.
2093 collection = "Cam1/calibs/default"
2094 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2095 # Cannot associate into a calibration collection (no timespan).
2096 with self.assertRaises(CollectionTypeError):
2097 registry.associate(collection, [bias2a])
2098 # Certify 2a dataset with [t2, t4) validity.
2099 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2100 # Test that we can query for this dataset via the new collection, both
2101 # on its own and with a RUN collection.
2102 self.assertEqual(
2103 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2104 {bias2a},
2105 )
2106 self.assertEqual(
2107 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2108 {
2109 bias2a,
2110 bias2b,
2111 bias3b,
2112 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2113 },
2114 )
2115 self.assertEqual(
2116 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2117 {registry.expandDataId(instrument="Cam1", detector=2)},
2118 )
2119 self.assertEqual(
2120 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2121 {
2122 registry.expandDataId(instrument="Cam1", detector=2),
2123 registry.expandDataId(instrument="Cam1", detector=3),
2124 registry.expandDataId(instrument="Cam1", detector=4),
2125 },
2126 )
2127 self.assertEqual(
2128 set(
2129 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2130 "bias", findFirst=True, collections=[collection]
2131 )
2132 ),
2133 {
2134 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2135 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2136 },
2137 )
2138 self.assertEqual(
2139 set(
2140 registry.queryDataIds(
2141 ["exposure", "detector"], instrument="Cam1", detector=2
2142 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2143 ),
2144 {
2145 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2146 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2147 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2148 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2149 },
2150 )
2152 # We should not be able to certify 2b with anything overlapping that
2153 # window.
2154 with self.assertRaises(ConflictingDefinitionError):
2155 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2156 with self.assertRaises(ConflictingDefinitionError):
2157 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2158 with self.assertRaises(ConflictingDefinitionError):
2159 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2160 with self.assertRaises(ConflictingDefinitionError):
2161 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2162 with self.assertRaises(ConflictingDefinitionError):
2163 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2164 with self.assertRaises(ConflictingDefinitionError):
2165 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2166 with self.assertRaises(ConflictingDefinitionError):
2167 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2168 with self.assertRaises(ConflictingDefinitionError):
2169 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2170 # We should be able to certify 3a with a range overlapping that window,
2171 # because it's for a different detector.
2172 # We'll certify 3a over [t1, t3).
2173 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2174 # Now we'll certify 2b and 3b together over [t4, ∞).
2175 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2177 # Fetch all associations and check that they are what we expect.
2178 self.assertCountEqual(
2179 list(
2180 registry.queryDatasetAssociations(
2181 "bias",
2182 collections=[collection, "imported_g", "imported_r"],
2183 )
2184 ),
2185 [
2186 DatasetAssociation(
2187 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2188 collection="imported_g",
2189 timespan=None,
2190 ),
2191 DatasetAssociation(
2192 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2193 collection="imported_r",
2194 timespan=None,
2195 ),
2196 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2197 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2198 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2199 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2200 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2201 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2202 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2203 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2204 ],
2205 )
2207 class Ambiguous:
2208 """Tag class to denote lookups that should be ambiguous."""
2210 pass
2212 def _assertLookup(
2213 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2214 ) -> None:
2215 """Local function that asserts that a bias lookup returns the given
2216 expected result.
2217 """
2218 if expected is Ambiguous:
2219 with self.assertRaises((DatasetTypeError, LookupError)):
2220 registry.findDataset(
2221 "bias",
2222 collections=collection,
2223 instrument="Cam1",
2224 detector=detector,
2225 timespan=timespan,
2226 )
2227 else:
2228 self.assertEqual(
2229 expected,
2230 registry.findDataset(
2231 "bias",
2232 collections=collection,
2233 instrument="Cam1",
2234 detector=detector,
2235 timespan=timespan,
2236 ),
2237 )
2239 # Systematically test lookups against expected results.
2240 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2241 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2242 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2243 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2244 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2245 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2246 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2247 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2248 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2249 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2250 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2251 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2252 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2253 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2254 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2255 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2256 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2257 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2258 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2259 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2260 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2261 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2262 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2263 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2264 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2265 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2266 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2267 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2268 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2269 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2270 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2271 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2272 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2273 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2274 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2275 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2276 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2277 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2278 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2279 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2280 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2281 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2283 # Test lookups via temporal joins to exposures.
2284 self.assertEqual(
2285 set(
2286 registry.queryDataIds(
2287 ["exposure", "detector"], instrument="Cam1", detector=2
2288 ).findRelatedDatasets("bias", collections=[collection])
2289 ),
2290 {
2291 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2292 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2293 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2294 },
2295 )
2296 self.assertEqual(
2297 set(
2298 registry.queryDataIds(
2299 ["exposure", "detector"], instrument="Cam1", detector=3
2300 ).findRelatedDatasets("bias", collections=[collection])
2301 ),
2302 {
2303 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2304 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2305 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2306 },
2307 )
2308 self.assertEqual(
2309 set(
2310 registry.queryDataIds(
2311 ["exposure", "detector"], instrument="Cam1", detector=2
2312 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2313 ),
2314 {
2315 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2316 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2317 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2318 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2319 },
2320 )
2321 self.assertEqual(
2322 set(
2323 registry.queryDataIds(
2324 ["exposure", "detector"], instrument="Cam1", detector=3
2325 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2326 ),
2327 {
2328 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2329 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2330 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2331 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2332 },
2333 )
2335 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2336 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2337 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2338 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2339 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2340 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2341 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2342 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2343 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2344 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2345 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2346 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2347 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2348 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2349 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2350 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2351 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2352 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2353 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2354 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2355 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2356 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2357 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2358 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2359 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2360 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2361 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2362 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2363 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2364 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2365 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2366 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2367 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2368 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2369 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2370 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2371 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2372 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2373 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2374 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2375 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2376 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2377 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2378 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2379 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2380 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2382 # Decertify everything, this time with explicit data IDs, then check
2383 # that no lookups succeed.
2384 registry.decertify(
2385 collection,
2386 "bias",
2387 Timespan(None, None),
2388 dataIds=[
2389 dict(instrument="Cam1", detector=2),
2390 dict(instrument="Cam1", detector=3),
2391 ],
2392 )
2393 for detector in (2, 3):
2394 for timespan in allTimespans:
2395 _assertLookup(detector=detector, timespan=timespan, expected=None)
2396 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2397 # those.
2398 registry.certify(
2399 collection,
2400 [bias2a, bias3a],
2401 Timespan(None, None),
2402 )
2403 for timespan in allTimespans:
2404 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2405 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2406 # Decertify just bias2 over [t2, t4).
2407 # This should split a single certification row into two (and leave the
2408 # other existing row, for bias3a, alone).
2409 registry.decertify(
2410 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2411 )
2412 for timespan in allTimespans:
2413 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2414 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2415 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2416 if overlapsBefore and overlapsAfter:
2417 expected = Ambiguous
2418 elif overlapsBefore or overlapsAfter:
2419 expected = bias2a
2420 else:
2421 expected = None
2422 _assertLookup(detector=2, timespan=timespan, expected=expected)
2424 def testSkipCalibs(self):
2425 """Test how queries handle skipping of calibration collections."""
2426 registry = self.makeRegistry()
2427 self.loadData(registry, "base.yaml")
2428 self.loadData(registry, "datasets.yaml")
2430 coll_calib = "Cam1/calibs/default"
2431 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2433 # Add all biases to the calibration collection.
2434 # Without this, the logic that prunes dataset subqueries based on
2435 # datasetType-collection summary information will fire before the logic
2436 # we want to test below. This is a good thing (it avoids the dreaded
2437 # NotImplementedError a bit more often) everywhere but here.
2438 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2440 coll_list = [coll_calib, "imported_g", "imported_r"]
2441 chain = "Cam1/chain"
2442 registry.registerCollection(chain, type=CollectionType.CHAINED)
2443 registry.setCollectionChain(chain, coll_list)
2445 # explicit list will raise if findFirst=True or there are temporal
2446 # dimensions
2447 with self.assertRaises(NotImplementedError):
2448 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2449 with self.assertRaises(NotImplementedError):
2450 registry.queryDataIds(
2451 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2452 ).count()
2454 # chain will skip
2455 datasets = list(registry.queryDatasets("bias", collections=chain))
2456 self.assertGreater(len(datasets), 0)
2458 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2459 self.assertGreater(len(dataIds), 0)
2461 # glob will skip too
2462 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2463 self.assertGreater(len(datasets), 0)
2465 # regular expression will skip too
2466 pattern = re.compile(".*")
2467 datasets = list(registry.queryDatasets("bias", collections=pattern))
2468 self.assertGreater(len(datasets), 0)
2470 # ellipsis should work as usual
2471 datasets = list(registry.queryDatasets("bias", collections=...))
2472 self.assertGreater(len(datasets), 0)
2474 # few tests with findFirst
2475 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2476 self.assertGreater(len(datasets), 0)
2478 def testIngestTimeQuery(self):
2479 registry = self.makeRegistry()
2480 self.loadData(registry, "base.yaml")
2481 dt0 = datetime.datetime.now(datetime.UTC)
2482 self.loadData(registry, "datasets.yaml")
2483 dt1 = datetime.datetime.now(datetime.UTC)
2485 datasets = list(registry.queryDatasets(..., collections=...))
2486 len0 = len(datasets)
2487 self.assertGreater(len0, 0)
2489 where = "ingest_date > T'2000-01-01'"
2490 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2491 len1 = len(datasets)
2492 self.assertEqual(len0, len1)
2494 # no one will ever use this piece of software in 30 years
2495 where = "ingest_date > T'2050-01-01'"
2496 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2497 len2 = len(datasets)
2498 self.assertEqual(len2, 0)
2500 # Check more exact timing to make sure there is no 37 seconds offset
2501 # (after fixing DM-30124). SQLite time precision is 1 second, make
2502 # sure that we don't test with higher precision.
2503 tests = [
2504 # format: (timestamp, operator, expected_len)
2505 (dt0 - timedelta(seconds=1), ">", len0),
2506 (dt0 - timedelta(seconds=1), "<", 0),
2507 (dt1 + timedelta(seconds=1), "<", len0),
2508 (dt1 + timedelta(seconds=1), ">", 0),
2509 ]
2510 for dt, op, expect_len in tests:
2511 dt_str = dt.isoformat(sep=" ")
2513 where = f"ingest_date {op} T'{dt_str}'"
2514 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2515 self.assertEqual(len(datasets), expect_len)
2517 # same with bind using datetime or astropy Time
2518 where = f"ingest_date {op} ingest_time"
2519 datasets = list(
2520 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2521 )
2522 self.assertEqual(len(datasets), expect_len)
2524 dt_astropy = astropy.time.Time(dt, format="datetime")
2525 datasets = list(
2526 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2527 )
2528 self.assertEqual(len(datasets), expect_len)
2530 def testTimespanQueries(self):
2531 """Test query expressions involving timespans."""
2532 registry = self.makeRegistry()
2533 self.loadData(registry, "hsc-rc2-subset.yaml")
2534 # All exposures in the database; mapping from ID to timespan.
2535 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2536 # Just those IDs, sorted (which is also temporal sorting, because HSC
2537 # exposure IDs are monotonically increasing).
2538 ids = sorted(visits.keys())
2539 self.assertGreater(len(ids), 20)
2540 # Pick some quasi-random indexes into `ids` to play with.
2541 i1 = int(len(ids) * 0.1)
2542 i2 = int(len(ids) * 0.3)
2543 i3 = int(len(ids) * 0.6)
2544 i4 = int(len(ids) * 0.8)
2545 # Extract some times from those: just before the beginning of i1 (which
2546 # should be after the end of the exposure before), exactly the
2547 # beginning of i2, just after the beginning of i3 (and before its end),
2548 # and the exact end of i4.
2549 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2550 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2551 t2 = visits[ids[i2]].begin
2552 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2553 self.assertLess(t3, visits[ids[i3]].end)
2554 t4 = visits[ids[i4]].end
2555 # Make sure those are actually in order.
2556 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2558 bind = {
2559 "t1": t1,
2560 "t2": t2,
2561 "t3": t3,
2562 "t4": t4,
2563 "ts23": Timespan(t2, t3),
2564 }
2566 def query(where):
2567 """Return results as a sorted, deduplicated list of visit IDs.
2569 Parameters
2570 ----------
2571 where : `str`
2572 The WHERE clause for the query.
2573 """
2574 return sorted(
2575 {
2576 dataId["visit"]
2577 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2578 }
2579 )
2581 # Try a bunch of timespan queries, mixing up the bounds themselves,
2582 # where they appear in the expression, and how we get the timespan into
2583 # the expression.
2585 # t1 is before the start of i1, so this should not include i1.
2586 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2587 # t2 is exactly at the start of i2, but ends are exclusive, so these
2588 # should not include i2.
2589 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2590 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2591 # t3 is in the middle of i3, so this should include i3.
2592 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2593 # This one should not include t3 by the same reasoning.
2594 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2595 # t4 is exactly at the end of i4, so this should include i4.
2596 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2597 # i4's upper bound of t4 is exclusive so this should not include t4.
2598 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2600 # Now some timespan vs. time scalar queries.
2601 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2602 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2603 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2604 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2605 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2606 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2608 # Empty timespans should not overlap anything.
2609 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2611 def testCollectionSummaries(self):
2612 """Test recording and retrieval of collection summaries."""
2613 self.maxDiff = None
2614 registry = self.makeRegistry()
2615 # Importing datasets from yaml should go through the code path where
2616 # we update collection summaries as we insert datasets.
2617 self.loadData(registry, "base.yaml")
2618 self.loadData(registry, "datasets.yaml")
2619 flat = registry.getDatasetType("flat")
2620 expected1 = CollectionSummary()
2621 expected1.dataset_types.add(registry.getDatasetType("bias"))
2622 expected1.add_data_ids(
2623 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2624 )
2625 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2626 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2627 # Create a chained collection with both of the imported runs; the
2628 # summary should be the same, because it's a union with itself.
2629 chain = "chain"
2630 registry.registerCollection(chain, CollectionType.CHAINED)
2631 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2632 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2633 # Associate flats only into a tagged collection and a calibration
2634 # collection to check summaries of those.
2635 tag = "tag"
2636 registry.registerCollection(tag, CollectionType.TAGGED)
2637 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2638 calibs = "calibs"
2639 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2640 registry.certify(
2641 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2642 )
2643 expected2 = expected1.copy()
2644 expected2.dataset_types.discard("bias")
2645 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2646 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2647 # Explicitly calling SqlRegistry.refresh() should load those same
2648 # summaries, via a totally different code path.
2649 registry.refresh()
2650 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2651 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2652 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2653 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2655 def testBindInQueryDatasets(self):
2656 """Test that the bind parameter is correctly forwarded in
2657 queryDatasets recursion.
2658 """
2659 registry = self.makeRegistry()
2660 # Importing datasets from yaml should go through the code path where
2661 # we update collection summaries as we insert datasets.
2662 self.loadData(registry, "base.yaml")
2663 self.loadData(registry, "datasets.yaml")
2664 self.assertEqual(
2665 set(registry.queryDatasets("flat", band="r", collections=...)),
2666 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2667 )
2669 def testQueryIntRangeExpressions(self):
2670 """Test integer range expressions in ``where`` arguments.
2672 Note that our expressions use inclusive stop values, unlike Python's.
2673 """
2674 registry = self.makeRegistry()
2675 self.loadData(registry, "base.yaml")
2676 self.assertEqual(
2677 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2678 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2679 )
2680 self.assertEqual(
2681 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2682 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2683 )
2684 self.assertEqual(
2685 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2686 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2687 )
2689 def testQueryResultSummaries(self):
2690 """Test summary methods like `count`, `any`, and `explain_no_results`
2691 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2692 """
2693 registry = self.makeRegistry()
2694 self.loadData(registry, "base.yaml")
2695 self.loadData(registry, "datasets.yaml")
2696 self.loadData(registry, "spatial.yaml")
2697 # Default test dataset has two collections, each with both flats and
2698 # biases. Add a new collection with only biases.
2699 registry.registerCollection("biases", CollectionType.TAGGED)
2700 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2701 # First query yields two results, and involves no postprocessing.
2702 query1 = registry.queryDataIds(["physical_filter"], band="r")
2703 self.assertTrue(query1.any(execute=False, exact=False))
2704 self.assertTrue(query1.any(execute=True, exact=False))
2705 self.assertTrue(query1.any(execute=True, exact=True))
2706 self.assertEqual(query1.count(exact=False), 2)
2707 self.assertEqual(query1.count(exact=True), 2)
2708 self.assertFalse(list(query1.explain_no_results()))
2709 # Second query should yield no results, which we should see when
2710 # we attempt to expand the data ID.
2711 query2 = registry.queryDataIds(["physical_filter"], band="h")
2712 # There's no execute=False, exact=Fals test here because the behavior
2713 # not something we want to guarantee in this case (and exact=False
2714 # says either answer is legal).
2715 self.assertFalse(query2.any(execute=True, exact=False))
2716 self.assertFalse(query2.any(execute=True, exact=True))
2717 self.assertEqual(query2.count(exact=False), 0)
2718 self.assertEqual(query2.count(exact=True), 0)
2719 self.assertTrue(list(query2.explain_no_results()))
2720 # These queries yield no results due to various problems that can be
2721 # spotted prior to execution, yielding helpful diagnostics.
2722 base_query = registry.queryDataIds(["detector", "physical_filter"])
2723 queries_and_snippets = [
2724 (
2725 # Dataset type name doesn't match any existing dataset types.
2726 registry.queryDatasets("nonexistent", collections=...),
2727 ["nonexistent"],
2728 ),
2729 (
2730 # Dataset type object isn't registered.
2731 registry.queryDatasets(
2732 DatasetType(
2733 "nonexistent",
2734 dimensions=["instrument"],
2735 universe=registry.dimensions,
2736 storageClass="Image",
2737 ),
2738 collections=...,
2739 ),
2740 ["nonexistent"],
2741 ),
2742 (
2743 # No datasets of this type in this collection.
2744 registry.queryDatasets("flat", collections=["biases"]),
2745 ["flat", "biases"],
2746 ),
2747 (
2748 # No datasets of this type in this collection.
2749 base_query.findDatasets("flat", collections=["biases"]),
2750 ["flat", "biases"],
2751 ),
2752 (
2753 # No collections matching at all.
2754 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2755 ["potato"],
2756 ),
2757 ]
2758 with self.assertRaises(MissingDatasetTypeError):
2759 # Dataset type name doesn't match any existing dataset types.
2760 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2761 with self.assertRaises(MissingDatasetTypeError):
2762 # Dataset type name doesn't match any existing dataset types.
2763 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...)
2764 for query, snippets in queries_and_snippets:
2765 self.assertFalse(query.any(execute=False, exact=False))
2766 self.assertFalse(query.any(execute=True, exact=False))
2767 self.assertFalse(query.any(execute=True, exact=True))
2768 self.assertEqual(query.count(exact=False), 0)
2769 self.assertEqual(query.count(exact=True), 0)
2770 messages = list(query.explain_no_results())
2771 self.assertTrue(messages)
2772 # Want all expected snippets to appear in at least one message.
2773 self.assertTrue(
2774 any(
2775 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2776 ),
2777 messages,
2778 )
2780 # Wildcards on dataset types are not permitted in queryDataIds.
2781 with self.assertRaises(DatasetTypeExpressionError):
2782 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2784 # These queries yield no results due to problems that can be identified
2785 # by cheap follow-up queries, yielding helpful diagnostics.
2786 for query, snippets in [
2787 (
2788 # No records for one of the involved dimensions.
2789 registry.queryDataIds(["subfilter"]),
2790 ["no rows", "subfilter"],
2791 ),
2792 (
2793 # No records for one of the involved dimensions.
2794 registry.queryDimensionRecords("subfilter"),
2795 ["no rows", "subfilter"],
2796 ),
2797 ]:
2798 self.assertFalse(query.any(execute=True, exact=False))
2799 self.assertFalse(query.any(execute=True, exact=True))
2800 self.assertEqual(query.count(exact=True), 0)
2801 messages = list(query.explain_no_results())
2802 self.assertTrue(messages)
2803 # Want all expected snippets to appear in at least one message.
2804 self.assertTrue(
2805 any(
2806 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2807 ),
2808 messages,
2809 )
2811 # This query yields four overlaps in the database, but one is filtered
2812 # out in postprocessing. The count queries aren't accurate because
2813 # they don't account for duplication that happens due to an internal
2814 # join against commonSkyPix.
2815 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2816 self.assertEqual(
2817 {
2818 DataCoordinate.standardize(
2819 instrument="Cam1",
2820 skymap="SkyMap1",
2821 visit=v,
2822 tract=t,
2823 universe=registry.dimensions,
2824 )
2825 for v, t in [(1, 0), (2, 0), (2, 1)]
2826 },
2827 set(query3),
2828 )
2829 self.assertTrue(query3.any(execute=False, exact=False))
2830 self.assertTrue(query3.any(execute=True, exact=False))
2831 self.assertTrue(query3.any(execute=True, exact=True))
2832 self.assertGreaterEqual(query3.count(exact=False), 4)
2833 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2834 self.assertFalse(list(query3.explain_no_results()))
2835 # This query yields overlaps in the database, but all are filtered
2836 # out in postprocessing. The count queries again aren't very useful.
2837 # We have to use `where=` here to avoid an optimization that
2838 # (currently) skips the spatial postprocess-filtering because it
2839 # recognizes that no spatial join is necessary. That's not ideal, but
2840 # fixing it is out of scope for this ticket.
2841 query4 = registry.queryDataIds(
2842 ["visit", "tract"],
2843 instrument="Cam1",
2844 skymap="SkyMap1",
2845 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2846 )
2847 self.assertFalse(set(query4))
2848 self.assertTrue(query4.any(execute=False, exact=False))
2849 self.assertTrue(query4.any(execute=True, exact=False))
2850 self.assertFalse(query4.any(execute=True, exact=True))
2851 self.assertGreaterEqual(query4.count(exact=False), 1)
2852 self.assertEqual(query4.count(exact=True, discard=True), 0)
2853 messages = query4.explain_no_results()
2854 self.assertTrue(messages)
2855 self.assertTrue(any("overlap" in message for message in messages))
2856 # This query should yield results from one dataset type but not the
2857 # other, which is not registered.
2858 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2859 self.assertTrue(set(query5))
2860 self.assertTrue(query5.any(execute=False, exact=False))
2861 self.assertTrue(query5.any(execute=True, exact=False))
2862 self.assertTrue(query5.any(execute=True, exact=True))
2863 self.assertGreaterEqual(query5.count(exact=False), 1)
2864 self.assertGreaterEqual(query5.count(exact=True), 1)
2865 self.assertFalse(list(query5.explain_no_results()))
2866 # This query applies a selection that yields no results, fully in the
2867 # database. Explaining why it fails involves traversing the relation
2868 # tree and running a LIMIT 1 query at each level that has the potential
2869 # to remove rows.
2870 query6 = registry.queryDimensionRecords(
2871 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2872 )
2873 self.assertEqual(query6.count(exact=True), 0)
2874 messages = query6.explain_no_results()
2875 self.assertTrue(messages)
2876 self.assertTrue(any("no-purpose" in message for message in messages))
2878 def testQueryDataIdsExpressionError(self):
2879 """Test error checking of 'where' expressions in queryDataIds."""
2880 registry = self.makeRegistry()
2881 self.loadData(registry, "base.yaml")
2882 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2883 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2884 registry.queryDataIds(["detector"], where="foo.bar = 12")
2885 with self.assertRaisesRegex(
2886 LookupError, "Dimension element name cannot be inferred in this context."
2887 ):
2888 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2890 def testQueryDataIdsOrderBy(self):
2891 """Test order_by and limit on result returned by queryDataIds()."""
2892 registry = self.makeRegistry()
2893 self.loadData(registry, "base.yaml")
2894 self.loadData(registry, "datasets.yaml")
2895 self.loadData(registry, "spatial.yaml")
2897 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2898 return registry.queryDataIds(
2899 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2900 )
2902 Test = namedtuple(
2903 "testQueryDataIdsOrderByTest",
2904 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2905 defaults=(None, None, None),
2906 )
2908 test_data = (
2909 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2910 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2911 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2912 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2913 Test(
2914 "tract.id,visit.id",
2915 "tract,visit",
2916 ((0, 1), (0, 1), (0, 2)),
2917 limit=(3,),
2918 ),
2919 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2920 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2921 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2922 Test(
2923 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2924 ),
2925 Test(
2926 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2927 ),
2928 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2929 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2930 Test(
2931 "tract,-visit.timespan.begin,visit.timespan.end",
2932 "tract,visit",
2933 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2934 ),
2935 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2936 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2937 Test(
2938 "tract,detector",
2939 "tract,detector",
2940 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2941 datasets="flat",
2942 collections="imported_r",
2943 ),
2944 Test(
2945 "tract,detector.full_name",
2946 "tract,detector",
2947 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2948 datasets="flat",
2949 collections="imported_r",
2950 ),
2951 Test(
2952 "tract,detector.raft,detector.name_in_raft",
2953 "tract,detector",
2954 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2955 datasets="flat",
2956 collections="imported_r",
2957 ),
2958 )
2960 for test in test_data:
2961 order_by = test.order_by.split(",")
2962 keys = test.keys.split(",")
2963 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2964 if test.limit is not None:
2965 query = query.limit(*test.limit)
2966 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2967 self.assertEqual(dataIds, test.result)
2969 # and materialize
2970 query = do_query(keys).order_by(*order_by)
2971 if test.limit is not None:
2972 query = query.limit(*test.limit)
2973 with self.assertRaises(RelationalAlgebraError):
2974 with query.materialize():
2975 pass
2977 # errors in a name
2978 for order_by in ("", "-"):
2979 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2980 list(do_query().order_by(order_by))
2982 for order_by in ("undimension.name", "-undimension.name"):
2983 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
2984 list(do_query().order_by(order_by))
2986 for order_by in ("attract", "-attract"):
2987 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2988 list(do_query().order_by(order_by))
2990 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2991 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2993 with self.assertRaisesRegex(
2994 ValueError,
2995 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
2996 r"qualify timespan with specific dimension name\.",
2997 ):
2998 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3000 with self.assertRaisesRegex(
3001 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3002 ):
3003 list(do_query("tract").order_by("timespan.begin"))
3005 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3006 list(do_query("tract").order_by("tract.timespan.begin"))
3008 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3009 list(do_query("tract").order_by("tract.name"))
3011 with self.assertRaisesRegex(
3012 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3013 ):
3014 list(do_query("visit").order_by("timestamp.begin"))
3016 def testQueryDataIdsGovernorExceptions(self):
3017 """Test exceptions raised by queryDataIds() for incorrect governors."""
3018 registry = self.makeRegistry()
3019 self.loadData(registry, "base.yaml")
3020 self.loadData(registry, "datasets.yaml")
3021 self.loadData(registry, "spatial.yaml")
3023 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3024 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3026 Test = namedtuple(
3027 "testQueryDataIdExceptionsTest",
3028 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3029 defaults=(None, None, None, {}, None, 0),
3030 )
3032 test_data = (
3033 Test("tract,visit", count=6),
3034 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3035 Test(
3036 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3037 ),
3038 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3039 Test(
3040 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3041 ),
3042 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3043 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3044 Test(
3045 "tract,visit",
3046 where="instrument=cam AND skymap=map",
3047 bind={"cam": "Cam1", "map": "SkyMap1"},
3048 count=6,
3049 ),
3050 Test(
3051 "tract,visit",
3052 where="instrument=cam AND skymap=map",
3053 bind={"cam": "Cam", "map": "SkyMap"},
3054 exception=DataIdValueError,
3055 ),
3056 )
3058 for test in test_data:
3059 dimensions = test.dimensions.split(",")
3060 if test.exception:
3061 with self.assertRaises(test.exception):
3062 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3063 else:
3064 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3065 self.assertEqual(query.count(discard=True), test.count)
3067 # and materialize
3068 if test.exception:
3069 with self.assertRaises(test.exception):
3070 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3071 with query.materialize() as materialized:
3072 materialized.count(discard=True)
3073 else:
3074 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3075 with query.materialize() as materialized:
3076 self.assertEqual(materialized.count(discard=True), test.count)
3078 def testQueryDimensionRecordsOrderBy(self):
3079 """Test order_by and limit on result returned by
3080 queryDimensionRecords().
3081 """
3082 registry = self.makeRegistry()
3083 self.loadData(registry, "base.yaml")
3084 self.loadData(registry, "datasets.yaml")
3085 self.loadData(registry, "spatial.yaml")
3087 def do_query(element, datasets=None, collections=None):
3088 return registry.queryDimensionRecords(
3089 element, instrument="Cam1", datasets=datasets, collections=collections
3090 )
3092 query = do_query("detector")
3093 self.assertEqual(len(list(query)), 4)
3095 Test = namedtuple(
3096 "testQueryDataIdsOrderByTest",
3097 ("element", "order_by", "result", "limit", "datasets", "collections"),
3098 defaults=(None, None, None),
3099 )
3101 test_data = (
3102 Test("detector", "detector", (1, 2, 3, 4)),
3103 Test("detector", "-detector", (4, 3, 2, 1)),
3104 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3105 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3106 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3107 Test("visit", "visit", (1, 2)),
3108 Test("visit", "-visit.id", (2, 1)),
3109 Test("visit", "zenith_angle", (1, 2)),
3110 Test("visit", "-visit.name", (2, 1)),
3111 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3112 )
3114 for test in test_data:
3115 order_by = test.order_by.split(",")
3116 query = do_query(test.element).order_by(*order_by)
3117 if test.limit is not None:
3118 query = query.limit(*test.limit)
3119 dataIds = tuple(rec.id for rec in query)
3120 self.assertEqual(dataIds, test.result)
3122 # errors in a name
3123 for order_by in ("", "-"):
3124 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3125 list(do_query("detector").order_by(order_by))
3127 for order_by in ("undimension.name", "-undimension.name"):
3128 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3129 list(do_query("detector").order_by(order_by))
3131 for order_by in ("attract", "-attract"):
3132 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3133 list(do_query("detector").order_by(order_by))
3135 for order_by in ("timestamp.begin", "-timestamp.begin"):
3136 with self.assertRaisesRegex(
3137 ValueError,
3138 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3139 r"perhaps you meant 'timespan.begin'\?",
3140 ):
3141 list(do_query("visit").order_by(order_by))
3143 def testQueryDimensionRecordsExceptions(self):
3144 """Test exceptions raised by queryDimensionRecords()."""
3145 registry = self.makeRegistry()
3146 self.loadData(registry, "base.yaml")
3147 self.loadData(registry, "datasets.yaml")
3148 self.loadData(registry, "spatial.yaml")
3150 result = registry.queryDimensionRecords("detector")
3151 self.assertEqual(result.count(), 4)
3152 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3153 self.assertEqual(result.count(), 4)
3154 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3155 self.assertEqual(result.count(), 4)
3156 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3157 self.assertEqual(result.count(), 4)
3158 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3159 self.assertEqual(result.count(), 4)
3161 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3162 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3163 result.count()
3165 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3166 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3167 result.count()
3169 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3170 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3171 result.count()
3173 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3174 result = registry.queryDimensionRecords(
3175 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3176 )
3177 result.count()
3179 def testDatasetConstrainedDimensionRecordQueries(self):
3180 """Test that queryDimensionRecords works even when given a dataset
3181 constraint whose dimensions extend beyond the requested dimension
3182 element's.
3183 """
3184 registry = self.makeRegistry()
3185 self.loadData(registry, "base.yaml")
3186 self.loadData(registry, "datasets.yaml")
3187 # Query for physical_filter dimension records, using a dataset that
3188 # has both physical_filter and dataset dimensions.
3189 records = registry.queryDimensionRecords(
3190 "physical_filter",
3191 datasets=["flat"],
3192 collections="imported_r",
3193 )
3194 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3195 # Trying to constrain by all dataset types is an error.
3196 with self.assertRaises(TypeError):
3197 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3199 def testSkyPixDatasetQueries(self):
3200 """Test that we can build queries involving skypix dimensions as long
3201 as a dataset type that uses those dimensions is included.
3202 """
3203 registry = self.makeRegistry()
3204 self.loadData(registry, "base.yaml")
3205 dataset_type = DatasetType(
3206 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3207 )
3208 registry.registerDatasetType(dataset_type)
3209 run = "r"
3210 registry.registerRun(run)
3211 # First try queries where there are no datasets; the concern is whether
3212 # we can even build and execute these queries without raising, even
3213 # when "doomed" query shortcuts are in play.
3214 self.assertFalse(
3215 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3216 )
3217 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3218 # Now add a dataset and see that we can get it back.
3219 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3220 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3221 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3222 self.assertEqual(
3223 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3224 {data_id},
3225 )
3226 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3228 def testDatasetIdFactory(self):
3229 """Simple test for DatasetIdFactory, mostly to catch potential changes
3230 in its API.
3231 """
3232 registry = self.makeRegistry()
3233 factory = DatasetIdFactory()
3234 dataset_type = DatasetType(
3235 "datasetType",
3236 dimensions=["detector", "instrument"],
3237 universe=registry.dimensions,
3238 storageClass="int",
3239 )
3240 run = "run"
3241 data_id = DataCoordinate.standardize(
3242 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3243 )
3245 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3246 self.assertIsInstance(datasetId, uuid.UUID)
3247 self.assertEqual(datasetId.version, 4)
3249 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3250 self.assertIsInstance(datasetId, uuid.UUID)
3251 self.assertEqual(datasetId.version, 5)
3253 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3254 self.assertIsInstance(datasetId, uuid.UUID)
3255 self.assertEqual(datasetId.version, 5)
3257 def testExposureQueries(self):
3258 """Test query methods using arguments sourced from the exposure log
3259 service.
3261 The most complete test dataset currently available to daf_butler tests
3262 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3263 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3264 dimension records as it was focused on providing nontrivial spatial
3265 overlaps between visit+detector and tract+patch. So in this test we
3266 need to translate queries that originally used the exposure dimension
3267 to use the (very similar) visit dimension instead.
3268 """
3269 registry = self.makeRegistry()
3270 self.loadData(registry, "hsc-rc2-subset.yaml")
3271 self.assertEqual(
3272 [
3273 record.id
3274 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3275 .order_by("id")
3276 .limit(5)
3277 ],
3278 [318, 322, 326, 330, 332],
3279 )
3280 self.assertEqual(
3281 [
3282 data_id["visit"]
3283 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3284 ],
3285 [318, 322, 326, 330, 332],
3286 )
3287 self.assertEqual(
3288 [
3289 record.id
3290 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3291 .order_by("full_name")
3292 .limit(5)
3293 ],
3294 [73, 72, 71, 70, 65],
3295 )
3296 self.assertEqual(
3297 [
3298 data_id["detector"]
3299 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3300 .order_by("full_name")
3301 .limit(5)
3302 ],
3303 [73, 72, 71, 70, 65],
3304 )
3306 def test_long_query_names(self) -> None:
3307 """Test that queries involving very long names are handled correctly.
3309 This is especially important for PostgreSQL, which truncates symbols
3310 longer than 64 chars, but it's worth testing for all DBs.
3311 """
3312 registry = self.makeRegistry()
3313 name = "abcd" * 17
3314 registry.registerDatasetType(
3315 DatasetType(
3316 name,
3317 dimensions=(),
3318 storageClass="Exposure",
3319 universe=registry.dimensions,
3320 )
3321 )
3322 # Need to search more than one collection actually containing a
3323 # matching dataset to avoid optimizations that sidestep bugs due to
3324 # truncation by making findFirst=True a no-op.
3325 run1 = "run1"
3326 registry.registerRun(run1)
3327 run2 = "run2"
3328 registry.registerRun(run2)
3329 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3330 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3331 self.assertEqual(
3332 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3333 {ref1},
3334 )
3336 def test_skypix_constraint_queries(self) -> None:
3337 """Test queries spatially constrained by a skypix data ID."""
3338 registry = self.makeRegistry()
3339 self.loadData(registry, "hsc-rc2-subset.yaml")
3340 patch_regions = {
3341 (data_id["tract"], data_id["patch"]): data_id.region
3342 for data_id in registry.queryDataIds(["patch"]).expanded()
3343 }
3344 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3345 # This check ensures the test doesn't become trivial due to a config
3346 # change; if it does, just pick a different HTML level.
3347 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3348 # Gather all skypix IDs that definitely overlap at least one of these
3349 # patches.
3350 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3351 for patch_region in patch_regions.values():
3352 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3353 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3354 # and does not overlap at least one other patch.
3355 for skypix_id in itertools.chain.from_iterable(
3356 range(begin, end) for begin, end in relevant_skypix_ids
3357 ):
3358 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3359 overlapping_patches = {
3360 patch_key
3361 for patch_key, patch_region in patch_regions.items()
3362 if not patch_region.isDisjointFrom(skypix_region)
3363 }
3364 if overlapping_patches and overlapping_patches != patch_regions.keys():
3365 break
3366 else:
3367 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3368 self.assertEqual(
3369 {
3370 (data_id["tract"], data_id["patch"])
3371 for data_id in registry.queryDataIds(
3372 ["patch"],
3373 dataId={skypix_dimension.name: skypix_id},
3374 )
3375 },
3376 overlapping_patches,
3377 )
3378 # Test that a three-way join that includes the common skypix system in
3379 # the dimensions doesn't generate redundant join terms in the query.
3380 full_data_ids = set(
3381 registry.queryDataIds(
3382 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3383 ).expanded()
3384 )
3385 self.assertGreater(len(full_data_ids), 0)
3386 for data_id in full_data_ids:
3387 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3388 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3390 def test_spatial_constraint_queries(self) -> None:
3391 """Test queries in which one spatial dimension in the constraint (data
3392 ID or ``where`` string) constrains a different spatial dimension in the
3393 query result columns.
3394 """
3395 registry = self.makeRegistry()
3396 self.loadData(registry, "hsc-rc2-subset.yaml")
3397 patch_regions = {
3398 (data_id["tract"], data_id["patch"]): data_id.region
3399 for data_id in registry.queryDataIds(["patch"]).expanded()
3400 }
3401 observation_regions = {
3402 (data_id["visit"], data_id["detector"]): data_id.region
3403 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3404 }
3405 all_combos = {
3406 (patch_key, observation_key)
3407 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3408 }
3409 overlapping_combos = {
3410 (patch_key, observation_key)
3411 for patch_key, observation_key in all_combos
3412 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3413 }
3414 # Check a direct spatial join with no constraint first.
3415 self.assertEqual(
3416 {
3417 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3418 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3419 },
3420 overlapping_combos,
3421 )
3422 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3423 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3424 for patch_key, observation_key in overlapping_combos:
3425 overlaps_by_patch[patch_key].add(observation_key)
3426 overlaps_by_observation[observation_key].add(patch_key)
3427 # Find patches and observations that overlap at least one of the other
3428 # but not all of the other.
3429 nontrivial_patch = next(
3430 iter(
3431 patch_key
3432 for patch_key, observation_keys in overlaps_by_patch.items()
3433 if observation_keys and observation_keys != observation_regions.keys()
3434 )
3435 )
3436 nontrivial_observation = next(
3437 iter(
3438 observation_key
3439 for observation_key, patch_keys in overlaps_by_observation.items()
3440 if patch_keys and patch_keys != patch_regions.keys()
3441 )
3442 )
3443 # Use the nontrivial patches and observations as constraints on the
3444 # other dimensions in various ways, first via a 'where' expression.
3445 # It's better in general to us 'bind' instead of f-strings, but these
3446 # all integers so there are no quoting concerns.
3447 self.assertEqual(
3448 {
3449 (data_id["visit"], data_id["detector"])
3450 for data_id in registry.queryDataIds(
3451 ["visit", "detector"],
3452 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3453 skymap="hsc_rings_v1",
3454 )
3455 },
3456 overlaps_by_patch[nontrivial_patch],
3457 )
3458 self.assertEqual(
3459 {
3460 (data_id["tract"], data_id["patch"])
3461 for data_id in registry.queryDataIds(
3462 ["patch"],
3463 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3464 instrument="HSC",
3465 )
3466 },
3467 overlaps_by_observation[nontrivial_observation],
3468 )
3469 # and then via the dataId argument.
3470 self.assertEqual(
3471 {
3472 (data_id["visit"], data_id["detector"])
3473 for data_id in registry.queryDataIds(
3474 ["visit", "detector"],
3475 dataId={
3476 "tract": nontrivial_patch[0],
3477 "patch": nontrivial_patch[1],
3478 },
3479 skymap="hsc_rings_v1",
3480 )
3481 },
3482 overlaps_by_patch[nontrivial_patch],
3483 )
3484 self.assertEqual(
3485 {
3486 (data_id["tract"], data_id["patch"])
3487 for data_id in registry.queryDataIds(
3488 ["patch"],
3489 dataId={
3490 "visit": nontrivial_observation[0],
3491 "detector": nontrivial_observation[1],
3492 },
3493 instrument="HSC",
3494 )
3495 },
3496 overlaps_by_observation[nontrivial_observation],
3497 )
3499 def test_query_projection_drop_postprocessing(self) -> None:
3500 """Test that projections and deduplications on query objects can
3501 drop post-query region filtering to ensure the query remains in
3502 the SQL engine.
3503 """
3504 registry = self.makeRegistry()
3505 self.loadData(registry, "base.yaml")
3506 self.loadData(registry, "spatial.yaml")
3508 def pop_transfer(tree: Relation) -> Relation:
3509 """If a relation tree terminates with a transfer to a new engine,
3510 return the relation prior to that transfer. If not, return the
3511 original relation.
3513 Parameters
3514 ----------
3515 tree : `Relation`
3516 The relation tree to modify.
3517 """
3518 match tree:
3519 case Transfer(target=target):
3520 return target
3521 case _:
3522 return tree
3524 # There's no public way to get a Query object yet, so we get one from a
3525 # DataCoordinateQueryResults private attribute. When a public API is
3526 # available this test should use it.
3527 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3528 # We expect this query to terminate in the iteration engine originally,
3529 # because region-filtering is necessary.
3530 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3531 # If we deduplicate, we usually have to do that downstream of the
3532 # filtering. That means the deduplication has to happen in the
3533 # iteration engine.
3534 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3535 # If we pass drop_postprocessing, we instead drop the region filtering
3536 # so the deduplication can happen in SQL (though there might still be
3537 # transfer to iteration at the tail of the tree that we can ignore;
3538 # that's what the pop_transfer takes care of here).
3539 self.assertIsInstance(
3540 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3541 sql.Engine,
3542 )
3544 def test_query_find_datasets_drop_postprocessing(self) -> None:
3545 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3546 problems with the FindFirstDataset relation operation.
3547 """
3548 # Setup: load some visit, tract, and patch records, and insert two
3549 # datasets with dimensions {visit, patch}, with one in each of two
3550 # RUN collections.
3551 registry = self.makeRegistry()
3552 self.loadData(registry, "base.yaml")
3553 self.loadData(registry, "spatial.yaml")
3554 storage_class = StorageClass("Warpy")
3555 registry.storageClasses.registerStorageClass(storage_class)
3556 dataset_type = DatasetType(
3557 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3558 )
3559 registry.registerDatasetType(dataset_type)
3560 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3561 registry.registerRun("run1")
3562 registry.registerRun("run2")
3563 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3564 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3565 # Query for the dataset using queryDataIds(...).findDatasets(...)
3566 # against only one of the two collections. This should work even
3567 # though the relation returned by queryDataIds ends with
3568 # iteration-engine region-filtering, because we can recognize before
3569 # running the query that there is only one collecton to search and
3570 # hence the (default) findFirst=True is irrelevant, and joining in the
3571 # dataset query commutes past the iteration-engine postprocessing.
3572 query1 = registry.queryDataIds(
3573 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3574 )
3575 self.assertEqual(
3576 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3577 {ref1},
3578 )
3579 # Query for the dataset using queryDataIds(...).findDatasets(...)
3580 # against both collections. This can only work if the FindFirstDataset
3581 # operation can be commuted past the iteration-engine options into SQL.
3582 query2 = registry.queryDataIds(
3583 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3584 )
3585 self.assertEqual(
3586 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3587 {ref2},
3588 )
3590 def test_query_empty_collections(self) -> None:
3591 """Test for registry query methods with empty collections. The methods
3592 should return empty result set (or None when applicable) and provide
3593 "doomed" diagnostics.
3594 """
3595 registry = self.makeRegistry()
3596 self.loadData(registry, "base.yaml")
3597 self.loadData(registry, "datasets.yaml")
3599 # Tests for registry.findDataset()
3600 with self.assertRaises(NoDefaultCollectionError):
3601 registry.findDataset("bias", instrument="Cam1", detector=1)
3602 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3603 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3605 # Tests for registry.queryDatasets()
3606 with self.assertRaises(NoDefaultCollectionError):
3607 registry.queryDatasets("bias")
3608 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3610 result = registry.queryDatasets("bias", collections=[])
3611 self.assertEqual(len(list(result)), 0)
3612 messages = list(result.explain_no_results())
3613 self.assertTrue(messages)
3614 self.assertTrue(any("because collection list is empty" in message for message in messages))
3616 # Tests for registry.queryDataIds()
3617 with self.assertRaises(NoDefaultCollectionError):
3618 registry.queryDataIds("detector", datasets="bias")
3619 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3621 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3622 self.assertEqual(len(list(result)), 0)
3623 messages = list(result.explain_no_results())
3624 self.assertTrue(messages)
3625 self.assertTrue(any("because collection list is empty" in message for message in messages))
3627 # Tests for registry.queryDimensionRecords()
3628 with self.assertRaises(NoDefaultCollectionError):
3629 registry.queryDimensionRecords("detector", datasets="bias")
3630 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3632 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3633 self.assertEqual(len(list(result)), 0)
3634 messages = list(result.explain_no_results())
3635 self.assertTrue(messages)
3636 self.assertTrue(any("because collection list is empty" in message for message in messages))
3638 def test_dataset_followup_spatial_joins(self) -> None:
3639 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3640 is involved.
3641 """
3642 registry = self.makeRegistry()
3643 self.loadData(registry, "base.yaml")
3644 self.loadData(registry, "spatial.yaml")
3645 pvi_dataset_type = DatasetType(
3646 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3647 )
3648 registry.registerDatasetType(pvi_dataset_type)
3649 collection = "datasets"
3650 registry.registerRun(collection)
3651 (pvi1,) = registry.insertDatasets(
3652 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3653 )
3654 (pvi2,) = registry.insertDatasets(
3655 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3656 )
3657 (pvi3,) = registry.insertDatasets(
3658 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3659 )
3660 self.assertEqual(
3661 set(
3662 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3663 .expanded()
3664 .findRelatedDatasets("pvi", [collection])
3665 ),
3666 {
3667 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3668 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3669 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3670 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3671 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3672 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3673 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3674 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3675 },
3676 )