Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%
1541 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 09:54 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-30 09:54 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import time
38import unittest
39import uuid
40from abc import ABC, abstractmethod
41from collections import defaultdict, namedtuple
42from collections.abc import Callable, Iterator
43from concurrent.futures import ThreadPoolExecutor
44from datetime import timedelta
45from threading import Barrier
47import astropy.time
48import sqlalchemy
50try:
51 import numpy as np
52except ImportError:
53 np = None
55import lsst.sphgeom
56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
58from ..._dataset_association import DatasetAssociation
59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
60from ..._dataset_type import DatasetType
61from ..._exceptions import (
62 CollectionTypeError,
63 DataIdValueError,
64 InconsistentDataIdError,
65 MissingCollectionError,
66 MissingDatasetTypeError,
67)
68from ..._exceptions_legacy import DatasetTypeError
69from ..._storage_class import StorageClass
70from ..._timespan import Timespan
71from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
72from .._collection_summary import CollectionSummary
73from .._collection_type import CollectionType
74from .._config import RegistryConfig
75from .._exceptions import (
76 ArgumentError,
77 CollectionError,
78 ConflictingDefinitionError,
79 DatasetTypeExpressionError,
80 NoDefaultCollectionError,
81 OrphanedRecordError,
82)
83from .._registry import Registry
84from ..interfaces import ButlerAttributeExistsError
85from ..sql_registry import SqlRegistry
88class RegistryTests(ABC):
89 """Generic tests for the `SqlRegistry` class that can be subclassed to
90 generate tests for different configurations.
91 """
93 collectionsManager: str | None = None
94 """Name of the collections manager class, if subclass provides value for
95 this member then it overrides name specified in default configuration
96 (`str`).
97 """
99 datasetsManager: str | dict[str, str] | None = None
100 """Name or configuration dictionary of the datasets manager class, if
101 subclass provides value for this member then it overrides name specified
102 in default configuration (`str` or `dict`).
103 """
105 supportsCollectionRegex: bool = True
106 """True if the registry class being tested supports regex searches for
107 collections."""
109 @classmethod
110 @abstractmethod
111 def getDataDir(cls) -> str:
112 """Return the root directory containing test data YAML files."""
113 raise NotImplementedError()
115 def makeRegistryConfig(self) -> RegistryConfig:
116 """Create RegistryConfig used to create a registry.
118 This method should be called by a subclass from `makeRegistry`.
119 Returned instance will be pre-configured based on the values of class
120 members, and default-configured for all other parameters. Subclasses
121 that need default configuration should just instantiate
122 `RegistryConfig` directly.
123 """
124 config = RegistryConfig()
125 if self.collectionsManager:
126 config["managers", "collections"] = self.collectionsManager
127 if self.datasetsManager:
128 config["managers", "datasets"] = self.datasetsManager
129 return config
131 @abstractmethod
132 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
133 """Return the Registry instance to be tested.
135 Parameters
136 ----------
137 share_repo_with : `Registry`, optional
138 If provided, the new registry should point to the same data
139 repository as this existing registry.
141 Returns
142 -------
143 registry : `Registry`
144 New `Registry` instance, or `None` *only* if `share_repo_with`
145 is not `None` and this test case does not support that argument
146 (e.g. it is impossible with in-memory SQLite DBs).
147 """
148 raise NotImplementedError()
150 def loadData(self, registry: SqlRegistry, filename: str) -> None:
151 """Load registry test data from ``getDataDir/<filename>``,
152 which should be a YAML import/export file.
154 Parameters
155 ----------
156 registry : `SqlRegistry`
157 The registry to load into.
158 filename : `str`
159 The name of the file to load.
160 """
161 from ...transfers import YamlRepoImportBackend
163 with open(os.path.join(self.getDataDir(), filename)) as stream:
164 backend = YamlRepoImportBackend(stream, registry)
165 backend.register()
166 backend.load(datastore=None)
168 def checkQueryResults(self, results, expected):
169 """Check that a query results object contains expected values.
171 Parameters
172 ----------
173 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
174 A lazy-evaluation query results object.
175 expected : `list`
176 A list of `DataCoordinate` o `DatasetRef` objects that should be
177 equal to results of the query, aside from ordering.
178 """
179 self.assertCountEqual(list(results), expected)
180 self.assertEqual(results.count(), len(expected))
181 if expected:
182 self.assertTrue(results.any())
183 else:
184 self.assertFalse(results.any())
186 def testOpaque(self):
187 """Tests for `SqlRegistry.registerOpaqueTable`,
188 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
189 `SqlRegistry.deleteOpaqueData`.
190 """
191 registry = self.makeRegistry()
192 table = "opaque_table_for_testing"
193 registry.registerOpaqueTable(
194 table,
195 spec=ddl.TableSpec(
196 fields=[
197 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
198 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
199 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
200 ],
201 ),
202 )
203 rows = [
204 {"id": 1, "name": "one", "count": None},
205 {"id": 2, "name": "two", "count": 5},
206 {"id": 3, "name": "three", "count": 6},
207 ]
208 registry.insertOpaqueData(table, *rows)
209 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
210 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
211 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
212 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
213 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
214 # Test very long IN clause which exceeds sqlite limit on number of
215 # parameters. SQLite says the limit is 32k but it looks like it is
216 # much higher.
217 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
218 # Two IN clauses, each longer than 1k batch size, first with
219 # duplicates, second has matching elements in different batches (after
220 # sorting).
221 self.assertEqual(
222 rows[0:2],
223 list(
224 registry.fetchOpaqueData(
225 table,
226 id=list(range(1000)) + list(range(100, 0, -1)),
227 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
228 )
229 ),
230 )
231 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
232 registry.deleteOpaqueData(table, id=3)
233 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
234 registry.deleteOpaqueData(table)
235 self.assertEqual([], list(registry.fetchOpaqueData(table)))
237 def testDatasetType(self):
238 """Tests for `SqlRegistry.registerDatasetType` and
239 `SqlRegistry.getDatasetType`.
240 """
241 registry = self.makeRegistry()
242 # Check valid insert
243 datasetTypeName = "test"
244 storageClass = StorageClass("testDatasetType")
245 registry.storageClasses.registerStorageClass(storageClass)
246 dimensions = registry.dimensions.conform(("instrument", "visit"))
247 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
248 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
249 # Inserting for the first time should return True
250 self.assertTrue(registry.registerDatasetType(inDatasetType))
251 outDatasetType1 = registry.getDatasetType(datasetTypeName)
252 self.assertEqual(outDatasetType1, inDatasetType)
254 # Re-inserting should work
255 self.assertFalse(registry.registerDatasetType(inDatasetType))
256 # Except when they are not identical
257 with self.assertRaises(ConflictingDefinitionError):
258 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
259 registry.registerDatasetType(nonIdenticalDatasetType)
261 # Template can be None
262 datasetTypeName = "testNoneTemplate"
263 storageClass = StorageClass("testDatasetType2")
264 registry.storageClasses.registerStorageClass(storageClass)
265 dimensions = registry.dimensions.conform(("instrument", "visit"))
266 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
267 registry.registerDatasetType(inDatasetType)
268 outDatasetType2 = registry.getDatasetType(datasetTypeName)
269 self.assertEqual(outDatasetType2, inDatasetType)
271 allTypes = set(registry.queryDatasetTypes())
272 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
274 def testDimensions(self):
275 """Tests for `SqlRegistry.insertDimensionData`,
276 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
277 """
278 registry = self.makeRegistry()
279 dimensionName = "instrument"
280 dimension = registry.dimensions[dimensionName]
281 dimensionValue = {
282 "name": "DummyCam",
283 "visit_max": 10,
284 "visit_system": 0,
285 "exposure_max": 10,
286 "detector_max": 2,
287 "class_name": "lsst.pipe.base.Instrument",
288 }
289 registry.insertDimensionData(dimensionName, dimensionValue)
290 # Inserting the same value twice should fail
291 with self.assertRaises(sqlalchemy.exc.IntegrityError):
292 registry.insertDimensionData(dimensionName, dimensionValue)
293 # expandDataId should retrieve the record we just inserted
294 self.assertEqual(
295 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
296 .records[dimensionName]
297 .toDict(),
298 dimensionValue,
299 )
300 # expandDataId should raise if there is no record with the given ID.
301 with self.assertRaises(DataIdValueError):
302 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
303 # band doesn't have a table; insert should fail.
304 with self.assertRaises(TypeError):
305 registry.insertDimensionData("band", {"band": "i"})
306 dimensionName2 = "physical_filter"
307 dimension2 = registry.dimensions[dimensionName2]
308 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
309 # Missing required dependency ("instrument") should fail
310 with self.assertRaises(KeyError):
311 registry.insertDimensionData(dimensionName2, dimensionValue2)
312 # Adding required dependency should fix the failure
313 dimensionValue2["instrument"] = "DummyCam"
314 registry.insertDimensionData(dimensionName2, dimensionValue2)
315 # expandDataId should retrieve the record we just inserted.
316 self.assertEqual(
317 registry.expandDataId(
318 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
319 )
320 .records[dimensionName2]
321 .toDict(),
322 dimensionValue2,
323 )
324 # Use syncDimensionData to insert a new record successfully.
325 dimensionName3 = "detector"
326 dimensionValue3 = {
327 "instrument": "DummyCam",
328 "id": 1,
329 "full_name": "one",
330 "name_in_raft": "zero",
331 "purpose": "SCIENCE",
332 }
333 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
334 # Sync that again. Note that one field ("raft") is NULL, and that
335 # should be okay.
336 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
337 # Now try that sync with the same primary key but a different value.
338 # This should fail.
339 with self.assertRaises(ConflictingDefinitionError):
340 registry.syncDimensionData(
341 dimensionName3,
342 {
343 "instrument": "DummyCam",
344 "id": 1,
345 "full_name": "one",
346 "name_in_raft": "four",
347 "purpose": "SCIENCE",
348 },
349 )
351 @unittest.skipIf(np is None, "numpy not available.")
352 def testNumpyDataId(self):
353 """Test that we can use a numpy int in a dataId."""
354 registry = self.makeRegistry()
355 dimensionEntries = [
356 ("instrument", {"instrument": "DummyCam"}),
357 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
358 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
359 # Using an np.int64 here fails unless Records.fromDict is also
360 # patched to look for numbers.Integral
361 (
362 "visit",
363 {
364 "instrument": "DummyCam",
365 "id": 42,
366 "name": "fortytwo",
367 "physical_filter": "d-r",
368 "day_obs": 20250101,
369 },
370 ),
371 ]
372 for args in dimensionEntries:
373 registry.insertDimensionData(*args)
375 # Try a normal integer and something that looks like an int but
376 # is not.
377 for visit_id in (42, np.int64(42)):
378 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
379 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
380 self.assertEqual(expanded["visit"], int(visit_id))
381 self.assertIsInstance(expanded["visit"], int)
383 def testDataIdRelationships(self):
384 """Test that `SqlRegistry.expandDataId` raises an exception when the
385 given keys are inconsistent.
386 """
387 registry = self.makeRegistry()
388 self.loadData(registry, "base.yaml")
389 # Insert a few more dimension records for the next test.
390 registry.insertDimensionData(
391 "day_obs",
392 {"instrument": "Cam1", "id": 20250101},
393 )
394 registry.insertDimensionData(
395 "group",
396 {"instrument": "Cam1", "name": "group1"},
397 )
398 registry.insertDimensionData(
399 "exposure",
400 {
401 "instrument": "Cam1",
402 "id": 1,
403 "obs_id": "one",
404 "physical_filter": "Cam1-G",
405 "group": "group1",
406 "day_obs": 20250101,
407 },
408 )
409 registry.insertDimensionData(
410 "group",
411 {"instrument": "Cam1", "name": "group2"},
412 )
413 registry.insertDimensionData(
414 "exposure",
415 {
416 "instrument": "Cam1",
417 "id": 2,
418 "obs_id": "two",
419 "physical_filter": "Cam1-G",
420 "group": "group2",
421 "day_obs": 20250101,
422 },
423 )
424 registry.insertDimensionData(
425 "visit_system",
426 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
427 )
428 registry.insertDimensionData(
429 "visit",
430 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
431 )
432 registry.insertDimensionData(
433 "visit_definition",
434 {"instrument": "Cam1", "visit": 1, "exposure": 1},
435 )
436 with self.assertRaises(InconsistentDataIdError):
437 registry.expandDataId(
438 {"instrument": "Cam1", "visit": 1, "exposure": 2},
439 )
441 def testDataset(self):
442 """Basic tests for `SqlRegistry.insertDatasets`,
443 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
444 """
445 registry = self.makeRegistry()
446 self.loadData(registry, "base.yaml")
447 run = "tésτ"
448 registry.registerRun(run)
449 datasetType = registry.getDatasetType("bias")
450 dataId = {"instrument": "Cam1", "detector": 2}
451 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
452 outRef = registry.getDataset(ref.id)
453 self.assertIsNotNone(ref.id)
454 self.assertEqual(ref, outRef)
455 with self.assertRaises(ConflictingDefinitionError):
456 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
457 registry.removeDatasets([ref])
458 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
460 def testFindDataset(self):
461 """Tests for `SqlRegistry.findDataset`."""
462 registry = self.makeRegistry()
463 self.loadData(registry, "base.yaml")
464 run = "tésτ"
465 datasetType = registry.getDatasetType("bias")
466 dataId = {"instrument": "Cam1", "detector": 4}
467 registry.registerRun(run)
468 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
469 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
470 self.assertEqual(outputRef, inputRef)
471 # Check that retrieval with invalid dataId raises
472 with self.assertRaises(LookupError):
473 dataId = {"instrument": "Cam1"} # no detector
474 registry.findDataset(datasetType, dataId, collections=run)
475 # Check that different dataIds match to different datasets
476 dataId1 = {"instrument": "Cam1", "detector": 1}
477 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
478 dataId2 = {"instrument": "Cam1", "detector": 2}
479 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
480 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
481 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
482 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
483 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
484 # Check that requesting a non-existing dataId returns None
485 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
486 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
487 # Search more than one collection, in which two have the right
488 # dataset type and another does not.
489 registry.registerRun("empty")
490 self.loadData(registry, "datasets.yaml")
491 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
492 self.assertIsNotNone(bias1)
493 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
494 self.assertIsNotNone(bias2)
495 self.assertEqual(
496 bias1,
497 registry.findDataset(
498 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
499 ),
500 )
501 self.assertEqual(
502 bias2,
503 registry.findDataset(
504 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
505 ),
506 )
507 # Search more than one collection, with one of them a CALIBRATION
508 # collection.
509 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
510 timespan = Timespan(
511 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
512 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
513 )
514 registry.certify("Cam1/calib", [bias2], timespan=timespan)
515 self.assertEqual(
516 bias1,
517 registry.findDataset(
518 "bias",
519 instrument="Cam1",
520 detector=2,
521 collections=["empty", "imported_g", "Cam1/calib"],
522 timespan=timespan,
523 ),
524 )
525 self.assertEqual(
526 bias2,
527 registry.findDataset(
528 "bias",
529 instrument="Cam1",
530 detector=2,
531 collections=["empty", "Cam1/calib", "imported_g"],
532 timespan=timespan,
533 ),
534 )
535 # If we try to search those same collections without a timespan, it
536 # should still work, since the CALIBRATION collection is ignored.
537 self.assertEqual(
538 bias1,
539 registry.findDataset(
540 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
541 ),
542 )
543 self.assertEqual(
544 bias1,
545 registry.findDataset(
546 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
547 ),
548 )
550 def testRemoveDatasetTypeSuccess(self):
551 """Test that SqlRegistry.removeDatasetType works when there are no
552 datasets of that type present.
553 """
554 registry = self.makeRegistry()
555 self.loadData(registry, "base.yaml")
556 registry.removeDatasetType("flat")
557 with self.assertRaises(MissingDatasetTypeError):
558 registry.getDatasetType("flat")
560 def testRemoveDatasetTypeFailure(self):
561 """Test that SqlRegistry.removeDatasetType raises when there are
562 datasets of that type present or if the dataset type is for a
563 component.
564 """
565 registry = self.makeRegistry()
566 self.loadData(registry, "base.yaml")
567 self.loadData(registry, "datasets.yaml")
568 with self.assertRaises(OrphanedRecordError):
569 registry.removeDatasetType("flat")
570 with self.assertRaises(DatasetTypeError):
571 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
573 def testImportDatasetsUUID(self):
574 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
575 if isinstance(self.datasetsManager, str):
576 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
577 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
578 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
579 ".ByDimensionsDatasetRecordStorageManagerUUID"
580 ):
581 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
583 registry = self.makeRegistry()
584 self.loadData(registry, "base.yaml")
585 for run in range(6):
586 registry.registerRun(f"run{run}")
587 datasetTypeBias = registry.getDatasetType("bias")
588 datasetTypeFlat = registry.getDatasetType("flat")
589 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
590 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
591 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
593 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
594 (ref1,) = registry._importDatasets([ref])
595 # UUID is used without change
596 self.assertEqual(ref.id, ref1.id)
598 # All different failure modes
599 refs = (
600 # Importing same DatasetRef with different dataset ID is an error
601 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
602 # Same DatasetId but different DataId
603 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
604 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
605 # Same DatasetRef and DatasetId but different run
606 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
607 )
608 for ref in refs:
609 with self.assertRaises(ConflictingDefinitionError):
610 registry._importDatasets([ref])
612 # Test for non-unique IDs, they can be re-imported multiple times.
613 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
614 with self.subTest(idGenMode=idGenMode):
615 # Make dataset ref with reproducible dataset ID.
616 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
617 (ref1,) = registry._importDatasets([ref])
618 self.assertIsInstance(ref1.id, uuid.UUID)
619 self.assertEqual(ref1.id.version, 5)
620 self.assertEqual(ref1.id, ref.id)
622 # Importing it again is OK
623 (ref2,) = registry._importDatasets([ref1])
624 self.assertEqual(ref2.id, ref1.id)
626 # Cannot import to different run with the same ID
627 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
628 with self.assertRaises(ConflictingDefinitionError):
629 registry._importDatasets([ref])
631 ref = DatasetRef(
632 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
633 )
634 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
635 # Cannot import same DATAID_TYPE ref into a new run
636 with self.assertRaises(ConflictingDefinitionError):
637 (ref2,) = registry._importDatasets([ref])
638 else:
639 # DATAID_TYPE_RUN ref can be imported into a new run
640 (ref2,) = registry._importDatasets([ref])
642 def testComponentLookups(self):
643 """Test searching for component datasets via their parents.
645 Components can no longer be found by registry. This test checks
646 that this now fails.
647 """
648 registry = self.makeRegistry()
649 self.loadData(registry, "base.yaml")
650 self.loadData(registry, "datasets.yaml")
651 # Test getting the child dataset type (which does still exist in the
652 # Registry), and check for consistency with
653 # DatasetRef.makeComponentRef.
654 collection = "imported_g"
655 parentType = registry.getDatasetType("bias")
656 childType = registry.getDatasetType("bias.wcs")
657 parentRefResolved = registry.findDataset(
658 parentType, collections=collection, instrument="Cam1", detector=1
659 )
660 self.assertIsInstance(parentRefResolved, DatasetRef)
661 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
662 # Search for a single dataset with findDataset.
663 with self.assertRaises(DatasetTypeError):
664 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
666 def testCollections(self):
667 """Tests for registry methods that manage collections."""
668 registry = self.makeRegistry()
669 other_registry = self.makeRegistry(share_repo_with=registry)
670 self.loadData(registry, "base.yaml")
671 self.loadData(registry, "datasets.yaml")
672 run1 = "imported_g"
673 run2 = "imported_r"
674 # Test setting a collection docstring after it has been created.
675 registry.setCollectionDocumentation(run1, "doc for run1")
676 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
677 registry.setCollectionDocumentation(run1, None)
678 self.assertIsNone(registry.getCollectionDocumentation(run1))
679 datasetType = "bias"
680 # Find some datasets via their run's collection.
681 dataId1 = {"instrument": "Cam1", "detector": 1}
682 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
683 self.assertIsNotNone(ref1)
684 dataId2 = {"instrument": "Cam1", "detector": 2}
685 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
686 self.assertIsNotNone(ref2)
687 # Associate those into a new collection, then look for them there.
688 tag1 = "tag1"
689 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
690 # Check that we can query for old and new collections by type.
691 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
692 self.assertEqual(
693 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
694 {tag1, run1, run2},
695 )
696 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
697 registry.associate(tag1, [ref1, ref2])
698 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
699 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
700 # Disassociate one and verify that we can't it there anymore...
701 registry.disassociate(tag1, [ref1])
702 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
703 # ...but we can still find ref2 in tag1, and ref1 in the run.
704 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
705 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
706 collections = set(registry.queryCollections())
707 self.assertEqual(collections, {run1, run2, tag1})
708 # Associate both refs into tag1 again; ref2 is already there, but that
709 # should be a harmless no-op.
710 registry.associate(tag1, [ref1, ref2])
711 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
712 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
713 # Get a different dataset (from a different run) that has the same
714 # dataset type and data ID as ref2.
715 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
716 self.assertNotEqual(ref2, ref2b)
717 # Attempting to associate that into tag1 should be an error.
718 with self.assertRaises(ConflictingDefinitionError):
719 registry.associate(tag1, [ref2b])
720 # That error shouldn't have messed up what we had before.
721 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
722 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
723 # Attempt to associate the conflicting dataset again, this time with
724 # a dataset that isn't in the collection and won't cause a conflict.
725 # Should also fail without modifying anything.
726 dataId3 = {"instrument": "Cam1", "detector": 3}
727 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
728 with self.assertRaises(ConflictingDefinitionError):
729 registry.associate(tag1, [ref3, ref2b])
730 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
731 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
732 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
733 # Register a chained collection that searches [tag1, run2]
734 chain1 = "chain1"
735 registry.registerCollection(chain1, type=CollectionType.CHAINED)
736 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
737 # Chained collection exists, but has no collections in it.
738 self.assertFalse(registry.getCollectionChain(chain1))
739 # If we query for all collections, we should get the chained collection
740 # only if we don't ask to flatten it (i.e. yield only its children).
741 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
742 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
743 # Attempt to set its child collections to something circular; that
744 # should fail.
745 with self.assertRaises(ValueError):
746 registry.setCollectionChain(chain1, [tag1, chain1])
747 # Add the child collections.
748 registry.setCollectionChain(chain1, [tag1, run2])
749 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
750 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
751 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
752 # Refresh the other registry that points to the same repo, and make
753 # sure it can see the things we've done (note that this does require
754 # an explicit refresh(); that's the documented behavior, because
755 # caching is ~impossible otherwise).
756 if other_registry is not None:
757 other_registry.refresh()
758 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
759 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
760 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
761 # Searching for dataId1 or dataId2 in the chain should return ref1 and
762 # ref2, because both are in tag1.
763 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
764 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
765 # Now disassociate ref2 from tag1. The search (for bias) with
766 # dataId2 in chain1 should then:
767 # 1. not find it in tag1
768 # 2. find a different dataset in run2
769 registry.disassociate(tag1, [ref2])
770 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
771 self.assertNotEqual(ref2b, ref2)
772 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
773 # Define a new chain so we can test recursive chains.
774 chain2 = "chain2"
775 registry.registerCollection(chain2, type=CollectionType.CHAINED)
776 registry.setCollectionChain(chain2, [run2, chain1])
777 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
778 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
780 if self.supportsCollectionRegex:
781 # Query for collections matching a regex.
782 self.assertCountEqual(
783 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
784 ["imported_r", "imported_g"],
785 )
786 # Query for collections matching a regex or an explicit str.
787 self.assertCountEqual(
788 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
789 ["imported_r", "imported_g", "chain1"],
790 )
791 # Same queries as the regex ones above, but using globs instead of
792 # regex.
793 self.assertCountEqual(
794 list(registry.queryCollections("imported_*", flattenChains=False)),
795 ["imported_r", "imported_g"],
796 )
797 # Query for collections matching a regex or an explicit str.
798 self.assertCountEqual(
799 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
800 ["imported_r", "imported_g", "chain1"],
801 )
803 # Search for bias with dataId1 should find it via tag1 in chain2,
804 # recursing, because is not in run1.
805 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
806 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
807 # Search for bias with dataId2 should find it in run2 (ref2b).
808 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
809 # Search for a flat that is in run2. That should not be found
810 # at the front of chain2, because of the restriction to bias
811 # on run2 there, but it should be found in at the end of chain1.
812 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
813 ref4 = registry.findDataset("flat", dataId4, collections=run2)
814 self.assertIsNotNone(ref4)
815 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
816 # Deleting a collection that's part of a CHAINED collection is not
817 # allowed, and is exception-safe.
818 with self.assertRaises(sqlalchemy.exc.IntegrityError):
819 registry.removeCollection(run2)
820 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
821 with self.assertRaises(sqlalchemy.exc.IntegrityError):
822 registry.removeCollection(chain1)
823 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
824 # Actually remove chain2, test that it's gone by asking for its type.
825 registry.removeCollection(chain2)
826 with self.assertRaises(MissingCollectionError):
827 registry.getCollectionType(chain2)
828 # Actually remove run2 and chain1, which should work now.
829 registry.removeCollection(chain1)
830 registry.removeCollection(run2)
831 with self.assertRaises(MissingCollectionError):
832 registry.getCollectionType(run2)
833 with self.assertRaises(MissingCollectionError):
834 registry.getCollectionType(chain1)
835 # Remove tag1 as well, just to test that we can remove TAGGED
836 # collections.
837 registry.removeCollection(tag1)
838 with self.assertRaises(MissingCollectionError):
839 registry.getCollectionType(tag1)
841 def testCollectionChainCaching(self):
842 registry = self.makeRegistry()
843 with registry.caching_context():
844 registry.registerCollection("a")
845 registry.registerCollection("chain", CollectionType.CHAINED)
846 # There used to be a caching bug (DM-43750) that would throw an
847 # exception if you modified a collection chain for a collection
848 # that was already in the cache.
849 registry.setCollectionChain("chain", ["a"])
850 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"])
852 def testCollectionChainFlatten(self):
853 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
854 option.
855 """
856 registry = self.makeRegistry()
857 registry.registerCollection("inner", CollectionType.CHAINED)
858 registry.registerCollection("innermost", CollectionType.RUN)
859 registry.setCollectionChain("inner", ["innermost"])
860 registry.registerCollection("outer", CollectionType.CHAINED)
861 registry.setCollectionChain("outer", ["inner"], flatten=False)
862 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
863 registry.setCollectionChain("outer", ["inner"], flatten=True)
864 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
866 def testCollectionChainPrependConcurrency(self):
867 """Verify that locking via database row locks is working as
868 expected.
869 """
871 def blocked_thread_func(registry: SqlRegistry):
872 # This call will become blocked after it has decided on positions
873 # for the new children in the collection chain, but before
874 # inserting them.
875 registry._managers.collections.prepend_collection_chain("chain", ["a"])
877 def unblocked_thread_func(registry: SqlRegistry):
878 registry._managers.collections.prepend_collection_chain("chain", ["b"])
880 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
882 # blocked_thread_func should have finished first, inserting "a".
883 # unblocked_thread_func should have finished second, prepending "b".
884 self.assertEqual(("b", "a"), registry.getCollectionChain("chain"))
886 def testCollectionChainReplaceConcurrency(self):
887 """Verify that locking via database row locks is working as
888 expected.
889 """
891 def blocked_thread_func(registry: SqlRegistry):
892 # This call will become blocked after deleting children, but before
893 # inserting new ones.
894 registry.setCollectionChain("chain", ["a"])
896 def unblocked_thread_func(registry: SqlRegistry):
897 registry.setCollectionChain("chain", ["b"])
899 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
901 # blocked_thread_func should have finished first.
902 # unblocked_thread_func should have finished second, overwriting the
903 # chain with "b".
904 self.assertEqual(("b",), registry.getCollectionChain("chain"))
906 def _do_collection_concurrency_test(
907 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]]
908 ) -> SqlRegistry:
909 # This function:
910 # 1. Sets up two registries pointing at the same database.
911 # 2. Start running 'blocked_thread_func' in a background thread,
912 # arranging for it to become blocked during a critical section in
913 # the collections manager.
914 # 3. Wait for 'blocked_thread_func' to reach the critical section
915 # 4. Start running 'unblocked_thread_func'.
916 # 5. Allow both functions to run to completion.
918 # Set up two registries pointing to the same DB
919 registry1 = self.makeRegistry()
920 assert isinstance(registry1, SqlRegistry)
921 registry2 = self.makeRegistry(share_repo_with=registry1)
922 if registry2 is None:
923 # This will happen for in-memory SQL databases.
924 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.")
926 registry1.registerCollection("chain", CollectionType.CHAINED)
927 for collection in ["a", "b"]:
928 registry1.registerCollection(collection)
930 # Arrange for registry1 to block during its critical section, allowing
931 # us to detect this and control when it becomes unblocked.
932 enter_barrier = Barrier(2, timeout=60)
933 exit_barrier = Barrier(2, timeout=60)
935 def wait_for_barrier():
936 enter_barrier.wait()
937 exit_barrier.wait()
939 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier
941 with ThreadPoolExecutor(max_workers=1) as exec1:
942 with ThreadPoolExecutor(max_workers=1) as exec2:
943 future1 = exec1.submit(blocked_thread_func, registry1)
944 enter_barrier.wait()
946 # At this point registry 1 has entered the critical section and
947 # is waiting for us to release it. Start the other thread.
948 future2 = exec2.submit(unblocked_thread_func, registry2)
949 # thread2 should block inside a database call, but we have no
950 # way to detect when it is in this state.
951 time.sleep(0.200)
953 # Let the threads run to completion.
954 exit_barrier.wait()
955 future1.result()
956 future2.result()
958 return registry1
960 def testBasicTransaction(self):
961 """Test that all operations within a single transaction block are
962 rolled back if an exception propagates out of the block.
963 """
964 registry = self.makeRegistry()
965 storageClass = StorageClass("testDatasetType")
966 registry.storageClasses.registerStorageClass(storageClass)
967 with registry.transaction():
968 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
969 with self.assertRaises(ValueError):
970 with registry.transaction():
971 registry.insertDimensionData("instrument", {"name": "Cam2"})
972 raise ValueError("Oops, something went wrong")
973 # Cam1 should exist
974 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
975 # But Cam2 and Cam3 should both not exist
976 with self.assertRaises(DataIdValueError):
977 registry.expandDataId(instrument="Cam2")
978 with self.assertRaises(DataIdValueError):
979 registry.expandDataId(instrument="Cam3")
981 def testNestedTransaction(self):
982 """Test that operations within a transaction block are not rolled back
983 if an exception propagates out of an inner transaction block and is
984 then caught.
985 """
986 registry = self.makeRegistry()
987 dimension = registry.dimensions["instrument"]
988 dataId1 = {"instrument": "DummyCam"}
989 dataId2 = {"instrument": "DummyCam2"}
990 checkpointReached = False
991 with registry.transaction():
992 # This should be added and (ultimately) committed.
993 registry.insertDimensionData(dimension, dataId1)
994 with self.assertRaises(sqlalchemy.exc.IntegrityError):
995 with registry.transaction(savepoint=True):
996 # This does not conflict, and should succeed (but not
997 # be committed).
998 registry.insertDimensionData(dimension, dataId2)
999 checkpointReached = True
1000 # This should conflict and raise, triggerring a rollback
1001 # of the previous insertion within the same transaction
1002 # context, but not the original insertion in the outer
1003 # block.
1004 registry.insertDimensionData(dimension, dataId1)
1005 self.assertTrue(checkpointReached)
1006 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
1007 with self.assertRaises(DataIdValueError):
1008 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
1010 def testInstrumentDimensions(self):
1011 """Test queries involving only instrument dimensions, with no joins to
1012 skymap.
1013 """
1014 registry = self.makeRegistry()
1016 # need a bunch of dimensions and datasets for test
1017 registry.insertDimensionData(
1018 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
1019 )
1020 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
1021 registry.insertDimensionData(
1022 "physical_filter",
1023 dict(instrument="DummyCam", name="dummy_r", band="r"),
1024 dict(instrument="DummyCam", name="dummy_i", band="i"),
1025 )
1026 registry.insertDimensionData(
1027 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
1028 )
1029 registry.insertDimensionData(
1030 "visit",
1031 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
1032 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
1033 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
1034 )
1035 registry.insertDimensionData(
1036 "group",
1037 dict(instrument="DummyCam", name="ten"),
1038 dict(instrument="DummyCam", name="eleven"),
1039 dict(instrument="DummyCam", name="twelve"),
1040 )
1041 for i in range(1, 6):
1042 registry.insertDimensionData(
1043 "visit_detector_region",
1044 dict(instrument="DummyCam", visit=10, detector=i),
1045 dict(instrument="DummyCam", visit=11, detector=i),
1046 dict(instrument="DummyCam", visit=20, detector=i),
1047 )
1048 registry.insertDimensionData(
1049 "exposure",
1050 dict(
1051 instrument="DummyCam",
1052 id=100,
1053 obs_id="100",
1054 physical_filter="dummy_i",
1055 group="ten",
1056 day_obs=20250101,
1057 ),
1058 dict(
1059 instrument="DummyCam",
1060 id=101,
1061 obs_id="101",
1062 physical_filter="dummy_i",
1063 group="ten",
1064 day_obs=20250101,
1065 ),
1066 dict(
1067 instrument="DummyCam",
1068 id=110,
1069 obs_id="110",
1070 physical_filter="dummy_r",
1071 group="eleven",
1072 day_obs=20250101,
1073 ),
1074 dict(
1075 instrument="DummyCam",
1076 id=111,
1077 obs_id="111",
1078 physical_filter="dummy_r",
1079 group="eleven",
1080 day_obs=20250101,
1081 ),
1082 dict(
1083 instrument="DummyCam",
1084 id=200,
1085 obs_id="200",
1086 physical_filter="dummy_r",
1087 group="twelve",
1088 day_obs=20250101,
1089 ),
1090 dict(
1091 instrument="DummyCam",
1092 id=201,
1093 obs_id="201",
1094 physical_filter="dummy_r",
1095 group="twelve",
1096 day_obs=20250101,
1097 ),
1098 )
1099 registry.insertDimensionData(
1100 "visit_definition",
1101 dict(instrument="DummyCam", exposure=100, visit=10),
1102 dict(instrument="DummyCam", exposure=101, visit=10),
1103 dict(instrument="DummyCam", exposure=110, visit=11),
1104 dict(instrument="DummyCam", exposure=111, visit=11),
1105 dict(instrument="DummyCam", exposure=200, visit=20),
1106 dict(instrument="DummyCam", exposure=201, visit=20),
1107 )
1108 # dataset types
1109 run1 = "test1_r"
1110 run2 = "test2_r"
1111 tagged2 = "test2_t"
1112 registry.registerRun(run1)
1113 registry.registerRun(run2)
1114 registry.registerCollection(tagged2)
1115 storageClass = StorageClass("testDataset")
1116 registry.storageClasses.registerStorageClass(storageClass)
1117 rawType = DatasetType(
1118 name="RAW",
1119 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1120 storageClass=storageClass,
1121 )
1122 registry.registerDatasetType(rawType)
1123 calexpType = DatasetType(
1124 name="CALEXP",
1125 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1126 storageClass=storageClass,
1127 )
1128 registry.registerDatasetType(calexpType)
1130 # add pre-existing datasets
1131 for exposure in (100, 101, 110, 111):
1132 for detector in (1, 2, 3):
1133 # note that only 3 of 5 detectors have datasets
1134 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1135 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1136 # exposures 100 and 101 appear in both run1 and tagged2.
1137 # 100 has different datasets in the different collections
1138 # 101 has the same dataset in both collections.
1139 if exposure == 100:
1140 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1141 if exposure in (100, 101):
1142 registry.associate(tagged2, [ref])
1143 # Add pre-existing datasets to tagged2.
1144 for exposure in (200, 201):
1145 for detector in (3, 4, 5):
1146 # note that only 3 of 5 detectors have datasets
1147 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1148 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1149 registry.associate(tagged2, [ref])
1151 dimensions = registry.dimensions.conform(
1152 rawType.dimensions.required.names | calexpType.dimensions.required.names
1153 )
1154 # Test that single dim string works as well as list of str
1155 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1156 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1157 self.assertEqual(rows, rowsI)
1158 # with empty expression
1159 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1160 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1161 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1162 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1163 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1165 # second collection
1166 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1167 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1168 for dataId in rows:
1169 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1170 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1171 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1172 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1174 # with two input datasets
1175 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1176 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1177 for dataId in rows:
1178 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1179 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1180 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1181 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1183 # limit to single visit
1184 rows = registry.queryDataIds(
1185 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1186 ).toSet()
1187 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1188 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1189 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1190 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1192 # more limiting expression, using link names instead of Table.column
1193 rows = registry.queryDataIds(
1194 dimensions,
1195 datasets=rawType,
1196 collections=run1,
1197 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1198 ).toSet()
1199 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1200 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1201 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1202 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1204 # queryDataIds with only one of `datasets` and `collections` is an
1205 # error.
1206 with self.assertRaises(CollectionError):
1207 registry.queryDataIds(dimensions, datasets=rawType)
1208 with self.assertRaises(ArgumentError):
1209 registry.queryDataIds(dimensions, collections=run1)
1211 # expression excludes everything
1212 rows = registry.queryDataIds(
1213 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1214 ).toSet()
1215 self.assertEqual(len(rows), 0)
1217 # Selecting by physical_filter, this is not in the dimensions, but it
1218 # is a part of the full expression so it should work too.
1219 rows = registry.queryDataIds(
1220 dimensions,
1221 datasets=rawType,
1222 collections=run1,
1223 where="physical_filter = 'dummy_r'",
1224 instrument="DummyCam",
1225 ).toSet()
1226 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1227 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1228 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1229 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1231 def testSkyMapDimensions(self):
1232 """Tests involving only skymap dimensions, no joins to instrument."""
1233 registry = self.makeRegistry()
1235 # need a bunch of dimensions and datasets for test, we want
1236 # "band" in the test so also have to add physical_filter
1237 # dimensions
1238 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1239 registry.insertDimensionData(
1240 "physical_filter",
1241 dict(instrument="DummyCam", name="dummy_r", band="r"),
1242 dict(instrument="DummyCam", name="dummy_i", band="i"),
1243 )
1244 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1245 for tract in range(10):
1246 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1247 registry.insertDimensionData(
1248 "patch",
1249 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1250 )
1252 # dataset types
1253 run = "tésτ"
1254 registry.registerRun(run)
1255 storageClass = StorageClass("testDataset")
1256 registry.storageClasses.registerStorageClass(storageClass)
1257 calexpType = DatasetType(
1258 name="deepCoadd_calexp",
1259 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1260 storageClass=storageClass,
1261 )
1262 registry.registerDatasetType(calexpType)
1263 mergeType = DatasetType(
1264 name="deepCoadd_mergeDet",
1265 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1266 storageClass=storageClass,
1267 )
1268 registry.registerDatasetType(mergeType)
1269 measType = DatasetType(
1270 name="deepCoadd_meas",
1271 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1272 storageClass=storageClass,
1273 )
1274 registry.registerDatasetType(measType)
1276 dimensions = registry.dimensions.conform(
1277 calexpType.dimensions.required.names
1278 | mergeType.dimensions.required.names
1279 | measType.dimensions.required.names
1280 )
1282 # add pre-existing datasets
1283 for tract in (1, 3, 5):
1284 for patch in (2, 4, 6, 7):
1285 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1286 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1287 for aFilter in ("i", "r"):
1288 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1289 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1291 # with empty expression
1292 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1293 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1294 for dataId in rows:
1295 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1296 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1297 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1298 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1300 # limit to 2 tracts and 2 patches
1301 rows = registry.queryDataIds(
1302 dimensions,
1303 datasets=[calexpType, mergeType],
1304 collections=run,
1305 where="tract IN (1, 5) AND patch IN (2, 7)",
1306 skymap="DummyMap",
1307 ).toSet()
1308 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1309 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1310 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1311 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1313 # limit to single filter
1314 rows = registry.queryDataIds(
1315 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1316 ).toSet()
1317 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1318 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1319 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1320 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1322 # Specifying non-existing skymap is an exception
1323 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1324 rows = registry.queryDataIds(
1325 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1326 ).toSet()
1328 def testSpatialJoin(self):
1329 """Test queries that involve spatial overlap joins."""
1330 registry = self.makeRegistry()
1331 self.loadData(registry, "hsc-rc2-subset.yaml")
1333 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1334 # the TopologicalFamily they belong to. We'll relate all elements in
1335 # each family to all of the elements in each other family.
1336 families = defaultdict(set)
1337 # Dictionary of {element.name: {dataId: region}}.
1338 regions = {}
1339 for element in registry.dimensions.database_elements:
1340 if element.spatial is not None:
1341 families[element.spatial.name].add(element)
1342 regions[element.name] = {
1343 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1344 }
1346 # If this check fails, it's not necessarily a problem - it may just be
1347 # a reasonable change to the default dimension definitions - but the
1348 # test below depends on there being more than one family to do anything
1349 # useful.
1350 self.assertEqual(len(families), 2)
1352 # Overlap DatabaseDimensionElements with each other.
1353 for family1, family2 in itertools.combinations(families, 2):
1354 for element1, element2 in itertools.product(families[family1], families[family2]):
1355 dimensions = element1.minimal_group | element2.minimal_group
1356 # Construct expected set of overlapping data IDs via a
1357 # brute-force comparison of the regions we've already fetched.
1358 expected = {
1359 DataCoordinate.standardize(
1360 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1361 )
1362 for (dataId1, region1), (dataId2, region2) in itertools.product(
1363 regions[element1.name].items(), regions[element2.name].items()
1364 )
1365 if not region1.isDisjointFrom(region2)
1366 }
1367 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1368 queried = set(registry.queryDataIds(dimensions))
1369 self.assertEqual(expected, queried)
1371 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1372 commonSkyPix = registry.dimensions.commonSkyPix
1373 for elementName, these_regions in regions.items():
1374 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1375 expected = set()
1376 for dataId, region in these_regions.items():
1377 for begin, end in commonSkyPix.pixelization.envelope(region):
1378 expected.update(
1379 DataCoordinate.standardize(
1380 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1381 )
1382 for index in range(begin, end)
1383 )
1384 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1385 queried = set(registry.queryDataIds(dimensions))
1386 self.assertEqual(expected, queried)
1388 def testAbstractQuery(self):
1389 """Test that we can run a query that just lists the known
1390 bands. This is tricky because band is
1391 backed by a query against physical_filter.
1392 """
1393 registry = self.makeRegistry()
1394 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1395 registry.insertDimensionData(
1396 "physical_filter",
1397 dict(instrument="DummyCam", name="dummy_i", band="i"),
1398 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1399 dict(instrument="DummyCam", name="dummy_r", band="r"),
1400 )
1401 rows = registry.queryDataIds(["band"]).toSet()
1402 self.assertCountEqual(
1403 rows,
1404 [
1405 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1406 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1407 ],
1408 )
1410 def testAttributeManager(self):
1411 """Test basic functionality of attribute manager."""
1412 # number of attributes with schema versions in a fresh database,
1413 # 6 managers with 2 records per manager, plus config for dimensions
1414 VERSION_COUNT = 6 * 2 + 1
1416 registry = self.makeRegistry()
1417 attributes = registry._managers.attributes
1419 # check what get() returns for non-existing key
1420 self.assertIsNone(attributes.get("attr"))
1421 self.assertEqual(attributes.get("attr", ""), "")
1422 self.assertEqual(attributes.get("attr", "Value"), "Value")
1423 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1425 # cannot store empty key or value
1426 with self.assertRaises(ValueError):
1427 attributes.set("", "value")
1428 with self.assertRaises(ValueError):
1429 attributes.set("attr", "")
1431 # set value of non-existing key
1432 attributes.set("attr", "value")
1433 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1434 self.assertEqual(attributes.get("attr"), "value")
1436 # update value of existing key
1437 with self.assertRaises(ButlerAttributeExistsError):
1438 attributes.set("attr", "value2")
1440 attributes.set("attr", "value2", force=True)
1441 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1442 self.assertEqual(attributes.get("attr"), "value2")
1444 # delete existing key
1445 self.assertTrue(attributes.delete("attr"))
1446 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1448 # delete non-existing key
1449 self.assertFalse(attributes.delete("non-attr"))
1451 # store bunch of keys and get the list back
1452 data = [
1453 ("version.core", "1.2.3"),
1454 ("version.dimensions", "3.2.1"),
1455 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1456 ]
1457 for key, value in data:
1458 attributes.set(key, value)
1459 items = dict(attributes.items())
1460 for key, value in data:
1461 self.assertEqual(items[key], value)
1463 def testQueryDatasetsDeduplication(self):
1464 """Test that the findFirst option to queryDatasets selects datasets
1465 from collections in the order given".
1466 """
1467 registry = self.makeRegistry()
1468 self.loadData(registry, "base.yaml")
1469 self.loadData(registry, "datasets.yaml")
1470 self.assertCountEqual(
1471 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1472 [
1473 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1474 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1475 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1476 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1477 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1478 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1479 ],
1480 )
1481 self.assertCountEqual(
1482 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1483 [
1484 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1485 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1486 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1487 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1488 ],
1489 )
1490 self.assertCountEqual(
1491 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1492 [
1493 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1494 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1495 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1496 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1497 ],
1498 )
1500 def testQueryResults(self):
1501 """Test querying for data IDs and then manipulating the QueryResults
1502 object returned to perform other queries.
1503 """
1504 registry = self.makeRegistry()
1505 self.loadData(registry, "base.yaml")
1506 self.loadData(registry, "datasets.yaml")
1507 bias = registry.getDatasetType("bias")
1508 flat = registry.getDatasetType("flat")
1509 # Obtain expected results from methods other than those we're testing
1510 # here. That includes:
1511 # - the dimensions of the data IDs we want to query:
1512 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1513 # - the dimensions of some other data IDs we'll extract from that:
1514 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1515 # - the data IDs we expect to obtain from the first queries:
1516 expectedDataIds = DataCoordinateSet(
1517 {
1518 DataCoordinate.standardize(
1519 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1520 )
1521 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1522 },
1523 dimensions=expected_dimensions,
1524 hasFull=False,
1525 hasRecords=False,
1526 )
1527 # - the flat datasets we expect to find from those data IDs, in just
1528 # one collection (so deduplication is irrelevant):
1529 expectedFlats = [
1530 registry.findDataset(
1531 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1532 ),
1533 registry.findDataset(
1534 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1535 ),
1536 registry.findDataset(
1537 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1538 ),
1539 ]
1540 # - the data IDs we expect to extract from that:
1541 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1542 # - the bias datasets we expect to find from those data IDs, after we
1543 # subset-out the physical_filter dimension, both with duplicates:
1544 expectedAllBiases = [
1545 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1546 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1547 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1548 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1549 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1550 ]
1551 # - ...and without duplicates:
1552 expectedDeduplicatedBiases = [
1553 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1554 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1555 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1556 ]
1557 # Test against those expected results, using a "lazy" query for the
1558 # data IDs (which re-executes that query each time we use it to do
1559 # something new).
1560 dataIds = registry.queryDataIds(
1561 ["detector", "physical_filter"],
1562 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1563 instrument="Cam1",
1564 )
1565 self.assertEqual(dataIds.dimensions, expected_dimensions)
1566 self.assertEqual(dataIds.toSet(), expectedDataIds)
1567 self.assertCountEqual(
1568 list(
1569 dataIds.findDatasets(
1570 flat,
1571 collections=["imported_r"],
1572 )
1573 ),
1574 expectedFlats,
1575 )
1576 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1577 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1578 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1579 self.assertCountEqual(
1580 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1581 expectedAllBiases,
1582 )
1583 self.assertCountEqual(
1584 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1585 expectedDeduplicatedBiases,
1586 )
1588 # Searching for a dataset with dimensions we had projected away
1589 # restores those dimensions.
1590 self.assertCountEqual(
1591 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1592 expectedFlats,
1593 )
1595 # Use a named dataset type that does not exist and a dataset type
1596 # object that does not exist.
1597 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1599 # Test both string name and dataset type object.
1600 test_type: str | DatasetType
1601 for test_type, test_type_name in (
1602 (unknown_type, unknown_type.name),
1603 (unknown_type.name, unknown_type.name),
1604 ):
1605 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1606 list(
1607 subsetDataIds.findDatasets(
1608 test_type, collections=["imported_r", "imported_g"], findFirst=True
1609 )
1610 )
1612 # Materialize the bias dataset queries (only) by putting the results
1613 # into temporary tables, then repeat those tests.
1614 with subsetDataIds.findDatasets(
1615 bias, collections=["imported_r", "imported_g"], findFirst=False
1616 ).materialize() as biases:
1617 self.assertCountEqual(list(biases), expectedAllBiases)
1618 with subsetDataIds.findDatasets(
1619 bias, collections=["imported_r", "imported_g"], findFirst=True
1620 ).materialize() as biases:
1621 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1622 # Materialize the data ID subset query, but not the dataset queries.
1623 with subsetDataIds.materialize() as subsetDataIds:
1624 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1625 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1626 self.assertCountEqual(
1627 list(
1628 subsetDataIds.findDatasets(
1629 bias, collections=["imported_r", "imported_g"], findFirst=False
1630 )
1631 ),
1632 expectedAllBiases,
1633 )
1634 self.assertCountEqual(
1635 list(
1636 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1637 ),
1638 expectedDeduplicatedBiases,
1639 )
1640 # Materialize the dataset queries, too.
1641 with subsetDataIds.findDatasets(
1642 bias, collections=["imported_r", "imported_g"], findFirst=False
1643 ).materialize() as biases:
1644 self.assertCountEqual(list(biases), expectedAllBiases)
1645 with subsetDataIds.findDatasets(
1646 bias, collections=["imported_r", "imported_g"], findFirst=True
1647 ).materialize() as biases:
1648 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1649 # Materialize the original query, but none of the follow-up queries.
1650 with dataIds.materialize() as dataIds:
1651 self.assertEqual(dataIds.dimensions, expected_dimensions)
1652 self.assertEqual(dataIds.toSet(), expectedDataIds)
1653 self.assertCountEqual(
1654 list(
1655 dataIds.findDatasets(
1656 flat,
1657 collections=["imported_r"],
1658 )
1659 ),
1660 expectedFlats,
1661 )
1662 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1663 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1664 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1665 self.assertCountEqual(
1666 list(
1667 subsetDataIds.findDatasets(
1668 bias, collections=["imported_r", "imported_g"], findFirst=False
1669 )
1670 ),
1671 expectedAllBiases,
1672 )
1673 self.assertCountEqual(
1674 list(
1675 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1676 ),
1677 expectedDeduplicatedBiases,
1678 )
1679 # Materialize just the bias dataset queries.
1680 with subsetDataIds.findDatasets(
1681 bias, collections=["imported_r", "imported_g"], findFirst=False
1682 ).materialize() as biases:
1683 self.assertCountEqual(list(biases), expectedAllBiases)
1684 with subsetDataIds.findDatasets(
1685 bias, collections=["imported_r", "imported_g"], findFirst=True
1686 ).materialize() as biases:
1687 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1688 # Materialize the subset data ID query, but not the dataset
1689 # queries.
1690 with subsetDataIds.materialize() as subsetDataIds:
1691 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1692 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1693 self.assertCountEqual(
1694 list(
1695 subsetDataIds.findDatasets(
1696 bias, collections=["imported_r", "imported_g"], findFirst=False
1697 )
1698 ),
1699 expectedAllBiases,
1700 )
1701 self.assertCountEqual(
1702 list(
1703 subsetDataIds.findDatasets(
1704 bias, collections=["imported_r", "imported_g"], findFirst=True
1705 )
1706 ),
1707 expectedDeduplicatedBiases,
1708 )
1709 # Materialize the bias dataset queries, too, so now we're
1710 # materializing every single step.
1711 with subsetDataIds.findDatasets(
1712 bias, collections=["imported_r", "imported_g"], findFirst=False
1713 ).materialize() as biases:
1714 self.assertCountEqual(list(biases), expectedAllBiases)
1715 with subsetDataIds.findDatasets(
1716 bias, collections=["imported_r", "imported_g"], findFirst=True
1717 ).materialize() as biases:
1718 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1720 def testStorageClassPropagation(self):
1721 """Test that queries for datasets respect the storage class passed in
1722 as part of a full dataset type.
1723 """
1724 registry = self.makeRegistry()
1725 self.loadData(registry, "base.yaml")
1726 dataset_type_in_registry = DatasetType(
1727 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1728 )
1729 registry.registerDatasetType(dataset_type_in_registry)
1730 run = "run1"
1731 registry.registerRun(run)
1732 (inserted_ref,) = registry.insertDatasets(
1733 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1734 )
1735 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1736 query_dataset_type = DatasetType(
1737 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1738 )
1739 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1740 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1741 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1742 (query_datasets_ref,) = query_datasets_result
1743 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1744 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1745 query_dataset_type, collections=[run]
1746 )
1747 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1748 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1749 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1750 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1751 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1752 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1753 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1755 def testEmptyDimensionsQueries(self):
1756 """Test Query and QueryResults objects in the case where there are no
1757 dimensions.
1758 """
1759 # Set up test data: one dataset type, two runs, one dataset in each.
1760 registry = self.makeRegistry()
1761 self.loadData(registry, "base.yaml")
1762 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1763 registry.registerDatasetType(schema)
1764 dataId = DataCoordinate.make_empty(registry.dimensions)
1765 run1 = "run1"
1766 run2 = "run2"
1767 registry.registerRun(run1)
1768 registry.registerRun(run2)
1769 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1770 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1771 # Query directly for both of the datasets, and each one, one at a time.
1772 self.checkQueryResults(
1773 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1774 )
1775 self.checkQueryResults(
1776 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1777 [dataset1],
1778 )
1779 self.checkQueryResults(
1780 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1781 [dataset2],
1782 )
1783 # Query for data IDs with no dimensions.
1784 dataIds = registry.queryDataIds([])
1785 self.checkQueryResults(dataIds, [dataId])
1786 # Use queried data IDs to find the datasets.
1787 self.checkQueryResults(
1788 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1789 [dataset1, dataset2],
1790 )
1791 self.checkQueryResults(
1792 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1793 [dataset1],
1794 )
1795 self.checkQueryResults(
1796 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1797 [dataset2],
1798 )
1799 # Now materialize the data ID query results and repeat those tests.
1800 with dataIds.materialize() as dataIds:
1801 self.checkQueryResults(dataIds, [dataId])
1802 self.checkQueryResults(
1803 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1804 [dataset1],
1805 )
1806 self.checkQueryResults(
1807 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1808 [dataset2],
1809 )
1810 # Query for non-empty data IDs, then subset that to get the empty one.
1811 # Repeat the above tests starting from that.
1812 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1813 self.checkQueryResults(dataIds, [dataId])
1814 self.checkQueryResults(
1815 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1816 [dataset1, dataset2],
1817 )
1818 self.checkQueryResults(
1819 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1820 [dataset1],
1821 )
1822 self.checkQueryResults(
1823 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1824 [dataset2],
1825 )
1826 with dataIds.materialize() as dataIds:
1827 self.checkQueryResults(dataIds, [dataId])
1828 self.checkQueryResults(
1829 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1830 [dataset1, dataset2],
1831 )
1832 self.checkQueryResults(
1833 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1834 [dataset1],
1835 )
1836 self.checkQueryResults(
1837 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1838 [dataset2],
1839 )
1840 # Query for non-empty data IDs, then materialize, then subset to get
1841 # the empty one. Repeat again.
1842 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1843 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1844 self.checkQueryResults(dataIds, [dataId])
1845 self.checkQueryResults(
1846 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1847 [dataset1, dataset2],
1848 )
1849 self.checkQueryResults(
1850 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1851 [dataset1],
1852 )
1853 self.checkQueryResults(
1854 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1855 [dataset2],
1856 )
1857 with dataIds.materialize() as dataIds:
1858 self.checkQueryResults(dataIds, [dataId])
1859 self.checkQueryResults(
1860 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1861 [dataset1, dataset2],
1862 )
1863 self.checkQueryResults(
1864 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1865 [dataset1],
1866 )
1867 self.checkQueryResults(
1868 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1869 [dataset2],
1870 )
1871 # Repeat the materialization tests with a dimension element that isn't
1872 # cached, so there's no way we can know when building the query where
1873 # there are any rows are not (there aren't).
1874 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1875 with dataIds.materialize() as dataIds:
1876 self.checkQueryResults(dataIds, [])
1877 self.checkQueryResults(
1878 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1879 )
1880 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1881 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1882 # Query for non-empty data IDs with a constraint on an empty-data-ID
1883 # dataset that exists.
1884 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1885 self.checkQueryResults(
1886 dataIds.subset(unique=True),
1887 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1888 )
1889 # Again query for non-empty data IDs with a constraint on empty-data-ID
1890 # datasets, but when the datasets don't exist. We delete the existing
1891 # dataset and query just that collection rather than creating a new
1892 # empty collection because this is a bit less likely for our build-time
1893 # logic to shortcut-out (via the collection summaries), and such a
1894 # shortcut would make this test a bit more trivial than we'd like.
1895 registry.removeDatasets([dataset2])
1896 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1897 self.checkQueryResults(dataIds, [])
1899 def testDimensionDataModifications(self):
1900 """Test that modifying dimension records via:
1901 syncDimensionData(..., update=True) and
1902 insertDimensionData(..., replace=True) works as expected, even in the
1903 presence of datasets using those dimensions and spatial overlap
1904 relationships.
1905 """
1907 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1908 """Unpack a sphgeom.RangeSet into the integers it contains."""
1909 for begin, end in ranges:
1910 yield from range(begin, end)
1912 def _range_set_hull(
1913 ranges: lsst.sphgeom.RangeSet,
1914 pixelization: lsst.sphgeom.HtmPixelization,
1915 ) -> lsst.sphgeom.ConvexPolygon:
1916 """Create a ConvexPolygon hull of the region defined by a set of
1917 HTM pixelization index ranges.
1918 """
1919 points = []
1920 for index in _unpack_range_set(ranges):
1921 points.extend(pixelization.triangle(index).getVertices())
1922 return lsst.sphgeom.ConvexPolygon(points)
1924 # Use HTM to set up an initial parent region (one arbitrary trixel)
1925 # and four child regions (the trixels within the parent at the next
1926 # level. We'll use the parent as a tract/visit region and the children
1927 # as its patch/visit_detector regions.
1928 registry = self.makeRegistry()
1929 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1930 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1931 index = 12288
1932 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1933 assert htm6.universe().contains(child_ranges_small)
1934 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1935 parent_region_small = lsst.sphgeom.ConvexPolygon(
1936 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1937 )
1938 assert all(parent_region_small.contains(c) for c in child_regions_small)
1939 # Make a larger version of each child region, defined to be the set of
1940 # htm6 trixels that overlap the original's bounding circle. Make a new
1941 # parent that's the convex hull of the new children.
1942 child_regions_large = [
1943 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1944 ]
1945 assert all(
1946 large.contains(small)
1947 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1948 )
1949 parent_region_large = lsst.sphgeom.ConvexPolygon(
1950 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1951 )
1952 assert all(parent_region_large.contains(c) for c in child_regions_large)
1953 assert parent_region_large.contains(parent_region_small)
1954 assert not parent_region_small.contains(parent_region_large)
1955 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1956 # Find some commonSkyPix indices that overlap the large regions but not
1957 # overlap the small regions. We use commonSkyPix here to make sure the
1958 # real tests later involve what's in the database, not just post-query
1959 # filtering of regions.
1960 child_difference_indices = []
1961 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1962 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1963 assert difference, "if this is empty, we can't test anything useful with these regions"
1964 assert all(
1965 not commonSkyPix.triangle(d).isDisjointFrom(large)
1966 and commonSkyPix.triangle(d).isDisjointFrom(small)
1967 for d in difference
1968 )
1969 child_difference_indices.append(difference)
1970 parent_difference_indices = list(
1971 _unpack_range_set(
1972 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1973 )
1974 )
1975 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1976 assert all(
1977 (
1978 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1979 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1980 )
1981 for d in parent_difference_indices
1982 )
1983 # Now that we've finally got those regions, we'll insert the large ones
1984 # as tract/patch dimension records.
1985 skymap_name = "testing_v1"
1986 registry.insertDimensionData(
1987 "skymap",
1988 {
1989 "name": skymap_name,
1990 "hash": bytes([42]),
1991 "tract_max": 1,
1992 "patch_nx_max": 2,
1993 "patch_ny_max": 2,
1994 },
1995 )
1996 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1997 registry.insertDimensionData(
1998 "patch",
1999 *[
2000 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2001 for n, c in enumerate(child_regions_large)
2002 ],
2003 )
2004 # Add at dataset that uses these dimensions to make sure that modifying
2005 # them doesn't disrupt foreign keys (need to make sure DB doesn't
2006 # implement insert with replace=True as delete-then-insert).
2007 dataset_type = DatasetType(
2008 "coadd",
2009 dimensions=["tract", "patch"],
2010 universe=registry.dimensions,
2011 storageClass="Exposure",
2012 )
2013 registry.registerDatasetType(dataset_type)
2014 registry.registerCollection("the_run", CollectionType.RUN)
2015 registry.insertDatasets(
2016 dataset_type,
2017 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
2018 run="the_run",
2019 )
2020 # Query for tracts and patches that overlap some "difference" htm9
2021 # pixels; there should be overlaps, because the database has
2022 # the "large" suite of regions.
2023 self.assertEqual(
2024 {0},
2025 {
2026 data_id["tract"]
2027 for data_id in registry.queryDataIds(
2028 ["tract"],
2029 skymap=skymap_name,
2030 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2031 )
2032 },
2033 )
2034 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2035 self.assertIn(
2036 patch_id,
2037 {
2038 data_id["patch"]
2039 for data_id in registry.queryDataIds(
2040 ["patch"],
2041 skymap=skymap_name,
2042 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2043 )
2044 },
2045 )
2046 # Use sync to update the tract region and insert to update the regions
2047 # of the patches, to the "small" suite.
2048 updated = registry.syncDimensionData(
2049 "tract",
2050 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
2051 update=True,
2052 )
2053 self.assertEqual(updated, {"region": parent_region_large})
2054 registry.insertDimensionData(
2055 "patch",
2056 *[
2057 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2058 for n, c in enumerate(child_regions_small)
2059 ],
2060 replace=True,
2061 )
2062 # Query again; there now should be no such overlaps, because the
2063 # database has the "small" suite of regions.
2064 self.assertFalse(
2065 set(
2066 registry.queryDataIds(
2067 ["tract"],
2068 skymap=skymap_name,
2069 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2070 )
2071 )
2072 )
2073 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2074 self.assertNotIn(
2075 patch_id,
2076 {
2077 data_id["patch"]
2078 for data_id in registry.queryDataIds(
2079 ["patch"],
2080 skymap=skymap_name,
2081 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2082 )
2083 },
2084 )
2085 # Update back to the large regions and query one more time.
2086 updated = registry.syncDimensionData(
2087 "tract",
2088 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2089 update=True,
2090 )
2091 self.assertEqual(updated, {"region": parent_region_small})
2092 registry.insertDimensionData(
2093 "patch",
2094 *[
2095 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2096 for n, c in enumerate(child_regions_large)
2097 ],
2098 replace=True,
2099 )
2100 self.assertEqual(
2101 {0},
2102 {
2103 data_id["tract"]
2104 for data_id in registry.queryDataIds(
2105 ["tract"],
2106 skymap=skymap_name,
2107 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2108 )
2109 },
2110 )
2111 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2112 self.assertIn(
2113 patch_id,
2114 {
2115 data_id["patch"]
2116 for data_id in registry.queryDataIds(
2117 ["patch"],
2118 skymap=skymap_name,
2119 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2120 )
2121 },
2122 )
2124 def testCalibrationCollections(self):
2125 """Test operations on `~CollectionType.CALIBRATION` collections,
2126 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2127 `SqlRegistry.findDataset`, and
2128 `DataCoordinateQueryResults.findRelatedDatasets`.
2129 """
2130 # Setup - make a Registry, fill it with some datasets in
2131 # non-calibration collections.
2132 registry = self.makeRegistry()
2133 self.loadData(registry, "base.yaml")
2134 self.loadData(registry, "datasets.yaml")
2135 # Set up some timestamps.
2136 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2137 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2138 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2139 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2140 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2141 allTimespans = [
2142 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2143 ]
2144 # Insert some exposure records with timespans between each sequential
2145 # pair of those.
2146 registry.insertDimensionData(
2147 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2148 )
2149 registry.insertDimensionData(
2150 "group",
2151 {"instrument": "Cam1", "name": "group0"},
2152 {"instrument": "Cam1", "name": "group1"},
2153 {"instrument": "Cam1", "name": "group2"},
2154 {"instrument": "Cam1", "name": "group3"},
2155 )
2156 registry.insertDimensionData(
2157 "exposure",
2158 {
2159 "instrument": "Cam1",
2160 "id": 0,
2161 "group": "group0",
2162 "obs_id": "zero",
2163 "physical_filter": "Cam1-G",
2164 "day_obs": 20200101,
2165 "timespan": Timespan(t1, t2),
2166 },
2167 {
2168 "instrument": "Cam1",
2169 "id": 1,
2170 "group": "group1",
2171 "obs_id": "one",
2172 "physical_filter": "Cam1-G",
2173 "day_obs": 20200101,
2174 "timespan": Timespan(t2, t3),
2175 },
2176 {
2177 "instrument": "Cam1",
2178 "id": 2,
2179 "group": "group2",
2180 "obs_id": "two",
2181 "physical_filter": "Cam1-G",
2182 "day_obs": 20200101,
2183 "timespan": Timespan(t3, t4),
2184 },
2185 {
2186 "instrument": "Cam1",
2187 "id": 3,
2188 "group": "group3",
2189 "obs_id": "three",
2190 "physical_filter": "Cam1-G",
2191 "day_obs": 20200101,
2192 "timespan": Timespan(t4, t5),
2193 },
2194 )
2195 # Get references to some datasets.
2196 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2197 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2198 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2199 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2200 # Register the main calibration collection we'll be working with.
2201 collection = "Cam1/calibs/default"
2202 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2203 # Cannot associate into a calibration collection (no timespan).
2204 with self.assertRaises(CollectionTypeError):
2205 registry.associate(collection, [bias2a])
2206 # Certify 2a dataset with [t2, t4) validity.
2207 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2208 # Test that we can query for this dataset via the new collection, both
2209 # on its own and with a RUN collection.
2210 self.assertEqual(
2211 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2212 {bias2a},
2213 )
2214 self.assertEqual(
2215 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2216 {
2217 bias2a,
2218 bias2b,
2219 bias3b,
2220 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2221 },
2222 )
2223 self.assertEqual(
2224 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2225 {registry.expandDataId(instrument="Cam1", detector=2)},
2226 )
2227 self.assertEqual(
2228 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2229 {
2230 registry.expandDataId(instrument="Cam1", detector=2),
2231 registry.expandDataId(instrument="Cam1", detector=3),
2232 registry.expandDataId(instrument="Cam1", detector=4),
2233 },
2234 )
2235 self.assertEqual(
2236 set(
2237 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2238 "bias", findFirst=True, collections=[collection]
2239 )
2240 ),
2241 {
2242 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2243 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2244 },
2245 )
2246 self.assertEqual(
2247 set(
2248 registry.queryDataIds(
2249 ["exposure", "detector"], instrument="Cam1", detector=2
2250 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2251 ),
2252 {
2253 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2254 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2255 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2256 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2257 },
2258 )
2260 # We should not be able to certify 2b with anything overlapping that
2261 # window.
2262 with self.assertRaises(ConflictingDefinitionError):
2263 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2264 with self.assertRaises(ConflictingDefinitionError):
2265 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2266 with self.assertRaises(ConflictingDefinitionError):
2267 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2268 with self.assertRaises(ConflictingDefinitionError):
2269 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2270 with self.assertRaises(ConflictingDefinitionError):
2271 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2272 with self.assertRaises(ConflictingDefinitionError):
2273 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2274 with self.assertRaises(ConflictingDefinitionError):
2275 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2276 with self.assertRaises(ConflictingDefinitionError):
2277 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2278 # We should be able to certify 3a with a range overlapping that window,
2279 # because it's for a different detector.
2280 # We'll certify 3a over [t1, t3).
2281 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2282 # Now we'll certify 2b and 3b together over [t4, ∞).
2283 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2285 # Fetch all associations and check that they are what we expect.
2286 self.assertCountEqual(
2287 list(
2288 registry.queryDatasetAssociations(
2289 "bias",
2290 collections=[collection, "imported_g", "imported_r"],
2291 )
2292 ),
2293 [
2294 DatasetAssociation(
2295 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2296 collection="imported_g",
2297 timespan=None,
2298 ),
2299 DatasetAssociation(
2300 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2301 collection="imported_r",
2302 timespan=None,
2303 ),
2304 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2305 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2306 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2307 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2308 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2309 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2310 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2311 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2312 ],
2313 )
2315 class Ambiguous:
2316 """Tag class to denote lookups that should be ambiguous."""
2318 pass
2320 def _assertLookup(
2321 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2322 ) -> None:
2323 """Local function that asserts that a bias lookup returns the given
2324 expected result.
2325 """
2326 if expected is Ambiguous:
2327 with self.assertRaises((DatasetTypeError, LookupError)):
2328 registry.findDataset(
2329 "bias",
2330 collections=collection,
2331 instrument="Cam1",
2332 detector=detector,
2333 timespan=timespan,
2334 )
2335 else:
2336 self.assertEqual(
2337 expected,
2338 registry.findDataset(
2339 "bias",
2340 collections=collection,
2341 instrument="Cam1",
2342 detector=detector,
2343 timespan=timespan,
2344 ),
2345 )
2347 # Systematically test lookups against expected results.
2348 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2349 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2350 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2351 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2352 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2353 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2354 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2355 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2356 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2357 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2358 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2359 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2360 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2361 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2362 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2363 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2364 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2365 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2366 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2367 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2368 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2369 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2370 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2371 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2372 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2373 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2374 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2375 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2376 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2377 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2378 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2379 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2380 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2381 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2382 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2383 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2384 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2385 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2386 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2387 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2388 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2389 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2391 # Test lookups via temporal joins to exposures.
2392 self.assertEqual(
2393 set(
2394 registry.queryDataIds(
2395 ["exposure", "detector"], instrument="Cam1", detector=2
2396 ).findRelatedDatasets("bias", collections=[collection])
2397 ),
2398 {
2399 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2400 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2401 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2402 },
2403 )
2404 self.assertEqual(
2405 set(
2406 registry.queryDataIds(
2407 ["exposure", "detector"], instrument="Cam1", detector=3
2408 ).findRelatedDatasets("bias", collections=[collection])
2409 ),
2410 {
2411 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2412 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2413 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2414 },
2415 )
2416 self.assertEqual(
2417 set(
2418 registry.queryDataIds(
2419 ["exposure", "detector"], instrument="Cam1", detector=2
2420 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2421 ),
2422 {
2423 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2424 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2425 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2426 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2427 },
2428 )
2429 self.assertEqual(
2430 set(
2431 registry.queryDataIds(
2432 ["exposure", "detector"], instrument="Cam1", detector=3
2433 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2434 ),
2435 {
2436 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2437 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2438 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2439 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2440 },
2441 )
2443 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2444 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2445 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2446 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2447 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2448 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2449 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2450 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2451 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2452 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2453 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2454 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2455 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2456 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2457 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2458 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2459 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2460 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2461 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2462 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2463 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2464 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2465 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2466 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2467 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2468 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2469 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2470 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2471 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2472 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2473 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2474 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2475 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2476 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2477 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2478 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2479 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2480 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2481 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2482 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2483 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2484 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2485 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2486 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2487 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2488 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2490 # Decertify everything, this time with explicit data IDs, then check
2491 # that no lookups succeed.
2492 registry.decertify(
2493 collection,
2494 "bias",
2495 Timespan(None, None),
2496 dataIds=[
2497 dict(instrument="Cam1", detector=2),
2498 dict(instrument="Cam1", detector=3),
2499 ],
2500 )
2501 for detector in (2, 3):
2502 for timespan in allTimespans:
2503 _assertLookup(detector=detector, timespan=timespan, expected=None)
2504 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2505 # those.
2506 registry.certify(
2507 collection,
2508 [bias2a, bias3a],
2509 Timespan(None, None),
2510 )
2511 for timespan in allTimespans:
2512 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2513 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2514 # Decertify just bias2 over [t2, t4).
2515 # This should split a single certification row into two (and leave the
2516 # other existing row, for bias3a, alone).
2517 registry.decertify(
2518 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2519 )
2520 for timespan in allTimespans:
2521 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2522 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2523 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2524 if overlapsBefore and overlapsAfter:
2525 expected = Ambiguous
2526 elif overlapsBefore or overlapsAfter:
2527 expected = bias2a
2528 else:
2529 expected = None
2530 _assertLookup(detector=2, timespan=timespan, expected=expected)
2532 def testSkipCalibs(self):
2533 """Test how queries handle skipping of calibration collections."""
2534 registry = self.makeRegistry()
2535 self.loadData(registry, "base.yaml")
2536 self.loadData(registry, "datasets.yaml")
2538 coll_calib = "Cam1/calibs/default"
2539 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2541 # Add all biases to the calibration collection.
2542 # Without this, the logic that prunes dataset subqueries based on
2543 # datasetType-collection summary information will fire before the logic
2544 # we want to test below. This is a good thing (it avoids the dreaded
2545 # NotImplementedError a bit more often) everywhere but here.
2546 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2548 coll_list = [coll_calib, "imported_g", "imported_r"]
2549 chain = "Cam1/chain"
2550 registry.registerCollection(chain, type=CollectionType.CHAINED)
2551 registry.setCollectionChain(chain, coll_list)
2553 # explicit list will raise if findFirst=True or there are temporal
2554 # dimensions
2555 with self.assertRaises(NotImplementedError):
2556 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2557 with self.assertRaises(NotImplementedError):
2558 registry.queryDataIds(
2559 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2560 ).count()
2562 # chain will skip
2563 datasets = list(registry.queryDatasets("bias", collections=chain))
2564 self.assertGreater(len(datasets), 0)
2566 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2567 self.assertGreater(len(dataIds), 0)
2569 # glob will skip too
2570 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2571 self.assertGreater(len(datasets), 0)
2573 # regular expression will skip too
2574 pattern = re.compile(".*")
2575 datasets = list(registry.queryDatasets("bias", collections=pattern))
2576 self.assertGreater(len(datasets), 0)
2578 # ellipsis should work as usual
2579 datasets = list(registry.queryDatasets("bias", collections=...))
2580 self.assertGreater(len(datasets), 0)
2582 # few tests with findFirst
2583 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2584 self.assertGreater(len(datasets), 0)
2586 def testIngestTimeQuery(self):
2587 registry = self.makeRegistry()
2588 self.loadData(registry, "base.yaml")
2589 dt0 = datetime.datetime.now(datetime.UTC)
2590 self.loadData(registry, "datasets.yaml")
2591 dt1 = datetime.datetime.now(datetime.UTC)
2593 datasets = list(registry.queryDatasets(..., collections=...))
2594 len0 = len(datasets)
2595 self.assertGreater(len0, 0)
2597 where = "ingest_date > T'2000-01-01'"
2598 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2599 len1 = len(datasets)
2600 self.assertEqual(len0, len1)
2602 # no one will ever use this piece of software in 30 years
2603 where = "ingest_date > T'2050-01-01'"
2604 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2605 len2 = len(datasets)
2606 self.assertEqual(len2, 0)
2608 # Check more exact timing to make sure there is no 37 seconds offset
2609 # (after fixing DM-30124). SQLite time precision is 1 second, make
2610 # sure that we don't test with higher precision.
2611 tests = [
2612 # format: (timestamp, operator, expected_len)
2613 (dt0 - timedelta(seconds=1), ">", len0),
2614 (dt0 - timedelta(seconds=1), "<", 0),
2615 (dt1 + timedelta(seconds=1), "<", len0),
2616 (dt1 + timedelta(seconds=1), ">", 0),
2617 ]
2618 for dt, op, expect_len in tests:
2619 dt_str = dt.isoformat(sep=" ")
2621 where = f"ingest_date {op} T'{dt_str}'"
2622 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2623 self.assertEqual(len(datasets), expect_len)
2625 # same with bind using datetime or astropy Time
2626 where = f"ingest_date {op} ingest_time"
2627 datasets = list(
2628 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2629 )
2630 self.assertEqual(len(datasets), expect_len)
2632 dt_astropy = astropy.time.Time(dt, format="datetime")
2633 datasets = list(
2634 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2635 )
2636 self.assertEqual(len(datasets), expect_len)
2638 def testTimespanQueries(self):
2639 """Test query expressions involving timespans."""
2640 registry = self.makeRegistry()
2641 self.loadData(registry, "hsc-rc2-subset.yaml")
2642 # All exposures in the database; mapping from ID to timespan.
2643 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2644 # Just those IDs, sorted (which is also temporal sorting, because HSC
2645 # exposure IDs are monotonically increasing).
2646 ids = sorted(visits.keys())
2647 self.assertGreater(len(ids), 20)
2648 # Pick some quasi-random indexes into `ids` to play with.
2649 i1 = int(len(ids) * 0.1)
2650 i2 = int(len(ids) * 0.3)
2651 i3 = int(len(ids) * 0.6)
2652 i4 = int(len(ids) * 0.8)
2653 # Extract some times from those: just before the beginning of i1 (which
2654 # should be after the end of the exposure before), exactly the
2655 # beginning of i2, just after the beginning of i3 (and before its end),
2656 # and the exact end of i4.
2657 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2658 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2659 t2 = visits[ids[i2]].begin
2660 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2661 self.assertLess(t3, visits[ids[i3]].end)
2662 t4 = visits[ids[i4]].end
2663 # Make sure those are actually in order.
2664 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2666 bind = {
2667 "t1": t1,
2668 "t2": t2,
2669 "t3": t3,
2670 "t4": t4,
2671 "ts23": Timespan(t2, t3),
2672 }
2674 def query(where):
2675 """Return results as a sorted, deduplicated list of visit IDs.
2677 Parameters
2678 ----------
2679 where : `str`
2680 The WHERE clause for the query.
2681 """
2682 return sorted(
2683 {
2684 dataId["visit"]
2685 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2686 }
2687 )
2689 # Try a bunch of timespan queries, mixing up the bounds themselves,
2690 # where they appear in the expression, and how we get the timespan into
2691 # the expression.
2693 # t1 is before the start of i1, so this should not include i1.
2694 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2695 # t2 is exactly at the start of i2, but ends are exclusive, so these
2696 # should not include i2.
2697 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2698 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2699 # t3 is in the middle of i3, so this should include i3.
2700 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2701 # This one should not include t3 by the same reasoning.
2702 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2703 # t4 is exactly at the end of i4, so this should include i4.
2704 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2705 # i4's upper bound of t4 is exclusive so this should not include t4.
2706 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2708 # Now some timespan vs. time scalar queries.
2709 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2710 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2711 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2712 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2713 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2714 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2716 # Empty timespans should not overlap anything.
2717 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2719 def testCollectionSummaries(self):
2720 """Test recording and retrieval of collection summaries."""
2721 self.maxDiff = None
2722 registry = self.makeRegistry()
2723 # Importing datasets from yaml should go through the code path where
2724 # we update collection summaries as we insert datasets.
2725 self.loadData(registry, "base.yaml")
2726 self.loadData(registry, "datasets.yaml")
2727 flat = registry.getDatasetType("flat")
2728 expected1 = CollectionSummary()
2729 expected1.dataset_types.add(registry.getDatasetType("bias"))
2730 expected1.add_data_ids(
2731 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2732 )
2733 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2734 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2735 # Create a chained collection with both of the imported runs; the
2736 # summary should be the same, because it's a union with itself.
2737 chain = "chain"
2738 registry.registerCollection(chain, CollectionType.CHAINED)
2739 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2740 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2741 # Associate flats only into a tagged collection and a calibration
2742 # collection to check summaries of those.
2743 tag = "tag"
2744 registry.registerCollection(tag, CollectionType.TAGGED)
2745 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2746 calibs = "calibs"
2747 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2748 registry.certify(
2749 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2750 )
2751 expected2 = expected1.copy()
2752 expected2.dataset_types.discard("bias")
2753 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2754 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2755 # Explicitly calling SqlRegistry.refresh() should load those same
2756 # summaries, via a totally different code path.
2757 registry.refresh()
2758 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2759 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2760 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2761 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2763 def testBindInQueryDatasets(self):
2764 """Test that the bind parameter is correctly forwarded in
2765 queryDatasets recursion.
2766 """
2767 registry = self.makeRegistry()
2768 # Importing datasets from yaml should go through the code path where
2769 # we update collection summaries as we insert datasets.
2770 self.loadData(registry, "base.yaml")
2771 self.loadData(registry, "datasets.yaml")
2772 self.assertEqual(
2773 set(registry.queryDatasets("flat", band="r", collections=...)),
2774 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2775 )
2777 def testQueryIntRangeExpressions(self):
2778 """Test integer range expressions in ``where`` arguments.
2780 Note that our expressions use inclusive stop values, unlike Python's.
2781 """
2782 registry = self.makeRegistry()
2783 self.loadData(registry, "base.yaml")
2784 self.assertEqual(
2785 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2786 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2787 )
2788 self.assertEqual(
2789 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2790 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2791 )
2792 self.assertEqual(
2793 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2794 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2795 )
2797 def testQueryResultSummaries(self):
2798 """Test summary methods like `count`, `any`, and `explain_no_results`
2799 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2800 """
2801 registry = self.makeRegistry()
2802 self.loadData(registry, "base.yaml")
2803 self.loadData(registry, "datasets.yaml")
2804 self.loadData(registry, "spatial.yaml")
2805 # Default test dataset has two collections, each with both flats and
2806 # biases. Add a new collection with only biases.
2807 registry.registerCollection("biases", CollectionType.TAGGED)
2808 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2809 # First query yields two results, and involves no postprocessing.
2810 query1 = registry.queryDataIds(["physical_filter"], band="r")
2811 self.assertTrue(query1.any(execute=False, exact=False))
2812 self.assertTrue(query1.any(execute=True, exact=False))
2813 self.assertTrue(query1.any(execute=True, exact=True))
2814 self.assertEqual(query1.count(exact=False), 2)
2815 self.assertEqual(query1.count(exact=True), 2)
2816 self.assertFalse(list(query1.explain_no_results()))
2817 # Second query should yield no results, which we should see when
2818 # we attempt to expand the data ID.
2819 query2 = registry.queryDataIds(["physical_filter"], band="h")
2820 # There's no execute=False, exact=Fals test here because the behavior
2821 # not something we want to guarantee in this case (and exact=False
2822 # says either answer is legal).
2823 self.assertFalse(query2.any(execute=True, exact=False))
2824 self.assertFalse(query2.any(execute=True, exact=True))
2825 self.assertEqual(query2.count(exact=False), 0)
2826 self.assertEqual(query2.count(exact=True), 0)
2827 self.assertTrue(list(query2.explain_no_results()))
2828 # These queries yield no results due to various problems that can be
2829 # spotted prior to execution, yielding helpful diagnostics.
2830 base_query = registry.queryDataIds(["detector", "physical_filter"])
2831 queries_and_snippets = [
2832 (
2833 # Dataset type name doesn't match any existing dataset types.
2834 registry.queryDatasets("nonexistent", collections=...),
2835 ["nonexistent"],
2836 ),
2837 (
2838 # Dataset type object isn't registered.
2839 registry.queryDatasets(
2840 DatasetType(
2841 "nonexistent",
2842 dimensions=["instrument"],
2843 universe=registry.dimensions,
2844 storageClass="Image",
2845 ),
2846 collections=...,
2847 ),
2848 ["nonexistent"],
2849 ),
2850 (
2851 # No datasets of this type in this collection.
2852 registry.queryDatasets("flat", collections=["biases"]),
2853 ["flat", "biases"],
2854 ),
2855 (
2856 # No datasets of this type in this collection.
2857 base_query.findDatasets("flat", collections=["biases"]),
2858 ["flat", "biases"],
2859 ),
2860 (
2861 # No collections matching at all.
2862 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2863 ["potato"],
2864 ),
2865 ]
2866 with self.assertRaises(MissingDatasetTypeError):
2867 # Dataset type name doesn't match any existing dataset types.
2868 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2869 with self.assertRaises(MissingDatasetTypeError):
2870 # Dataset type name doesn't match any existing dataset types.
2871 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...)
2872 for query, snippets in queries_and_snippets:
2873 self.assertFalse(query.any(execute=False, exact=False))
2874 self.assertFalse(query.any(execute=True, exact=False))
2875 self.assertFalse(query.any(execute=True, exact=True))
2876 self.assertEqual(query.count(exact=False), 0)
2877 self.assertEqual(query.count(exact=True), 0)
2878 messages = list(query.explain_no_results())
2879 self.assertTrue(messages)
2880 # Want all expected snippets to appear in at least one message.
2881 self.assertTrue(
2882 any(
2883 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2884 ),
2885 messages,
2886 )
2888 # Wildcards on dataset types are not permitted in queryDataIds.
2889 with self.assertRaises(DatasetTypeExpressionError):
2890 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2892 # These queries yield no results due to problems that can be identified
2893 # by cheap follow-up queries, yielding helpful diagnostics.
2894 for query, snippets in [
2895 (
2896 # No records for one of the involved dimensions.
2897 registry.queryDataIds(["subfilter"]),
2898 ["no rows", "subfilter"],
2899 ),
2900 (
2901 # No records for one of the involved dimensions.
2902 registry.queryDimensionRecords("subfilter"),
2903 ["no rows", "subfilter"],
2904 ),
2905 ]:
2906 self.assertFalse(query.any(execute=True, exact=False))
2907 self.assertFalse(query.any(execute=True, exact=True))
2908 self.assertEqual(query.count(exact=True), 0)
2909 messages = list(query.explain_no_results())
2910 self.assertTrue(messages)
2911 # Want all expected snippets to appear in at least one message.
2912 self.assertTrue(
2913 any(
2914 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2915 ),
2916 messages,
2917 )
2919 # This query yields four overlaps in the database, but one is filtered
2920 # out in postprocessing. The count queries aren't accurate because
2921 # they don't account for duplication that happens due to an internal
2922 # join against commonSkyPix.
2923 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2924 self.assertEqual(
2925 {
2926 DataCoordinate.standardize(
2927 instrument="Cam1",
2928 skymap="SkyMap1",
2929 visit=v,
2930 tract=t,
2931 universe=registry.dimensions,
2932 )
2933 for v, t in [(1, 0), (2, 0), (2, 1)]
2934 },
2935 set(query3),
2936 )
2937 self.assertTrue(query3.any(execute=False, exact=False))
2938 self.assertTrue(query3.any(execute=True, exact=False))
2939 self.assertTrue(query3.any(execute=True, exact=True))
2940 self.assertGreaterEqual(query3.count(exact=False), 4)
2941 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2942 self.assertFalse(list(query3.explain_no_results()))
2943 # This query yields overlaps in the database, but all are filtered
2944 # out in postprocessing. The count queries again aren't very useful.
2945 # We have to use `where=` here to avoid an optimization that
2946 # (currently) skips the spatial postprocess-filtering because it
2947 # recognizes that no spatial join is necessary. That's not ideal, but
2948 # fixing it is out of scope for this ticket.
2949 query4 = registry.queryDataIds(
2950 ["visit", "tract"],
2951 instrument="Cam1",
2952 skymap="SkyMap1",
2953 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2954 )
2955 self.assertFalse(set(query4))
2956 self.assertTrue(query4.any(execute=False, exact=False))
2957 self.assertTrue(query4.any(execute=True, exact=False))
2958 self.assertFalse(query4.any(execute=True, exact=True))
2959 self.assertGreaterEqual(query4.count(exact=False), 1)
2960 self.assertEqual(query4.count(exact=True, discard=True), 0)
2961 messages = query4.explain_no_results()
2962 self.assertTrue(messages)
2963 self.assertTrue(any("overlap" in message for message in messages))
2964 # This query should yield results from one dataset type but not the
2965 # other, which is not registered.
2966 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2967 self.assertTrue(set(query5))
2968 self.assertTrue(query5.any(execute=False, exact=False))
2969 self.assertTrue(query5.any(execute=True, exact=False))
2970 self.assertTrue(query5.any(execute=True, exact=True))
2971 self.assertGreaterEqual(query5.count(exact=False), 1)
2972 self.assertGreaterEqual(query5.count(exact=True), 1)
2973 self.assertFalse(list(query5.explain_no_results()))
2974 # This query applies a selection that yields no results, fully in the
2975 # database. Explaining why it fails involves traversing the relation
2976 # tree and running a LIMIT 1 query at each level that has the potential
2977 # to remove rows.
2978 query6 = registry.queryDimensionRecords(
2979 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2980 )
2981 self.assertEqual(query6.count(exact=True), 0)
2982 messages = query6.explain_no_results()
2983 self.assertTrue(messages)
2984 self.assertTrue(any("no-purpose" in message for message in messages))
2986 def testQueryDataIdsExpressionError(self):
2987 """Test error checking of 'where' expressions in queryDataIds."""
2988 registry = self.makeRegistry()
2989 self.loadData(registry, "base.yaml")
2990 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2991 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2992 registry.queryDataIds(["detector"], where="foo.bar = 12")
2993 with self.assertRaisesRegex(
2994 LookupError, "Dimension element name cannot be inferred in this context."
2995 ):
2996 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2998 def testQueryDataIdsOrderBy(self):
2999 """Test order_by and limit on result returned by queryDataIds()."""
3000 registry = self.makeRegistry()
3001 self.loadData(registry, "base.yaml")
3002 self.loadData(registry, "datasets.yaml")
3003 self.loadData(registry, "spatial.yaml")
3005 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
3006 return registry.queryDataIds(
3007 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
3008 )
3010 Test = namedtuple(
3011 "testQueryDataIdsOrderByTest",
3012 ("order_by", "keys", "result", "limit", "datasets", "collections"),
3013 defaults=(None, None, None),
3014 )
3016 test_data = (
3017 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3018 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
3019 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
3020 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
3021 Test(
3022 "tract.id,visit.id",
3023 "tract,visit",
3024 ((0, 1), (0, 1), (0, 2)),
3025 limit=(3,),
3026 ),
3027 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
3028 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
3029 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
3030 Test(
3031 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
3032 ),
3033 Test(
3034 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
3035 ),
3036 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3037 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3038 Test(
3039 "tract,-visit.timespan.begin,visit.timespan.end",
3040 "tract,visit",
3041 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
3042 ),
3043 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
3044 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
3045 Test(
3046 "tract,detector",
3047 "tract,detector",
3048 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3049 datasets="flat",
3050 collections="imported_r",
3051 ),
3052 Test(
3053 "tract,detector.full_name",
3054 "tract,detector",
3055 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3056 datasets="flat",
3057 collections="imported_r",
3058 ),
3059 Test(
3060 "tract,detector.raft,detector.name_in_raft",
3061 "tract,detector",
3062 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3063 datasets="flat",
3064 collections="imported_r",
3065 ),
3066 )
3068 for test in test_data:
3069 order_by = test.order_by.split(",")
3070 keys = test.keys.split(",")
3071 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
3072 if test.limit is not None:
3073 query = query.limit(*test.limit)
3074 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
3075 self.assertEqual(dataIds, test.result)
3077 # and materialize
3078 query = do_query(keys).order_by(*order_by)
3079 if test.limit is not None:
3080 query = query.limit(*test.limit)
3081 with self.assertRaises(RelationalAlgebraError):
3082 with query.materialize():
3083 pass
3085 # errors in a name
3086 for order_by in ("", "-"):
3087 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3088 list(do_query().order_by(order_by))
3090 for order_by in ("undimension.name", "-undimension.name"):
3091 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
3092 list(do_query().order_by(order_by))
3094 for order_by in ("attract", "-attract"):
3095 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
3096 list(do_query().order_by(order_by))
3098 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3099 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3101 with self.assertRaisesRegex(
3102 ValueError,
3103 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
3104 r"qualify timespan with specific dimension name\.",
3105 ):
3106 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3108 with self.assertRaisesRegex(
3109 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3110 ):
3111 list(do_query("tract").order_by("timespan.begin"))
3113 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3114 list(do_query("tract").order_by("tract.timespan.begin"))
3116 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3117 list(do_query("tract").order_by("tract.name"))
3119 with self.assertRaisesRegex(
3120 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3121 ):
3122 list(do_query("visit").order_by("timestamp.begin"))
3124 def testQueryDataIdsGovernorExceptions(self):
3125 """Test exceptions raised by queryDataIds() for incorrect governors."""
3126 registry = self.makeRegistry()
3127 self.loadData(registry, "base.yaml")
3128 self.loadData(registry, "datasets.yaml")
3129 self.loadData(registry, "spatial.yaml")
3131 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3132 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3134 Test = namedtuple(
3135 "testQueryDataIdExceptionsTest",
3136 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3137 defaults=(None, None, None, {}, None, 0),
3138 )
3140 test_data = (
3141 Test("tract,visit", count=6),
3142 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3143 Test(
3144 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3145 ),
3146 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3147 Test(
3148 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3149 ),
3150 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3151 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3152 Test(
3153 "tract,visit",
3154 where="instrument=cam AND skymap=map",
3155 bind={"cam": "Cam1", "map": "SkyMap1"},
3156 count=6,
3157 ),
3158 Test(
3159 "tract,visit",
3160 where="instrument=cam AND skymap=map",
3161 bind={"cam": "Cam", "map": "SkyMap"},
3162 exception=DataIdValueError,
3163 ),
3164 )
3166 for test in test_data:
3167 dimensions = test.dimensions.split(",")
3168 if test.exception:
3169 with self.assertRaises(test.exception):
3170 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3171 else:
3172 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3173 self.assertEqual(query.count(discard=True), test.count)
3175 # and materialize
3176 if test.exception:
3177 with self.assertRaises(test.exception):
3178 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3179 with query.materialize() as materialized:
3180 materialized.count(discard=True)
3181 else:
3182 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3183 with query.materialize() as materialized:
3184 self.assertEqual(materialized.count(discard=True), test.count)
3186 def testQueryDimensionRecordsOrderBy(self):
3187 """Test order_by and limit on result returned by
3188 queryDimensionRecords().
3189 """
3190 registry = self.makeRegistry()
3191 self.loadData(registry, "base.yaml")
3192 self.loadData(registry, "datasets.yaml")
3193 self.loadData(registry, "spatial.yaml")
3195 def do_query(element, datasets=None, collections=None):
3196 return registry.queryDimensionRecords(
3197 element, instrument="Cam1", datasets=datasets, collections=collections
3198 )
3200 query = do_query("detector")
3201 self.assertEqual(len(list(query)), 4)
3203 Test = namedtuple(
3204 "testQueryDataIdsOrderByTest",
3205 ("element", "order_by", "result", "limit", "datasets", "collections"),
3206 defaults=(None, None, None),
3207 )
3209 test_data = (
3210 Test("detector", "detector", (1, 2, 3, 4)),
3211 Test("detector", "-detector", (4, 3, 2, 1)),
3212 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3213 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3214 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3215 Test("visit", "visit", (1, 2)),
3216 Test("visit", "-visit.id", (2, 1)),
3217 Test("visit", "zenith_angle", (1, 2)),
3218 Test("visit", "-visit.name", (2, 1)),
3219 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3220 )
3222 for test in test_data:
3223 order_by = test.order_by.split(",")
3224 query = do_query(test.element).order_by(*order_by)
3225 if test.limit is not None:
3226 query = query.limit(*test.limit)
3227 dataIds = tuple(rec.id for rec in query)
3228 self.assertEqual(dataIds, test.result)
3230 # errors in a name
3231 for order_by in ("", "-"):
3232 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3233 list(do_query("detector").order_by(order_by))
3235 for order_by in ("undimension.name", "-undimension.name"):
3236 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3237 list(do_query("detector").order_by(order_by))
3239 for order_by in ("attract", "-attract"):
3240 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3241 list(do_query("detector").order_by(order_by))
3243 for order_by in ("timestamp.begin", "-timestamp.begin"):
3244 with self.assertRaisesRegex(
3245 ValueError,
3246 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3247 r"perhaps you meant 'timespan.begin'\?",
3248 ):
3249 list(do_query("visit").order_by(order_by))
3251 def testQueryDimensionRecordsExceptions(self):
3252 """Test exceptions raised by queryDimensionRecords()."""
3253 registry = self.makeRegistry()
3254 self.loadData(registry, "base.yaml")
3255 self.loadData(registry, "datasets.yaml")
3256 self.loadData(registry, "spatial.yaml")
3258 result = registry.queryDimensionRecords("detector")
3259 self.assertEqual(result.count(), 4)
3260 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3261 self.assertEqual(result.count(), 4)
3262 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3263 self.assertEqual(result.count(), 4)
3264 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3265 self.assertEqual(result.count(), 4)
3266 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3267 self.assertEqual(result.count(), 4)
3269 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3270 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3271 result.count()
3273 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3274 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3275 result.count()
3277 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3278 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3279 result.count()
3281 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3282 result = registry.queryDimensionRecords(
3283 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3284 )
3285 result.count()
3287 def testDatasetConstrainedDimensionRecordQueries(self):
3288 """Test that queryDimensionRecords works even when given a dataset
3289 constraint whose dimensions extend beyond the requested dimension
3290 element's.
3291 """
3292 registry = self.makeRegistry()
3293 self.loadData(registry, "base.yaml")
3294 self.loadData(registry, "datasets.yaml")
3295 # Query for physical_filter dimension records, using a dataset that
3296 # has both physical_filter and dataset dimensions.
3297 records = registry.queryDimensionRecords(
3298 "physical_filter",
3299 datasets=["flat"],
3300 collections="imported_r",
3301 )
3302 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3303 # Trying to constrain by all dataset types is an error.
3304 with self.assertRaises(TypeError):
3305 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3307 def testSkyPixDatasetQueries(self):
3308 """Test that we can build queries involving skypix dimensions as long
3309 as a dataset type that uses those dimensions is included.
3310 """
3311 registry = self.makeRegistry()
3312 self.loadData(registry, "base.yaml")
3313 dataset_type = DatasetType(
3314 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3315 )
3316 registry.registerDatasetType(dataset_type)
3317 run = "r"
3318 registry.registerRun(run)
3319 # First try queries where there are no datasets; the concern is whether
3320 # we can even build and execute these queries without raising, even
3321 # when "doomed" query shortcuts are in play.
3322 self.assertFalse(
3323 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3324 )
3325 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3326 # Now add a dataset and see that we can get it back.
3327 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3328 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3329 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3330 self.assertEqual(
3331 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3332 {data_id},
3333 )
3334 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3336 def testDatasetIdFactory(self):
3337 """Simple test for DatasetIdFactory, mostly to catch potential changes
3338 in its API.
3339 """
3340 registry = self.makeRegistry()
3341 factory = DatasetIdFactory()
3342 dataset_type = DatasetType(
3343 "datasetType",
3344 dimensions=["detector", "instrument"],
3345 universe=registry.dimensions,
3346 storageClass="int",
3347 )
3348 run = "run"
3349 data_id = DataCoordinate.standardize(
3350 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3351 )
3353 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3354 self.assertIsInstance(datasetId, uuid.UUID)
3355 self.assertEqual(datasetId.version, 4)
3357 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3358 self.assertIsInstance(datasetId, uuid.UUID)
3359 self.assertEqual(datasetId.version, 5)
3361 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3362 self.assertIsInstance(datasetId, uuid.UUID)
3363 self.assertEqual(datasetId.version, 5)
3365 def testExposureQueries(self):
3366 """Test query methods using arguments sourced from the exposure log
3367 service.
3369 The most complete test dataset currently available to daf_butler tests
3370 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3371 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3372 dimension records as it was focused on providing nontrivial spatial
3373 overlaps between visit+detector and tract+patch. So in this test we
3374 need to translate queries that originally used the exposure dimension
3375 to use the (very similar) visit dimension instead.
3376 """
3377 registry = self.makeRegistry()
3378 self.loadData(registry, "hsc-rc2-subset.yaml")
3379 self.assertEqual(
3380 [
3381 record.id
3382 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3383 .order_by("id")
3384 .limit(5)
3385 ],
3386 [318, 322, 326, 330, 332],
3387 )
3388 self.assertEqual(
3389 [
3390 data_id["visit"]
3391 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3392 ],
3393 [318, 322, 326, 330, 332],
3394 )
3395 self.assertEqual(
3396 [
3397 record.id
3398 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3399 .order_by("full_name")
3400 .limit(5)
3401 ],
3402 [73, 72, 71, 70, 65],
3403 )
3404 self.assertEqual(
3405 [
3406 data_id["detector"]
3407 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3408 .order_by("full_name")
3409 .limit(5)
3410 ],
3411 [73, 72, 71, 70, 65],
3412 )
3414 def test_long_query_names(self) -> None:
3415 """Test that queries involving very long names are handled correctly.
3417 This is especially important for PostgreSQL, which truncates symbols
3418 longer than 64 chars, but it's worth testing for all DBs.
3419 """
3420 registry = self.makeRegistry()
3421 name = "abcd" * 17
3422 registry.registerDatasetType(
3423 DatasetType(
3424 name,
3425 dimensions=(),
3426 storageClass="Exposure",
3427 universe=registry.dimensions,
3428 )
3429 )
3430 # Need to search more than one collection actually containing a
3431 # matching dataset to avoid optimizations that sidestep bugs due to
3432 # truncation by making findFirst=True a no-op.
3433 run1 = "run1"
3434 registry.registerRun(run1)
3435 run2 = "run2"
3436 registry.registerRun(run2)
3437 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3438 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3439 self.assertEqual(
3440 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3441 {ref1},
3442 )
3444 def test_skypix_constraint_queries(self) -> None:
3445 """Test queries spatially constrained by a skypix data ID."""
3446 registry = self.makeRegistry()
3447 self.loadData(registry, "hsc-rc2-subset.yaml")
3448 patch_regions = {
3449 (data_id["tract"], data_id["patch"]): data_id.region
3450 for data_id in registry.queryDataIds(["patch"]).expanded()
3451 }
3452 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3453 # This check ensures the test doesn't become trivial due to a config
3454 # change; if it does, just pick a different HTML level.
3455 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3456 # Gather all skypix IDs that definitely overlap at least one of these
3457 # patches.
3458 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3459 for patch_region in patch_regions.values():
3460 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3461 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3462 # and does not overlap at least one other patch.
3463 for skypix_id in itertools.chain.from_iterable(
3464 range(begin, end) for begin, end in relevant_skypix_ids
3465 ):
3466 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3467 overlapping_patches = {
3468 patch_key
3469 for patch_key, patch_region in patch_regions.items()
3470 if not patch_region.isDisjointFrom(skypix_region)
3471 }
3472 if overlapping_patches and overlapping_patches != patch_regions.keys():
3473 break
3474 else:
3475 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3476 self.assertEqual(
3477 {
3478 (data_id["tract"], data_id["patch"])
3479 for data_id in registry.queryDataIds(
3480 ["patch"],
3481 dataId={skypix_dimension.name: skypix_id},
3482 )
3483 },
3484 overlapping_patches,
3485 )
3486 # Test that a three-way join that includes the common skypix system in
3487 # the dimensions doesn't generate redundant join terms in the query.
3488 full_data_ids = set(
3489 registry.queryDataIds(
3490 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3491 ).expanded()
3492 )
3493 self.assertGreater(len(full_data_ids), 0)
3494 for data_id in full_data_ids:
3495 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3496 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3498 def test_spatial_constraint_queries(self) -> None:
3499 """Test queries in which one spatial dimension in the constraint (data
3500 ID or ``where`` string) constrains a different spatial dimension in the
3501 query result columns.
3502 """
3503 registry = self.makeRegistry()
3504 self.loadData(registry, "hsc-rc2-subset.yaml")
3505 patch_regions = {
3506 (data_id["tract"], data_id["patch"]): data_id.region
3507 for data_id in registry.queryDataIds(["patch"]).expanded()
3508 }
3509 observation_regions = {
3510 (data_id["visit"], data_id["detector"]): data_id.region
3511 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3512 }
3513 all_combos = {
3514 (patch_key, observation_key)
3515 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3516 }
3517 overlapping_combos = {
3518 (patch_key, observation_key)
3519 for patch_key, observation_key in all_combos
3520 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3521 }
3522 # Check a direct spatial join with no constraint first.
3523 self.assertEqual(
3524 {
3525 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3526 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3527 },
3528 overlapping_combos,
3529 )
3530 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3531 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3532 for patch_key, observation_key in overlapping_combos:
3533 overlaps_by_patch[patch_key].add(observation_key)
3534 overlaps_by_observation[observation_key].add(patch_key)
3535 # Find patches and observations that overlap at least one of the other
3536 # but not all of the other.
3537 nontrivial_patch = next(
3538 iter(
3539 patch_key
3540 for patch_key, observation_keys in overlaps_by_patch.items()
3541 if observation_keys and observation_keys != observation_regions.keys()
3542 )
3543 )
3544 nontrivial_observation = next(
3545 iter(
3546 observation_key
3547 for observation_key, patch_keys in overlaps_by_observation.items()
3548 if patch_keys and patch_keys != patch_regions.keys()
3549 )
3550 )
3551 # Use the nontrivial patches and observations as constraints on the
3552 # other dimensions in various ways, first via a 'where' expression.
3553 # It's better in general to us 'bind' instead of f-strings, but these
3554 # all integers so there are no quoting concerns.
3555 self.assertEqual(
3556 {
3557 (data_id["visit"], data_id["detector"])
3558 for data_id in registry.queryDataIds(
3559 ["visit", "detector"],
3560 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3561 skymap="hsc_rings_v1",
3562 )
3563 },
3564 overlaps_by_patch[nontrivial_patch],
3565 )
3566 self.assertEqual(
3567 {
3568 (data_id["tract"], data_id["patch"])
3569 for data_id in registry.queryDataIds(
3570 ["patch"],
3571 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3572 instrument="HSC",
3573 )
3574 },
3575 overlaps_by_observation[nontrivial_observation],
3576 )
3577 # and then via the dataId argument.
3578 self.assertEqual(
3579 {
3580 (data_id["visit"], data_id["detector"])
3581 for data_id in registry.queryDataIds(
3582 ["visit", "detector"],
3583 dataId={
3584 "tract": nontrivial_patch[0],
3585 "patch": nontrivial_patch[1],
3586 },
3587 skymap="hsc_rings_v1",
3588 )
3589 },
3590 overlaps_by_patch[nontrivial_patch],
3591 )
3592 self.assertEqual(
3593 {
3594 (data_id["tract"], data_id["patch"])
3595 for data_id in registry.queryDataIds(
3596 ["patch"],
3597 dataId={
3598 "visit": nontrivial_observation[0],
3599 "detector": nontrivial_observation[1],
3600 },
3601 instrument="HSC",
3602 )
3603 },
3604 overlaps_by_observation[nontrivial_observation],
3605 )
3607 def test_query_projection_drop_postprocessing(self) -> None:
3608 """Test that projections and deduplications on query objects can
3609 drop post-query region filtering to ensure the query remains in
3610 the SQL engine.
3611 """
3612 registry = self.makeRegistry()
3613 self.loadData(registry, "base.yaml")
3614 self.loadData(registry, "spatial.yaml")
3616 def pop_transfer(tree: Relation) -> Relation:
3617 """If a relation tree terminates with a transfer to a new engine,
3618 return the relation prior to that transfer. If not, return the
3619 original relation.
3621 Parameters
3622 ----------
3623 tree : `Relation`
3624 The relation tree to modify.
3625 """
3626 match tree:
3627 case Transfer(target=target):
3628 return target
3629 case _:
3630 return tree
3632 # There's no public way to get a Query object yet, so we get one from a
3633 # DataCoordinateQueryResults private attribute. When a public API is
3634 # available this test should use it.
3635 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3636 # We expect this query to terminate in the iteration engine originally,
3637 # because region-filtering is necessary.
3638 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3639 # If we deduplicate, we usually have to do that downstream of the
3640 # filtering. That means the deduplication has to happen in the
3641 # iteration engine.
3642 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3643 # If we pass drop_postprocessing, we instead drop the region filtering
3644 # so the deduplication can happen in SQL (though there might still be
3645 # transfer to iteration at the tail of the tree that we can ignore;
3646 # that's what the pop_transfer takes care of here).
3647 self.assertIsInstance(
3648 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3649 sql.Engine,
3650 )
3652 def test_query_find_datasets_drop_postprocessing(self) -> None:
3653 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3654 problems with the FindFirstDataset relation operation.
3655 """
3656 # Setup: load some visit, tract, and patch records, and insert two
3657 # datasets with dimensions {visit, patch}, with one in each of two
3658 # RUN collections.
3659 registry = self.makeRegistry()
3660 self.loadData(registry, "base.yaml")
3661 self.loadData(registry, "spatial.yaml")
3662 storage_class = StorageClass("Warpy")
3663 registry.storageClasses.registerStorageClass(storage_class)
3664 dataset_type = DatasetType(
3665 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3666 )
3667 registry.registerDatasetType(dataset_type)
3668 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3669 registry.registerRun("run1")
3670 registry.registerRun("run2")
3671 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3672 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3673 # Query for the dataset using queryDataIds(...).findDatasets(...)
3674 # against only one of the two collections. This should work even
3675 # though the relation returned by queryDataIds ends with
3676 # iteration-engine region-filtering, because we can recognize before
3677 # running the query that there is only one collecton to search and
3678 # hence the (default) findFirst=True is irrelevant, and joining in the
3679 # dataset query commutes past the iteration-engine postprocessing.
3680 query1 = registry.queryDataIds(
3681 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3682 )
3683 self.assertEqual(
3684 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3685 {ref1},
3686 )
3687 # Query for the dataset using queryDataIds(...).findDatasets(...)
3688 # against both collections. This can only work if the FindFirstDataset
3689 # operation can be commuted past the iteration-engine options into SQL.
3690 query2 = registry.queryDataIds(
3691 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3692 )
3693 self.assertEqual(
3694 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3695 {ref2},
3696 )
3698 def test_query_empty_collections(self) -> None:
3699 """Test for registry query methods with empty collections. The methods
3700 should return empty result set (or None when applicable) and provide
3701 "doomed" diagnostics.
3702 """
3703 registry = self.makeRegistry()
3704 self.loadData(registry, "base.yaml")
3705 self.loadData(registry, "datasets.yaml")
3707 # Tests for registry.findDataset()
3708 with self.assertRaises(NoDefaultCollectionError):
3709 registry.findDataset("bias", instrument="Cam1", detector=1)
3710 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3711 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3713 # Tests for registry.queryDatasets()
3714 with self.assertRaises(NoDefaultCollectionError):
3715 registry.queryDatasets("bias")
3716 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3718 result = registry.queryDatasets("bias", collections=[])
3719 self.assertEqual(len(list(result)), 0)
3720 messages = list(result.explain_no_results())
3721 self.assertTrue(messages)
3722 self.assertTrue(any("because collection list is empty" in message for message in messages))
3724 # Tests for registry.queryDataIds()
3725 with self.assertRaises(NoDefaultCollectionError):
3726 registry.queryDataIds("detector", datasets="bias")
3727 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3729 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3730 self.assertEqual(len(list(result)), 0)
3731 messages = list(result.explain_no_results())
3732 self.assertTrue(messages)
3733 self.assertTrue(any("because collection list is empty" in message for message in messages))
3735 # Tests for registry.queryDimensionRecords()
3736 with self.assertRaises(NoDefaultCollectionError):
3737 registry.queryDimensionRecords("detector", datasets="bias")
3738 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3740 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3741 self.assertEqual(len(list(result)), 0)
3742 messages = list(result.explain_no_results())
3743 self.assertTrue(messages)
3744 self.assertTrue(any("because collection list is empty" in message for message in messages))
3746 def test_dataset_followup_spatial_joins(self) -> None:
3747 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3748 is involved.
3749 """
3750 registry = self.makeRegistry()
3751 self.loadData(registry, "base.yaml")
3752 self.loadData(registry, "spatial.yaml")
3753 pvi_dataset_type = DatasetType(
3754 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3755 )
3756 registry.registerDatasetType(pvi_dataset_type)
3757 collection = "datasets"
3758 registry.registerRun(collection)
3759 (pvi1,) = registry.insertDatasets(
3760 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3761 )
3762 (pvi2,) = registry.insertDatasets(
3763 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3764 )
3765 (pvi3,) = registry.insertDatasets(
3766 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3767 )
3768 self.assertEqual(
3769 set(
3770 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3771 .expanded()
3772 .findRelatedDatasets("pvi", [collection])
3773 ),
3774 {
3775 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3776 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3777 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3778 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3779 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3780 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3781 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3782 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3783 },
3784 )